diff --git a/R/plot_cluster_overview.R b/R/plot_cluster_overview.R index ee6c2eb23d4cb23c575871932a75bc33db9fabab..99dd2d189e5a6477894e8a67b7e33d739156b640 100644 --- a/R/plot_cluster_overview.R +++ b/R/plot_cluster_overview.R @@ -1,35 +1,62 @@ -# Complete plotting and saving of dendrogram, sequence abundance and sample -# abundance from cluster in named list of DNAStringSets and named list of tbls -# containing sequence distribution in all samples. -plot_cluster_overview <- function(cluster, cluster_sequence_list, +#' Plotting of cluster overview (dendrogram, sequence abundance and sample +#' abundance from cluster in named list of DNAStringSets and named list of tbls +#' containing sequence distribution in all samples. +#' +#' @param cluster Character string. The name of the cluster to plot and save. +#' @param cluster_sequence_list Named list of DNAStringSets containing the sequences. +#' @param cluster_tbl_list Named list of tbls containing the sequence distribution in all samples. +#' @param cm_width Width of the plot in centimeters. +#' @param cm_height Height of the plot in centimeters. +#' @param path Path to save the plot. +#' @return A list containing the plotted and saved cluster overview. +#' +#' @import Biostrings +#' @import ggplot2 +#' @import ggtree +#' +#' @examples +#' # Create example data +#' sequences <- DNAStringSet(c("AGTCTGATC", "ATCGTACG")) +#' tbl_1 <- tibble(ID = c("AGTCTGATC", "ATCGTACG", "AAAAA"), count = c(5, 3, 0)) +#' tbl_2 <- tibble(ID = c("AGTCTGATC", "ATCGTACG", "AAAAA"), count = c(6, 2, 1)) +#' sequence_list#' cluster_sequence_list <- list(cluster1 = sequences) +#' cluster_tbl_list <- list(cluster1 = tbl_1, cluster2 = tbl_2) +#' +#' # Plot and save cluster overview +#' plot_cluster_overview(cluster = "cluster1", +#' cluster_sequence_list = cluster_sequence_list, +#' cluster_tbl_list = cluster_tbl_list, +#' cm_width = 25, cm_height = 15, +#' path = "path/to/save/plot.png") +plot_cluster_overview <- function(cluster, cluster_sequence_list, cluster_tbl_list, cm_width = 25, cm_height = 15, path = path){ - tree <- align_and_generate_upgma(cluster = cluster, + tree <- align_and_generate_upgma(cluster = cluster, sequence_list = cluster_sequence_list) tree_pl <- plot_cluster_dendrogram(tree) # Extract order of sequences according to dendrogram dendrogram_label_order <- rev(ggtree::get_taxa_name(tree_pl)) - + # Refactor sequences to achieve same order in the sequence tbl as in the # dendrogram and remove zero-counts without removing NAs - abundance_tbl <- full_join(cluster_tbl_list[[cluster]], + abundance_tbl <- full_join(cluster_tbl_list[[cluster]], tibble(ID = dendrogram_label_order)) %>% mutate(ID = fct_relevel(ID, dendrogram_label_order)) %>% filter(count > 0 | is.na(count)) - + sample_pl <- plot_abundance_per_sample(abundance_tbl) - + # Sum up abundances over all samples abundance_sums <- abundance_tbl %>% group_by(ID) %>% summarise(sum_count = sum(count)) - + abundance_pl <- plot_abundance_sums_per_sequence(abundance_sums) - - out <- combine_cluster_plots_and_save(plot_list = list(tree_pl, + + out <- combine_cluster_plots_and_save(plot_list = list(tree_pl, abundance_pl, sample_pl), cluster = cluster, w = cm_width, h = cm_height, out_path = path) return(out) -} \ No newline at end of file +} diff --git a/R/plot_dendrogram.R b/R/plot_dendrogram.R index d40bbb3850b250bc2942f36ddfc6910ff0d5f210..1318244e90b689ddeb47caf954765553e5d19cff 100644 --- a/R/plot_dendrogram.R +++ b/R/plot_dendrogram.R @@ -1,20 +1,34 @@ +#' Plot Dendrogram +#' +#' This function creates a plot of a dendrogram using hierarchical clustering on a given data table. +#' +#' @param distclust_table A tibble containing the data for clustering and plotting. +#' @return A plot of the dendrogram. +#' @import ggdendro +#' @import dplyr +#' @import tidyr +#' @import ggplot2 +#' @import tibble +#' +#' @export +#' plot_dendrogram <- function(distclust_table = mytibble){ library(ggdendro) - + # Prepare data from the plotting table for vegdist/hclust clustering - mts_vegan <- distclust_table %>% + mts_vegan <- distclust_table %>% select(var_type, seqnames, rel_var_abundance) %>% - pivot_wider(id_cols = var_type, names_from = seqnames, + pivot_wider(id_cols = var_type, names_from = seqnames, values_from = rel_var_abundance) - + # make hierarchical cluster from vegdist matrix and extract data for plotting dat <- dendrogram_hclust(veganify_generic_wide_tbl(mts_vegan), seed = 1) - - mock <- tibble(x = factor(round(segment(dat)$xend)), y = segment(dat)$y) + + mock<- tibble(x = factor(round(segment(dat)$xend)), y = segment(dat)$y) dendro_plot <- ggplot() + - geom_blank(data = mock, + geom_blank(data = mock, aes(x = x, y = y)) + - geom_segment(data = segment(dat), + geom_segment(data = segment(dat), aes(x = x, y = y, xend = xend, yend = yend) ) + coord_flip() + @@ -25,5 +39,6 @@ plot_dendrogram <- function(distclust_table = mytibble){ axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) - -} \ No newline at end of file + + dendro_plot +} diff --git a/R/plot_distance_matrix.R b/R/plot_distance_matrix.R index d51066bf422105897504f6f1d4272dd18b82d2ca..52b6814992bc23d01addc013add619a64afe0dd6 100644 --- a/R/plot_distance_matrix.R +++ b/R/plot_distance_matrix.R @@ -1,11 +1,35 @@ -# All vs. all point plot of tidy tbl from distance matrix (deprecated) +#' All vs. all point plot of tidy tbl from distance matrix (deprecated) +#' +#' This function creates a hetmap-like plot that shows all pairwise distances of a distance matrix. +#' +#' @param dists_matrix A distance matrix containing pairwise distances. +#' +#' @return A tile plot showing all pairwise distances of the distance matrix. +#' +#' @import dplyr +#' @import ggplot2 +#' @import tidyr +#' @import viridis +#' +#' @keywords plotting +#' @seealso \code{\link{heatmap}} +#' +#' @examples +#' # Create a random distance matrix +#' dists_matrix <- matrix(rnorm(16), nrow = 4) +#' +#' # Call the plot_distance_matrix function +#' plot_distance_matrix(dists_matrix) +#' +#' @export plot_distance_matrix <- function(dists_matrix) { - dists_tbl <- as_tibble(dists_matrix, rownames = "ref") %>% + dists_tbl <- as_tibble(dists_matrix, rownames = 'ref') %>% pivot_longer(cols = -ref) - - ggplot(dists_tbl, aes(x = ref, y = name, color = value)) + - geom_point() + scale_color_viridis_c() + - theme(axis.text = element_blank(), - axis.title = element_blank(), + + ggplot(dists_tbl, aes(x = ref, y = name, fill = value)) + + geom_tile() + scale_fill_viridis_c() + + theme(axis.text = element_blank(), + axis.title = element_blank(), axis.ticks = element_blank()) -} \ No newline at end of file +} + diff --git a/R/plot_longest_reading_frame.R b/R/plot_longest_reading_frame.R index ce708a9511c30f8a624d7f264d1cad63cd3afd98..01d30ee66091a8ddf10fe08061b5488350ea1cc0 100644 --- a/R/plot_longest_reading_frame.R +++ b/R/plot_longest_reading_frame.R @@ -1,10 +1,25 @@ +#' Plot longest reading frame +#' +#' This function generates a plot of the longest reading frames for each sequence in a given table. +#' +#' @param reading_frames_tbl A table containing the reading frame data (default: tbl) +#' +#' @return A ggplot object displaying the longest reading frames for each sequence. +#' +#' @import ggplot2 +#' @import dplyr +#' @import tidyr +#' @import viridis +#' +#' @examples +#' plot_longest_reading_frame() plot_longest_reading_frame <- function(reading_frames_tbl = tbl) { clus_name <- reading_frames_tbl$cluster_name %>% unique() - ggplot(reading_frames_tbl, aes(x = max_width, y = seqnames, fill = rframe)) + - geom_col() + - facet_grid(rows = vars(max_type), scales = "free_y", space = "free_y") + + ggplot(reading_frames_tbl, aes(x = max_width, y = seqnames, fill = rframe)) + + geom_col() + + facet_grid(rows = vars(max_type), scales = 'free_y', space = 'free_y') + theme(strip.text.y = element_text(angle = 0)) + - scale_fill_viridis_d(name = "Reading frame") + - ylab("Sequence") + xlab("Sequence length (bp)") + - ggtitle(paste("Longest reading frames in cluster", clus_name)) -} \ No newline at end of file + scale_fill_viridis_d(name = 'Reading frame') + + ylab('Sequence') + xlab('Sequence length (bp)') + + ggtitle(paste('Longest reading frames in cluster', clus_name)) +} diff --git a/R/save_plot.R b/R/save_plot.R index b023b06b003d939e92d3ce42159a0836a868ed7a..4bc7995e38eef5a69fae05ff0c4fd42fc4e6344b 100644 --- a/R/save_plot.R +++ b/R/save_plot.R @@ -1,17 +1,32 @@ +#' Save Plot +#' +#' Saves a ggplot object as a file with the specified file type, plot name, and output directory. If the plot name already exists in the output directory and overwrite is set to FALSE, the plot name will be appended with the current date and time. +#' +#' @param pl The ggplot object to be saved. +#' @param filetype The desired file type for the saved plot. Default is '.pdf'. +#' @param plot_name The desired plot name. Default is 'my_plot'. +#' @param overwrite Logical value indicating whether to overwrite an existing plot with the same name in the output directory. Default is FALSE. +#' @param outp The output directory where the plot will be saved. Default is the working directory. +#' @param set An environment or list containing custom parameters for the plot dimensions and resolution. Default is NULL. +#' @param set$wp The desired width of the plot. Default is 20. +#' @param set$hp The desired height of the plot. Default is 20. +#' @param set$res The desired resolution of the plot (dots per inch). Default is 300. +#' +#' @return None +#' +#' @examples +#' save_plot(ggplot(mtcars, aes(x = mpg, y = disp)) + save_plot <- function( - pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE, outp = outp){ + pl, filetype = '.pdf', plot_name = 'my_plot', overwrite=FALSE, outp = outp){ dir.create(outp, showWarnings = FALSE) wp <- if (!is.null(set$wp)) set$wp else 20 hp <- if (!is.null(set$hp)) set$hp else 20 res <- if (!is.null(set$res)) set$res else 300 name <- paste0(plot_name,filetype) if (file.exists(file.path(outp, name)) & !overwrite) { - name <- paste0(plot_name, "_", - format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)} - ggsave(file.path(outp, name), pl, - width = wp, height = hp, unit = "cm", dpi = res) + name <- paste0(plot_name, '_', + format(Sys.time(), '%d-%m-%y_%H%M%S'),filetype)} + ggsave(file.path(outp, name), pl, + width = wp, height = hp, unit = 'cm', dpi = res) } - -str_pad_to_max <- function(vec = c(), ...){ - str_pad(vec, max(str_length(vec)), ...) -} \ No newline at end of file