From 741ea9da6bdc856740e1d99566749d340f497a79 Mon Sep 17 00:00:00 2001 From: Simeon <51403284+simeross@users.noreply.github.com> Date: Fri, 20 Oct 2023 12:50:38 +0200 Subject: [PATCH] roxygen tags --- R/plot_cluster_overview.R | 53 +++++++++++++++++++++++++--------- R/plot_dendrogram.R | 35 +++++++++++++++------- R/plot_distance_matrix.R | 40 ++++++++++++++++++++----- R/plot_longest_reading_frame.R | 29 ++++++++++++++----- R/save_plot.R | 33 +++++++++++++++------ 5 files changed, 143 insertions(+), 47 deletions(-) diff --git a/R/plot_cluster_overview.R b/R/plot_cluster_overview.R index ee6c2eb..99dd2d1 100644 --- a/R/plot_cluster_overview.R +++ b/R/plot_cluster_overview.R @@ -1,35 +1,62 @@ -# Complete plotting and saving of dendrogram, sequence abundance and sample -# abundance from cluster in named list of DNAStringSets and named list of tbls -# containing sequence distribution in all samples. -plot_cluster_overview <- function(cluster, cluster_sequence_list, +#' Plotting of cluster overview (dendrogram, sequence abundance and sample +#' abundance from cluster in named list of DNAStringSets and named list of tbls +#' containing sequence distribution in all samples. +#' +#' @param cluster Character string. The name of the cluster to plot and save. +#' @param cluster_sequence_list Named list of DNAStringSets containing the sequences. +#' @param cluster_tbl_list Named list of tbls containing the sequence distribution in all samples. +#' @param cm_width Width of the plot in centimeters. +#' @param cm_height Height of the plot in centimeters. +#' @param path Path to save the plot. +#' @return A list containing the plotted and saved cluster overview. +#' +#' @import Biostrings +#' @import ggplot2 +#' @import ggtree +#' +#' @examples +#' # Create example data +#' sequences <- DNAStringSet(c("AGTCTGATC", "ATCGTACG")) +#' tbl_1 <- tibble(ID = c("AGTCTGATC", "ATCGTACG", "AAAAA"), count = c(5, 3, 0)) +#' tbl_2 <- tibble(ID = c("AGTCTGATC", "ATCGTACG", "AAAAA"), count = c(6, 2, 1)) +#' sequence_list#' cluster_sequence_list <- list(cluster1 = sequences) +#' cluster_tbl_list <- list(cluster1 = tbl_1, cluster2 = tbl_2) +#' +#' # Plot and save cluster overview +#' plot_cluster_overview(cluster = "cluster1", +#' cluster_sequence_list = cluster_sequence_list, +#' cluster_tbl_list = cluster_tbl_list, +#' cm_width = 25, cm_height = 15, +#' path = "path/to/save/plot.png") +plot_cluster_overview <- function(cluster, cluster_sequence_list, cluster_tbl_list, cm_width = 25, cm_height = 15, path = path){ - tree <- align_and_generate_upgma(cluster = cluster, + tree <- align_and_generate_upgma(cluster = cluster, sequence_list = cluster_sequence_list) tree_pl <- plot_cluster_dendrogram(tree) # Extract order of sequences according to dendrogram dendrogram_label_order <- rev(ggtree::get_taxa_name(tree_pl)) - + # Refactor sequences to achieve same order in the sequence tbl as in the # dendrogram and remove zero-counts without removing NAs - abundance_tbl <- full_join(cluster_tbl_list[[cluster]], + abundance_tbl <- full_join(cluster_tbl_list[[cluster]], tibble(ID = dendrogram_label_order)) %>% mutate(ID = fct_relevel(ID, dendrogram_label_order)) %>% filter(count > 0 | is.na(count)) - + sample_pl <- plot_abundance_per_sample(abundance_tbl) - + # Sum up abundances over all samples abundance_sums <- abundance_tbl %>% group_by(ID) %>% summarise(sum_count = sum(count)) - + abundance_pl <- plot_abundance_sums_per_sequence(abundance_sums) - - out <- combine_cluster_plots_and_save(plot_list = list(tree_pl, + + out <- combine_cluster_plots_and_save(plot_list = list(tree_pl, abundance_pl, sample_pl), cluster = cluster, w = cm_width, h = cm_height, out_path = path) return(out) -} \ No newline at end of file +} diff --git a/R/plot_dendrogram.R b/R/plot_dendrogram.R index d40bbb3..1318244 100644 --- a/R/plot_dendrogram.R +++ b/R/plot_dendrogram.R @@ -1,20 +1,34 @@ +#' Plot Dendrogram +#' +#' This function creates a plot of a dendrogram using hierarchical clustering on a given data table. +#' +#' @param distclust_table A tibble containing the data for clustering and plotting. +#' @return A plot of the dendrogram. +#' @import ggdendro +#' @import dplyr +#' @import tidyr +#' @import ggplot2 +#' @import tibble +#' +#' @export +#' plot_dendrogram <- function(distclust_table = mytibble){ library(ggdendro) - + # Prepare data from the plotting table for vegdist/hclust clustering - mts_vegan <- distclust_table %>% + mts_vegan <- distclust_table %>% select(var_type, seqnames, rel_var_abundance) %>% - pivot_wider(id_cols = var_type, names_from = seqnames, + pivot_wider(id_cols = var_type, names_from = seqnames, values_from = rel_var_abundance) - + # make hierarchical cluster from vegdist matrix and extract data for plotting dat <- dendrogram_hclust(veganify_generic_wide_tbl(mts_vegan), seed = 1) - - mock <- tibble(x = factor(round(segment(dat)$xend)), y = segment(dat)$y) + + mock<- tibble(x = factor(round(segment(dat)$xend)), y = segment(dat)$y) dendro_plot <- ggplot() + - geom_blank(data = mock, + geom_blank(data = mock, aes(x = x, y = y)) + - geom_segment(data = segment(dat), + geom_segment(data = segment(dat), aes(x = x, y = y, xend = xend, yend = yend) ) + coord_flip() + @@ -25,5 +39,6 @@ plot_dendrogram <- function(distclust_table = mytibble){ axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) - -} \ No newline at end of file + + dendro_plot +} diff --git a/R/plot_distance_matrix.R b/R/plot_distance_matrix.R index d51066b..52b6814 100644 --- a/R/plot_distance_matrix.R +++ b/R/plot_distance_matrix.R @@ -1,11 +1,35 @@ -# All vs. all point plot of tidy tbl from distance matrix (deprecated) +#' All vs. all point plot of tidy tbl from distance matrix (deprecated) +#' +#' This function creates a hetmap-like plot that shows all pairwise distances of a distance matrix. +#' +#' @param dists_matrix A distance matrix containing pairwise distances. +#' +#' @return A tile plot showing all pairwise distances of the distance matrix. +#' +#' @import dplyr +#' @import ggplot2 +#' @import tidyr +#' @import viridis +#' +#' @keywords plotting +#' @seealso \code{\link{heatmap}} +#' +#' @examples +#' # Create a random distance matrix +#' dists_matrix <- matrix(rnorm(16), nrow = 4) +#' +#' # Call the plot_distance_matrix function +#' plot_distance_matrix(dists_matrix) +#' +#' @export plot_distance_matrix <- function(dists_matrix) { - dists_tbl <- as_tibble(dists_matrix, rownames = "ref") %>% + dists_tbl <- as_tibble(dists_matrix, rownames = 'ref') %>% pivot_longer(cols = -ref) - - ggplot(dists_tbl, aes(x = ref, y = name, color = value)) + - geom_point() + scale_color_viridis_c() + - theme(axis.text = element_blank(), - axis.title = element_blank(), + + ggplot(dists_tbl, aes(x = ref, y = name, fill = value)) + + geom_tile() + scale_fill_viridis_c() + + theme(axis.text = element_blank(), + axis.title = element_blank(), axis.ticks = element_blank()) -} \ No newline at end of file +} + diff --git a/R/plot_longest_reading_frame.R b/R/plot_longest_reading_frame.R index ce708a9..01d30ee 100644 --- a/R/plot_longest_reading_frame.R +++ b/R/plot_longest_reading_frame.R @@ -1,10 +1,25 @@ +#' Plot longest reading frame +#' +#' This function generates a plot of the longest reading frames for each sequence in a given table. +#' +#' @param reading_frames_tbl A table containing the reading frame data (default: tbl) +#' +#' @return A ggplot object displaying the longest reading frames for each sequence. +#' +#' @import ggplot2 +#' @import dplyr +#' @import tidyr +#' @import viridis +#' +#' @examples +#' plot_longest_reading_frame() plot_longest_reading_frame <- function(reading_frames_tbl = tbl) { clus_name <- reading_frames_tbl$cluster_name %>% unique() - ggplot(reading_frames_tbl, aes(x = max_width, y = seqnames, fill = rframe)) + - geom_col() + - facet_grid(rows = vars(max_type), scales = "free_y", space = "free_y") + + ggplot(reading_frames_tbl, aes(x = max_width, y = seqnames, fill = rframe)) + + geom_col() + + facet_grid(rows = vars(max_type), scales = 'free_y', space = 'free_y') + theme(strip.text.y = element_text(angle = 0)) + - scale_fill_viridis_d(name = "Reading frame") + - ylab("Sequence") + xlab("Sequence length (bp)") + - ggtitle(paste("Longest reading frames in cluster", clus_name)) -} \ No newline at end of file + scale_fill_viridis_d(name = 'Reading frame') + + ylab('Sequence') + xlab('Sequence length (bp)') + + ggtitle(paste('Longest reading frames in cluster', clus_name)) +} diff --git a/R/save_plot.R b/R/save_plot.R index b023b06..4bc7995 100644 --- a/R/save_plot.R +++ b/R/save_plot.R @@ -1,17 +1,32 @@ +#' Save Plot +#' +#' Saves a ggplot object as a file with the specified file type, plot name, and output directory. If the plot name already exists in the output directory and overwrite is set to FALSE, the plot name will be appended with the current date and time. +#' +#' @param pl The ggplot object to be saved. +#' @param filetype The desired file type for the saved plot. Default is '.pdf'. +#' @param plot_name The desired plot name. Default is 'my_plot'. +#' @param overwrite Logical value indicating whether to overwrite an existing plot with the same name in the output directory. Default is FALSE. +#' @param outp The output directory where the plot will be saved. Default is the working directory. +#' @param set An environment or list containing custom parameters for the plot dimensions and resolution. Default is NULL. +#' @param set$wp The desired width of the plot. Default is 20. +#' @param set$hp The desired height of the plot. Default is 20. +#' @param set$res The desired resolution of the plot (dots per inch). Default is 300. +#' +#' @return None +#' +#' @examples +#' save_plot(ggplot(mtcars, aes(x = mpg, y = disp)) + save_plot <- function( - pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE, outp = outp){ + pl, filetype = '.pdf', plot_name = 'my_plot', overwrite=FALSE, outp = outp){ dir.create(outp, showWarnings = FALSE) wp <- if (!is.null(set$wp)) set$wp else 20 hp <- if (!is.null(set$hp)) set$hp else 20 res <- if (!is.null(set$res)) set$res else 300 name <- paste0(plot_name,filetype) if (file.exists(file.path(outp, name)) & !overwrite) { - name <- paste0(plot_name, "_", - format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)} - ggsave(file.path(outp, name), pl, - width = wp, height = hp, unit = "cm", dpi = res) + name <- paste0(plot_name, '_', + format(Sys.time(), '%d-%m-%y_%H%M%S'),filetype)} + ggsave(file.path(outp, name), pl, + width = wp, height = hp, unit = 'cm', dpi = res) } - -str_pad_to_max <- function(vec = c(), ...){ - str_pad(vec, max(str_length(vec)), ...) -} \ No newline at end of file -- GitLab