Skip to content
Snippets Groups Projects
Commit 741ea9da authored by Simeon's avatar Simeon
Browse files

roxygen tags

parent bbaf3a7c
Branches
Tags
No related merge requests found
# Complete plotting and saving of dendrogram, sequence abundance and sample #' Plotting of cluster overview (dendrogram, sequence abundance and sample
# abundance from cluster in named list of DNAStringSets and named list of tbls #' abundance from cluster in named list of DNAStringSets and named list of tbls
# containing sequence distribution in all samples. #' containing sequence distribution in all samples.
plot_cluster_overview <- function(cluster, cluster_sequence_list, #'
#' @param cluster Character string. The name of the cluster to plot and save.
#' @param cluster_sequence_list Named list of DNAStringSets containing the sequences.
#' @param cluster_tbl_list Named list of tbls containing the sequence distribution in all samples.
#' @param cm_width Width of the plot in centimeters.
#' @param cm_height Height of the plot in centimeters.
#' @param path Path to save the plot.
#' @return A list containing the plotted and saved cluster overview.
#'
#' @import Biostrings
#' @import ggplot2
#' @import ggtree
#'
#' @examples
#' # Create example data
#' sequences <- DNAStringSet(c("AGTCTGATC", "ATCGTACG"))
#' tbl_1 <- tibble(ID = c("AGTCTGATC", "ATCGTACG", "AAAAA"), count = c(5, 3, 0))
#' tbl_2 <- tibble(ID = c("AGTCTGATC", "ATCGTACG", "AAAAA"), count = c(6, 2, 1))
#' sequence_list#' cluster_sequence_list <- list(cluster1 = sequences)
#' cluster_tbl_list <- list(cluster1 = tbl_1, cluster2 = tbl_2)
#'
#' # Plot and save cluster overview
#' plot_cluster_overview(cluster = "cluster1",
#' cluster_sequence_list = cluster_sequence_list,
#' cluster_tbl_list = cluster_tbl_list,
#' cm_width = 25, cm_height = 15,
#' path = "path/to/save/plot.png")
plot_cluster_overview <- function(cluster, cluster_sequence_list,
cluster_tbl_list, cm_width = 25, cluster_tbl_list, cm_width = 25,
cm_height = 15, path = path){ cm_height = 15, path = path){
tree <- align_and_generate_upgma(cluster = cluster, tree <- align_and_generate_upgma(cluster = cluster,
sequence_list = cluster_sequence_list) sequence_list = cluster_sequence_list)
tree_pl <- plot_cluster_dendrogram(tree) tree_pl <- plot_cluster_dendrogram(tree)
# Extract order of sequences according to dendrogram # Extract order of sequences according to dendrogram
dendrogram_label_order <- rev(ggtree::get_taxa_name(tree_pl)) dendrogram_label_order <- rev(ggtree::get_taxa_name(tree_pl))
# Refactor sequences to achieve same order in the sequence tbl as in the # Refactor sequences to achieve same order in the sequence tbl as in the
# dendrogram and remove zero-counts without removing NAs # dendrogram and remove zero-counts without removing NAs
abundance_tbl <- full_join(cluster_tbl_list[[cluster]], abundance_tbl <- full_join(cluster_tbl_list[[cluster]],
tibble(ID = dendrogram_label_order)) %>% tibble(ID = dendrogram_label_order)) %>%
mutate(ID = fct_relevel(ID, dendrogram_label_order)) %>% mutate(ID = fct_relevel(ID, dendrogram_label_order)) %>%
filter(count > 0 | is.na(count)) filter(count > 0 | is.na(count))
sample_pl <- plot_abundance_per_sample(abundance_tbl) sample_pl <- plot_abundance_per_sample(abundance_tbl)
# Sum up abundances over all samples # Sum up abundances over all samples
abundance_sums <- abundance_tbl %>% abundance_sums <- abundance_tbl %>%
group_by(ID) %>% group_by(ID) %>%
summarise(sum_count = sum(count)) summarise(sum_count = sum(count))
abundance_pl <- plot_abundance_sums_per_sequence(abundance_sums) abundance_pl <- plot_abundance_sums_per_sequence(abundance_sums)
out <- combine_cluster_plots_and_save(plot_list = list(tree_pl, out <- combine_cluster_plots_and_save(plot_list = list(tree_pl,
abundance_pl, abundance_pl,
sample_pl), sample_pl),
cluster = cluster, w = cm_width, cluster = cluster, w = cm_width,
h = cm_height, out_path = path) h = cm_height, out_path = path)
return(out) return(out)
} }
\ No newline at end of file
#' Plot Dendrogram
#'
#' This function creates a plot of a dendrogram using hierarchical clustering on a given data table.
#'
#' @param distclust_table A tibble containing the data for clustering and plotting.
#' @return A plot of the dendrogram.
#' @import ggdendro
#' @import dplyr
#' @import tidyr
#' @import ggplot2
#' @import tibble
#'
#' @export
#'
plot_dendrogram <- function(distclust_table = mytibble){ plot_dendrogram <- function(distclust_table = mytibble){
library(ggdendro) library(ggdendro)
# Prepare data from the plotting table for vegdist/hclust clustering # Prepare data from the plotting table for vegdist/hclust clustering
mts_vegan <- distclust_table %>% mts_vegan <- distclust_table %>%
select(var_type, seqnames, rel_var_abundance) %>% select(var_type, seqnames, rel_var_abundance) %>%
pivot_wider(id_cols = var_type, names_from = seqnames, pivot_wider(id_cols = var_type, names_from = seqnames,
values_from = rel_var_abundance) values_from = rel_var_abundance)
# make hierarchical cluster from vegdist matrix and extract data for plotting # make hierarchical cluster from vegdist matrix and extract data for plotting
dat <- dendrogram_hclust(veganify_generic_wide_tbl(mts_vegan), seed = 1) dat <- dendrogram_hclust(veganify_generic_wide_tbl(mts_vegan), seed = 1)
mock <- tibble(x = factor(round(segment(dat)$xend)), y = segment(dat)$y) mock<- tibble(x = factor(round(segment(dat)$xend)), y = segment(dat)$y)
dendro_plot <- ggplot() + dendro_plot <- ggplot() +
geom_blank(data = mock, geom_blank(data = mock,
aes(x = x, y = y)) + aes(x = x, y = y)) +
geom_segment(data = segment(dat), geom_segment(data = segment(dat),
aes(x = x, y = y, xend = xend, yend = yend) aes(x = x, y = y, xend = xend, yend = yend)
) + ) +
coord_flip() + coord_flip() +
...@@ -25,5 +39,6 @@ plot_dendrogram <- function(distclust_table = mytibble){ ...@@ -25,5 +39,6 @@ plot_dendrogram <- function(distclust_table = mytibble){
axis.line = element_blank(), axis.line = element_blank(),
axis.text = element_blank(), axis.text = element_blank(),
axis.ticks = element_blank()) axis.ticks = element_blank())
} dendro_plot
\ No newline at end of file }
# All vs. all point plot of tidy tbl from distance matrix (deprecated) #' All vs. all point plot of tidy tbl from distance matrix (deprecated)
#'
#' This function creates a hetmap-like plot that shows all pairwise distances of a distance matrix.
#'
#' @param dists_matrix A distance matrix containing pairwise distances.
#'
#' @return A tile plot showing all pairwise distances of the distance matrix.
#'
#' @import dplyr
#' @import ggplot2
#' @import tidyr
#' @import viridis
#'
#' @keywords plotting
#' @seealso \code{\link{heatmap}}
#'
#' @examples
#' # Create a random distance matrix
#' dists_matrix <- matrix(rnorm(16), nrow = 4)
#'
#' # Call the plot_distance_matrix function
#' plot_distance_matrix(dists_matrix)
#'
#' @export
plot_distance_matrix <- function(dists_matrix) { plot_distance_matrix <- function(dists_matrix) {
dists_tbl <- as_tibble(dists_matrix, rownames = "ref") %>% dists_tbl <- as_tibble(dists_matrix, rownames = 'ref') %>%
pivot_longer(cols = -ref) pivot_longer(cols = -ref)
ggplot(dists_tbl, aes(x = ref, y = name, color = value)) + ggplot(dists_tbl, aes(x = ref, y = name, fill = value)) +
geom_point() + scale_color_viridis_c() + geom_tile() + scale_fill_viridis_c() +
theme(axis.text = element_blank(), theme(axis.text = element_blank(),
axis.title = element_blank(), axis.title = element_blank(),
axis.ticks = element_blank()) axis.ticks = element_blank())
} }
\ No newline at end of file
#' Plot longest reading frame
#'
#' This function generates a plot of the longest reading frames for each sequence in a given table.
#'
#' @param reading_frames_tbl A table containing the reading frame data (default: tbl)
#'
#' @return A ggplot object displaying the longest reading frames for each sequence.
#'
#' @import ggplot2
#' @import dplyr
#' @import tidyr
#' @import viridis
#'
#' @examples
#' plot_longest_reading_frame()
plot_longest_reading_frame <- function(reading_frames_tbl = tbl) { plot_longest_reading_frame <- function(reading_frames_tbl = tbl) {
clus_name <- reading_frames_tbl$cluster_name %>% unique() clus_name <- reading_frames_tbl$cluster_name %>% unique()
ggplot(reading_frames_tbl, aes(x = max_width, y = seqnames, fill = rframe)) + ggplot(reading_frames_tbl, aes(x = max_width, y = seqnames, fill = rframe)) +
geom_col() + geom_col() +
facet_grid(rows = vars(max_type), scales = "free_y", space = "free_y") + facet_grid(rows = vars(max_type), scales = 'free_y', space = 'free_y') +
theme(strip.text.y = element_text(angle = 0)) + theme(strip.text.y = element_text(angle = 0)) +
scale_fill_viridis_d(name = "Reading frame") + scale_fill_viridis_d(name = 'Reading frame') +
ylab("Sequence") + xlab("Sequence length (bp)") + ylab('Sequence') + xlab('Sequence length (bp)') +
ggtitle(paste("Longest reading frames in cluster", clus_name)) ggtitle(paste('Longest reading frames in cluster', clus_name))
} }
\ No newline at end of file
#' Save Plot
#'
#' Saves a ggplot object as a file with the specified file type, plot name, and output directory. If the plot name already exists in the output directory and overwrite is set to FALSE, the plot name will be appended with the current date and time.
#'
#' @param pl The ggplot object to be saved.
#' @param filetype The desired file type for the saved plot. Default is '.pdf'.
#' @param plot_name The desired plot name. Default is 'my_plot'.
#' @param overwrite Logical value indicating whether to overwrite an existing plot with the same name in the output directory. Default is FALSE.
#' @param outp The output directory where the plot will be saved. Default is the working directory.
#' @param set An environment or list containing custom parameters for the plot dimensions and resolution. Default is NULL.
#' @param set$wp The desired width of the plot. Default is 20.
#' @param set$hp The desired height of the plot. Default is 20.
#' @param set$res The desired resolution of the plot (dots per inch). Default is 300.
#'
#' @return None
#'
#' @examples
#' save_plot(ggplot(mtcars, aes(x = mpg, y = disp))
save_plot <- function( save_plot <- function(
pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE, outp = outp){ pl, filetype = '.pdf', plot_name = 'my_plot', overwrite=FALSE, outp = outp){
dir.create(outp, showWarnings = FALSE) dir.create(outp, showWarnings = FALSE)
wp <- if (!is.null(set$wp)) set$wp else 20 wp <- if (!is.null(set$wp)) set$wp else 20
hp <- if (!is.null(set$hp)) set$hp else 20 hp <- if (!is.null(set$hp)) set$hp else 20
res <- if (!is.null(set$res)) set$res else 300 res <- if (!is.null(set$res)) set$res else 300
name <- paste0(plot_name,filetype) name <- paste0(plot_name,filetype)
if (file.exists(file.path(outp, name)) & !overwrite) { if (file.exists(file.path(outp, name)) & !overwrite) {
name <- paste0(plot_name, "_", name <- paste0(plot_name, '_',
format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)} format(Sys.time(), '%d-%m-%y_%H%M%S'),filetype)}
ggsave(file.path(outp, name), pl, ggsave(file.path(outp, name), pl,
width = wp, height = hp, unit = "cm", dpi = res) width = wp, height = hp, unit = 'cm', dpi = res)
} }
str_pad_to_max <- function(vec = c(), ...){
str_pad(vec, max(str_length(vec)), ...)
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment