From 02892b7b3402952f92c73318e54da4af73767d6c Mon Sep 17 00:00:00 2001 From: Simeon <51403284+simeross@users.noreply.github.com> Date: Wed, 8 Nov 2023 08:46:31 +0100 Subject: [PATCH] more efficient cluster writing by only reading in seqtab_nochim once --- R/per_cluster_abundance.R | 9 +++------ R/read_and_write_cluster_abundance.R | 21 +++------------------ man/per_cluster_abundance.Rd | 4 ++-- 3 files changed, 8 insertions(+), 26 deletions(-) diff --git a/R/per_cluster_abundance.R b/R/per_cluster_abundance.R index 34b8c80..2348497 100644 --- a/R/per_cluster_abundance.R +++ b/R/per_cluster_abundance.R @@ -3,22 +3,19 @@ #' Primarily intended for use inside \code{\link{read_and_write_cluster_abundance}}. #' #' @param seqs_of_one_cluster DNAStringSet containing sequences of one cluster -#' @param seqtab_nochim Path to the seqtab_nochim file (in RDS format) +#' @param seqtab_nochim seqtab object, as imported by readRDS ("path/to/seqtab_nochim.rds") #' @param reference_seqs Named vector of reference sequences to exclude (Optional) or NULL (default) #' @return A table with the abundance of sequences within the cluster #' @import dplyr readr tibble tidyr #' @export per_cluster_abundance <- function(seqs_of_one_cluster = DNAStringSet, - seqtab_nochim = 'seqtab_nochim.rds', + seqtab_nochim = stab, reference_seqs = NULL){ - stab <- readRDS(seqtab_nochim) %>% - t() %>% as.data.frame() %>% - rownames_to_column(var = 'seqs') seq_tbl <- tibble(seqs = as.data.frame(seqs_of_one_cluster)[[1]], ID = names(seqs_of_one_cluster)) - named_stab <- left_join(seq_tbl, stab, by = 'seqs') + named_stab <- left_join(seq_tbl, seqtab_nochim, by = 'seqs') if(!is.null(reference_seqs)){ named_stab <- named_stab %>% diff --git a/R/read_and_write_cluster_abundance.R b/R/read_and_write_cluster_abundance.R index f6e5595..255b5fe 100644 --- a/R/read_and_write_cluster_abundance.R +++ b/R/read_and_write_cluster_abundance.R @@ -22,25 +22,10 @@ read_and_write_cluster_abundance <- function( seqtab_nochim = 'seqtab_nochim.rds', outpath = path) { -# stab <- readRDS(seqtab_nochim) %>% -# t() %>% as.data.frame() %>% -# rownames_to_column(var = 'seqs') + stab <- readRDS(seqtab_nochim) %>% + t() %>% as.data.frame() %>% + rownames_to_column(var = 'seqs') -# per_cluster_abundance <- function(seqs_of_one_cluster = DNAStringSet){ -# seq_tbl <- tibble(seqs = as.data.frame(seqs_of_one_cluster)[[1]], -# ID = names(seqs_of_one_cluster)) -# named_stab <- left_join(seq_tbl, stab, by = 'seqs') - -# if(!is.null(reference_seqs)){ -# named_stab <- named_stab %>% -# filter(!(ID %in% names(reference_seqs))) -# } - -# stab_tbl <- named_stab %>% -# select(-seqs) %>% -# pivot_longer(cols = -ID, names_to = 'Sample', values_to = 'count') -# } -# out <- lapply(cluster_sequence_list, per_cluster_abundance, seqtab_nochim = seqtab_nochim, reference_seqs = reference_seqs) diff --git a/man/per_cluster_abundance.Rd b/man/per_cluster_abundance.Rd index bd02656..f61d293 100644 --- a/man/per_cluster_abundance.Rd +++ b/man/per_cluster_abundance.Rd @@ -6,14 +6,14 @@ \usage{ per_cluster_abundance( seqs_of_one_cluster = DNAStringSet, - seqtab_nochim = "seqtab_nochim.rds", + seqtab_nochim = stab, reference_seqs = NULL ) } \arguments{ \item{seqs_of_one_cluster}{DNAStringSet containing sequences of one cluster} -\item{seqtab_nochim}{Path to the seqtab_nochim file (in RDS format)} +\item{seqtab_nochim}{seqtab object, as imported by readRDS ("path/to/seqtab_nochim.rds")} \item{reference_seqs}{Named vector of reference sequences to exclude (Optional) or NULL (default)} } -- GitLab