diff --git a/NAMESPACE b/NAMESPACE index 35f8144d00ca8e5e2e21299a40ecaa58d88bc64c..157c3f0add2328d80bd4ddfd691b9213b7ef0679 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(find_longest_reading_frames) export(find_repeat_positions) export(kmer_based_distance_matrix) export(meshclustR) +export(per_cluster_abundance) export(pivot_cluster_tbl_wider) export(plot_abundance_per_sample) export(plot_abundance_sums_per_sequence) diff --git a/R/per_cluster_abundance.R b/R/per_cluster_abundance.R new file mode 100644 index 0000000000000000000000000000000000000000..34b8c8030da25023b107ba376158314e5ca45190 --- /dev/null +++ b/R/per_cluster_abundance.R @@ -0,0 +1,31 @@ +#' Extract abundance of sequences from a single cluster +#' +#' Primarily intended for use inside \code{\link{read_and_write_cluster_abundance}}. +#' +#' @param seqs_of_one_cluster DNAStringSet containing sequences of one cluster +#' @param seqtab_nochim Path to the seqtab_nochim file (in RDS format) +#' @param reference_seqs Named vector of reference sequences to exclude (Optional) or NULL (default) +#' @return A table with the abundance of sequences within the cluster +#' @import dplyr readr tibble tidyr +#' @export + +per_cluster_abundance <- function(seqs_of_one_cluster = DNAStringSet, + seqtab_nochim = 'seqtab_nochim.rds', + reference_seqs = NULL){ + stab <- readRDS(seqtab_nochim) %>% + t() %>% as.data.frame() %>% + rownames_to_column(var = 'seqs') + + seq_tbl <- tibble(seqs = as.data.frame(seqs_of_one_cluster)[[1]], + ID = names(seqs_of_one_cluster)) + named_stab <- left_join(seq_tbl, stab, by = 'seqs') + + if(!is.null(reference_seqs)){ + named_stab <- named_stab %>% + filter(!(ID %in% names(reference_seqs))) + } + + stab_tbl <- named_stab %>% + select(-seqs) %>% + pivot_longer(cols = -ID, names_to = 'Sample', values_to = 'count') +} diff --git a/R/read_and_write_cluster_abundance.R b/R/read_and_write_cluster_abundance.R index 66a9a9e684a477be65de75b1b2361f05104fc234..f6e5595fcbdcc482d80fcde021c770a52c1d032a 100644 --- a/R/read_and_write_cluster_abundance.R +++ b/R/read_and_write_cluster_abundance.R @@ -22,26 +22,27 @@ read_and_write_cluster_abundance <- function( seqtab_nochim = 'seqtab_nochim.rds', outpath = path) { - stab <- readRDS(seqtab_nochim) %>% - t() %>% as.data.frame() %>% - rownames_to_column(var = 'seqs') +# stab <- readRDS(seqtab_nochim) %>% +# t() %>% as.data.frame() %>% +# rownames_to_column(var = 'seqs') - per_cluster_abundance <- function(seqs_of_one_cluster = DNAStringSet){ - seq_tbl <- tibble(seqs = as.data.frame(seqs_of_one_cluster)[[1]], - ID = names(seqs_of_one_cluster)) - named_stab <- left_join(seq_tbl, stab, by = 'seqs') +# per_cluster_abundance <- function(seqs_of_one_cluster = DNAStringSet){ +# seq_tbl <- tibble(seqs = as.data.frame(seqs_of_one_cluster)[[1]], +# ID = names(seqs_of_one_cluster)) +# named_stab <- left_join(seq_tbl, stab, by = 'seqs') - if(!is.null(reference_seqs)){ - named_stab <- named_stab %>% - filter(!(ID %in% names(reference_seqs))) - } - - stab_tbl <- named_stab %>% - select(-seqs) %>% - pivot_longer(cols = -ID, names_to = 'Sample', values_to = 'count') - } +# if(!is.null(reference_seqs)){ +# named_stab <- named_stab %>% +# filter(!(ID %in% names(reference_seqs))) +# } - out <- lapply(cluster_sequence_list, per_cluster_abundance) +# stab_tbl <- named_stab %>% +# select(-seqs) %>% +# pivot_longer(cols = -ID, names_to = 'Sample', values_to = 'count') +# } +# + out <- lapply(cluster_sequence_list, per_cluster_abundance, + seqtab_nochim = seqtab_nochim, reference_seqs = reference_seqs) write_cluster_abundance_tbl <- function(clus_name){ stab_tbl <- out[[clus_name]] diff --git a/man/per_cluster_abundance.Rd b/man/per_cluster_abundance.Rd new file mode 100644 index 0000000000000000000000000000000000000000..bd026560ac0c5c6b84c0a399e3a3088a04f33ad2 --- /dev/null +++ b/man/per_cluster_abundance.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/per_cluster_abundance.R +\name{per_cluster_abundance} +\alias{per_cluster_abundance} +\title{Extract abundance of sequences from a single cluster} +\usage{ +per_cluster_abundance( + seqs_of_one_cluster = DNAStringSet, + seqtab_nochim = "seqtab_nochim.rds", + reference_seqs = NULL +) +} +\arguments{ +\item{seqs_of_one_cluster}{DNAStringSet containing sequences of one cluster} + +\item{seqtab_nochim}{Path to the seqtab_nochim file (in RDS format)} + +\item{reference_seqs}{Named vector of reference sequences to exclude (Optional) or NULL (default)} +} +\value{ +A table with the abundance of sequences within the cluster +} +\description{ +Primarily intended for use inside \code{\link{read_and_write_cluster_abundance}}. +}