From 8d1e165d2db56b1654e312592c0e25e761a2017c Mon Sep 17 00:00:00 2001 From: Simeon <51403284+simeross@users.noreply.github.com> Date: Wed, 1 Nov 2023 13:40:33 +0100 Subject: [PATCH] exporting per_cluster_abundance was previously a purely internal function --- NAMESPACE | 1 + R/per_cluster_abundance.R | 31 ++++++++++++++++++++++++ R/read_and_write_cluster_abundance.R | 35 ++++++++++++++-------------- man/per_cluster_abundance.Rd | 25 ++++++++++++++++++++ 4 files changed, 75 insertions(+), 17 deletions(-) create mode 100644 R/per_cluster_abundance.R create mode 100644 man/per_cluster_abundance.Rd diff --git a/NAMESPACE b/NAMESPACE index 35f8144..157c3f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(find_longest_reading_frames) export(find_repeat_positions) export(kmer_based_distance_matrix) export(meshclustR) +export(per_cluster_abundance) export(pivot_cluster_tbl_wider) export(plot_abundance_per_sample) export(plot_abundance_sums_per_sequence) diff --git a/R/per_cluster_abundance.R b/R/per_cluster_abundance.R new file mode 100644 index 0000000..34b8c80 --- /dev/null +++ b/R/per_cluster_abundance.R @@ -0,0 +1,31 @@ +#' Extract abundance of sequences from a single cluster +#' +#' Primarily intended for use inside \code{\link{read_and_write_cluster_abundance}}. +#' +#' @param seqs_of_one_cluster DNAStringSet containing sequences of one cluster +#' @param seqtab_nochim Path to the seqtab_nochim file (in RDS format) +#' @param reference_seqs Named vector of reference sequences to exclude (Optional) or NULL (default) +#' @return A table with the abundance of sequences within the cluster +#' @import dplyr readr tibble tidyr +#' @export + +per_cluster_abundance <- function(seqs_of_one_cluster = DNAStringSet, + seqtab_nochim = 'seqtab_nochim.rds', + reference_seqs = NULL){ + stab <- readRDS(seqtab_nochim) %>% + t() %>% as.data.frame() %>% + rownames_to_column(var = 'seqs') + + seq_tbl <- tibble(seqs = as.data.frame(seqs_of_one_cluster)[[1]], + ID = names(seqs_of_one_cluster)) + named_stab <- left_join(seq_tbl, stab, by = 'seqs') + + if(!is.null(reference_seqs)){ + named_stab <- named_stab %>% + filter(!(ID %in% names(reference_seqs))) + } + + stab_tbl <- named_stab %>% + select(-seqs) %>% + pivot_longer(cols = -ID, names_to = 'Sample', values_to = 'count') +} diff --git a/R/read_and_write_cluster_abundance.R b/R/read_and_write_cluster_abundance.R index 66a9a9e..f6e5595 100644 --- a/R/read_and_write_cluster_abundance.R +++ b/R/read_and_write_cluster_abundance.R @@ -22,26 +22,27 @@ read_and_write_cluster_abundance <- function( seqtab_nochim = 'seqtab_nochim.rds', outpath = path) { - stab <- readRDS(seqtab_nochim) %>% - t() %>% as.data.frame() %>% - rownames_to_column(var = 'seqs') +# stab <- readRDS(seqtab_nochim) %>% +# t() %>% as.data.frame() %>% +# rownames_to_column(var = 'seqs') - per_cluster_abundance <- function(seqs_of_one_cluster = DNAStringSet){ - seq_tbl <- tibble(seqs = as.data.frame(seqs_of_one_cluster)[[1]], - ID = names(seqs_of_one_cluster)) - named_stab <- left_join(seq_tbl, stab, by = 'seqs') +# per_cluster_abundance <- function(seqs_of_one_cluster = DNAStringSet){ +# seq_tbl <- tibble(seqs = as.data.frame(seqs_of_one_cluster)[[1]], +# ID = names(seqs_of_one_cluster)) +# named_stab <- left_join(seq_tbl, stab, by = 'seqs') - if(!is.null(reference_seqs)){ - named_stab <- named_stab %>% - filter(!(ID %in% names(reference_seqs))) - } - - stab_tbl <- named_stab %>% - select(-seqs) %>% - pivot_longer(cols = -ID, names_to = 'Sample', values_to = 'count') - } +# if(!is.null(reference_seqs)){ +# named_stab <- named_stab %>% +# filter(!(ID %in% names(reference_seqs))) +# } - out <- lapply(cluster_sequence_list, per_cluster_abundance) +# stab_tbl <- named_stab %>% +# select(-seqs) %>% +# pivot_longer(cols = -ID, names_to = 'Sample', values_to = 'count') +# } +# + out <- lapply(cluster_sequence_list, per_cluster_abundance, + seqtab_nochim = seqtab_nochim, reference_seqs = reference_seqs) write_cluster_abundance_tbl <- function(clus_name){ stab_tbl <- out[[clus_name]] diff --git a/man/per_cluster_abundance.Rd b/man/per_cluster_abundance.Rd new file mode 100644 index 0000000..bd02656 --- /dev/null +++ b/man/per_cluster_abundance.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/per_cluster_abundance.R +\name{per_cluster_abundance} +\alias{per_cluster_abundance} +\title{Extract abundance of sequences from a single cluster} +\usage{ +per_cluster_abundance( + seqs_of_one_cluster = DNAStringSet, + seqtab_nochim = "seqtab_nochim.rds", + reference_seqs = NULL +) +} +\arguments{ +\item{seqs_of_one_cluster}{DNAStringSet containing sequences of one cluster} + +\item{seqtab_nochim}{Path to the seqtab_nochim file (in RDS format)} + +\item{reference_seqs}{Named vector of reference sequences to exclude (Optional) or NULL (default)} +} +\value{ +A table with the abundance of sequences within the cluster +} +\description{ +Primarily intended for use inside \code{\link{read_and_write_cluster_abundance}}. +} -- GitLab