roxygen tags

6941973c · Simeon · ead693c0 · 6941973c · 6941973c · 6941973c
Commit 6941973c authored 2 years ago by Simeon
--- a/R/align_and_generate_upgma.R
+++ b/R/align_and_generate_upgma.R
-# UPGMA tree from DNAStringSet object in named sequence list
+#' UPGMA tree from DNAStringSet object in named sequence list
+#'
+#' This function generates a UPGMA tree from a DNAStringSet object in a named sequence list.
+#'
+#' @param cluster The name of the cluster to generate the UPGMA tree from
+#' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences
+#' @return A UPGMA tree object
+#' @import utils
+#' @importFrom DECIPHER AlignSeqs
+#' @importFrom phangorn phyDat
+#' @importFrom phangorn dist.ml
+#' @importFrom phangorn upgma
+#' @export
+
+# Define function to align a cluster of sequences and generate a UPGMA tree
 align_and_generate_upgma <- function(cluster, sequence_list) {
+
+  # Use AlignSeqs function to align the sequences in the given cluster
  alig <- AlignSeqs(sequence_list[[cluster]], verbose = FALSE)
-  tree <-  phangorn::phyDat(as(alig, "matrix"), type = "DNA") %>%
+
+  # Convert the aligned sequences to a matrix of DNA data and calculate the distance matrix using maximum likelihood
+  # Then use the upgma function to generate the tree
+  tree <- phangorn::phyDat(as(alig, 'matrix'), type = 'DNA') %>%
    phangorn::dist.ml() %>%
    phangorn::upgma()
+
+  # Return the generated UPGMA tree
  return(tree)
-}
\ No newline at end of file
+}
--- a/R/alignment_based_distance_matrix.R
+++ b/R/alignment_based_distance_matrix.R
-# Align seqs and get distance matrix using DECIPHER, multithreading supported
-alignment_based_distance_matrix <- function (seqs = DNAStringSet, 
+#' Generate an alignment-based distance matrix from a set of DNA sequences
+#'
+#' This function uses the DECIPHER package to align a set of DNA sequences and generate a distance matrix based on the alignments. The alignments can be performed using multiple processors for faster execution. This function requires the DECIPHER package to be installed.
+#'
+#' @param seqs A DNAStringSet object containing the DNA sequences to be used for the generation of the distance matrix.
+#' @param ncores An integer indicating the number of processors to be used for the alignment and generation of the distance matrix. The default value is 1.
+#'
+#' @return A matrix object containing the distance scores calculated based on the alignments of the input sequences.
+#'
+#' @import DECIPHER
+#' @import Biostrings
+#'
+#' @examples
+#' data(smallexample)
+#' dna_sequences <- DNAStringSet(smallexample)
+#' alignment_based_distance_matrix(dna_sequences, ncores = 2)
+#'
+#' @export
+alignment_based_distance_matrix <- function(seqs = DNAStringSet,
                                             ncores = 1) {
+  # Align the sequences using the specified number of processors
  seqs_alig <- AlignSeqs(seqs, processors = ncores, verbose = FALSE)
+  # Generate the distance matrix from the aligned sequences using the specified number of processors
  DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores)
-}
\ No newline at end of file
+}
--- a/R/calc_asv_nmds.R
+++ b/R/calc_asv_nmds.R
-calc_asv_nmds <- function(cleaned_seqtab = my_cleaned_seqtab, seed = 1905, ...){
+#' Calculate NMDS using ASV counts
+#'
+#' This function calculates NMDS (non-metric multidimensional scaling) using ASV counts data.
+#' It converts ASV counts to numeric data, performs NMDS analysis on the data,
+#' formats the NMDS results as a tibble, and combines the NMDS results and tibble into a list object.
+#'
+#' @param cleaned_seqtab A data frame of cleaned ASV counts data (from clean_seqtab())
+#' @param seed An optional integer specifying the seed for the random number generator (default: 1905)
+#' @param ... Additional arguments passed to the `metaMDS` function from the vegan package
+#'
+#' @return A list object with results including NMDS results and NMDS tibble
+#' @export
+#'
+#' @examples
+#' data("my_cleaned_seqtab")
+#' calc_asv_nmds(my_cleaned_seqtab)
+calc_asv_nmds <- function(cleaned_seqtab = my_cleaned_seqtab, seed = 1905, ...) {
+
  # Extract numeric asvs
-  num_asv <- veganify_asvcounts(cleaned_seqtab)
-  # Calculate NMDS
-  set.seed(seed)
-  nmds <- metaMDS(num_asv, "bray", ...)
-  nmds_tbl <- as_tibble(nmds$points, rownames = "sample")
-  out <- list(nmds, nmds_tbl)
-  names(out) <- c("NMDS", "NMDS tibble")
-  return(out)
-}
\ No newline at end of file
+  num_asv <- veganify_asvcounts(cleaned_seqtab) # Convert ASV counts to numeric data
+
+  ## Calculate NMDS
+  set.seed(seed) # Set seed for reproducibility
+  nmds <- metaMDS(num_asv, 'bray', ...) # Perform NMDS analysis on the data
+  nmds_tbl <- as_tibble(nmds$points, rownames = 'sample') # Format NMDS results as a tibble
+  out <- list(nmds, nmds_tbl) # Combine NMDS results and tibble into a list object
+  names(out) <- c('NMDS', 'NMDS tibble') # Give names to the list elements
+
+  return(out) # Return the list object with results
+}
--- a/R/clean_seqtab.R
+++ b/R/clean_seqtab.R
-clean_seqtab <- function(file = "seqtab_nochim.rds", 
-                         ASV_sequences = asvstrings,
-                         output = TRUE){
+clean_seqtab <- function(file = 'seqtab_nochim.rds',
+                         ASV_sequences = asvstrings,   # Specify a default value for 'ASV_sequences' if none given
+                         output = TRUE){    # Specify a default value for 'output' if none given
+
+  # Read in an RDS file and save it as 'st'
  st <- readRDS(file)
-  
-  st_clean <- st %>% 
-    t() %>% 
-    tibble::as_tibble() %>% 
+
+  # Transpose 'st', convert to tibble, add column 'seqs' to it
+  # Note the use of the pipe operator '%>%'
+  st_clean <- st %>%
+    t() %>%
+    tibble::as_tibble() %>%
    dplyr::mutate(seqs = colnames(st))
-  
-  st_clean <- tibble(seqs = as.character(ASV_sequences), 
-                     ASV = names(ASV_sequences))  %>% 
-    left_join(st_clean, by = "seqs")
-  
+
+  # Create a tibble with column 'seqs' containing ASV sequences as character strings
+  # and another column 'ASV' containing their names. Then left join with 'st_clean'.
+  st_clean <- tibble(seqs = as.character(ASV_sequences),
+                     ASV = names(ASV_sequences))  %>%
+    left_join(st_clean, by = 'seqs')
+
+  # If 'output' is TRUE, write a csv filecontaining the cleaned sequence table in the same directory as the original file
+  # Note the use of file.path()
  if(output){
-    readr::write_csv(st_clean, file.path(dirname(file), "clean_seqtab.csv"))
+    readr::write_csv(st_clean, file.path(dirname(file), 'clean_seqtab.csv'))
  }
-  
+
+  # Return the cleaned sequence table
  return(st_clean)
-}
\ No newline at end of file
+}
--- a/R/cluster_longest_reading_frames.R
+++ b/R/cluster_longest_reading_frames.R
-# Include cluster info in reading frame tbl
+#' Cluster Longest Reading Frames
+#'
+#' This function takes two inputs: clustered_sequences (which should be a \code{\link{DNAStringSetList}}) and a reading_frame_tbl from \code{\link{find_longest_reading_frames()}}
+#'
+#' @param clustered_sequences A list of DNA sequences that have been clustered, e.g. using \code{\link{meshclustR()}}
+#' @param reading_frame_tbl A tibble object with columns "seqnames," "strand," "start," "end," and "width"
+#' @return A tibble object with the same column structure as the \code{\link{reading_frame_tbl}}, but with an additional column for "cluster_name"
+#' @export
+#'
+#' @examples
+#' clustered_sequences <- DNAStringSetList(c("ATGTCGTA", "CGTACTG", "AGTGTCAGT"))
+#' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9))
+#' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl)
+#'
+#' @import dplyr
+#' @import Biostrings
+#'
+
+# Define a function that clusters DNA sequences and determines their longest reading frame
 cluster_longest_reading_frames <- function(
-    clustered_sequences = DNAStringSetList, reading_frame_tbl = tbl) {
-  
-  seqnames_clus_order <- map(clustered_sequences, names) %>% 
-    unlist() %>% 
+  clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered
+  reading_frame_tbl = tbl) { # A variable that holds a table of reading frames
+
+  # Create a vector of sequence names in the order they appear in the clustered_sequences variable
+  seqnames_clus_order <- map(clustered_sequences, names) %>%
+    unlist() %>%
    unname()
-  
+
+  # Create a data frame with a column that holds the group names of the DNA sequences
  group_names_clus_order <- as.data.frame(
-    DNAStringSetList(clustered_sequences))[["group_name"]]
-  
+    DNAStringSetList(clustered_sequences))[['group_name']]
+
+  # Combine the reading frame table with the vector of sequence names and group names
+  # to get the maximum reading frame for each DNA sequence cluster
  clus_max_rf <- left_join(reading_frame_tbl,
                           tibble(seqnames = seqnames_clus_order,
-                                  cluster_name = group_names_clus_order),
-                           by = "seqnames") 
-  
+                                  cluster_name =group_names_clus_order),
+                           by = 'seqnames')
+
+  # Return a table with the maximum reading frame for each DNA sequence cluster
  return(clus_max_rf)
-}
\ No newline at end of file
+}