Clean up, filled DESCRIPTION

fdf13598 · Simeon · 5f5c378c · fdf13598 · fdf13598 · fdf13598
Commit fdf13598 authored 1 year ago by Simeon
--- a/DESCRIPTION
+++ b/DESCRIPTION
 Package: cAmpSeqR
 Type: Package
-Title: What the Package Does (Title Case)
+Title: Custom Processing and Visualization for Multi-Amplicon Sequencing Projects
 Version: 0.1.0
-Author: Who wrote it
-Maintainer: The package maintainer <yourself@somewhere.net>
-Description: More about what it does (maybe more than one line)
-    Use four spaces when indenting paragraphs within the Description.
-License: What license is it under?
+Authors@R: person(
+    "Simeon", "Lim Rossmann", 
+    email = "simeon.rossmann@nmbu.no", 
+    role = c("aut", "cre"), 
+    comment = c(ORCID = "0000-0003-0435-8221")
+    )
+Description: This package provides a range of functions to process
+    and visualize next-generation sequencing data from multi-amplicon
+    sequencing projects. It may work with a range of input variants
+    consisting of a sequence list and count table for these sequences but
+    was initially designed for data generated by the DADA2 package.
+    The functions range from very simple utilities and parsers to complex
+    plotting functions and are packaged as they are here for the 
+    convenience of the author and collaborators.
+    A commented pipeline incorporating most of these functions in the
+    intended sequence can be obtained from the author upon request.
+Imports:
+    Biostrings,
+    DECIPHER,
+    ggdendro,
+    ggtree,
+    tidyverse,
+    vegan,
+    viridis,
+    ape,
+    cowplot,
+    phangorn
+License: use_gpl_license(version = 3, include_future = TRUE)
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,6 +3,7 @@
 export(align_and_generate_upgma)
 export(alignment_based_distance_matrix)
 export(calc_asv_nmds)
+export(clean_seqtab)
 export(cluster_longest_reading_frames)
 export(cluster_tbl_named)
 export(combine_cluster_plots_and_save)
@@ -18,6 +19,7 @@ export(find_repeat_positions)
 export(kmer_based_distance_matrix)
 export(meshclustR)
 export(pivot_cluster_tbl_wider)
+export(plot_abundance_per_sample)
 export(plot_abundance_sums_per_sequence)
 export(plot_asv_nmds)
 export(plot_cluster_dendrogram)
@@ -45,24 +47,11 @@ export(veganify_asvcounts)
 export(veganify_generic_wide_tbl)
 import(Biostrings)
 import(DECIPHER)
-import(dplyr)
+import(GenomicRanges)
+import(ape)
+import(cowplot)
 import(ggdendro)
-import(ggplot2)
 import(ggtree)
-import(magrittr)
-import(purrr)
-import(readr)
-import(scales)
-import(stats)
-import(stringr)
-import(tibble)
-import(tidyr)
+import(phangorn)
 import(tidyverse)
-import(utils)
-import(vegan)
 import(viridis)
-importFrom(GenomicRanges,GRanges)
-import(ape)
-import(cowplot)
-import(phangorn)
-
--- a/R/align_and_generate_upgma.R
+++ b/R/align_and_generate_upgma.R
@@ -5,14 +5,10 @@
 #' @param cluster The name of the cluster to generate the UPGMA tree from
 #' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences
 #' @return A UPGMA tree object
-#' @import utils
-#' @importFrom DECIPHER AlignSeqs
-#' @importFrom phangorn phyDat
-#' @importFrom phangorn dist.ml
-#' @importFrom phangorn upgma
+#' @import DECIPHER
+#' @import phangorn
+#' @import tidyverse
 #' @export
-
-# Define function to align a cluster of sequences and generate a UPGMA tree
 align_and_generate_upgma <- function(cluster, sequence_list) {

  # Use AlignSeqs function to align the sequences in the given cluster

--- a/R/calc_asv_nmds.R
+++ b/R/calc_asv_nmds.R
@@ -9,6 +9,7 @@
 #' @param ... Additional arguments passed to the `metaMDS` function from the vegan package
 #'
 #' @return A list object with results including NMDS results and NMDS tibble
+#' @import tidyverse
 #' @export
 #'
 #' @examples

--- a/R/clean_seqtab.R
+++ b/R/clean_seqtab.R
+#' Clean Sequence Table
+#'
+#' Cleans a typical sequence table as output by our internal DADA2 pipeline by
+#' transposing it, converting it to a tibble format, and adding a column of sequence names.
+#'
+#' @param file The path to the RDS file containing the sequence table.
+#' @param ASV_sequences A character vector specifying the ASV sequences.
+#' @param output A logical value indicating whether to output a CSV file.
+#' @return A tibble containing the cleaned sequence table.
+#'
+#' @import tidyverse
+#'
+#' @examples
+#' clean_seqtab()
+#' clean_seqtab(file = 'seqtab.rds', output = FALSE)
+#' clean_seqtab(ASV_sequences = asvstrings)
+#' @export
 clean_seqtab <- function(file = 'seqtab_nochim.rds',
                         ASV_sequences = asvstrings,   # Specify a default value for 'ASV_sequences' if none given
                         output = TRUE){    # Specify a default value for 'output' if none given

--- a/R/cluster_longest_reading_frames.R
+++ b/R/cluster_longest_reading_frames.R
@@ -12,11 +12,8 @@
 #' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9))
 #' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl)
 #'
-#' @import dplyr
 #' @import Biostrings
-#'
-
-# Define a function that clusters DNA sequences and determines their longest reading frame
+#' @import tidyverse
 cluster_longest_reading_frames <- function(
  clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered
  reading_frame_tbl = tbl) { # A variable that holds a table of reading frames

--- a/R/cluster_tbl_named.R
+++ b/R/cluster_tbl_named.R
@@ -12,9 +12,8 @@
 #'   name, sequence number within the cluster, and cluster size.
 #'
 #' @import tidyverse
-#' @import Biostrings DNAStringSet
+#' @import Biostrings
 #' @export
-#'
 cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){
  # First: get names of each cluster
  cluster_names <- tibble(clus_name = names(clustered_sequences),

--- a/R/combine_cluster_plots_and_save.R
+++ b/R/combine_cluster_plots_and_save.R
@@ -11,10 +11,8 @@
 #' @param w The width of the plot. Default is 'cm_width'.
 #' @param h The height of the plot. Default is 'cm_height'.
 #' @return combined plot
+#' @import tidyverse
 #' @export
-
-# plot list of three plots in three columns and save to "path" with filename
-# "Cluster_overview_'cluster'.pdf"
 combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path,
                                           w = cm_width, h = cm_height) {
  dir.create(out_path, showWarnings = FALSE)

--- a/R/count_clusters.R
+++ b/R/count_clusters.R
@@ -18,11 +18,8 @@
 #' )
 #' count_clusters(clus_tbl_list)
 #'
-#' @import dplyr
-#' @import purrr
-#' @import tidyr
+#' @import tidyverse
 #' @export
-
 count_clusters <- function(clus_tbl_list){

   # Remove the non-numeric column "seqnames" from each cluster table in the list.

--- a/R/define_plateau.R
+++ b/R/define_plateau.R
@@ -12,9 +12,8 @@
 #' @examples
 #' define_plateau(cluster_counts = cluster_counts_df)
 #'
-#' @import dplyr
+#' @import tidyverse
 #' @export
-# The following code defines a function called "define_plateau"
 define_plateau <- function(cluster_counts){
  # "cluster_counts" is a tibble of cluster counts passed as a parameter to the function
  # "clus_plateau" filters the cluster counts by selecting only those with cluster_number greater than or equal to 2

--- a/R/dendrogram_hclust.R
+++ b/R/dendrogram_hclust.R
@@ -9,8 +9,7 @@
 #' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram.
 #'
 #' @import ggdendro
-#' @import vegan
-#' @import stats
+#' @import tidyverse
 #'
 #' @examples
 #' # Generate dendrogram with default parameters
@@ -21,7 +20,6 @@
 #' dendrogram_hclust(daisy_dist)
 #'
 #' @export
-
 dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) {
  require(ggdendro)
  set.seed(seed)

--- a/R/export_longest_reading_frame.R
+++ b/R/export_longest_reading_frame.R
@@ -13,12 +13,9 @@
 #' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE)
 #'
 #' @import Biostrings
-#' @import dplyr
-#' @import tidyr
-#' @import utils
+#' @import tidyverse
 #'
 #' @export
-# Define a function that exports the longest reading frames
 export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table
                                          seqs = myDNAStringSet, # function argument for DNA sequence set
                                          outpath = path, # function argument for output file path

--- a/R/find_contiguous_multi_repeats.R
+++ b/R/find_contiguous_multi_repeats.R
@@ -23,11 +23,9 @@
 #'
 #' # Expected output: c(2, 1)
 #'
-#' @import stringr
+#' @import tidyverse
 #' @import Biostrings
 #'
-#' @keywords sequence, repeats
-#'
 find_contiguous_multi_repeats <- function(sequences = DNAStringSet,
                                          repeat_sequence = 'string',
                                          singlet_count = 100) {

--- a/R/find_longest_hrf.R
+++ b/R/find_longest_hrf.R
@@ -10,7 +10,7 @@
 #' @examples
 #' find_longest_hrf(seqs)
 #'
-#' @import dplyr
+#' @import tidyverse
 #'
 #' @export
 find_longest_hrf <- function(seqs = DNAStringSet){

--- a/R/find_longest_orf.R
+++ b/R/find_longest_orf.R
@@ -8,18 +8,14 @@
 #' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence.
 #'
 #' @import Biostrings
-#' @importFrom GenomicRanges GRanges
-#' @import tibble
-#' @import dplyr
-#' @export
+#' @import GenomicRanges
+#' @import tidyverse
 #'
 #' @examples
 #' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC"))
 #' find_longest_orf(seqs)
 #'
-#' @seealso \code{\link{findORFs}}
-#'
-
+#' @export
 find_longest_orf <- function(seqs = DNAStringSet) {
  # Find ORFs in the sequences, return longest ORF, and convert to a vector
  orfs <- findORFs(seqs, longestORF = TRUE, startCodon = startDefinition(6)) %>%

--- a/R/find_longest_reading_frames.R
+++ b/R/find_longest_reading_frames.R
@@ -10,10 +10,9 @@
 #' @return A data frame containing the longest reading frames for each sequence.
 #' The data frame includes the sequence names, reading frame, and the width of the reading frame.
 #'
-#' @import dplyr, tidyr
+#' @import tidyverse
 #'
 #' @export
-## Reading frame finder (longest orf or hrf)
 find_longest_reading_frames <- function(seqs = myDNAStringSet){
  orfs <- find_longest_orf(seqs)
  hrfs <- find_longest_hrf(seqs)

--- a/R/find_repeat_positions.R
+++ b/R/find_repeat_positions.R
@@ -6,18 +6,14 @@
 #' @param repeat_sequence A string specifying the repeat sequence to search for.
 #'
 #' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity.
-#'
-#' @import stringr
-#' @import dplyr
-#' @import tibble
-#'
-#' @export
+#' @import tidyverse
 #'
 #' @examples
 #' sequences <- DNAStringSet(c("AGTCAGT",
 #'                             "ACGTAGT",
 #'                             "AGTCGAT"))
 #' find_repeat_positions(sequences, "AGT")
+#' @export
 find_repeat_positions <- function(sequences = DNAStringSet, repeat_sequence = 'string'){

  repeat_positions <- str_locate_all(as.character(sequences), repeat_sequence)

--- a/R/kmer_based_distance_matrix.R
+++ b/R/kmer_based_distance_matrix.R
@@ -6,12 +6,10 @@
 #'
 #' @return A distance matrix in bin format.
 #'
-#' @importFrom ape as.DNAbin
-#'
-#' @importFrom Biostrings DNAStringSet
+#' @import ape
+#' @import Biostrings
 #'
 #' @export
-#'
 kmer_based_distance_matrix <- function (seqs) {
  seqbins <- ape::as.DNAbin(seqs)
  as.matrix(kdistance(seqbins))

--- a/R/meshclustR.R
+++ b/R/meshclustR.R
@@ -2,6 +2,7 @@
 #'
 #' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
 #' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
+#' Meshclust has to be installed and executlable via system2() to run this function.
 #'
 #' James, Benjamin T. et al. (2018),
 #' MeShClust: an intelligent tool for clustering DNA sequences.
@@ -17,12 +18,8 @@
 #' @examples
 #' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path)
 #' @import Biostrings
-#' @import readr
-#' @import magrittr
-#' @import stringr
-#' @importFrom tools file_path_sans_ext
+#' @import tidyverse
 #' @export
-
 meshclustR <- function(seqs = MyDNAStringSet,
                       meshclust_bin = meshclust,
                       filepath = path){

--- a/R/pivot_cluster_tbl_wider.R
+++ b/R/pivot_cluster_tbl_wider.R
@@ -4,10 +4,9 @@
 #'
 #' @return A wide table of clusters with all sequences in each cluster listed
 #'
-#' @import dplyr
-#' @import tidyr
-#' @export
+#' @import tidyverse
 #'
+#' @export
 pivot_cluster_tbl_wider <- function(cluster_tbl) {
  # First: get names of each cluster
  cluster_names <- cluster_tbl %>%