From fdf135982c8f0d63be1dd2c0c5a3bb6b1f3ac46e Mon Sep 17 00:00:00 2001
From: Simeon <51403284+simeross@users.noreply.github.com>
Date: Thu, 26 Oct 2023 14:14:06 +0200
Subject: [PATCH] Clean up, filled DESCRIPTION

---
 DESCRIPTION                             | 35 ++++++++++++++++++++-----
 NAMESPACE                               | 23 +++++-----------
 R/align_and_generate_upgma.R            | 10 +++----
 R/calc_asv_nmds.R                       |  1 +
 R/clean_seqtab.R                        | 17 ++++++++++++
 R/cluster_longest_reading_frames.R      |  5 +---
 R/cluster_tbl_named.R                   |  3 +--
 R/combine_cluster_plots_and_save.R      |  4 +--
 R/count_clusters.R                      |  5 +---
 R/define_plateau.R                      |  3 +--
 R/dendrogram_hclust.R                   |  4 +--
 R/export_longest_reading_frame.R        |  5 +---
 R/find_contiguous_multi_repeats.R       |  4 +--
 R/find_longest_hrf.R                    |  2 +-
 R/find_longest_orf.R                    | 10 +++----
 R/find_longest_reading_frames.R         |  3 +--
 R/find_repeat_positions.R               |  8 ++----
 R/kmer_based_distance_matrix.R          |  6 ++---
 R/meshclustR.R                          |  7 ++---
 R/pivot_cluster_tbl_wider.R             |  5 ++--
 R/plot_abundance_per_sample.R           |  6 ++---
 R/plot_abundance_sums_per_sequence.R    |  2 +-
 R/plot_asv_nmds.R                       |  3 +--
 R/plot_cluster_dendrogram.R             |  2 +-
 R/plot_cluster_overview.R               |  2 +-
 R/plot_cluster_thresholds.R             |  2 +-
 R/plot_clusters.R                       |  4 +--
 R/plot_dendrogram.R                     |  6 +----
 R/plot_distance_matrix.R                |  4 +--
 R/plot_longest_reading_frame.R          |  4 +--
 R/plot_repeat_positions.R               |  4 +--
 R/plot_repeat_quantity.R                |  6 +++--
 R/plot_repeats.R                        |  3 +--
 R/plot_variants_per_sample.R            | 10 ++-----
 R/quantify_repeats.R                    |  1 +
 R/read_and_write_cluster_abundance.R    |  2 +-
 R/save_plot.R                           |  3 ++-
 R/similiarity_to_reference.R            | 12 ++-------
 R/str_pad_to_max.R                      |  2 +-
 R/subset_by_clusters.R                  |  2 +-
 R/subset_variant_table.R                |  4 +--
 R/test_clustering_thresholds.R          |  4 +--
 R/translate_and_count_stops.R           |  3 +--
 R/variant_classifier.R                  |  2 +-
 R/veganify_asvcounts.R                  |  1 +
 R/veganify_generic_wide_tbl.R           |  5 +---
 man/clean_seqtab.Rd                     | 31 ++++++++++++++++++++++
 man/find_contiguous_multi_repeats.Rd    |  2 --
 man/find_longest_orf.Rd                 |  3 ---
 man/meshclustR.Rd                       |  1 +
 man/plot_cluster_thresholds.Rd          |  1 -
 man/plot_repeat_positions.Rd            |  1 -
 man/plot_repeat_quantity.Rd             |  1 +
 man/read_and_write_cluster_abundance.Rd |  1 -
 man/save_plot.Rd                        |  1 +
 55 files changed, 145 insertions(+), 156 deletions(-)
 create mode 100644 man/clean_seqtab.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 66468af..8b0ac1f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,12 +1,35 @@
 Package: cAmpSeqR
 Type: Package
-Title: What the Package Does (Title Case)
+Title: Custom Processing and Visualization for Multi-Amplicon Sequencing Projects
 Version: 0.1.0
-Author: Who wrote it
-Maintainer: The package maintainer <yourself@somewhere.net>
-Description: More about what it does (maybe more than one line)
-    Use four spaces when indenting paragraphs within the Description.
-License: What license is it under?
+Authors@R: person(
+    "Simeon", "Lim Rossmann", 
+    email = "simeon.rossmann@nmbu.no", 
+    role = c("aut", "cre"), 
+    comment = c(ORCID = "0000-0003-0435-8221")
+    )
+Description: This package provides a range of functions to process
+    and visualize next-generation sequencing data from multi-amplicon
+    sequencing projects. It may work with a range of input variants
+    consisting of a sequence list and count table for these sequences but
+    was initially designed for data generated by the DADA2 package.
+    The functions range from very simple utilities and parsers to complex
+    plotting functions and are packaged as they are here for the 
+    convenience of the author and collaborators.
+    A commented pipeline incorporating most of these functions in the
+    intended sequence can be obtained from the author upon request.
+Imports:
+    Biostrings,
+    DECIPHER,
+    ggdendro,
+    ggtree,
+    tidyverse,
+    vegan,
+    viridis,
+    ape,
+    cowplot,
+    phangorn
+License: use_gpl_license(version = 3, include_future = TRUE)
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
diff --git a/NAMESPACE b/NAMESPACE
index 8814070..a87f056 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,6 +3,7 @@
 export(align_and_generate_upgma)
 export(alignment_based_distance_matrix)
 export(calc_asv_nmds)
+export(clean_seqtab)
 export(cluster_longest_reading_frames)
 export(cluster_tbl_named)
 export(combine_cluster_plots_and_save)
@@ -18,6 +19,7 @@ export(find_repeat_positions)
 export(kmer_based_distance_matrix)
 export(meshclustR)
 export(pivot_cluster_tbl_wider)
+export(plot_abundance_per_sample)
 export(plot_abundance_sums_per_sequence)
 export(plot_asv_nmds)
 export(plot_cluster_dendrogram)
@@ -45,24 +47,11 @@ export(veganify_asvcounts)
 export(veganify_generic_wide_tbl)
 import(Biostrings)
 import(DECIPHER)
-import(dplyr)
+import(GenomicRanges)
+import(ape)
+import(cowplot)
 import(ggdendro)
-import(ggplot2)
 import(ggtree)
-import(magrittr)
-import(purrr)
-import(readr)
-import(scales)
-import(stats)
-import(stringr)
-import(tibble)
-import(tidyr)
+import(phangorn)
 import(tidyverse)
-import(utils)
-import(vegan)
 import(viridis)
-importFrom(GenomicRanges,GRanges)
-import(ape)
-import(cowplot)
-import(phangorn)
-
diff --git a/R/align_and_generate_upgma.R b/R/align_and_generate_upgma.R
index 872b741..6a48be9 100644
--- a/R/align_and_generate_upgma.R
+++ b/R/align_and_generate_upgma.R
@@ -5,14 +5,10 @@
 #' @param cluster The name of the cluster to generate the UPGMA tree from
 #' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences
 #' @return A UPGMA tree object
-#' @import utils
-#' @importFrom DECIPHER AlignSeqs
-#' @importFrom phangorn phyDat
-#' @importFrom phangorn dist.ml
-#' @importFrom phangorn upgma
+#' @import DECIPHER
+#' @import phangorn
+#' @import tidyverse
 #' @export
-
-# Define function to align a cluster of sequences and generate a UPGMA tree
 align_and_generate_upgma <- function(cluster, sequence_list) {
 
   # Use AlignSeqs function to align the sequences in the given cluster
diff --git a/R/calc_asv_nmds.R b/R/calc_asv_nmds.R
index 198ca42..0d43687 100644
--- a/R/calc_asv_nmds.R
+++ b/R/calc_asv_nmds.R
@@ -9,6 +9,7 @@
 #' @param ... Additional arguments passed to the `metaMDS` function from the vegan package
 #'
 #' @return A list object with results including NMDS results and NMDS tibble
+#' @import tidyverse
 #' @export
 #'
 #' @examples
diff --git a/R/clean_seqtab.R b/R/clean_seqtab.R
index 95dc41c..be41425 100644
--- a/R/clean_seqtab.R
+++ b/R/clean_seqtab.R
@@ -1,3 +1,20 @@
+#' Clean Sequence Table
+#'
+#' Cleans a typical sequence table as output by our internal DADA2 pipeline by
+#' transposing it, converting it to a tibble format, and adding a column of sequence names.
+#'
+#' @param file The path to the RDS file containing the sequence table.
+#' @param ASV_sequences A character vector specifying the ASV sequences.
+#' @param output A logical value indicating whether to output a CSV file.
+#' @return A tibble containing the cleaned sequence table.
+#'
+#' @import tidyverse
+#'
+#' @examples
+#' clean_seqtab()
+#' clean_seqtab(file = 'seqtab.rds', output = FALSE)
+#' clean_seqtab(ASV_sequences = asvstrings)
+#' @export
 clean_seqtab <- function(file = 'seqtab_nochim.rds',
                          ASV_sequences = asvstrings,   # Specify a default value for 'ASV_sequences' if none given
                          output = TRUE){    # Specify a default value for 'output' if none given
diff --git a/R/cluster_longest_reading_frames.R b/R/cluster_longest_reading_frames.R
index 088c1f2..0a578b5 100644
--- a/R/cluster_longest_reading_frames.R
+++ b/R/cluster_longest_reading_frames.R
@@ -12,11 +12,8 @@
 #' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9))
 #' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl)
 #'
-#' @import dplyr
 #' @import Biostrings
-#'
-
-# Define a function that clusters DNA sequences and determines their longest reading frame
+#' @import tidyverse
 cluster_longest_reading_frames <- function(
   clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered
   reading_frame_tbl = tbl) { # A variable that holds a table of reading frames
diff --git a/R/cluster_tbl_named.R b/R/cluster_tbl_named.R
index 75ac14e..e1e9f24 100644
--- a/R/cluster_tbl_named.R
+++ b/R/cluster_tbl_named.R
@@ -12,9 +12,8 @@
 #'   name, sequence number within the cluster, and cluster size.
 #'
 #' @import tidyverse
-#' @import Biostrings DNAStringSet
+#' @import Biostrings
 #' @export
-#'
 cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){
   # First: get names of each cluster
   cluster_names <- tibble(clus_name = names(clustered_sequences),
diff --git a/R/combine_cluster_plots_and_save.R b/R/combine_cluster_plots_and_save.R
index 7bab1d5..9f6c9e0 100644
--- a/R/combine_cluster_plots_and_save.R
+++ b/R/combine_cluster_plots_and_save.R
@@ -11,10 +11,8 @@
 #' @param w The width of the plot. Default is 'cm_width'.
 #' @param h The height of the plot. Default is 'cm_height'.
 #' @return combined plot
+#' @import tidyverse
 #' @export
-
-# plot list of three plots in three columns and save to "path" with filename
-# "Cluster_overview_'cluster'.pdf"
 combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path,
                                            w = cm_width, h = cm_height) {
   dir.create(out_path, showWarnings = FALSE)
diff --git a/R/count_clusters.R b/R/count_clusters.R
index a0884b4..238cbdf 100644
--- a/R/count_clusters.R
+++ b/R/count_clusters.R
@@ -18,11 +18,8 @@
 #' )
 #' count_clusters(clus_tbl_list)
 #'
-#' @import dplyr
-#' @import purrr
-#' @import tidyr
+#' @import tidyverse
 #' @export
-
 count_clusters <- function(clus_tbl_list){
 
    # Remove the non-numeric column "seqnames" from each cluster table in the list.
diff --git a/R/define_plateau.R b/R/define_plateau.R
index 98534eb..0c44275 100644
--- a/R/define_plateau.R
+++ b/R/define_plateau.R
@@ -12,9 +12,8 @@
 #' @examples
 #' define_plateau(cluster_counts = cluster_counts_df)
 #'
-#' @import dplyr
+#' @import tidyverse
 #' @export
-# The following code defines a function called "define_plateau"
 define_plateau <- function(cluster_counts){
   # "cluster_counts" is a tibble of cluster counts passed as a parameter to the function
   # "clus_plateau" filters the cluster counts by selecting only those with cluster_number greater than or equal to 2
diff --git a/R/dendrogram_hclust.R b/R/dendrogram_hclust.R
index 87848ca..61fa758 100644
--- a/R/dendrogram_hclust.R
+++ b/R/dendrogram_hclust.R
@@ -9,8 +9,7 @@
 #' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram.
 #'
 #' @import ggdendro
-#' @import vegan
-#' @import stats
+#' @import tidyverse
 #'
 #' @examples
 #' # Generate dendrogram with default parameters
@@ -21,7 +20,6 @@
 #' dendrogram_hclust(daisy_dist)
 #'
 #' @export
-
 dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) {
   require(ggdendro)
   set.seed(seed)
diff --git a/R/export_longest_reading_frame.R b/R/export_longest_reading_frame.R
index 64e3d6e..1535835 100644
--- a/R/export_longest_reading_frame.R
+++ b/R/export_longest_reading_frame.R
@@ -13,12 +13,9 @@
 #' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE)
 #'
 #' @import Biostrings
-#' @import dplyr
-#' @import tidyr
-#' @import utils
+#' @import tidyverse
 #'
 #' @export
-# Define a function that exports the longest reading frames
 export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table
                                           seqs = myDNAStringSet, # function argument for DNA sequence set
                                           outpath = path, # function argument for output file path
diff --git a/R/find_contiguous_multi_repeats.R b/R/find_contiguous_multi_repeats.R
index 7661b09..14e021a 100644
--- a/R/find_contiguous_multi_repeats.R
+++ b/R/find_contiguous_multi_repeats.R
@@ -23,11 +23,9 @@
 #'
 #' # Expected output: c(2, 1)
 #'
-#' @import stringr
+#' @import tidyverse
 #' @import Biostrings
 #'
-#' @keywords sequence, repeats
-#'
 find_contiguous_multi_repeats <- function(sequences = DNAStringSet,
                                           repeat_sequence = 'string',
                                           singlet_count = 100) {
diff --git a/R/find_longest_hrf.R b/R/find_longest_hrf.R
index c5d5da9..bd4ccab 100644
--- a/R/find_longest_hrf.R
+++ b/R/find_longest_hrf.R
@@ -10,7 +10,7 @@
 #' @examples
 #' find_longest_hrf(seqs)
 #'
-#' @import dplyr
+#' @import tidyverse
 #'
 #' @export
 find_longest_hrf <- function(seqs = DNAStringSet){
diff --git a/R/find_longest_orf.R b/R/find_longest_orf.R
index 1e5fdee..80f1dea 100644
--- a/R/find_longest_orf.R
+++ b/R/find_longest_orf.R
@@ -8,18 +8,14 @@
 #' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence.
 #'
 #' @import Biostrings
-#' @importFrom GenomicRanges GRanges
-#' @import tibble
-#' @import dplyr
-#' @export
+#' @import GenomicRanges
+#' @import tidyverse
 #'
 #' @examples
 #' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC"))
 #' find_longest_orf(seqs)
 #'
-#' @seealso \code{\link{findORFs}}
-#'
-
+#' @export
 find_longest_orf <- function(seqs = DNAStringSet) {
   # Find ORFs in the sequences, return longest ORF, and convert to a vector
   orfs <- findORFs(seqs, longestORF = TRUE, startCodon = startDefinition(6)) %>%
diff --git a/R/find_longest_reading_frames.R b/R/find_longest_reading_frames.R
index de34c89..36ce62a 100644
--- a/R/find_longest_reading_frames.R
+++ b/R/find_longest_reading_frames.R
@@ -10,10 +10,9 @@
 #' @return A data frame containing the longest reading frames for each sequence.
 #' The data frame includes the sequence names, reading frame, and the width of the reading frame.
 #'
-#' @import dplyr, tidyr
+#' @import tidyverse
 #'
 #' @export
-## Reading frame finder (longest orf or hrf)
 find_longest_reading_frames <- function(seqs = myDNAStringSet){
   orfs <- find_longest_orf(seqs)
   hrfs <- find_longest_hrf(seqs)
diff --git a/R/find_repeat_positions.R b/R/find_repeat_positions.R
index f5ab40a..9ac1953 100644
--- a/R/find_repeat_positions.R
+++ b/R/find_repeat_positions.R
@@ -6,18 +6,14 @@
 #' @param repeat_sequence A string specifying the repeat sequence to search for.
 #'
 #' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity.
-#'
-#' @import stringr
-#' @import dplyr
-#' @import tibble
-#'
-#' @export
+#' @import tidyverse
 #'
 #' @examples
 #' sequences <- DNAStringSet(c("AGTCAGT",
 #'                             "ACGTAGT",
 #'                             "AGTCGAT"))
 #' find_repeat_positions(sequences, "AGT")
+#' @export
 find_repeat_positions <- function(sequences = DNAStringSet, repeat_sequence = 'string'){
 
   repeat_positions <- str_locate_all(as.character(sequences), repeat_sequence)
diff --git a/R/kmer_based_distance_matrix.R b/R/kmer_based_distance_matrix.R
index 4a97370..99bd16e 100644
--- a/R/kmer_based_distance_matrix.R
+++ b/R/kmer_based_distance_matrix.R
@@ -6,12 +6,10 @@
 #'
 #' @return A distance matrix in bin format.
 #'
-#' @importFrom ape as.DNAbin
-#'
-#' @importFrom Biostrings DNAStringSet
+#' @import ape
+#' @import Biostrings
 #'
 #' @export
-#'
 kmer_based_distance_matrix <- function (seqs) {
   seqbins <- ape::as.DNAbin(seqs)
   as.matrix(kdistance(seqbins))
diff --git a/R/meshclustR.R b/R/meshclustR.R
index 6b2517c..954c712 100644
--- a/R/meshclustR.R
+++ b/R/meshclustR.R
@@ -2,6 +2,7 @@
 #'
 #' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
 #' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
+#' Meshclust has to be installed and executlable via system2() to run this function.
 #'
 #' James, Benjamin T. et al. (2018),
 #' MeShClust: an intelligent tool for clustering DNA sequences.
@@ -17,12 +18,8 @@
 #' @examples
 #' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path)
 #' @import Biostrings
-#' @import readr
-#' @import magrittr
-#' @import stringr
-#' @importFrom tools file_path_sans_ext
+#' @import tidyverse
 #' @export
-
 meshclustR <- function(seqs = MyDNAStringSet,
                        meshclust_bin = meshclust,
                        filepath = path){
diff --git a/R/pivot_cluster_tbl_wider.R b/R/pivot_cluster_tbl_wider.R
index c428396..7ca1e0c 100644
--- a/R/pivot_cluster_tbl_wider.R
+++ b/R/pivot_cluster_tbl_wider.R
@@ -4,10 +4,9 @@
 #'
 #' @return A wide table of clusters with all sequences in each cluster listed
 #'
-#' @import dplyr
-#' @import tidyr
-#' @export
+#' @import tidyverse
 #'
+#' @export
 pivot_cluster_tbl_wider <- function(cluster_tbl) {
   # First: get names of each cluster
   cluster_names <- cluster_tbl %>%
diff --git a/R/plot_abundance_per_sample.R b/R/plot_abundance_per_sample.R
index 48dfee3..9f8f936 100644
--- a/R/plot_abundance_per_sample.R
+++ b/R/plot_abundance_per_sample.R
@@ -2,8 +2,9 @@
 #'
 #' @param tbl_of_abundance A table containing sequence abundance data.
 #' @return A plot displaying sequence abundance per sample.
-#' @import ggplot2
-#' @import scales
+#'
+#' @import tidyverse
+#'
 #' @examples
 #' tbl_of_abundance <- data.frame(ID = c("ASV1", "ASV2", "ASV3", "ASV4"),
 #'                                 Sample = c("Sample1", "Sample1", "Sample2", "Sample2"),
@@ -11,7 +12,6 @@
 #' plot_abundance_per_sample(tbl_of_abundance)
 #'
 #' @export
-#' Specifically intended to plot sequence abundance per sampled within plot_cluster_overview from tbl of sequence abundance
 plot_abundance_per_sample <- function(tbl_of_abundance) {
   ggplot(tbl_of_abundance, aes(y = ID, x = Sample, fill = count/1000)) +
     geom_tile() + theme(legend.position = 'none',
diff --git a/R/plot_abundance_sums_per_sequence.R b/R/plot_abundance_sums_per_sequence.R
index f871d70..e39ddfe 100644
--- a/R/plot_abundance_sums_per_sequence.R
+++ b/R/plot_abundance_sums_per_sequence.R
@@ -6,7 +6,7 @@
 #' tbl_of_sums <- data.frame(ID = c("ASV_001", "ASV_002", "ASV_003"), sum_count = c(1000, 2000, 3000))
 #' plot_abundance_sums_per_sequence(tbl_of_sums)
 #'
-#' @import ggplot2
+#' @import tidyverse
 #'
 #' @export
 plot_abundance_sums_per_sequence <- function(tbl_of_sums) {
diff --git a/R/plot_asv_nmds.R b/R/plot_asv_nmds.R
index 1b62a21..2c791e4 100644
--- a/R/plot_asv_nmds.R
+++ b/R/plot_asv_nmds.R
@@ -12,8 +12,7 @@
 #' # Assume nmds_df has been created through NMDS analysis
 #' plot_asv_nmds(asv_nmds = nmds_df, color_by = 'Sample', centroids = TRUE)
 #'
-#' @import ggplot2
-#' @importFrom stats aggregate
+#' @import tidyverse
 #'
 #' @export
 plot_asv_nmds <- function(asv_nmds = my_asv_nmds,
diff --git a/R/plot_cluster_dendrogram.R b/R/plot_cluster_dendrogram.R
index adc21f7..7d8e1a2 100644
--- a/R/plot_cluster_dendrogram.R
+++ b/R/plot_cluster_dendrogram.R
@@ -5,7 +5,7 @@
 #' @param upgma_tree An object of class 'phylo' representing the tree.
 #'
 #' @import ggtree
-#' @import ggplot2
+#' @import tidyverse
 #'
 #' @return A dendrogram plot.
 #'
diff --git a/R/plot_cluster_overview.R b/R/plot_cluster_overview.R
index 7adcc5c..3fcc3f0 100644
--- a/R/plot_cluster_overview.R
+++ b/R/plot_cluster_overview.R
@@ -11,7 +11,7 @@
 #' @return A list containing the plotted and saved cluster overview.
 #'
 #' @import Biostrings
-#' @import ggplot2
+#' @import tidyverse
 #' @import ggtree
 #'
 #' @examples
diff --git a/R/plot_cluster_thresholds.R b/R/plot_cluster_thresholds.R
index 81bf058..679a077 100644
--- a/R/plot_cluster_thresholds.R
+++ b/R/plot_cluster_thresholds.R
@@ -29,9 +29,9 @@
 #'
 #' plot_cluster_thresholds(clus_counts_tbl, plateaus)
 #'
+#' @import tidyverse
 #'
 #' @export
-
 plot_cluster_thresholds <- function(clus_counts_tbl, plateaus) {
   ggplot(clus_counts_tbl, aes(x = threshold, y = cluster_number)) +
     geom_rect(aes(xmin = plateaus['plateau_start'],
diff --git a/R/plot_clusters.R b/R/plot_clusters.R
index e8240bd..c6c3c57 100644
--- a/R/plot_clusters.R
+++ b/R/plot_clusters.R
@@ -7,14 +7,12 @@
 #'
 #' @return A plot with thresholds and plateaus highlighted.
 #'
-#' @import ggplot2
+#' @import tidyverse
 #'
 #' @examples
 #' plot_clusters(clus_counts_tbl, plateaus)
 #'
 #' @export
-
-# Plot thresholds
 plot_clusters <- function(clus_counts_tbl, plateaus) {
   ggplot(clus_counts_tbl, aes(x = threshold, y = cluster_number)) +
     geom_rect(aes(xmin = plateaus["plateau_start"],
diff --git a/R/plot_dendrogram.R b/R/plot_dendrogram.R
index 1318244..83ceb8a 100644
--- a/R/plot_dendrogram.R
+++ b/R/plot_dendrogram.R
@@ -5,13 +5,9 @@
 #' @param distclust_table A tibble containing the data for clustering and plotting.
 #' @return A plot of the dendrogram.
 #' @import ggdendro
-#' @import dplyr
-#' @import tidyr
-#' @import ggplot2
-#' @import tibble
+#' @import tidyverse
 #'
 #' @export
-#'
 plot_dendrogram <- function(distclust_table = mytibble){
   library(ggdendro)
 
diff --git a/R/plot_distance_matrix.R b/R/plot_distance_matrix.R
index 52b6814..26a3bd4 100644
--- a/R/plot_distance_matrix.R
+++ b/R/plot_distance_matrix.R
@@ -6,9 +6,7 @@
 #'
 #' @return A tile plot showing all pairwise distances of the distance matrix.
 #'
-#' @import dplyr
-#' @import ggplot2
-#' @import tidyr
+#' @import tidyverse
 #' @import viridis
 #'
 #' @keywords plotting
diff --git a/R/plot_longest_reading_frame.R b/R/plot_longest_reading_frame.R
index fbd4ea6..952f8f1 100644
--- a/R/plot_longest_reading_frame.R
+++ b/R/plot_longest_reading_frame.R
@@ -6,9 +6,7 @@
 #'
 #' @return A ggplot object displaying the longest reading frames for each sequence.
 #'
-#' @import ggplot2
-#' @import dplyr
-#' @import tidyr
+#' @import tidyverse
 #' @import viridis
 #'
 #' @examples
diff --git a/R/plot_repeat_positions.R b/R/plot_repeat_positions.R
index 58eba23..8bfe0f0 100644
--- a/R/plot_repeat_positions.R
+++ b/R/plot_repeat_positions.R
@@ -7,9 +7,7 @@
 #' @param repeat_positions A data frame with columns 'start', 'end', 'seqname',
 #' 'fragment', and 'plot_intensity'.
 #' @return A ggplot object representing the repeat positions plot.
-#' @import ggplot2
-#' @import viridis
-#' @keywords plot
+#' @import tidyverse
 #' @export
 plot_repeat_positions <- function(repeat_positions){
   legend_name <- ''
diff --git a/R/plot_repeat_quantity.R b/R/plot_repeat_quantity.R
index b38d49d..0c00746 100644
--- a/R/plot_repeat_quantity.R
+++ b/R/plot_repeat_quantity.R
@@ -6,13 +6,15 @@
 #'
 #' @return A ggplot object representing the plot.
 #'
-#' @export
-#'
 #' @examples
 #' quantified_repeats <- data.frame(seqname = c('A', 'B', 'C'),
 #'                                 repeat_count = c(1, 2, 3),
 #'                                 count_type = c('Type1', 'Type2', 'Type3'))
 #' plot_repeat_quantity(quantified_repeats)
+#'
+#' @import tidyverse
+#'
+#' @export
 plot_repeat_quantity <- function(quantified_repeats) {
   legend_name <- ''
 
diff --git a/R/plot_repeats.R b/R/plot_repeats.R
index d986420..10a9bdb 100644
--- a/R/plot_repeats.R
+++ b/R/plot_repeats.R
@@ -6,9 +6,8 @@
 #' @param repeat_sequence A character vector specifying the repeat sequence to be searched. Default is 'GATC'.
 #'
 #' @return A ggplot object displaying both positions and quantities of repeated sequences.
+#' @import tidyverse
 #' @export
-#' @name plot_repeats
-
 plot_repeats <- function(sequences = DNAStringSet(),
                          repeat_sequence = 'GATC') {
   clus_name <- names(sequences)[[1]]
diff --git a/R/plot_variants_per_sample.R b/R/plot_variants_per_sample.R
index 3dc413e..f8696f9 100644
--- a/R/plot_variants_per_sample.R
+++ b/R/plot_variants_per_sample.R
@@ -8,14 +8,8 @@
 #'
 #' @return A ggplot object representing the variants per sample plot
 #'
-#' @import ggplot2
-#' @import dplyr
-#' @importFrom cowplot plot_grid
-#' @importFrom cowplot get_legend
-#' @importFrom viridisLite viridis
-#' @importFrom viridisLite viridisLite
-#' @importFrom gridExtra unit
-#' @importFrom gridExtra margin
+#' @import tidyverse
+#' @import cowplot
 #'
 #' @examples
 #' plot_variants_per_sample()
diff --git a/R/quantify_repeats.R b/R/quantify_repeats.R
index c7e854a..1e08ed6 100644
--- a/R/quantify_repeats.R
+++ b/R/quantify_repeats.R
@@ -11,6 +11,7 @@
 #'   \item \code{singlets}: The number of occurrences of the repeat sequence as singlets in each sequence.
 #'   \item \code{largest_repeat_contig}: The number of contiguous repeats of the repeat sequence in each sequence.
 #' }
+#' @import tidyverse
 #' @export
 quantify_repeats <- function(sequences = DNAStringSet, repeat_sequence = 'string') {
   singlet_count <- str_count(as.character(sequences), repeat_sequence)
diff --git a/R/read_and_write_cluster_abundance.R b/R/read_and_write_cluster_abundance.R
index 8ac0b24..38a3746 100644
--- a/R/read_and_write_cluster_abundance.R
+++ b/R/read_and_write_cluster_abundance.R
@@ -14,7 +14,7 @@
 #' # Read and write cluster abundance
 #' read_and_write_cluster_abundance(cluster_sequence_list, reference_seqs, seqtab_nochim = 'seqtab_nochim.rds', outpath = path)
 #' }
-#'
+#' @import tidyverse
 #' @export
 read_and_write_cluster_abundance <- function(
     cluster_sequence_list = DNAStringSetList,
diff --git a/R/save_plot.R b/R/save_plot.R
index a8e314d..d99903a 100644
--- a/R/save_plot.R
+++ b/R/save_plot.R
@@ -14,10 +14,11 @@
 #'
 #' @return None
 #'
+#' @import tidyverse
 #' @examples
 #' save_plot(ggplot(mtcars, aes(x = mpg, y = disp))
+#'
 #' @export
-
 save_plot <- function(
     pl, filetype = '.pdf', plot_name = 'my_plot', overwrite=FALSE, outp = outp){
   dir.create(outp, showWarnings = FALSE)
diff --git a/R/similiarity_to_reference.R b/R/similiarity_to_reference.R
index f6af472..3e98b84 100644
--- a/R/similiarity_to_reference.R
+++ b/R/similiarity_to_reference.R
@@ -5,14 +5,9 @@
 #' @param seqs A DNAStringSet object containing the sequences.
 #' @param ncores An integer specifying the number of cores to use for parallel processing. Defaults to 1.
 #'
+#' @import tidyverse
+#' @import Biostrings
 #' @export
-#' @importFrom DNAtools alignment_based_distance_matrix
-#' @importFrom dplyr as_tibble filter mutate pivot_longer select
-#' @importFrom magrittr %>%
-#' @importFrom glue if_else
-#' @importFrom stringr str_c
-#' @importFrom Biostrings DNAStringSet
-
 similiarity_to_reference <- function (seqs = DNAStringSet,
                                       ncores = 1) {
   if(length(seqs) > 1){
@@ -26,6 +21,3 @@ similiarity_to_reference <- function (seqs = DNAStringSet,
       select(-seq_var, -query)
     }
 }
-
-#' @rdname similiarity_to_reference
-#' @keywords internal
diff --git a/R/str_pad_to_max.R b/R/str_pad_to_max.R
index c222201..67c6ae8 100644
--- a/R/str_pad_to_max.R
+++ b/R/str_pad_to_max.R
@@ -10,7 +10,7 @@
 #' @examples
 #' str_pad_to_max(c("hello", "world", "foo", "bar", "x"))
 #'
-#' @importFrom stringr str_pad str_length
+#' @import tidyverse
 #'
 #' @export
 str_pad_to_max <- function(vec = c(), ...){
diff --git a/R/subset_by_clusters.R b/R/subset_by_clusters.R
index 6b87a5a..48c9c8e 100644
--- a/R/subset_by_clusters.R
+++ b/R/subset_by_clusters.R
@@ -6,8 +6,8 @@
 #' @param cluster_tbl A data frame or tibble containing the cluster assignments. It should have two columns, 'cluster' and 'seqnames', where 'cluster' contains the cluster numbers and 'seqnames' contains the corresponding sequence names.
 #' @param save_to_file Logical value indicating whether to save the resulting sequences to separate files for each cluster.
 #' @return A list of sequence objects, where each list element corresponds to a cluster and contains the sequences in that cluster
+#' @import tidyverse
 #' @export
-
 subset_by_clusters <- function(seqs, cluster_tbl, save_to_file = TRUE){
   cluster_seqs <- cluster_tbl %>%
     select(cluster, seqnames) %>%
diff --git a/R/subset_variant_table.R b/R/subset_variant_table.R
index d0ca548..344fccd 100644
--- a/R/subset_variant_table.R
+++ b/R/subset_variant_table.R
@@ -9,13 +9,11 @@
 #'
 #' @return A tibble containing the subsetted variant table.
 #'
-#' @import dplyr
-#' @importFrom stats setdiff
+#' @import tidyverse
 #'
 #' @examples
 #' subset_variant_table(mytibble, c("cluster1", "cluster2"), c("variant1", "variant2"), c("sample1", "sample2"))
 #' @export
-
 subset_variant_table <- function(variant_classified_table = mytibble,
                                  exclude_clusters = c(),
                                  exclude_variants = c(),
diff --git a/R/test_clustering_thresholds.R b/R/test_clustering_thresholds.R
index 787e47c..56ddefc 100644
--- a/R/test_clustering_thresholds.R
+++ b/R/test_clustering_thresholds.R
@@ -11,9 +11,7 @@
 #' @return A list of clustering results, where each element in the list corresponds to a specific threshold value.
 #'
 #' @import DECIPHER
-#' @import tibble
-#' @import dplyr
-#' @import tidyr
+#' @import tidyverse
 #'
 #' @examples
 #' # Create a DNAStringSet object
diff --git a/R/translate_and_count_stops.R b/R/translate_and_count_stops.R
index 9380240..cd6de20 100644
--- a/R/translate_and_count_stops.R
+++ b/R/translate_and_count_stops.R
@@ -18,8 +18,7 @@
 #' translate_and_count_stops(seqs)
 #'
 #' @import Biostrings
-#' @import dplyr
-#' @import stringr
+#' @import tidyverse
 #' @export
 translate_and_count_stops <- function(seqs = DNAStringSet) {
   # Make reading frames and translate to protein
diff --git a/R/variant_classifier.R b/R/variant_classifier.R
index 5700fca..c0e2353 100644
--- a/R/variant_classifier.R
+++ b/R/variant_classifier.R
@@ -7,9 +7,9 @@
 #' @param reference_informed Logical value indicating whether the classification should be reference informed (default: FALSE)
 #'
 #' @return A modified master table with variant classifications
+#' @import tidyverse
 #' @export
 
-# Define a function called variant_classifier with two arguments: seqtab_file and clustered_sequences
 variant_classifier <- function(
     seqtab_file = file.path(path, 'seqtab_nochim.rds'),
     clustered_sequences = myDNAStringSetList){
diff --git a/R/veganify_asvcounts.R b/R/veganify_asvcounts.R
index 2070bd4..5d513fd 100644
--- a/R/veganify_asvcounts.R
+++ b/R/veganify_asvcounts.R
@@ -5,6 +5,7 @@
 #' @param cleaned_seqtab A cleaned sequence table.
 #'
 #' @return A vegan formatted count matrix.
+#' @import tidyverse
 #' @export
 veganify_asvcounts <- function(cleaned_seqtab = my_cleaned_seqtab){
   out <- cleaned_seqtab %>%
diff --git a/R/veganify_generic_wide_tbl.R b/R/veganify_generic_wide_tbl.R
index d08fcd7..151c94f 100644
--- a/R/veganify_generic_wide_tbl.R
+++ b/R/veganify_generic_wide_tbl.R
@@ -5,10 +5,7 @@
 #' @param data A wide tibble with rownames in the first column and input data for vegdist in all other columns.
 #'
 #' @return A data.frame that is compatible with the vegdist function.
-#'
-#' @importFrom dplyr %>%
-#' @importFrom dplyr replace
-#' @importFrom stats as.data.frame
+#' @import tidyverse
 #'
 #' @examples
 #' library(tibble)
diff --git a/man/clean_seqtab.Rd b/man/clean_seqtab.Rd
new file mode 100644
index 0000000..dc56f19
--- /dev/null
+++ b/man/clean_seqtab.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clean_seqtab.R
+\name{clean_seqtab}
+\alias{clean_seqtab}
+\title{Clean Sequence Table}
+\usage{
+clean_seqtab(
+  file = "seqtab_nochim.rds",
+  ASV_sequences = asvstrings,
+  output = TRUE
+)
+}
+\arguments{
+\item{file}{The path to the RDS file containing the sequence table.}
+
+\item{ASV_sequences}{A character vector specifying the ASV sequences.}
+
+\item{output}{A logical value indicating whether to output a CSV file.}
+}
+\value{
+A tibble containing the cleaned sequence table.
+}
+\description{
+Cleans a typical sequence table as output by our internal DADA2 pipeline by
+transposing it, converting it to a tibble format, and adding a column of sequence names.
+}
+\examples{
+clean_seqtab()
+clean_seqtab(file = 'seqtab.rds', output = FALSE)
+clean_seqtab(ASV_sequences = asvstrings)
+}
diff --git a/man/find_contiguous_multi_repeats.Rd b/man/find_contiguous_multi_repeats.Rd
index b5cb905..c6be0c8 100644
--- a/man/find_contiguous_multi_repeats.Rd
+++ b/man/find_contiguous_multi_repeats.Rd
@@ -37,5 +37,3 @@ find_contiguous_multi_repeats(sequences, 'AT', 3)
 # Expected output: c(2, 1)
 
 }
-\keyword{repeats}
-\keyword{sequence,}
diff --git a/man/find_longest_orf.Rd b/man/find_longest_orf.Rd
index a459d74..6d88245 100644
--- a/man/find_longest_orf.Rd
+++ b/man/find_longest_orf.Rd
@@ -21,6 +21,3 @@ seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC"))
 find_longest_orf(seqs)
 
 }
-\seealso{
-\code{\link{findORFs}}
-}
diff --git a/man/meshclustR.Rd b/man/meshclustR.Rd
index ae60c36..f4d6efe 100644
--- a/man/meshclustR.Rd
+++ b/man/meshclustR.Rd
@@ -21,6 +21,7 @@ A data frame with information regarding the clustering analysis.
 \description{
 This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
 The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
+Meshclust has to be installed and executlable via system2() to run this function.
 }
 \details{
 James, Benjamin T. et al. (2018),
diff --git a/man/plot_cluster_thresholds.Rd b/man/plot_cluster_thresholds.Rd
index d1996f9..b5d9406 100644
--- a/man/plot_cluster_thresholds.Rd
+++ b/man/plot_cluster_thresholds.Rd
@@ -38,5 +38,4 @@ plateaus <- c(
 
 plot_cluster_thresholds(clus_counts_tbl, plateaus)
 
-
 }
diff --git a/man/plot_repeat_positions.Rd b/man/plot_repeat_positions.Rd
index d21c6e3..8430f75 100644
--- a/man/plot_repeat_positions.Rd
+++ b/man/plot_repeat_positions.Rd
@@ -18,4 +18,3 @@ This function plots repeat positions on a sequence. It takes a data frame
 as input which should have columns 'start', 'end', 'seqname', 'fragment',
 and 'plot_intensity'. It uses ggplot2 to create the plot.
 }
-\keyword{plot}
diff --git a/man/plot_repeat_quantity.Rd b/man/plot_repeat_quantity.Rd
index a7b944f..9aeb8e3 100644
--- a/man/plot_repeat_quantity.Rd
+++ b/man/plot_repeat_quantity.Rd
@@ -20,4 +20,5 @@ quantified_repeats <- data.frame(seqname = c('A', 'B', 'C'),
                                 repeat_count = c(1, 2, 3),
                                 count_type = c('Type1', 'Type2', 'Type3'))
 plot_repeat_quantity(quantified_repeats)
+
 }
diff --git a/man/read_and_write_cluster_abundance.Rd b/man/read_and_write_cluster_abundance.Rd
index 465240e..9374ed2 100644
--- a/man/read_and_write_cluster_abundance.Rd
+++ b/man/read_and_write_cluster_abundance.Rd
@@ -31,5 +31,4 @@ This function reads a sequence table, calculates the abundance of sequences belo
 # Read and write cluster abundance
 read_and_write_cluster_abundance(cluster_sequence_list, reference_seqs, seqtab_nochim = 'seqtab_nochim.rds', outpath = path)
 }
-
 }
diff --git a/man/save_plot.Rd b/man/save_plot.Rd
index c6ea917..972f7b4 100644
--- a/man/save_plot.Rd
+++ b/man/save_plot.Rd
@@ -39,4 +39,5 @@ Saves a ggplot object as a file with the specified file type, plot name, and out
 }
 \examples{
 save_plot(ggplot(mtcars, aes(x = mpg, y = disp))
+
 }
-- 
GitLab