diff --git a/DESCRIPTION b/DESCRIPTION index 66468af24cfb3a58894d952674ca16934f23acd7..8b0ac1f26d86585885449708ea47f66458758678 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,35 @@ Package: cAmpSeqR Type: Package -Title: What the Package Does (Title Case) +Title: Custom Processing and Visualization for Multi-Amplicon Sequencing Projects Version: 0.1.0 -Author: Who wrote it -Maintainer: The package maintainer <yourself@somewhere.net> -Description: More about what it does (maybe more than one line) - Use four spaces when indenting paragraphs within the Description. -License: What license is it under? +Authors@R: person( + "Simeon", "Lim Rossmann", + email = "simeon.rossmann@nmbu.no", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0003-0435-8221") + ) +Description: This package provides a range of functions to process + and visualize next-generation sequencing data from multi-amplicon + sequencing projects. It may work with a range of input variants + consisting of a sequence list and count table for these sequences but + was initially designed for data generated by the DADA2 package. + The functions range from very simple utilities and parsers to complex + plotting functions and are packaged as they are here for the + convenience of the author and collaborators. + A commented pipeline incorporating most of these functions in the + intended sequence can be obtained from the author upon request. +Imports: + Biostrings, + DECIPHER, + ggdendro, + ggtree, + tidyverse, + vegan, + viridis, + ape, + cowplot, + phangorn +License: use_gpl_license(version = 3, include_future = TRUE) Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index 88140709df0a12c4930a70dc85c826ac9a070e26..a87f056f501f1c60e45a81cd45614b666e44e587 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(align_and_generate_upgma) export(alignment_based_distance_matrix) export(calc_asv_nmds) +export(clean_seqtab) export(cluster_longest_reading_frames) export(cluster_tbl_named) export(combine_cluster_plots_and_save) @@ -18,6 +19,7 @@ export(find_repeat_positions) export(kmer_based_distance_matrix) export(meshclustR) export(pivot_cluster_tbl_wider) +export(plot_abundance_per_sample) export(plot_abundance_sums_per_sequence) export(plot_asv_nmds) export(plot_cluster_dendrogram) @@ -45,24 +47,11 @@ export(veganify_asvcounts) export(veganify_generic_wide_tbl) import(Biostrings) import(DECIPHER) -import(dplyr) +import(GenomicRanges) +import(ape) +import(cowplot) import(ggdendro) -import(ggplot2) import(ggtree) -import(magrittr) -import(purrr) -import(readr) -import(scales) -import(stats) -import(stringr) -import(tibble) -import(tidyr) +import(phangorn) import(tidyverse) -import(utils) -import(vegan) import(viridis) -importFrom(GenomicRanges,GRanges) -import(ape) -import(cowplot) -import(phangorn) - diff --git a/R/align_and_generate_upgma.R b/R/align_and_generate_upgma.R index 872b7410476a7f45bb5ee4bd0a0c7b427502076d..6a48be966f8327e80dc7f511dc71b7e8d5d8801f 100644 --- a/R/align_and_generate_upgma.R +++ b/R/align_and_generate_upgma.R @@ -5,14 +5,10 @@ #' @param cluster The name of the cluster to generate the UPGMA tree from #' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences #' @return A UPGMA tree object -#' @import utils -#' @importFrom DECIPHER AlignSeqs -#' @importFrom phangorn phyDat -#' @importFrom phangorn dist.ml -#' @importFrom phangorn upgma +#' @import DECIPHER +#' @import phangorn +#' @import tidyverse #' @export - -# Define function to align a cluster of sequences and generate a UPGMA tree align_and_generate_upgma <- function(cluster, sequence_list) { # Use AlignSeqs function to align the sequences in the given cluster diff --git a/R/calc_asv_nmds.R b/R/calc_asv_nmds.R index 198ca427a8b6033acd83357f4673ddf16772b60a..0d43687ed8be403eaaac5c7bbe3e388678bee2c0 100644 --- a/R/calc_asv_nmds.R +++ b/R/calc_asv_nmds.R @@ -9,6 +9,7 @@ #' @param ... Additional arguments passed to the `metaMDS` function from the vegan package #' #' @return A list object with results including NMDS results and NMDS tibble +#' @import tidyverse #' @export #' #' @examples diff --git a/R/clean_seqtab.R b/R/clean_seqtab.R index 95dc41ce98421dd24660def9d4044f887320f66b..be414252a25a05ec674016e2b866eb479f04b678 100644 --- a/R/clean_seqtab.R +++ b/R/clean_seqtab.R @@ -1,3 +1,20 @@ +#' Clean Sequence Table +#' +#' Cleans a typical sequence table as output by our internal DADA2 pipeline by +#' transposing it, converting it to a tibble format, and adding a column of sequence names. +#' +#' @param file The path to the RDS file containing the sequence table. +#' @param ASV_sequences A character vector specifying the ASV sequences. +#' @param output A logical value indicating whether to output a CSV file. +#' @return A tibble containing the cleaned sequence table. +#' +#' @import tidyverse +#' +#' @examples +#' clean_seqtab() +#' clean_seqtab(file = 'seqtab.rds', output = FALSE) +#' clean_seqtab(ASV_sequences = asvstrings) +#' @export clean_seqtab <- function(file = 'seqtab_nochim.rds', ASV_sequences = asvstrings, # Specify a default value for 'ASV_sequences' if none given output = TRUE){ # Specify a default value for 'output' if none given diff --git a/R/cluster_longest_reading_frames.R b/R/cluster_longest_reading_frames.R index 088c1f2f71ca3dc558c33ed0b6782e8c44dbbcaf..0a578b548d3fd9b039699b93bafc5a2b36466059 100644 --- a/R/cluster_longest_reading_frames.R +++ b/R/cluster_longest_reading_frames.R @@ -12,11 +12,8 @@ #' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9)) #' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl) #' -#' @import dplyr #' @import Biostrings -#' - -# Define a function that clusters DNA sequences and determines their longest reading frame +#' @import tidyverse cluster_longest_reading_frames <- function( clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered reading_frame_tbl = tbl) { # A variable that holds a table of reading frames diff --git a/R/cluster_tbl_named.R b/R/cluster_tbl_named.R index 75ac14e59c04e233b5ec04104359662269a7e12d..e1e9f241fe3753688f5de5434328eeaa3f76a701 100644 --- a/R/cluster_tbl_named.R +++ b/R/cluster_tbl_named.R @@ -12,9 +12,8 @@ #' name, sequence number within the cluster, and cluster size. #' #' @import tidyverse -#' @import Biostrings DNAStringSet +#' @import Biostrings #' @export -#' cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){ # First: get names of each cluster cluster_names <- tibble(clus_name = names(clustered_sequences), diff --git a/R/combine_cluster_plots_and_save.R b/R/combine_cluster_plots_and_save.R index 7bab1d56284155e80c12c3b04096acb1feca25fa..9f6c9e051d8cfba9da5c7643568f03ee663490e3 100644 --- a/R/combine_cluster_plots_and_save.R +++ b/R/combine_cluster_plots_and_save.R @@ -11,10 +11,8 @@ #' @param w The width of the plot. Default is 'cm_width'. #' @param h The height of the plot. Default is 'cm_height'. #' @return combined plot +#' @import tidyverse #' @export - -# plot list of three plots in three columns and save to "path" with filename -# "Cluster_overview_'cluster'.pdf" combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path, w = cm_width, h = cm_height) { dir.create(out_path, showWarnings = FALSE) diff --git a/R/count_clusters.R b/R/count_clusters.R index a0884b4cb2b6277e75e82cf114049402f73a82de..238cbdfdbfc07cbc4dde6d2072bfca69cf9c12f0 100644 --- a/R/count_clusters.R +++ b/R/count_clusters.R @@ -18,11 +18,8 @@ #' ) #' count_clusters(clus_tbl_list) #' -#' @import dplyr -#' @import purrr -#' @import tidyr +#' @import tidyverse #' @export - count_clusters <- function(clus_tbl_list){ # Remove the non-numeric column "seqnames" from each cluster table in the list. diff --git a/R/define_plateau.R b/R/define_plateau.R index 98534ebc203c36698c767a2613058021669a6f16..0c4427552ca299b544138ca5782a59222bd73289 100644 --- a/R/define_plateau.R +++ b/R/define_plateau.R @@ -12,9 +12,8 @@ #' @examples #' define_plateau(cluster_counts = cluster_counts_df) #' -#' @import dplyr +#' @import tidyverse #' @export -# The following code defines a function called "define_plateau" define_plateau <- function(cluster_counts){ # "cluster_counts" is a tibble of cluster counts passed as a parameter to the function # "clus_plateau" filters the cluster counts by selecting only those with cluster_number greater than or equal to 2 diff --git a/R/dendrogram_hclust.R b/R/dendrogram_hclust.R index 87848ca1e628c653892c936a62ce21e0cbf3e911..61fa75831c766a427e6041650b962ad3e95ce261 100644 --- a/R/dendrogram_hclust.R +++ b/R/dendrogram_hclust.R @@ -9,8 +9,7 @@ #' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram. #' #' @import ggdendro -#' @import vegan -#' @import stats +#' @import tidyverse #' #' @examples #' # Generate dendrogram with default parameters @@ -21,7 +20,6 @@ #' dendrogram_hclust(daisy_dist) #' #' @export - dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) { require(ggdendro) set.seed(seed) diff --git a/R/export_longest_reading_frame.R b/R/export_longest_reading_frame.R index 64e3d6e975b7e575254609bc19af35775f30d061..1535835c94fcd32c496530986738a7e7ff286c7d 100644 --- a/R/export_longest_reading_frame.R +++ b/R/export_longest_reading_frame.R @@ -13,12 +13,9 @@ #' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE) #' #' @import Biostrings -#' @import dplyr -#' @import tidyr -#' @import utils +#' @import tidyverse #' #' @export -# Define a function that exports the longest reading frames export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table seqs = myDNAStringSet, # function argument for DNA sequence set outpath = path, # function argument for output file path diff --git a/R/find_contiguous_multi_repeats.R b/R/find_contiguous_multi_repeats.R index 7661b0930a33640f588e808cc54d950a7c7e1cc8..14e021aebe833eaa44a702b48e4b3f7531baa7df 100644 --- a/R/find_contiguous_multi_repeats.R +++ b/R/find_contiguous_multi_repeats.R @@ -23,11 +23,9 @@ #' #' # Expected output: c(2, 1) #' -#' @import stringr +#' @import tidyverse #' @import Biostrings #' -#' @keywords sequence, repeats -#' find_contiguous_multi_repeats <- function(sequences = DNAStringSet, repeat_sequence = 'string', singlet_count = 100) { diff --git a/R/find_longest_hrf.R b/R/find_longest_hrf.R index c5d5da9ea72115114c8fcd983e48689165f60a1a..bd4ccab4092960672e4e04e6d81ae4c87c21477e 100644 --- a/R/find_longest_hrf.R +++ b/R/find_longest_hrf.R @@ -10,7 +10,7 @@ #' @examples #' find_longest_hrf(seqs) #' -#' @import dplyr +#' @import tidyverse #' #' @export find_longest_hrf <- function(seqs = DNAStringSet){ diff --git a/R/find_longest_orf.R b/R/find_longest_orf.R index 1e5fdee18d3e94b0356959ffd8376e8884cb0ca3..80f1deadd2694a07fb8a8a1c77d49a236960bf33 100644 --- a/R/find_longest_orf.R +++ b/R/find_longest_orf.R @@ -8,18 +8,14 @@ #' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence. #' #' @import Biostrings -#' @importFrom GenomicRanges GRanges -#' @import tibble -#' @import dplyr -#' @export +#' @import GenomicRanges +#' @import tidyverse #' #' @examples #' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC")) #' find_longest_orf(seqs) #' -#' @seealso \code{\link{findORFs}} -#' - +#' @export find_longest_orf <- function(seqs = DNAStringSet) { # Find ORFs in the sequences, return longest ORF, and convert to a vector orfs <- findORFs(seqs, longestORF = TRUE, startCodon = startDefinition(6)) %>% diff --git a/R/find_longest_reading_frames.R b/R/find_longest_reading_frames.R index de34c89a96f7ebf61372d27cb87c12b943c66431..36ce62adba1dd1fb052e458734644cee2a45349d 100644 --- a/R/find_longest_reading_frames.R +++ b/R/find_longest_reading_frames.R @@ -10,10 +10,9 @@ #' @return A data frame containing the longest reading frames for each sequence. #' The data frame includes the sequence names, reading frame, and the width of the reading frame. #' -#' @import dplyr, tidyr +#' @import tidyverse #' #' @export -## Reading frame finder (longest orf or hrf) find_longest_reading_frames <- function(seqs = myDNAStringSet){ orfs <- find_longest_orf(seqs) hrfs <- find_longest_hrf(seqs) diff --git a/R/find_repeat_positions.R b/R/find_repeat_positions.R index f5ab40a6146ce00013c95e0b3681674861387358..9ac195325318460afb74961351c559a4e5532b88 100644 --- a/R/find_repeat_positions.R +++ b/R/find_repeat_positions.R @@ -6,18 +6,14 @@ #' @param repeat_sequence A string specifying the repeat sequence to search for. #' #' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity. -#' -#' @import stringr -#' @import dplyr -#' @import tibble -#' -#' @export +#' @import tidyverse #' #' @examples #' sequences <- DNAStringSet(c("AGTCAGT", #' "ACGTAGT", #' "AGTCGAT")) #' find_repeat_positions(sequences, "AGT") +#' @export find_repeat_positions <- function(sequences = DNAStringSet, repeat_sequence = 'string'){ repeat_positions <- str_locate_all(as.character(sequences), repeat_sequence) diff --git a/R/kmer_based_distance_matrix.R b/R/kmer_based_distance_matrix.R index 4a9737074cba1a5174db678daa62e5f030ffd7a7..99bd16eda84f4d8f2fca9b425fdd3f36de928c05 100644 --- a/R/kmer_based_distance_matrix.R +++ b/R/kmer_based_distance_matrix.R @@ -6,12 +6,10 @@ #' #' @return A distance matrix in bin format. #' -#' @importFrom ape as.DNAbin -#' -#' @importFrom Biostrings DNAStringSet +#' @import ape +#' @import Biostrings #' #' @export -#' kmer_based_distance_matrix <- function (seqs) { seqbins <- ape::as.DNAbin(seqs) as.matrix(kdistance(seqbins)) diff --git a/R/meshclustR.R b/R/meshclustR.R index 6b2517cbffd117435c26ffed86f964ecdfe96f2b..954c71211419bc46bde91dc66a7b27460845e8ea 100644 --- a/R/meshclustR.R +++ b/R/meshclustR.R @@ -2,6 +2,7 @@ #' #' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences. #' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool. +#' Meshclust has to be installed and executlable via system2() to run this function. #' #' James, Benjamin T. et al. (2018), #' MeShClust: an intelligent tool for clustering DNA sequences. @@ -17,12 +18,8 @@ #' @examples #' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path) #' @import Biostrings -#' @import readr -#' @import magrittr -#' @import stringr -#' @importFrom tools file_path_sans_ext +#' @import tidyverse #' @export - meshclustR <- function(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path){ diff --git a/R/pivot_cluster_tbl_wider.R b/R/pivot_cluster_tbl_wider.R index c42839631f180074205ed76eda6af93ba9a06cca..7ca1e0c1472ea877a7adb1c7a98cea56973a8a75 100644 --- a/R/pivot_cluster_tbl_wider.R +++ b/R/pivot_cluster_tbl_wider.R @@ -4,10 +4,9 @@ #' #' @return A wide table of clusters with all sequences in each cluster listed #' -#' @import dplyr -#' @import tidyr -#' @export +#' @import tidyverse #' +#' @export pivot_cluster_tbl_wider <- function(cluster_tbl) { # First: get names of each cluster cluster_names <- cluster_tbl %>% diff --git a/R/plot_abundance_per_sample.R b/R/plot_abundance_per_sample.R index 48dfee3fe230590b13ace238db1558e7717a4fa1..9f8f9363d1508eb1e45cfd6e1307c06cb1311df4 100644 --- a/R/plot_abundance_per_sample.R +++ b/R/plot_abundance_per_sample.R @@ -2,8 +2,9 @@ #' #' @param tbl_of_abundance A table containing sequence abundance data. #' @return A plot displaying sequence abundance per sample. -#' @import ggplot2 -#' @import scales +#' +#' @import tidyverse +#' #' @examples #' tbl_of_abundance <- data.frame(ID = c("ASV1", "ASV2", "ASV3", "ASV4"), #' Sample = c("Sample1", "Sample1", "Sample2", "Sample2"), @@ -11,7 +12,6 @@ #' plot_abundance_per_sample(tbl_of_abundance) #' #' @export -#' Specifically intended to plot sequence abundance per sampled within plot_cluster_overview from tbl of sequence abundance plot_abundance_per_sample <- function(tbl_of_abundance) { ggplot(tbl_of_abundance, aes(y = ID, x = Sample, fill = count/1000)) + geom_tile() + theme(legend.position = 'none', diff --git a/R/plot_abundance_sums_per_sequence.R b/R/plot_abundance_sums_per_sequence.R index f871d709486cd467511d6a4307b79ca4508c6c66..e39ddfeb55997ce2214c6005abe0091eee6e0896 100644 --- a/R/plot_abundance_sums_per_sequence.R +++ b/R/plot_abundance_sums_per_sequence.R @@ -6,7 +6,7 @@ #' tbl_of_sums <- data.frame(ID = c("ASV_001", "ASV_002", "ASV_003"), sum_count = c(1000, 2000, 3000)) #' plot_abundance_sums_per_sequence(tbl_of_sums) #' -#' @import ggplot2 +#' @import tidyverse #' #' @export plot_abundance_sums_per_sequence <- function(tbl_of_sums) { diff --git a/R/plot_asv_nmds.R b/R/plot_asv_nmds.R index 1b62a215dfe6cce35ef5ff4bc60cb3dc05b79ea8..2c791e4ce850473d8840b083613c2e741e40726a 100644 --- a/R/plot_asv_nmds.R +++ b/R/plot_asv_nmds.R @@ -12,8 +12,7 @@ #' # Assume nmds_df has been created through NMDS analysis #' plot_asv_nmds(asv_nmds = nmds_df, color_by = 'Sample', centroids = TRUE) #' -#' @import ggplot2 -#' @importFrom stats aggregate +#' @import tidyverse #' #' @export plot_asv_nmds <- function(asv_nmds = my_asv_nmds, diff --git a/R/plot_cluster_dendrogram.R b/R/plot_cluster_dendrogram.R index adc21f717188200cc58bf81342d8f577cac994e4..7d8e1a26e366243e01cc7c5384d18902473d0f9d 100644 --- a/R/plot_cluster_dendrogram.R +++ b/R/plot_cluster_dendrogram.R @@ -5,7 +5,7 @@ #' @param upgma_tree An object of class 'phylo' representing the tree. #' #' @import ggtree -#' @import ggplot2 +#' @import tidyverse #' #' @return A dendrogram plot. #' diff --git a/R/plot_cluster_overview.R b/R/plot_cluster_overview.R index 7adcc5cc8a494136fa28b0df21fa748a886a8fa8..3fcc3f012f013b4a27793f6af40bb21a9f85b4e8 100644 --- a/R/plot_cluster_overview.R +++ b/R/plot_cluster_overview.R @@ -11,7 +11,7 @@ #' @return A list containing the plotted and saved cluster overview. #' #' @import Biostrings -#' @import ggplot2 +#' @import tidyverse #' @import ggtree #' #' @examples diff --git a/R/plot_cluster_thresholds.R b/R/plot_cluster_thresholds.R index 81bf0587210b65b3ae288f8ace6c8a01f1c5a9eb..679a077d344a53a1c671d68f7d80a303a9412381 100644 --- a/R/plot_cluster_thresholds.R +++ b/R/plot_cluster_thresholds.R @@ -29,9 +29,9 @@ #' #' plot_cluster_thresholds(clus_counts_tbl, plateaus) #' +#' @import tidyverse #' #' @export - plot_cluster_thresholds <- function(clus_counts_tbl, plateaus) { ggplot(clus_counts_tbl, aes(x = threshold, y = cluster_number)) + geom_rect(aes(xmin = plateaus['plateau_start'], diff --git a/R/plot_clusters.R b/R/plot_clusters.R index e8240bd121c6beae76b3f28a7c1207c7f1160210..c6c3c577d7aa643ca870118581073129035e3763 100644 --- a/R/plot_clusters.R +++ b/R/plot_clusters.R @@ -7,14 +7,12 @@ #' #' @return A plot with thresholds and plateaus highlighted. #' -#' @import ggplot2 +#' @import tidyverse #' #' @examples #' plot_clusters(clus_counts_tbl, plateaus) #' #' @export - -# Plot thresholds plot_clusters <- function(clus_counts_tbl, plateaus) { ggplot(clus_counts_tbl, aes(x = threshold, y = cluster_number)) + geom_rect(aes(xmin = plateaus["plateau_start"], diff --git a/R/plot_dendrogram.R b/R/plot_dendrogram.R index 1318244e90b689ddeb47caf954765553e5d19cff..83ceb8a360a2873fee931110a58df5a360dc9cda 100644 --- a/R/plot_dendrogram.R +++ b/R/plot_dendrogram.R @@ -5,13 +5,9 @@ #' @param distclust_table A tibble containing the data for clustering and plotting. #' @return A plot of the dendrogram. #' @import ggdendro -#' @import dplyr -#' @import tidyr -#' @import ggplot2 -#' @import tibble +#' @import tidyverse #' #' @export -#' plot_dendrogram <- function(distclust_table = mytibble){ library(ggdendro) diff --git a/R/plot_distance_matrix.R b/R/plot_distance_matrix.R index 52b6814992bc23d01addc013add619a64afe0dd6..26a3bd4c35dbb9fee1900cae1ec37e8b35416aa7 100644 --- a/R/plot_distance_matrix.R +++ b/R/plot_distance_matrix.R @@ -6,9 +6,7 @@ #' #' @return A tile plot showing all pairwise distances of the distance matrix. #' -#' @import dplyr -#' @import ggplot2 -#' @import tidyr +#' @import tidyverse #' @import viridis #' #' @keywords plotting diff --git a/R/plot_longest_reading_frame.R b/R/plot_longest_reading_frame.R index fbd4ea6c166ada327c4547d3e44e7c342a11b7fc..952f8f163e464ea7f86efb13918a9730e4862722 100644 --- a/R/plot_longest_reading_frame.R +++ b/R/plot_longest_reading_frame.R @@ -6,9 +6,7 @@ #' #' @return A ggplot object displaying the longest reading frames for each sequence. #' -#' @import ggplot2 -#' @import dplyr -#' @import tidyr +#' @import tidyverse #' @import viridis #' #' @examples diff --git a/R/plot_repeat_positions.R b/R/plot_repeat_positions.R index 58eba23a49dfdb52fef872efcf376defd5111a34..8bfe0f0581a4055b11e0be4ece557a995e5b88d6 100644 --- a/R/plot_repeat_positions.R +++ b/R/plot_repeat_positions.R @@ -7,9 +7,7 @@ #' @param repeat_positions A data frame with columns 'start', 'end', 'seqname', #' 'fragment', and 'plot_intensity'. #' @return A ggplot object representing the repeat positions plot. -#' @import ggplot2 -#' @import viridis -#' @keywords plot +#' @import tidyverse #' @export plot_repeat_positions <- function(repeat_positions){ legend_name <- '' diff --git a/R/plot_repeat_quantity.R b/R/plot_repeat_quantity.R index b38d49dcf537c9848dbdc00e4f74259ccae38cb9..0c00746bbc4809739f6dcf7dca853c3250655a12 100644 --- a/R/plot_repeat_quantity.R +++ b/R/plot_repeat_quantity.R @@ -6,13 +6,15 @@ #' #' @return A ggplot object representing the plot. #' -#' @export -#' #' @examples #' quantified_repeats <- data.frame(seqname = c('A', 'B', 'C'), #' repeat_count = c(1, 2, 3), #' count_type = c('Type1', 'Type2', 'Type3')) #' plot_repeat_quantity(quantified_repeats) +#' +#' @import tidyverse +#' +#' @export plot_repeat_quantity <- function(quantified_repeats) { legend_name <- '' diff --git a/R/plot_repeats.R b/R/plot_repeats.R index d986420a9e6af2a4c069af99d28dd8231aa8c9db..10a9bdb5b9a3ea3542525788530b83fc7bc5b207 100644 --- a/R/plot_repeats.R +++ b/R/plot_repeats.R @@ -6,9 +6,8 @@ #' @param repeat_sequence A character vector specifying the repeat sequence to be searched. Default is 'GATC'. #' #' @return A ggplot object displaying both positions and quantities of repeated sequences. +#' @import tidyverse #' @export -#' @name plot_repeats - plot_repeats <- function(sequences = DNAStringSet(), repeat_sequence = 'GATC') { clus_name <- names(sequences)[[1]] diff --git a/R/plot_variants_per_sample.R b/R/plot_variants_per_sample.R index 3dc413ea84b738f39fb5453856518b62bb19147e..f8696f92f9b3e81c155bfc49e857348737e2bab6 100644 --- a/R/plot_variants_per_sample.R +++ b/R/plot_variants_per_sample.R @@ -8,14 +8,8 @@ #' #' @return A ggplot object representing the variants per sample plot #' -#' @import ggplot2 -#' @import dplyr -#' @importFrom cowplot plot_grid -#' @importFrom cowplot get_legend -#' @importFrom viridisLite viridis -#' @importFrom viridisLite viridisLite -#' @importFrom gridExtra unit -#' @importFrom gridExtra margin +#' @import tidyverse +#' @import cowplot #' #' @examples #' plot_variants_per_sample() diff --git a/R/quantify_repeats.R b/R/quantify_repeats.R index c7e854ab1ac0341b8b0ef2ed7ebda0cae5f86634..1e08ed6afcc392f602560e0e572cb19efd547a1e 100644 --- a/R/quantify_repeats.R +++ b/R/quantify_repeats.R @@ -11,6 +11,7 @@ #' \item \code{singlets}: The number of occurrences of the repeat sequence as singlets in each sequence. #' \item \code{largest_repeat_contig}: The number of contiguous repeats of the repeat sequence in each sequence. #' } +#' @import tidyverse #' @export quantify_repeats <- function(sequences = DNAStringSet, repeat_sequence = 'string') { singlet_count <- str_count(as.character(sequences), repeat_sequence) diff --git a/R/read_and_write_cluster_abundance.R b/R/read_and_write_cluster_abundance.R index 8ac0b24397a3960b478bfb68a5ff84ea10d505ee..38a37468c9be48b7a6e3f1ddc23bcc4d274ac5de 100644 --- a/R/read_and_write_cluster_abundance.R +++ b/R/read_and_write_cluster_abundance.R @@ -14,7 +14,7 @@ #' # Read and write cluster abundance #' read_and_write_cluster_abundance(cluster_sequence_list, reference_seqs, seqtab_nochim = 'seqtab_nochim.rds', outpath = path) #' } -#' +#' @import tidyverse #' @export read_and_write_cluster_abundance <- function( cluster_sequence_list = DNAStringSetList, diff --git a/R/save_plot.R b/R/save_plot.R index a8e314d6ddec4b6f7369a39aff682be8f817c005..d99903a21dde10bd8e7d18d5ee8d36c5b683594a 100644 --- a/R/save_plot.R +++ b/R/save_plot.R @@ -14,10 +14,11 @@ #' #' @return None #' +#' @import tidyverse #' @examples #' save_plot(ggplot(mtcars, aes(x = mpg, y = disp)) +#' #' @export - save_plot <- function( pl, filetype = '.pdf', plot_name = 'my_plot', overwrite=FALSE, outp = outp){ dir.create(outp, showWarnings = FALSE) diff --git a/R/similiarity_to_reference.R b/R/similiarity_to_reference.R index f6af472b41cd536980e2a8d48ba50f97833ef2b5..3e98b84040993ce6ed15a61ad2f3a434bb5aa510 100644 --- a/R/similiarity_to_reference.R +++ b/R/similiarity_to_reference.R @@ -5,14 +5,9 @@ #' @param seqs A DNAStringSet object containing the sequences. #' @param ncores An integer specifying the number of cores to use for parallel processing. Defaults to 1. #' +#' @import tidyverse +#' @import Biostrings #' @export -#' @importFrom DNAtools alignment_based_distance_matrix -#' @importFrom dplyr as_tibble filter mutate pivot_longer select -#' @importFrom magrittr %>% -#' @importFrom glue if_else -#' @importFrom stringr str_c -#' @importFrom Biostrings DNAStringSet - similiarity_to_reference <- function (seqs = DNAStringSet, ncores = 1) { if(length(seqs) > 1){ @@ -26,6 +21,3 @@ similiarity_to_reference <- function (seqs = DNAStringSet, select(-seq_var, -query) } } - -#' @rdname similiarity_to_reference -#' @keywords internal diff --git a/R/str_pad_to_max.R b/R/str_pad_to_max.R index c222201a2d5606c53af2f598071842beb0694ecf..67c6ae87a38cbb7074b3a0bd56c15b51d8fbf8d3 100644 --- a/R/str_pad_to_max.R +++ b/R/str_pad_to_max.R @@ -10,7 +10,7 @@ #' @examples #' str_pad_to_max(c("hello", "world", "foo", "bar", "x")) #' -#' @importFrom stringr str_pad str_length +#' @import tidyverse #' #' @export str_pad_to_max <- function(vec = c(), ...){ diff --git a/R/subset_by_clusters.R b/R/subset_by_clusters.R index 6b87a5a7ef37135aca8b1a3962457434c18ea339..48c9c8ec6fc195dbbb58c0016d3a8d0b017c4e85 100644 --- a/R/subset_by_clusters.R +++ b/R/subset_by_clusters.R @@ -6,8 +6,8 @@ #' @param cluster_tbl A data frame or tibble containing the cluster assignments. It should have two columns, 'cluster' and 'seqnames', where 'cluster' contains the cluster numbers and 'seqnames' contains the corresponding sequence names. #' @param save_to_file Logical value indicating whether to save the resulting sequences to separate files for each cluster. #' @return A list of sequence objects, where each list element corresponds to a cluster and contains the sequences in that cluster +#' @import tidyverse #' @export - subset_by_clusters <- function(seqs, cluster_tbl, save_to_file = TRUE){ cluster_seqs <- cluster_tbl %>% select(cluster, seqnames) %>% diff --git a/R/subset_variant_table.R b/R/subset_variant_table.R index d0ca54860830a2ee9e90c5cba968e9dad89095e9..344fccdc0ba47ea6a6394732761c53eeb6af8577 100644 --- a/R/subset_variant_table.R +++ b/R/subset_variant_table.R @@ -9,13 +9,11 @@ #' #' @return A tibble containing the subsetted variant table. #' -#' @import dplyr -#' @importFrom stats setdiff +#' @import tidyverse #' #' @examples #' subset_variant_table(mytibble, c("cluster1", "cluster2"), c("variant1", "variant2"), c("sample1", "sample2")) #' @export - subset_variant_table <- function(variant_classified_table = mytibble, exclude_clusters = c(), exclude_variants = c(), diff --git a/R/test_clustering_thresholds.R b/R/test_clustering_thresholds.R index 787e47c69d5022f349891c1b1dd88238bb122b5d..56ddefc5959abb20a0b1e98fdd1184eb488b87b1 100644 --- a/R/test_clustering_thresholds.R +++ b/R/test_clustering_thresholds.R @@ -11,9 +11,7 @@ #' @return A list of clustering results, where each element in the list corresponds to a specific threshold value. #' #' @import DECIPHER -#' @import tibble -#' @import dplyr -#' @import tidyr +#' @import tidyverse #' #' @examples #' # Create a DNAStringSet object diff --git a/R/translate_and_count_stops.R b/R/translate_and_count_stops.R index 93802407c220712bd250dccaf70cba1543452edb..cd6de20715a32bd6e12727d3048ff7e1b02ffe74 100644 --- a/R/translate_and_count_stops.R +++ b/R/translate_and_count_stops.R @@ -18,8 +18,7 @@ #' translate_and_count_stops(seqs) #' #' @import Biostrings -#' @import dplyr -#' @import stringr +#' @import tidyverse #' @export translate_and_count_stops <- function(seqs = DNAStringSet) { # Make reading frames and translate to protein diff --git a/R/variant_classifier.R b/R/variant_classifier.R index 5700fca84667fdfb331ce21c2ce67b6df5af41ef..c0e2353d35123988df97954818176e1a5cdc843d 100644 --- a/R/variant_classifier.R +++ b/R/variant_classifier.R @@ -7,9 +7,9 @@ #' @param reference_informed Logical value indicating whether the classification should be reference informed (default: FALSE) #' #' @return A modified master table with variant classifications +#' @import tidyverse #' @export -# Define a function called variant_classifier with two arguments: seqtab_file and clustered_sequences variant_classifier <- function( seqtab_file = file.path(path, 'seqtab_nochim.rds'), clustered_sequences = myDNAStringSetList){ diff --git a/R/veganify_asvcounts.R b/R/veganify_asvcounts.R index 2070bd44ea4ceaf023881cd9001974cba0600429..5d513fdbd9162d9264071418141f1855c76d51d4 100644 --- a/R/veganify_asvcounts.R +++ b/R/veganify_asvcounts.R @@ -5,6 +5,7 @@ #' @param cleaned_seqtab A cleaned sequence table. #' #' @return A vegan formatted count matrix. +#' @import tidyverse #' @export veganify_asvcounts <- function(cleaned_seqtab = my_cleaned_seqtab){ out <- cleaned_seqtab %>% diff --git a/R/veganify_generic_wide_tbl.R b/R/veganify_generic_wide_tbl.R index d08fcd79734b57848b73d499b92b9fb655ca59d4..151c94fe5792931a7eb512a093b071be934372bc 100644 --- a/R/veganify_generic_wide_tbl.R +++ b/R/veganify_generic_wide_tbl.R @@ -5,10 +5,7 @@ #' @param data A wide tibble with rownames in the first column and input data for vegdist in all other columns. #' #' @return A data.frame that is compatible with the vegdist function. -#' -#' @importFrom dplyr %>% -#' @importFrom dplyr replace -#' @importFrom stats as.data.frame +#' @import tidyverse #' #' @examples #' library(tibble) diff --git a/man/clean_seqtab.Rd b/man/clean_seqtab.Rd new file mode 100644 index 0000000000000000000000000000000000000000..dc56f198b474eea6c1885efac3dc99462956768e --- /dev/null +++ b/man/clean_seqtab.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_seqtab.R +\name{clean_seqtab} +\alias{clean_seqtab} +\title{Clean Sequence Table} +\usage{ +clean_seqtab( + file = "seqtab_nochim.rds", + ASV_sequences = asvstrings, + output = TRUE +) +} +\arguments{ +\item{file}{The path to the RDS file containing the sequence table.} + +\item{ASV_sequences}{A character vector specifying the ASV sequences.} + +\item{output}{A logical value indicating whether to output a CSV file.} +} +\value{ +A tibble containing the cleaned sequence table. +} +\description{ +Cleans a typical sequence table as output by our internal DADA2 pipeline by +transposing it, converting it to a tibble format, and adding a column of sequence names. +} +\examples{ +clean_seqtab() +clean_seqtab(file = 'seqtab.rds', output = FALSE) +clean_seqtab(ASV_sequences = asvstrings) +} diff --git a/man/find_contiguous_multi_repeats.Rd b/man/find_contiguous_multi_repeats.Rd index b5cb905e9ecf796949fa9d1dc9182b444a6fc5a0..c6be0c8cc53d1e060745f9ae7903a23ae8968e0e 100644 --- a/man/find_contiguous_multi_repeats.Rd +++ b/man/find_contiguous_multi_repeats.Rd @@ -37,5 +37,3 @@ find_contiguous_multi_repeats(sequences, 'AT', 3) # Expected output: c(2, 1) } -\keyword{repeats} -\keyword{sequence,} diff --git a/man/find_longest_orf.Rd b/man/find_longest_orf.Rd index a459d74a44c70d17ea3af8100745aa1e20526b95..6d88245b5c69c27263f39b728551687588d06536 100644 --- a/man/find_longest_orf.Rd +++ b/man/find_longest_orf.Rd @@ -21,6 +21,3 @@ seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC")) find_longest_orf(seqs) } -\seealso{ -\code{\link{findORFs}} -} diff --git a/man/meshclustR.Rd b/man/meshclustR.Rd index ae60c36d49e0623631571e9c567080ffb0508ce0..f4d6efe80fcf76e4976568649006593f3fc8a8a1 100644 --- a/man/meshclustR.Rd +++ b/man/meshclustR.Rd @@ -21,6 +21,7 @@ A data frame with information regarding the clustering analysis. \description{ This function writes a temporary file to perform a clustering analysis on a set of DNA sequences. The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool. +Meshclust has to be installed and executlable via system2() to run this function. } \details{ James, Benjamin T. et al. (2018), diff --git a/man/plot_cluster_thresholds.Rd b/man/plot_cluster_thresholds.Rd index d1996f9abeef455822e162ac06848f9d06a4faf0..b5d94065edae31ab472dd0533272a6452047638c 100644 --- a/man/plot_cluster_thresholds.Rd +++ b/man/plot_cluster_thresholds.Rd @@ -38,5 +38,4 @@ plateaus <- c( plot_cluster_thresholds(clus_counts_tbl, plateaus) - } diff --git a/man/plot_repeat_positions.Rd b/man/plot_repeat_positions.Rd index d21c6e3c79c1c7ea8803852097ecfdd0b9ea0708..8430f7542e43eba0a038ba525d27c077d915d947 100644 --- a/man/plot_repeat_positions.Rd +++ b/man/plot_repeat_positions.Rd @@ -18,4 +18,3 @@ This function plots repeat positions on a sequence. It takes a data frame as input which should have columns 'start', 'end', 'seqname', 'fragment', and 'plot_intensity'. It uses ggplot2 to create the plot. } -\keyword{plot} diff --git a/man/plot_repeat_quantity.Rd b/man/plot_repeat_quantity.Rd index a7b944fa8fa273eaf2cc6cff9ffee4512a8ebab6..9aeb8e3cee148a8d3257b4b22c6e917b3e24ea8a 100644 --- a/man/plot_repeat_quantity.Rd +++ b/man/plot_repeat_quantity.Rd @@ -20,4 +20,5 @@ quantified_repeats <- data.frame(seqname = c('A', 'B', 'C'), repeat_count = c(1, 2, 3), count_type = c('Type1', 'Type2', 'Type3')) plot_repeat_quantity(quantified_repeats) + } diff --git a/man/read_and_write_cluster_abundance.Rd b/man/read_and_write_cluster_abundance.Rd index 465240e7a59e12f586978a777addeb766e631da6..9374ed2c3738cf842d2da2ca3f8672a5010b559f 100644 --- a/man/read_and_write_cluster_abundance.Rd +++ b/man/read_and_write_cluster_abundance.Rd @@ -31,5 +31,4 @@ This function reads a sequence table, calculates the abundance of sequences belo # Read and write cluster abundance read_and_write_cluster_abundance(cluster_sequence_list, reference_seqs, seqtab_nochim = 'seqtab_nochim.rds', outpath = path) } - } diff --git a/man/save_plot.Rd b/man/save_plot.Rd index c6ea917f1c6d4a9fecef10147bf583023adda573..972f7b4abfd371313856bc7686ea9df4c498516b 100644 --- a/man/save_plot.Rd +++ b/man/save_plot.Rd @@ -39,4 +39,5 @@ Saves a ggplot object as a file with the specified file type, plot name, and out } \examples{ save_plot(ggplot(mtcars, aes(x = mpg, y = disp)) + }