diff --git a/DESCRIPTION b/DESCRIPTION index 145a7baea03eea961e1bad335d718bd6d0b188bf..d286e2b15bea57373934569ba1a715415a8e1f66 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,12 +24,22 @@ Imports: Biostrings, cowplot, DECIPHER, + dplyr, + forcats, + GenomicRanges, + kmer, ggdendro, + ggplot2, ggtree, - tidyverse, + magrittr, + phangorn, + purrr, + readr, + stringr, + tibble, + tidyr, vegan, - viridis, - phangorn + viridisLite License: use_gpl_license(version = 3, include_future = TRUE) Encoding: UTF-8 LazyData: true diff --git a/NAMESPACE b/NAMESPACE index 3b5b311b629a32f00a8b6ff9e1d1920f3bdceaae..664c393761d191cdbc0bd5ef6b978857f87b1871 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,14 +45,35 @@ export(translate_and_count_stops) export(variant_classifier) export(veganify_asvcounts) export(veganify_generic_wide_tbl) -import(Biostrings) import(DECIPHER) -import(GenomicRanges) -import(ape) -import(cowplot) import(dplyr) -import(ggdendro) -import(ggtree) -import(phangorn) -import(tidyverse) -import(viridis) +import(forcats) +import(ggplot2) +import(purrr) +import(readr) +import(stringr) +import(tibble) +import(tidyr) +importFrom(Biostrings,AAStringSet) +importFrom(Biostrings,DNAStringSet) +importFrom(Biostrings,DNAStringSetList) +importFrom(Biostrings,translate) +importFrom(Biostrings,writeXStringSet) +importFrom(DECIPHER,AlignSeqs) +importFrom(DECIPHER,DistanceMatrix) +importFrom(GenomicRanges,GRanges) +importFrom(IRanges,ranges) +importFrom(ape,as.DNAbin) +importFrom(cowplot,get_legend) +importFrom(cowplot,plot_grid) +importFrom(ggdendro,dendro_data) +importFrom(ggdendro,segment) +importFrom(ggtree,geom_tiplab) +importFrom(ggtree,get_taxa_name) +importFrom(ggtree,ggtree) +importFrom(kmer,kdistance) +importFrom(magrittr,"%>%") +importFrom(phangorn,dist.ml) +importFrom(phangorn,phyDat) +importFrom(phangorn,upgma) +importFrom(viridisLite,viridis) diff --git a/R/align_and_generate_upgma.R b/R/align_and_generate_upgma.R index 6a48be966f8327e80dc7f511dc71b7e8d5d8801f..1991c0aac8381d5f3f9ff13beb2ee7f511ac062c 100644 --- a/R/align_and_generate_upgma.R +++ b/R/align_and_generate_upgma.R @@ -5,14 +5,14 @@ #' @param cluster The name of the cluster to generate the UPGMA tree from #' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences #' @return A UPGMA tree object -#' @import DECIPHER -#' @import phangorn -#' @import tidyverse +#' @importFrom DECIPHER AlignSeqs +#' @importFrom phangorn dist.ml upgma phyDat +#' @importFrom magrittr %>% #' @export align_and_generate_upgma <- function(cluster, sequence_list) { # Use AlignSeqs function to align the sequences in the given cluster - alig <- AlignSeqs(sequence_list[[cluster]], verbose = FALSE) + alig <- DECIPHER::AlignSeqs(sequence_list[[cluster]], verbose = FALSE) # Convert the aligned sequences to a matrix of DNA data and calculate the distance matrix using maximum likelihood # Then use the upgma function to generate the tree diff --git a/R/alignment_based_distance_matrix.R b/R/alignment_based_distance_matrix.R index d4ffcd240e7034cb9593e12ff597a05fff6d89ab..feb65c3463623b21dcc20fe8f221de8000d8735e 100644 --- a/R/alignment_based_distance_matrix.R +++ b/R/alignment_based_distance_matrix.R @@ -7,19 +7,18 @@ #' #' @return A matrix object containing the distance scores calculated based on the alignments of the input sequences. #' -#' @import DECIPHER -#' @import Biostrings +#' @importFrom DECIPHER AlignSeqs DistanceMatrix #' #' @examples -#' data(smallexample) -#' dna_sequences <- DNAStringSet(smallexample) +#' dna_sequences <- DNAStringSet("AGACCACTCC", "GCATGTAGCT", +#' "GTGGTACGGC", "TCAAACGGCT") #' alignment_based_distance_matrix(dna_sequences, ncores = 2) #' #' @export alignment_based_distance_matrix <- function(seqs = DNAStringSet, ncores = 1) { # Align the sequences using the specified number of processors - seqs_alig <- AlignSeqs(seqs, processors = ncores, verbose = FALSE) + seqs_alig <- DECIPHER::AlignSeqs(seqs, processors = ncores, verbose = FALSE) # Generate the distance matrix from the aligned sequences using the specified number of processors - DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores) + DECIPHER::DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores) } diff --git a/R/calc_asv_nmds.R b/R/calc_asv_nmds.R index 0d43687ed8be403eaaac5c7bbe3e388678bee2c0..00747814c0a63b8b37a5e5303b5b6a9d79cafb33 100644 --- a/R/calc_asv_nmds.R +++ b/R/calc_asv_nmds.R @@ -9,7 +9,7 @@ #' @param ... Additional arguments passed to the `metaMDS` function from the vegan package #' #' @return A list object with results including NMDS results and NMDS tibble -#' @import tidyverse +#' @import dplyr tibble tidyr #' @export #' #' @examples diff --git a/R/clean_seqtab.R b/R/clean_seqtab.R index be414252a25a05ec674016e2b866eb479f04b678..8737033a576df38541c551b2bd8526ea34686cc1 100644 --- a/R/clean_seqtab.R +++ b/R/clean_seqtab.R @@ -8,7 +8,7 @@ #' @param output A logical value indicating whether to output a CSV file. #' @return A tibble containing the cleaned sequence table. #' -#' @import tidyverse +#' @import dplyr readr tibble tidyr #' #' @examples #' clean_seqtab() diff --git a/R/cluster_longest_reading_frames.R b/R/cluster_longest_reading_frames.R index 0a578b548d3fd9b039699b93bafc5a2b36466059..0728631d4f6518c6dd4e4276067be3df060a9f5c 100644 --- a/R/cluster_longest_reading_frames.R +++ b/R/cluster_longest_reading_frames.R @@ -12,8 +12,9 @@ #' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9)) #' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl) #' -#' @import Biostrings -#' @import tidyverse +#' @import dplyr purrr tibble tidyr +#' @importFrom Biostrings DNAStringSetList +#' cluster_longest_reading_frames <- function( clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered reading_frame_tbl = tbl) { # A variable that holds a table of reading frames diff --git a/R/cluster_tbl_named.R b/R/cluster_tbl_named.R index e1e9f241fe3753688f5de5434328eeaa3f76a701..43d1fc05390514642c0de48a3b53f0d028af50f7 100644 --- a/R/cluster_tbl_named.R +++ b/R/cluster_tbl_named.R @@ -11,8 +11,7 @@ #' @return A tibble that contains the cluster number, sequence name, cluster #' name, sequence number within the cluster, and cluster size. #' -#' @import tidyverse -#' @import Biostrings +#' @import dplyr purrr tibble tidyr #' @export cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){ # First: get names of each cluster diff --git a/R/combine_cluster_plots_and_save.R b/R/combine_cluster_plots_and_save.R index 9f6c9e051d8cfba9da5c7643568f03ee663490e3..007f1e624c30c8b4a7a1e1a403d475bc61dc7a20 100644 --- a/R/combine_cluster_plots_and_save.R +++ b/R/combine_cluster_plots_and_save.R @@ -11,7 +11,7 @@ #' @param w The width of the plot. Default is 'cm_width'. #' @param h The height of the plot. Default is 'cm_height'. #' @return combined plot -#' @import tidyverse +#' @import ggplot2 #' @export combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path, w = cm_width, h = cm_height) { diff --git a/R/count_clusters.R b/R/count_clusters.R index 238cbdfdbfc07cbc4dde6d2072bfca69cf9c12f0..4a14635fc5acbd87e8bebcacd2fc6ce43ed59a52 100644 --- a/R/count_clusters.R +++ b/R/count_clusters.R @@ -18,7 +18,7 @@ #' ) #' count_clusters(clus_tbl_list) #' -#' @import tidyverse +#' @import dplyr purrr tidyr #' @export count_clusters <- function(clus_tbl_list){ diff --git a/R/define_plateau.R b/R/define_plateau.R index 0c4427552ca299b544138ca5782a59222bd73289..bb18605c359f0684e20574369d165a1ff30b64c4 100644 --- a/R/define_plateau.R +++ b/R/define_plateau.R @@ -12,7 +12,7 @@ #' @examples #' define_plateau(cluster_counts = cluster_counts_df) #' -#' @import tidyverse +#' @import dplyr #' @export define_plateau <- function(cluster_counts){ # "cluster_counts" is a tibble of cluster counts passed as a parameter to the function diff --git a/R/dendrogram_hclust.R b/R/dendrogram_hclust.R index 61fa75831c766a427e6041650b962ad3e95ce261..ad2269b78d65139f1890acf3b8caadd69c199f32 100644 --- a/R/dendrogram_hclust.R +++ b/R/dendrogram_hclust.R @@ -8,8 +8,8 @@ #' #' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram. #' -#' @import ggdendro -#' @import tidyverse +#' @importFrom ggdendro dendro_data +#' @importFrom magrittr %>% #' #' @examples #' # Generate dendrogram with default parameters @@ -21,7 +21,6 @@ #' #' @export dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) { - require(ggdendro) set.seed(seed) # make hierarchical cluster from vegdist matrix and extract data for plotting diff --git a/R/export_longest_reading_frame.R b/R/export_longest_reading_frame.R index 1535835c94fcd32c496530986738a7e7ff286c7d..ca681194fc1c63c0a5939fd0bcbf85772bf1b66f 100644 --- a/R/export_longest_reading_frame.R +++ b/R/export_longest_reading_frame.R @@ -12,8 +12,8 @@ #' @examples #' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE) #' -#' @import Biostrings -#' @import tidyverse +#' @importFrom Biostrings AAStringSet writeXStringSet +#' @import dplyr #' #' @export export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table diff --git a/R/find_contiguous_multi_repeats.R b/R/find_contiguous_multi_repeats.R index 14e021aebe833eaa44a702b48e4b3f7531baa7df..b69f06186a4859ee078ef3b98f0b9f2a9e7ac93e 100644 --- a/R/find_contiguous_multi_repeats.R +++ b/R/find_contiguous_multi_repeats.R @@ -23,8 +23,8 @@ #' #' # Expected output: c(2, 1) #' -#' @import tidyverse -#' @import Biostrings +#' @import stringr +#' @importFrom Biostrings DNAStringSet #' find_contiguous_multi_repeats <- function(sequences = DNAStringSet, repeat_sequence = 'string', diff --git a/R/find_longest_hrf.R b/R/find_longest_hrf.R index bd4ccab4092960672e4e04e6d81ae4c87c21477e..c5d5da9ea72115114c8fcd983e48689165f60a1a 100644 --- a/R/find_longest_hrf.R +++ b/R/find_longest_hrf.R @@ -10,7 +10,7 @@ #' @examples #' find_longest_hrf(seqs) #' -#' @import tidyverse +#' @import dplyr #' #' @export find_longest_hrf <- function(seqs = DNAStringSet){ diff --git a/R/find_longest_orf.R b/R/find_longest_orf.R index 80f1deadd2694a07fb8a8a1c77d49a236960bf33..07b7eac79857b4929001b062793c95aec750ec15 100644 --- a/R/find_longest_orf.R +++ b/R/find_longest_orf.R @@ -6,10 +6,10 @@ #' @param seqs A DNAStringSet object containing the DNA sequences to search for ORFs. #' #' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence. -#' -#' @import Biostrings -#' @import GenomicRanges -#' @import tidyverse +#' @importFrom Biostrings DNAStringSet +#' @importFrom GenomicRanges GRanges +#' @importFrom IRanges ranges +#' @import dplyr tibble tidyr #' #' @examples #' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC")) diff --git a/R/find_longest_reading_frames.R b/R/find_longest_reading_frames.R index 36ce62adba1dd1fb052e458734644cee2a45349d..5638462955d63b41fdb3d5b4f9012769a91a1b35 100644 --- a/R/find_longest_reading_frames.R +++ b/R/find_longest_reading_frames.R @@ -10,7 +10,7 @@ #' @return A data frame containing the longest reading frames for each sequence. #' The data frame includes the sequence names, reading frame, and the width of the reading frame. #' -#' @import tidyverse +#' @import dplyr tidyr #' #' @export find_longest_reading_frames <- function(seqs = myDNAStringSet){ diff --git a/R/find_repeat_positions.R b/R/find_repeat_positions.R index 9ac195325318460afb74961351c559a4e5532b88..bad7257284860f0c20a293a057d2fc4af4121a06 100644 --- a/R/find_repeat_positions.R +++ b/R/find_repeat_positions.R @@ -6,7 +6,8 @@ #' @param repeat_sequence A string specifying the repeat sequence to search for. #' #' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity. -#' @import tidyverse +#' @import dplyr stringr tibble tidyr +#' @importFrom Biostrings DNAStringSet #' #' @examples #' sequences <- DNAStringSet(c("AGTCAGT", diff --git a/R/kmer_based_distance_matrix.R b/R/kmer_based_distance_matrix.R index 99bd16eda84f4d8f2fca9b425fdd3f36de928c05..729a04ac1bbbc3d00da0b503c008a98428503deb 100644 --- a/R/kmer_based_distance_matrix.R +++ b/R/kmer_based_distance_matrix.R @@ -1,16 +1,16 @@ #' Convert to bin format and get distance matrix using kmers #' -#' This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from XXXX and returns it as a matrix. +#' This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from the "kmer" package and returns it as a matrix. #' #' @param seqs DNAStringSet containing the DNA sequences #' #' @return A distance matrix in bin format. #' -#' @import ape -#' @import Biostrings +#' @importFrom ape as.DNAbin +#' @importFrom kmer kdistance #' #' @export kmer_based_distance_matrix <- function (seqs) { seqbins <- ape::as.DNAbin(seqs) - as.matrix(kdistance(seqbins)) + as.matrix(kmer::kdistance(seqbins)) } diff --git a/R/meshclustR.R b/R/meshclustR.R index 1eecb069194ea07ed49644534ce427de9139c57c..990a9df62cfc0b22b284406db1148c35aa32080e 100644 --- a/R/meshclustR.R +++ b/R/meshclustR.R @@ -2,7 +2,7 @@ #' #' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences. #' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool. -#' Meshclust has to be installed and executlable via system2() to run this function. +#' Meshclust has to be installed and executlable via system2() to run this function. This is hard to achieve on Windows and intended for use on Linux. #' #' James, Benjamin T. et al. (2018), #' MeShClust: an intelligent tool for clustering DNA sequences. @@ -17,9 +17,8 @@ #' #' @examples #' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path) -#' @import Biostrings -#' @import tidyverse -#' @import dplyr +#' @importFrom Biostrings writeXStringSet +#' @import dplyr readr stringr #' #' @export meshclustR <- function(seqs = MyDNAStringSet, @@ -39,7 +38,7 @@ meshclustR <- function(seqs = MyDNAStringSet, '-o', out_file)) #read output file and parse - stable_cluster <- read_delim(out_file, delim = '\t', + stable_cluster <- readr::read_delim(out_file, delim = '\t', col_names = c('cluster', 'seqnames', 'identity_with_center', 'cluster_class'), col_types = 'fcdc') %>% diff --git a/R/pivot_cluster_tbl_wider.R b/R/pivot_cluster_tbl_wider.R index 7ca1e0c1472ea877a7adb1c7a98cea56973a8a75..37152c9cfaabd5d08e2c8b74a16fe6116282ef5a 100644 --- a/R/pivot_cluster_tbl_wider.R +++ b/R/pivot_cluster_tbl_wider.R @@ -4,7 +4,7 @@ #' #' @return A wide table of clusters with all sequences in each cluster listed #' -#' @import tidyverse +#' @import dplyr tidyr #' #' @export pivot_cluster_tbl_wider <- function(cluster_tbl) { diff --git a/R/plot_abundance_per_sample.R b/R/plot_abundance_per_sample.R index 9f8f9363d1508eb1e45cfd6e1307c06cb1311df4..0a4616c1c87cf5b34fb26b433f6f89b6bdc27259 100644 --- a/R/plot_abundance_per_sample.R +++ b/R/plot_abundance_per_sample.R @@ -3,7 +3,7 @@ #' @param tbl_of_abundance A table containing sequence abundance data. #' @return A plot displaying sequence abundance per sample. #' -#' @import tidyverse +#' @import ggplot2 #' #' @examples #' tbl_of_abundance <- data.frame(ID = c("ASV1", "ASV2", "ASV3", "ASV4"), diff --git a/R/plot_abundance_sums_per_sequence.R b/R/plot_abundance_sums_per_sequence.R index e39ddfeb55997ce2214c6005abe0091eee6e0896..f871d709486cd467511d6a4307b79ca4508c6c66 100644 --- a/R/plot_abundance_sums_per_sequence.R +++ b/R/plot_abundance_sums_per_sequence.R @@ -6,7 +6,7 @@ #' tbl_of_sums <- data.frame(ID = c("ASV_001", "ASV_002", "ASV_003"), sum_count = c(1000, 2000, 3000)) #' plot_abundance_sums_per_sequence(tbl_of_sums) #' -#' @import tidyverse +#' @import ggplot2 #' #' @export plot_abundance_sums_per_sequence <- function(tbl_of_sums) { diff --git a/R/plot_asv_nmds.R b/R/plot_asv_nmds.R index 2c791e4ce850473d8840b083613c2e741e40726a..b2f12eee4982d0df4eb64263a9acf2af1780d2de 100644 --- a/R/plot_asv_nmds.R +++ b/R/plot_asv_nmds.R @@ -12,7 +12,7 @@ #' # Assume nmds_df has been created through NMDS analysis #' plot_asv_nmds(asv_nmds = nmds_df, color_by = 'Sample', centroids = TRUE) #' -#' @import tidyverse +#' @import ggplot2 #' #' @export plot_asv_nmds <- function(asv_nmds = my_asv_nmds, diff --git a/R/plot_cluster_dendrogram.R b/R/plot_cluster_dendrogram.R index 7d8e1a26e366243e01cc7c5384d18902473d0f9d..c371079bc31765994e9d015ad7b7718e4b6dabfd 100644 --- a/R/plot_cluster_dendrogram.R +++ b/R/plot_cluster_dendrogram.R @@ -4,8 +4,8 @@ #' #' @param upgma_tree An object of class 'phylo' representing the tree. #' -#' @import ggtree -#' @import tidyverse +#' @importFrom ggtree ggtree geom_tiplab +#' @import ggplot2 #' #' @return A dendrogram plot. #' diff --git a/R/plot_cluster_overview.R b/R/plot_cluster_overview.R index 3fcc3f012f013b4a27793f6af40bb21a9f85b4e8..0bab586d73c4c9ee3310f969b21bf247e15c97c5 100644 --- a/R/plot_cluster_overview.R +++ b/R/plot_cluster_overview.R @@ -9,10 +9,9 @@ #' @param cm_height Height of the plot in centimeters. #' @param path Path to save the plot. #' @return A list containing the plotted and saved cluster overview. -#' -#' @import Biostrings -#' @import tidyverse -#' @import ggtree +#' @importFrom Biostrings DNAStringSet +#' @import dplyr forcats tibble tidyr +#' @importFrom ggtree get_taxa_name #' #' @examples #' # Create example data diff --git a/R/plot_cluster_thresholds.R b/R/plot_cluster_thresholds.R index 679a077d344a53a1c671d68f7d80a303a9412381..c42d5cae15910cf67528ea6ccd1f8c10851c17ed 100644 --- a/R/plot_cluster_thresholds.R +++ b/R/plot_cluster_thresholds.R @@ -29,7 +29,7 @@ #' #' plot_cluster_thresholds(clus_counts_tbl, plateaus) #' -#' @import tidyverse +#' @import ggplot2 #' #' @export plot_cluster_thresholds <- function(clus_counts_tbl, plateaus) { diff --git a/R/plot_clusters.R b/R/plot_clusters.R index c6c3c577d7aa643ca870118581073129035e3763..e1455116af58f76328478a8c7880fff46a377958 100644 --- a/R/plot_clusters.R +++ b/R/plot_clusters.R @@ -7,7 +7,7 @@ #' #' @return A plot with thresholds and plateaus highlighted. #' -#' @import tidyverse +#' @import ggplot2 #' #' @examples #' plot_clusters(clus_counts_tbl, plateaus) diff --git a/R/plot_dendrogram.R b/R/plot_dendrogram.R index 83ceb8a360a2873fee931110a58df5a360dc9cda..da155b68df72755a7e46ee2b987edce4797e2a6d 100644 --- a/R/plot_dendrogram.R +++ b/R/plot_dendrogram.R @@ -4,13 +4,11 @@ #' #' @param distclust_table A tibble containing the data for clustering and plotting. #' @return A plot of the dendrogram. -#' @import ggdendro -#' @import tidyverse +#' @importFrom ggdendro segment +#' @import dplyr ggplot2 tibble tidyr #' #' @export plot_dendrogram <- function(distclust_table = mytibble){ - library(ggdendro) - # Prepare data from the plotting table for vegdist/hclust clustering mts_vegan <- distclust_table %>% select(var_type, seqnames, rel_var_abundance) %>% diff --git a/R/plot_distance_matrix.R b/R/plot_distance_matrix.R index 26a3bd4c35dbb9fee1900cae1ec37e8b35416aa7..f18ceacd6a58feb66544ca9b96e66f420802ea23 100644 --- a/R/plot_distance_matrix.R +++ b/R/plot_distance_matrix.R @@ -6,8 +6,7 @@ #' #' @return A tile plot showing all pairwise distances of the distance matrix. #' -#' @import tidyverse -#' @import viridis +#' @import dplyr ggplot2 tibble tidyr #' #' @keywords plotting #' @seealso \code{\link{heatmap}} diff --git a/R/plot_longest_reading_frame.R b/R/plot_longest_reading_frame.R index 952f8f163e464ea7f86efb13918a9730e4862722..b45756867b000cacec146291c776d0697b621df0 100644 --- a/R/plot_longest_reading_frame.R +++ b/R/plot_longest_reading_frame.R @@ -6,8 +6,7 @@ #' #' @return A ggplot object displaying the longest reading frames for each sequence. #' -#' @import tidyverse -#' @import viridis +#' @import dplyr ggplot2 #' #' @examples #' plot_longest_reading_frame() diff --git a/R/plot_repeat_positions.R b/R/plot_repeat_positions.R index 8bfe0f0581a4055b11e0be4ece557a995e5b88d6..cb82811e4173e531e71cdc0c6d95667f19b9162e 100644 --- a/R/plot_repeat_positions.R +++ b/R/plot_repeat_positions.R @@ -7,7 +7,7 @@ #' @param repeat_positions A data frame with columns 'start', 'end', 'seqname', #' 'fragment', and 'plot_intensity'. #' @return A ggplot object representing the repeat positions plot. -#' @import tidyverse +#' @import ggplot2 #' @export plot_repeat_positions <- function(repeat_positions){ legend_name <- '' diff --git a/R/plot_repeat_quantity.R b/R/plot_repeat_quantity.R index 0c00746bbc4809739f6dcf7dca853c3250655a12..54fcee6ab7eb2e01ff026f65f8a1141ba3d7576e 100644 --- a/R/plot_repeat_quantity.R +++ b/R/plot_repeat_quantity.R @@ -12,7 +12,7 @@ #' count_type = c('Type1', 'Type2', 'Type3')) #' plot_repeat_quantity(quantified_repeats) #' -#' @import tidyverse +#' @import ggplot2 #' #' @export plot_repeat_quantity <- function(quantified_repeats) { diff --git a/R/plot_repeats.R b/R/plot_repeats.R index 10a9bdb5b9a3ea3542525788530b83fc7bc5b207..b8b79adaf8b23fdad4ac76a514c387236745bc6b 100644 --- a/R/plot_repeats.R +++ b/R/plot_repeats.R @@ -6,7 +6,8 @@ #' @param repeat_sequence A character vector specifying the repeat sequence to be searched. Default is 'GATC'. #' #' @return A ggplot object displaying both positions and quantities of repeated sequences. -#' @import tidyverse +#' @import dplyr forcats stringr tidyr +#' @importFrom Biostrings DNAStringSet #' @export plot_repeats <- function(sequences = DNAStringSet(), repeat_sequence = 'GATC') { diff --git a/R/plot_variants_per_sample.R b/R/plot_variants_per_sample.R index f8696f92f9b3e81c155bfc49e857348737e2bab6..ab0c70853e7f3455e4a044b3c1d5a22b0c16c676 100644 --- a/R/plot_variants_per_sample.R +++ b/R/plot_variants_per_sample.R @@ -8,8 +8,9 @@ #' #' @return A ggplot object representing the variants per sample plot #' -#' @import tidyverse -#' @import cowplot +#' @import dplyr ggplot2 tidyr +#' @importFrom cowplot get_legend plot_grid +#' @importFrom viridisLite viridis #' #' @examples #' plot_variants_per_sample() diff --git a/R/quantify_repeats.R b/R/quantify_repeats.R index 1e08ed6afcc392f602560e0e572cb19efd547a1e..2ce78f8ce26a65a33d2403146621b5c6c1e78eb0 100644 --- a/R/quantify_repeats.R +++ b/R/quantify_repeats.R @@ -11,7 +11,7 @@ #' \item \code{singlets}: The number of occurrences of the repeat sequence as singlets in each sequence. #' \item \code{largest_repeat_contig}: The number of contiguous repeats of the repeat sequence in each sequence. #' } -#' @import tidyverse +#' @import dplyr stringr tibble tidyr #' @export quantify_repeats <- function(sequences = DNAStringSet, repeat_sequence = 'string') { singlet_count <- str_count(as.character(sequences), repeat_sequence) diff --git a/R/read_and_write_cluster_abundance.R b/R/read_and_write_cluster_abundance.R index 38a37468c9be48b7a6e3f1ddc23bcc4d274ac5de..66a9a9e684a477be65de75b1b2361f05104fc234 100644 --- a/R/read_and_write_cluster_abundance.R +++ b/R/read_and_write_cluster_abundance.R @@ -14,7 +14,7 @@ #' # Read and write cluster abundance #' read_and_write_cluster_abundance(cluster_sequence_list, reference_seqs, seqtab_nochim = 'seqtab_nochim.rds', outpath = path) #' } -#' @import tidyverse +#' @import dplyr readr tibble tidyr #' @export read_and_write_cluster_abundance <- function( cluster_sequence_list = DNAStringSetList, diff --git a/R/save_plot.R b/R/save_plot.R index d99903a21dde10bd8e7d18d5ee8d36c5b683594a..be0f808bf6fd91235a5a1d6bb3cee580136e1f35 100644 --- a/R/save_plot.R +++ b/R/save_plot.R @@ -14,7 +14,7 @@ #' #' @return None #' -#' @import tidyverse +#' @import ggplot2 #' @examples #' save_plot(ggplot(mtcars, aes(x = mpg, y = disp)) #' diff --git a/R/similiarity_to_reference.R b/R/similiarity_to_reference.R index 3e98b84040993ce6ed15a61ad2f3a434bb5aa510..5da43161dbcb0decb2662084f1cf8d4f067b6273 100644 --- a/R/similiarity_to_reference.R +++ b/R/similiarity_to_reference.R @@ -5,8 +5,7 @@ #' @param seqs A DNAStringSet object containing the sequences. #' @param ncores An integer specifying the number of cores to use for parallel processing. Defaults to 1. #' -#' @import tidyverse -#' @import Biostrings +#' @import dplyr tibble tidyr #' @export similiarity_to_reference <- function (seqs = DNAStringSet, ncores = 1) { diff --git a/R/str_pad_to_max.R b/R/str_pad_to_max.R index 67c6ae87a38cbb7074b3a0bd56c15b51d8fbf8d3..a9e828adf2b309d7480ba9b40cd918f697e08688 100644 --- a/R/str_pad_to_max.R +++ b/R/str_pad_to_max.R @@ -10,7 +10,7 @@ #' @examples #' str_pad_to_max(c("hello", "world", "foo", "bar", "x")) #' -#' @import tidyverse +#' @import stringr #' #' @export str_pad_to_max <- function(vec = c(), ...){ diff --git a/R/subset_by_clusters.R b/R/subset_by_clusters.R index 48c9c8ec6fc195dbbb58c0016d3a8d0b017c4e85..82111186016b81f68027ac40465d50082368ec7c 100644 --- a/R/subset_by_clusters.R +++ b/R/subset_by_clusters.R @@ -6,7 +6,8 @@ #' @param cluster_tbl A data frame or tibble containing the cluster assignments. It should have two columns, 'cluster' and 'seqnames', where 'cluster' contains the cluster numbers and 'seqnames' contains the corresponding sequence names. #' @param save_to_file Logical value indicating whether to save the resulting sequences to separate files for each cluster. #' @return A list of sequence objects, where each list element corresponds to a cluster and contains the sequences in that cluster -#' @import tidyverse +#' @import dplyr purrr +#' @importFrom Biostrings writeXStringSet #' @export subset_by_clusters <- function(seqs, cluster_tbl, save_to_file = TRUE){ cluster_seqs <- cluster_tbl %>% diff --git a/R/subset_variant_table.R b/R/subset_variant_table.R index 344fccdc0ba47ea6a6394732761c53eeb6af8577..8eb75dd365d6b99fcd834d7dcc557fca333ce116 100644 --- a/R/subset_variant_table.R +++ b/R/subset_variant_table.R @@ -9,7 +9,7 @@ #' #' @return A tibble containing the subsetted variant table. #' -#' @import tidyverse +#' @import dplyr #' #' @examples #' subset_variant_table(mytibble, c("cluster1", "cluster2"), c("variant1", "variant2"), c("sample1", "sample2")) diff --git a/R/test_clustering_thresholds.R b/R/test_clustering_thresholds.R index 56ddefc5959abb20a0b1e98fdd1184eb488b87b1..766885c90090854105eaeaadae5d32f846143a00 100644 --- a/R/test_clustering_thresholds.R +++ b/R/test_clustering_thresholds.R @@ -11,7 +11,8 @@ #' @return A list of clustering results, where each element in the list corresponds to a specific threshold value. #' #' @import DECIPHER -#' @import tidyverse +#' @import dplyr tibble tidyr +#' @importFrom Biostrings DNAStringSet #' #' @examples #' # Create a DNAStringSet object diff --git a/R/translate_and_count_stops.R b/R/translate_and_count_stops.R index cd6de20715a32bd6e12727d3048ff7e1b02ffe74..f9c616fb954894b5475c3d833170c64529071e14 100644 --- a/R/translate_and_count_stops.R +++ b/R/translate_and_count_stops.R @@ -17,8 +17,8 @@ #' seqs <- DNAStringSet("ATGTCGATAGCCTAGGTCAGTAA") #' translate_and_count_stops(seqs) #' -#' @import Biostrings -#' @import tidyverse +#' @import dplyr stringr tidyr +#' @importFrom Biostrings translate #' @export translate_and_count_stops <- function(seqs = DNAStringSet) { # Make reading frames and translate to protein diff --git a/R/variant_classifier.R b/R/variant_classifier.R index c0e2353d35123988df97954818176e1a5cdc843d..dda813384ec0a75c67953c0619b5517dbbc35ea4 100644 --- a/R/variant_classifier.R +++ b/R/variant_classifier.R @@ -7,7 +7,7 @@ #' @param reference_informed Logical value indicating whether the classification should be reference informed (default: FALSE) #' #' @return A modified master table with variant classifications -#' @import tidyverse +#' @import dplyr purrr stringr tibble tidyr #' @export variant_classifier <- function( @@ -22,20 +22,20 @@ variant_classifier <- function( mutate(sample_count_sum = sum(asv_counts)) %>% ungroup() %>% mutate(rel_abundance = asv_counts/sample_count_sum) %>% - filter(sample_count_sum > 0) + dplyr::filter(sample_count_sum > 0) # Create a table for clustered sequences and filter out clusters with only one sequence clustab_tbl <- cluster_tbl_named(clustered_sequences) %>% left_join(tibble(seqnames = unlist(map(clustered_seqs, names)), seqs = as.character(unlist(clustered_sequences))), by = 'seqnames') %>% - filter(clus_size > 1) + dplyr::filter(clus_size > 1) # If reference informed, add reference info and name variants if(reference_informed){ # Join the clustered sequences table with the long seqtab table by sequence name master_tbl <- left_join(clustab_tbl, seqtab_tbl_long, by = 'seqs') %>% - filter(seqnames != clus_name) + dplyr::filter(seqnames != clus_name) # Calculate the similarity of each cluster to the reference sequence sim_to_ref_tbl <- map(clustered_seqs, similiarity_to_reference, @@ -83,7 +83,7 @@ variant_classifier <- function( # Call variant types using top 2 most abundant sequences per sample and cluster based on relative abundance variant_types <- master_tbl %>% - filter(rel_abundance > 0) %>% + dplyr::filter(rel_abundance > 0) %>% group_by(sample, clus_name) %>% top_n(2, rel_abundance) %>% ungroup() %>% @@ -114,7 +114,7 @@ variant_classifier <- function( mutate(rel_var_abundance = rel_abundance/cluster_sum_rel_abu) %>% top_n(2, rel_abundance) %>% ungroup() %>% - filter(rel_abundance > 0) + dplyr::filter(rel_abundance > 0) aa_info <- find_longest_reading_frames(unlist(unname(clustered_sequences))) %>% mutate(longest_aa_seq = max_width/3) %>% diff --git a/R/veganify_asvcounts.R b/R/veganify_asvcounts.R index 5d513fdbd9162d9264071418141f1855c76d51d4..5d72327c65fc9462921169bd330382064300af06 100644 --- a/R/veganify_asvcounts.R +++ b/R/veganify_asvcounts.R @@ -5,7 +5,7 @@ #' @param cleaned_seqtab A cleaned sequence table. #' #' @return A vegan formatted count matrix. -#' @import tidyverse +#' @import dplyr #' @export veganify_asvcounts <- function(cleaned_seqtab = my_cleaned_seqtab){ out <- cleaned_seqtab %>% diff --git a/R/veganify_generic_wide_tbl.R b/R/veganify_generic_wide_tbl.R index 151c94fe5792931a7eb512a093b071be934372bc..b7a844a224136b05c19c0465816f414c3e207202 100644 --- a/R/veganify_generic_wide_tbl.R +++ b/R/veganify_generic_wide_tbl.R @@ -5,7 +5,7 @@ #' @param data A wide tibble with rownames in the first column and input data for vegdist in all other columns. #' #' @return A data.frame that is compatible with the vegdist function. -#' @import tidyverse +#' @import dplyr tibble tidyr #' #' @examples #' library(tibble) diff --git a/man/alignment_based_distance_matrix.Rd b/man/alignment_based_distance_matrix.Rd index 6e141714e27a6c069bf0798610430e08f2b5bd7d..c5dd8fb444a3a9596b22df82ee57c4c1e05c4a16 100644 --- a/man/alignment_based_distance_matrix.Rd +++ b/man/alignment_based_distance_matrix.Rd @@ -18,8 +18,8 @@ A matrix object containing the distance scores calculated based on the alignment This function uses the DECIPHER package to align a set of DNA sequences and generate a distance matrix based on the alignments. The alignments can be performed using multiple processors for faster execution. This function requires the DECIPHER package to be installed. } \examples{ -data(smallexample) -dna_sequences <- DNAStringSet(smallexample) +dna_sequences <- DNAStringSet("AGACCACTCC", "GCATGTAGCT", +"GTGGTACGGC", "TCAAACGGCT") alignment_based_distance_matrix(dna_sequences, ncores = 2) } diff --git a/man/kmer_based_distance_matrix.Rd b/man/kmer_based_distance_matrix.Rd index 111084a0b4ee09fca33a0e3c236207d60d500ed6..d0454cb1b382092d7353c1d0536414f31286b884 100644 --- a/man/kmer_based_distance_matrix.Rd +++ b/man/kmer_based_distance_matrix.Rd @@ -13,5 +13,5 @@ kmer_based_distance_matrix(seqs) A distance matrix in bin format. } \description{ -This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from XXXX and returns it as a matrix. +This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from the "kmer" package and returns it as a matrix. } diff --git a/man/meshclustR.Rd b/man/meshclustR.Rd index f4d6efe80fcf76e4976568649006593f3fc8a8a1..5c459fcbaba1594671759a4654b01ae89cfac25c 100644 --- a/man/meshclustR.Rd +++ b/man/meshclustR.Rd @@ -21,7 +21,7 @@ A data frame with information regarding the clustering analysis. \description{ This function writes a temporary file to perform a clustering analysis on a set of DNA sequences. The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool. -Meshclust has to be installed and executlable via system2() to run this function. +Meshclust has to be installed and executlable via system2() to run this function. This is hard to achieve on Windows and intended for use on Linux. } \details{ James, Benjamin T. et al. (2018),