Better dependency handling

de14a9b4 · Simeon · a71f6342 · de14a9b4 · de14a9b4 · de14a9b4
Commit de14a9b4 authored 1 year ago by Simeon
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -24,12 +24,22 @@ Imports:
    Biostrings,
    cowplot,
    DECIPHER,
+    dplyr,
+    forcats,
+    GenomicRanges,
+    kmer,
    ggdendro,
+    ggplot2,
    ggtree,
-    tidyverse,
+    magrittr,
+    phangorn,
+    purrr,
+    readr,
+    stringr,
+    tibble,
+    tidyr,
    vegan,
-    viridis,
-    phangorn
+    viridisLite
 License: use_gpl_license(version = 3, include_future = TRUE)
 Encoding: UTF-8
 LazyData: true

--- a/NAMESPACE
+++ b/NAMESPACE
@@ -45,14 +45,35 @@ export(translate_and_count_stops)
 export(variant_classifier)
 export(veganify_asvcounts)
 export(veganify_generic_wide_tbl)
-import(Biostrings)
 import(DECIPHER)
-import(GenomicRanges)
-import(ape)
-import(cowplot)
 import(dplyr)
-import(ggdendro)
-import(ggtree)
-import(phangorn)
-import(tidyverse)
-import(viridis)
+import(forcats)
+import(ggplot2)
+import(purrr)
+import(readr)
+import(stringr)
+import(tibble)
+import(tidyr)
+importFrom(Biostrings,AAStringSet)
+importFrom(Biostrings,DNAStringSet)
+importFrom(Biostrings,DNAStringSetList)
+importFrom(Biostrings,translate)
+importFrom(Biostrings,writeXStringSet)
+importFrom(DECIPHER,AlignSeqs)
+importFrom(DECIPHER,DistanceMatrix)
+importFrom(GenomicRanges,GRanges)
+importFrom(IRanges,ranges)
+importFrom(ape,as.DNAbin)
+importFrom(cowplot,get_legend)
+importFrom(cowplot,plot_grid)
+importFrom(ggdendro,dendro_data)
+importFrom(ggdendro,segment)
+importFrom(ggtree,geom_tiplab)
+importFrom(ggtree,get_taxa_name)
+importFrom(ggtree,ggtree)
+importFrom(kmer,kdistance)
+importFrom(magrittr,"%>%")
+importFrom(phangorn,dist.ml)
+importFrom(phangorn,phyDat)
+importFrom(phangorn,upgma)
+importFrom(viridisLite,viridis)
--- a/R/align_and_generate_upgma.R
+++ b/R/align_and_generate_upgma.R
@@ -5,14 +5,14 @@
 #' @param cluster The name of the cluster to generate the UPGMA tree from
 #' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences
 #' @return A UPGMA tree object
-#' @import DECIPHER
-#' @import phangorn
-#' @import tidyverse
+#' @importFrom DECIPHER AlignSeqs
+#' @importFrom phangorn dist.ml upgma phyDat
+#' @importFrom magrittr %>%
 #' @export
 align_and_generate_upgma <- function(cluster, sequence_list) {

  # Use AlignSeqs function to align the sequences in the given cluster
-  alig <- AlignSeqs(sequence_list[[cluster]], verbose = FALSE)
+  alig <- DECIPHER::AlignSeqs(sequence_list[[cluster]], verbose = FALSE)

  # Convert the aligned sequences to a matrix of DNA data and calculate the distance matrix using maximum likelihood
  # Then use the upgma function to generate the tree

--- a/R/alignment_based_distance_matrix.R
+++ b/R/alignment_based_distance_matrix.R
@@ -7,19 +7,18 @@
 #'
 #' @return A matrix object containing the distance scores calculated based on the alignments of the input sequences.
 #'
-#' @import DECIPHER
-#' @import Biostrings
+#' @importFrom DECIPHER AlignSeqs DistanceMatrix
 #'
 #' @examples
-#' data(smallexample)
-#' dna_sequences <- DNAStringSet(smallexample)
+#' dna_sequences <- DNAStringSet("AGACCACTCC", "GCATGTAGCT",
+#' "GTGGTACGGC", "TCAAACGGCT")
 #' alignment_based_distance_matrix(dna_sequences, ncores = 2)
 #'
 #' @export
 alignment_based_distance_matrix <- function(seqs = DNAStringSet,
                                             ncores = 1) {
  # Align the sequences using the specified number of processors
-  seqs_alig <- AlignSeqs(seqs, processors = ncores, verbose = FALSE)
+  seqs_alig <- DECIPHER::AlignSeqs(seqs, processors = ncores, verbose = FALSE)
  # Generate the distance matrix from the aligned sequences using the specified number of processors
-  DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores)
+  DECIPHER::DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores)
 }
--- a/R/calc_asv_nmds.R
+++ b/R/calc_asv_nmds.R
@@ -9,7 +9,7 @@
 #' @param ... Additional arguments passed to the `metaMDS` function from the vegan package
 #'
 #' @return A list object with results including NMDS results and NMDS tibble
-#' @import tidyverse
+#' @import dplyr tibble tidyr
 #' @export
 #'
 #' @examples

--- a/R/clean_seqtab.R
+++ b/R/clean_seqtab.R
@@ -8,7 +8,7 @@
 #' @param output A logical value indicating whether to output a CSV file.
 #' @return A tibble containing the cleaned sequence table.
 #'
-#' @import tidyverse
+#' @import dplyr readr tibble tidyr
 #'
 #' @examples
 #' clean_seqtab()

--- a/R/cluster_longest_reading_frames.R
+++ b/R/cluster_longest_reading_frames.R
@@ -12,8 +12,9 @@
 #' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9))
 #' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl)
 #'
-#' @import Biostrings
-#' @import tidyverse
+#' @import dplyr purrr tibble tidyr
+#' @importFrom Biostrings DNAStringSetList
+#'
 cluster_longest_reading_frames <- function(
  clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered
  reading_frame_tbl = tbl) { # A variable that holds a table of reading frames

--- a/R/cluster_tbl_named.R
+++ b/R/cluster_tbl_named.R
@@ -11,8 +11,7 @@
 #' @return A tibble that contains the cluster number, sequence name, cluster
 #'   name, sequence number within the cluster, and cluster size.
 #'
-#' @import tidyverse
-#' @import Biostrings
+#' @import dplyr purrr tibble tidyr
 #' @export
 cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){
  # First: get names of each cluster

--- a/R/combine_cluster_plots_and_save.R
+++ b/R/combine_cluster_plots_and_save.R
@@ -11,7 +11,7 @@
 #' @param w The width of the plot. Default is 'cm_width'.
 #' @param h The height of the plot. Default is 'cm_height'.
 #' @return combined plot
-#' @import tidyverse
+#' @import ggplot2
 #' @export
 combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path,
                                           w = cm_width, h = cm_height) {

--- a/R/count_clusters.R
+++ b/R/count_clusters.R
@@ -18,7 +18,7 @@
 #' )
 #' count_clusters(clus_tbl_list)
 #'
-#' @import tidyverse
+#' @import dplyr purrr tidyr
 #' @export
 count_clusters <- function(clus_tbl_list){


--- a/R/define_plateau.R
+++ b/R/define_plateau.R
@@ -12,7 +12,7 @@
 #' @examples
 #' define_plateau(cluster_counts = cluster_counts_df)
 #'
-#' @import tidyverse
+#' @import dplyr
 #' @export
 define_plateau <- function(cluster_counts){
  # "cluster_counts" is a tibble of cluster counts passed as a parameter to the function

--- a/R/dendrogram_hclust.R
+++ b/R/dendrogram_hclust.R
@@ -8,8 +8,8 @@
 #'
 #' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram.
 #'
-#' @import ggdendro
-#' @import tidyverse
+#' @importFrom ggdendro dendro_data
+#' @importFrom magrittr %>%
 #'
 #' @examples
 #' # Generate dendrogram with default parameters
@@ -21,7 +21,6 @@
 #'
 #' @export
 dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) {
-  require(ggdendro)
  set.seed(seed)

  # make hierarchical cluster from vegdist matrix and extract data for plotting

--- a/R/export_longest_reading_frame.R
+++ b/R/export_longest_reading_frame.R
@@ -12,8 +12,8 @@
 #' @examples
 #' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE)
 #'
-#' @import Biostrings
-#' @import tidyverse
+#' @importFrom Biostrings AAStringSet writeXStringSet
+#' @import dplyr
 #'
 #' @export
 export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table

--- a/R/find_contiguous_multi_repeats.R
+++ b/R/find_contiguous_multi_repeats.R
@@ -23,8 +23,8 @@
 #'
 #' # Expected output: c(2, 1)
 #'
-#' @import tidyverse
-#' @import Biostrings
+#' @import stringr
+#' @importFrom Biostrings DNAStringSet
 #'
 find_contiguous_multi_repeats <- function(sequences = DNAStringSet,
                                          repeat_sequence = 'string',

--- a/R/find_longest_hrf.R
+++ b/R/find_longest_hrf.R
@@ -10,7 +10,7 @@
 #' @examples
 #' find_longest_hrf(seqs)
 #'
-#' @import tidyverse
+#' @import dplyr
 #'
 #' @export
 find_longest_hrf <- function(seqs = DNAStringSet){

--- a/R/find_longest_orf.R
+++ b/R/find_longest_orf.R
@@ -6,10 +6,10 @@
 #' @param seqs A DNAStringSet object containing the DNA sequences to search for ORFs.
 #'
 #' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence.
-#'
-#' @import Biostrings
-#' @import GenomicRanges
-#' @import tidyverse
+#' @importFrom Biostrings DNAStringSet
+#' @importFrom GenomicRanges GRanges
+#' @importFrom IRanges ranges
+#' @import dplyr tibble tidyr
 #'
 #' @examples
 #' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC"))

--- a/R/find_longest_reading_frames.R
+++ b/R/find_longest_reading_frames.R
@@ -10,7 +10,7 @@
 #' @return A data frame containing the longest reading frames for each sequence.
 #' The data frame includes the sequence names, reading frame, and the width of the reading frame.
 #'
-#' @import tidyverse
+#' @import dplyr tidyr
 #'
 #' @export
 find_longest_reading_frames <- function(seqs = myDNAStringSet){

--- a/R/find_repeat_positions.R
+++ b/R/find_repeat_positions.R
@@ -6,7 +6,8 @@
 #' @param repeat_sequence A string specifying the repeat sequence to search for.
 #'
 #' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity.
-#' @import tidyverse
+#' @import dplyr stringr tibble tidyr
+#' @importFrom Biostrings DNAStringSet
 #'
 #' @examples
 #' sequences <- DNAStringSet(c("AGTCAGT",

--- a/R/kmer_based_distance_matrix.R
+++ b/R/kmer_based_distance_matrix.R
 #' Convert to bin format and get distance matrix using kmers
 #'
-#' This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from XXXX and returns it as a matrix.
+#' This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from the "kmer" package and returns it as a matrix.
 #'
 #' @param seqs DNAStringSet containing the DNA sequences
 #'
 #' @return A distance matrix in bin format.
 #'
-#' @import ape
-#' @import Biostrings
+#' @importFrom ape as.DNAbin
+#' @importFrom kmer kdistance
 #'
 #' @export
 kmer_based_distance_matrix <- function (seqs) {
  seqbins <- ape::as.DNAbin(seqs)
-  as.matrix(kdistance(seqbins))
+  as.matrix(kmer::kdistance(seqbins))
 }
--- a/R/meshclustR.R
+++ b/R/meshclustR.R
@@ -2,7 +2,7 @@
 #'
 #' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
 #' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
-#' Meshclust has to be installed and executlable via system2() to run this function.
+#' Meshclust has to be installed and executlable via system2() to run this function. This is hard to achieve on Windows and intended for use on Linux.
 #'
 #' James, Benjamin T. et al. (2018),
 #' MeShClust: an intelligent tool for clustering DNA sequences.
@@ -17,9 +17,8 @@
 #'
 #' @examples
 #' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path)
-#' @import Biostrings
-#' @import tidyverse
-#' @import dplyr
+#' @importFrom Biostrings writeXStringSet
+#' @import dplyr readr stringr
 #'
 #' @export
 meshclustR <- function(seqs = MyDNAStringSet,
@@ -39,7 +38,7 @@ meshclustR <- function(seqs = MyDNAStringSet,
                                  '-o', out_file))

  #read output file and parse
-  stable_cluster <- read_delim(out_file, delim = '\t',
+  stable_cluster <- readr::read_delim(out_file, delim = '\t',
                               col_names = c('cluster', 'seqnames',
                                             'identity_with_center', 'cluster_class'),
                               col_types = 'fcdc') %>%