Better dependency handling

de14a9b4 · Simeon · a71f6342 · de14a9b4 · de14a9b4 · de14a9b4
Commit de14a9b4 authored 1 year ago by Simeon
--- a/R/subset_by_clusters.R
+++ b/R/subset_by_clusters.R
@@ -6,7 +6,8 @@
 #' @param cluster_tbl A data frame or tibble containing the cluster assignments. It should have two columns, 'cluster' and 'seqnames', where 'cluster' contains the cluster numbers and 'seqnames' contains the corresponding sequence names.
 #' @param save_to_file Logical value indicating whether to save the resulting sequences to separate files for each cluster.
 #' @return A list of sequence objects, where each list element corresponds to a cluster and contains the sequences in that cluster
-#' @import tidyverse
+#' @import dplyr purrr
+#' @importFrom Biostrings writeXStringSet
 #' @export
 subset_by_clusters <- function(seqs, cluster_tbl, save_to_file = TRUE){
  cluster_seqs <- cluster_tbl %>%

--- a/R/subset_variant_table.R
+++ b/R/subset_variant_table.R
@@ -9,7 +9,7 @@
 #'
 #' @return A tibble containing the subsetted variant table.
 #'
-#' @import tidyverse
+#' @import dplyr
 #'
 #' @examples
 #' subset_variant_table(mytibble, c("cluster1", "cluster2"), c("variant1", "variant2"), c("sample1", "sample2"))

--- a/R/test_clustering_thresholds.R
+++ b/R/test_clustering_thresholds.R
@@ -11,7 +11,8 @@
 #' @return A list of clustering results, where each element in the list corresponds to a specific threshold value.
 #'
 #' @import DECIPHER
-#' @import tidyverse
+#' @import dplyr tibble tidyr
+#' @importFrom Biostrings DNAStringSet
 #'
 #' @examples
 #' # Create a DNAStringSet object

--- a/R/translate_and_count_stops.R
+++ b/R/translate_and_count_stops.R
@@ -17,8 +17,8 @@
 #' seqs <- DNAStringSet("ATGTCGATAGCCTAGGTCAGTAA")
 #' translate_and_count_stops(seqs)
 #'
-#' @import Biostrings
-#' @import tidyverse
+#' @import dplyr stringr tidyr
+#' @importFrom Biostrings translate
 #' @export
 translate_and_count_stops <- function(seqs = DNAStringSet) {
  # Make reading frames and translate to protein

--- a/R/variant_classifier.R
+++ b/R/variant_classifier.R
@@ -7,7 +7,7 @@
 #' @param reference_informed Logical value indicating whether the classification should be reference informed (default: FALSE)
 #'
 #' @return A modified master table with variant classifications
-#' @import tidyverse
+#' @import dplyr purrr stringr tibble tidyr
 #' @export

 variant_classifier <- function(
@@ -22,20 +22,20 @@ variant_classifier <- function(
    mutate(sample_count_sum = sum(asv_counts)) %>%
    ungroup() %>%
    mutate(rel_abundance = asv_counts/sample_count_sum) %>%
-    filter(sample_count_sum > 0)
+    dplyr::filter(sample_count_sum > 0)

  # Create a table for clustered sequences and filter out clusters with only one sequence
  clustab_tbl <- cluster_tbl_named(clustered_sequences) %>%
    left_join(tibble(seqnames = unlist(map(clustered_seqs, names)),
                     seqs = as.character(unlist(clustered_sequences))),
              by = 'seqnames') %>%
-    filter(clus_size > 1)
+    dplyr::filter(clus_size > 1)

  # If reference informed, add reference info and name variants
  if(reference_informed){
    # Join the clustered sequences table with the long seqtab table by sequence name
    master_tbl <- left_join(clustab_tbl, seqtab_tbl_long, by = 'seqs') %>%
-      filter(seqnames != clus_name)
+      dplyr::filter(seqnames != clus_name)

    # Calculate the similarity of each cluster to the reference sequence
    sim_to_ref_tbl <- map(clustered_seqs, similiarity_to_reference,
@@ -83,7 +83,7 @@ variant_classifier <- function(

  # Call variant types using top 2 most abundant sequences per sample and cluster based on relative abundance
  variant_types <- master_tbl %>%
-    filter(rel_abundance > 0) %>%
+    dplyr::filter(rel_abundance > 0) %>%
    group_by(sample, clus_name) %>%
    top_n(2, rel_abundance) %>%
    ungroup() %>%
@@ -114,7 +114,7 @@ variant_classifier <- function(
    mutate(rel_var_abundance = rel_abundance/cluster_sum_rel_abu) %>%
    top_n(2, rel_abundance) %>%
    ungroup() %>%
-    filter(rel_abundance > 0)
+    dplyr::filter(rel_abundance > 0)

  aa_info <- find_longest_reading_frames(unlist(unname(clustered_sequences))) %>%
    mutate(longest_aa_seq = max_width/3) %>%

--- a/R/veganify_asvcounts.R
+++ b/R/veganify_asvcounts.R
@@ -5,7 +5,7 @@
 #' @param cleaned_seqtab A cleaned sequence table.
 #'
 #' @return A vegan formatted count matrix.
-#' @import tidyverse
+#' @import dplyr
 #' @export
 veganify_asvcounts <- function(cleaned_seqtab = my_cleaned_seqtab){
  out <- cleaned_seqtab %>%

--- a/R/veganify_generic_wide_tbl.R
+++ b/R/veganify_generic_wide_tbl.R
@@ -5,7 +5,7 @@
 #' @param data A wide tibble with rownames in the first column and input data for vegdist in all other columns.
 #'
 #' @return A data.frame that is compatible with the vegdist function.
-#' @import tidyverse
+#' @import dplyr tibble tidyr
 #'
 #' @examples
 #' library(tibble)

--- a/man/alignment_based_distance_matrix.Rd
+++ b/man/alignment_based_distance_matrix.Rd
@@ -18,8 +18,8 @@ A matrix object containing the distance scores calculated based on the alignment
 This function uses the DECIPHER package to align a set of DNA sequences and generate a distance matrix based on the alignments. The alignments can be performed using multiple processors for faster execution. This function requires the DECIPHER package to be installed.
 }
 \examples{
-data(smallexample)
-dna_sequences <- DNAStringSet(smallexample)
+dna_sequences <- DNAStringSet("AGACCACTCC", "GCATGTAGCT",
+"GTGGTACGGC", "TCAAACGGCT")
 alignment_based_distance_matrix(dna_sequences, ncores = 2)

 }
--- a/man/kmer_based_distance_matrix.Rd
+++ b/man/kmer_based_distance_matrix.Rd
@@ -13,5 +13,5 @@ kmer_based_distance_matrix(seqs)
 A distance matrix in bin format.
 }
 \description{
-This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from XXXX and returns it as a matrix.
+This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from the "kmer" package and returns it as a matrix.
 }
--- a/man/meshclustR.Rd
+++ b/man/meshclustR.Rd
@@ -21,7 +21,7 @@ A data frame with information regarding the clustering analysis.
 \description{
 This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
 The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
-Meshclust has to be installed and executlable via system2() to run this function.
+Meshclust has to be installed and executlable via system2() to run this function. This is hard to achieve on Windows and intended for use on Linux.
 }
 \details{
 James, Benjamin T. et al. (2018),