Skip to content
Snippets Groups Projects
Commit de14a9b4 authored by Simeon's avatar Simeon
Browse files

Better dependency handling

parent a71f6342
Branches
No related tags found
No related merge requests found
Showing
with 83 additions and 54 deletions
...@@ -24,12 +24,22 @@ Imports: ...@@ -24,12 +24,22 @@ Imports:
Biostrings, Biostrings,
cowplot, cowplot,
DECIPHER, DECIPHER,
dplyr,
forcats,
GenomicRanges,
kmer,
ggdendro, ggdendro,
ggplot2,
ggtree, ggtree,
tidyverse, magrittr,
phangorn,
purrr,
readr,
stringr,
tibble,
tidyr,
vegan, vegan,
viridis, viridisLite
phangorn
License: use_gpl_license(version = 3, include_future = TRUE) License: use_gpl_license(version = 3, include_future = TRUE)
Encoding: UTF-8 Encoding: UTF-8
LazyData: true LazyData: true
......
...@@ -45,14 +45,35 @@ export(translate_and_count_stops) ...@@ -45,14 +45,35 @@ export(translate_and_count_stops)
export(variant_classifier) export(variant_classifier)
export(veganify_asvcounts) export(veganify_asvcounts)
export(veganify_generic_wide_tbl) export(veganify_generic_wide_tbl)
import(Biostrings)
import(DECIPHER) import(DECIPHER)
import(GenomicRanges)
import(ape)
import(cowplot)
import(dplyr) import(dplyr)
import(ggdendro) import(forcats)
import(ggtree) import(ggplot2)
import(phangorn) import(purrr)
import(tidyverse) import(readr)
import(viridis) import(stringr)
import(tibble)
import(tidyr)
importFrom(Biostrings,AAStringSet)
importFrom(Biostrings,DNAStringSet)
importFrom(Biostrings,DNAStringSetList)
importFrom(Biostrings,translate)
importFrom(Biostrings,writeXStringSet)
importFrom(DECIPHER,AlignSeqs)
importFrom(DECIPHER,DistanceMatrix)
importFrom(GenomicRanges,GRanges)
importFrom(IRanges,ranges)
importFrom(ape,as.DNAbin)
importFrom(cowplot,get_legend)
importFrom(cowplot,plot_grid)
importFrom(ggdendro,dendro_data)
importFrom(ggdendro,segment)
importFrom(ggtree,geom_tiplab)
importFrom(ggtree,get_taxa_name)
importFrom(ggtree,ggtree)
importFrom(kmer,kdistance)
importFrom(magrittr,"%>%")
importFrom(phangorn,dist.ml)
importFrom(phangorn,phyDat)
importFrom(phangorn,upgma)
importFrom(viridisLite,viridis)
...@@ -5,14 +5,14 @@ ...@@ -5,14 +5,14 @@
#' @param cluster The name of the cluster to generate the UPGMA tree from #' @param cluster The name of the cluster to generate the UPGMA tree from
#' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences #' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences
#' @return A UPGMA tree object #' @return A UPGMA tree object
#' @import DECIPHER #' @importFrom DECIPHER AlignSeqs
#' @import phangorn #' @importFrom phangorn dist.ml upgma phyDat
#' @import tidyverse #' @importFrom magrittr %>%
#' @export #' @export
align_and_generate_upgma <- function(cluster, sequence_list) { align_and_generate_upgma <- function(cluster, sequence_list) {
# Use AlignSeqs function to align the sequences in the given cluster # Use AlignSeqs function to align the sequences in the given cluster
alig <- AlignSeqs(sequence_list[[cluster]], verbose = FALSE) alig <- DECIPHER::AlignSeqs(sequence_list[[cluster]], verbose = FALSE)
# Convert the aligned sequences to a matrix of DNA data and calculate the distance matrix using maximum likelihood # Convert the aligned sequences to a matrix of DNA data and calculate the distance matrix using maximum likelihood
# Then use the upgma function to generate the tree # Then use the upgma function to generate the tree
......
...@@ -7,19 +7,18 @@ ...@@ -7,19 +7,18 @@
#' #'
#' @return A matrix object containing the distance scores calculated based on the alignments of the input sequences. #' @return A matrix object containing the distance scores calculated based on the alignments of the input sequences.
#' #'
#' @import DECIPHER #' @importFrom DECIPHER AlignSeqs DistanceMatrix
#' @import Biostrings
#' #'
#' @examples #' @examples
#' data(smallexample) #' dna_sequences <- DNAStringSet("AGACCACTCC", "GCATGTAGCT",
#' dna_sequences <- DNAStringSet(smallexample) #' "GTGGTACGGC", "TCAAACGGCT")
#' alignment_based_distance_matrix(dna_sequences, ncores = 2) #' alignment_based_distance_matrix(dna_sequences, ncores = 2)
#' #'
#' @export #' @export
alignment_based_distance_matrix <- function(seqs = DNAStringSet, alignment_based_distance_matrix <- function(seqs = DNAStringSet,
ncores = 1) { ncores = 1) {
# Align the sequences using the specified number of processors # Align the sequences using the specified number of processors
seqs_alig <- AlignSeqs(seqs, processors = ncores, verbose = FALSE) seqs_alig <- DECIPHER::AlignSeqs(seqs, processors = ncores, verbose = FALSE)
# Generate the distance matrix from the aligned sequences using the specified number of processors # Generate the distance matrix from the aligned sequences using the specified number of processors
DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores) DECIPHER::DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores)
} }
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#' @param ... Additional arguments passed to the `metaMDS` function from the vegan package #' @param ... Additional arguments passed to the `metaMDS` function from the vegan package
#' #'
#' @return A list object with results including NMDS results and NMDS tibble #' @return A list object with results including NMDS results and NMDS tibble
#' @import tidyverse #' @import dplyr tibble tidyr
#' @export #' @export
#' #'
#' @examples #' @examples
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#' @param output A logical value indicating whether to output a CSV file. #' @param output A logical value indicating whether to output a CSV file.
#' @return A tibble containing the cleaned sequence table. #' @return A tibble containing the cleaned sequence table.
#' #'
#' @import tidyverse #' @import dplyr readr tibble tidyr
#' #'
#' @examples #' @examples
#' clean_seqtab() #' clean_seqtab()
......
...@@ -12,8 +12,9 @@ ...@@ -12,8 +12,9 @@
#' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9)) #' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9))
#' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl) #' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl)
#' #'
#' @import Biostrings #' @import dplyr purrr tibble tidyr
#' @import tidyverse #' @importFrom Biostrings DNAStringSetList
#'
cluster_longest_reading_frames <- function( cluster_longest_reading_frames <- function(
clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered
reading_frame_tbl = tbl) { # A variable that holds a table of reading frames reading_frame_tbl = tbl) { # A variable that holds a table of reading frames
......
...@@ -11,8 +11,7 @@ ...@@ -11,8 +11,7 @@
#' @return A tibble that contains the cluster number, sequence name, cluster #' @return A tibble that contains the cluster number, sequence name, cluster
#' name, sequence number within the cluster, and cluster size. #' name, sequence number within the cluster, and cluster size.
#' #'
#' @import tidyverse #' @import dplyr purrr tibble tidyr
#' @import Biostrings
#' @export #' @export
cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){ cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){
# First: get names of each cluster # First: get names of each cluster
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#' @param w The width of the plot. Default is 'cm_width'. #' @param w The width of the plot. Default is 'cm_width'.
#' @param h The height of the plot. Default is 'cm_height'. #' @param h The height of the plot. Default is 'cm_height'.
#' @return combined plot #' @return combined plot
#' @import tidyverse #' @import ggplot2
#' @export #' @export
combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path, combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path,
w = cm_width, h = cm_height) { w = cm_width, h = cm_height) {
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#' ) #' )
#' count_clusters(clus_tbl_list) #' count_clusters(clus_tbl_list)
#' #'
#' @import tidyverse #' @import dplyr purrr tidyr
#' @export #' @export
count_clusters <- function(clus_tbl_list){ count_clusters <- function(clus_tbl_list){
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#' @examples #' @examples
#' define_plateau(cluster_counts = cluster_counts_df) #' define_plateau(cluster_counts = cluster_counts_df)
#' #'
#' @import tidyverse #' @import dplyr
#' @export #' @export
define_plateau <- function(cluster_counts){ define_plateau <- function(cluster_counts){
# "cluster_counts" is a tibble of cluster counts passed as a parameter to the function # "cluster_counts" is a tibble of cluster counts passed as a parameter to the function
......
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
#' #'
#' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram. #' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram.
#' #'
#' @import ggdendro #' @importFrom ggdendro dendro_data
#' @import tidyverse #' @importFrom magrittr %>%
#' #'
#' @examples #' @examples
#' # Generate dendrogram with default parameters #' # Generate dendrogram with default parameters
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#' #'
#' @export #' @export
dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) { dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) {
require(ggdendro)
set.seed(seed) set.seed(seed)
# make hierarchical cluster from vegdist matrix and extract data for plotting # make hierarchical cluster from vegdist matrix and extract data for plotting
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
#' @examples #' @examples
#' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE) #' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE)
#' #'
#' @import Biostrings #' @importFrom Biostrings AAStringSet writeXStringSet
#' @import tidyverse #' @import dplyr
#' #'
#' @export #' @export
export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table
......
...@@ -23,8 +23,8 @@ ...@@ -23,8 +23,8 @@
#' #'
#' # Expected output: c(2, 1) #' # Expected output: c(2, 1)
#' #'
#' @import tidyverse #' @import stringr
#' @import Biostrings #' @importFrom Biostrings DNAStringSet
#' #'
find_contiguous_multi_repeats <- function(sequences = DNAStringSet, find_contiguous_multi_repeats <- function(sequences = DNAStringSet,
repeat_sequence = 'string', repeat_sequence = 'string',
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#' @examples #' @examples
#' find_longest_hrf(seqs) #' find_longest_hrf(seqs)
#' #'
#' @import tidyverse #' @import dplyr
#' #'
#' @export #' @export
find_longest_hrf <- function(seqs = DNAStringSet){ find_longest_hrf <- function(seqs = DNAStringSet){
......
...@@ -6,10 +6,10 @@ ...@@ -6,10 +6,10 @@
#' @param seqs A DNAStringSet object containing the DNA sequences to search for ORFs. #' @param seqs A DNAStringSet object containing the DNA sequences to search for ORFs.
#' #'
#' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence. #' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence.
#' #' @importFrom Biostrings DNAStringSet
#' @import Biostrings #' @importFrom GenomicRanges GRanges
#' @import GenomicRanges #' @importFrom IRanges ranges
#' @import tidyverse #' @import dplyr tibble tidyr
#' #'
#' @examples #' @examples
#' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC")) #' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC"))
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#' @return A data frame containing the longest reading frames for each sequence. #' @return A data frame containing the longest reading frames for each sequence.
#' The data frame includes the sequence names, reading frame, and the width of the reading frame. #' The data frame includes the sequence names, reading frame, and the width of the reading frame.
#' #'
#' @import tidyverse #' @import dplyr tidyr
#' #'
#' @export #' @export
find_longest_reading_frames <- function(seqs = myDNAStringSet){ find_longest_reading_frames <- function(seqs = myDNAStringSet){
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
#' @param repeat_sequence A string specifying the repeat sequence to search for. #' @param repeat_sequence A string specifying the repeat sequence to search for.
#' #'
#' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity. #' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity.
#' @import tidyverse #' @import dplyr stringr tibble tidyr
#' @importFrom Biostrings DNAStringSet
#' #'
#' @examples #' @examples
#' sequences <- DNAStringSet(c("AGTCAGT", #' sequences <- DNAStringSet(c("AGTCAGT",
......
#' Convert to bin format and get distance matrix using kmers #' Convert to bin format and get distance matrix using kmers
#' #'
#' This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from XXXX and returns it as a matrix. #' This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from the "kmer" package and returns it as a matrix.
#' #'
#' @param seqs DNAStringSet containing the DNA sequences #' @param seqs DNAStringSet containing the DNA sequences
#' #'
#' @return A distance matrix in bin format. #' @return A distance matrix in bin format.
#' #'
#' @import ape #' @importFrom ape as.DNAbin
#' @import Biostrings #' @importFrom kmer kdistance
#' #'
#' @export #' @export
kmer_based_distance_matrix <- function (seqs) { kmer_based_distance_matrix <- function (seqs) {
seqbins <- ape::as.DNAbin(seqs) seqbins <- ape::as.DNAbin(seqs)
as.matrix(kdistance(seqbins)) as.matrix(kmer::kdistance(seqbins))
} }
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#' #'
#' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences. #' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
#' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool. #' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
#' Meshclust has to be installed and executlable via system2() to run this function. #' Meshclust has to be installed and executlable via system2() to run this function. This is hard to achieve on Windows and intended for use on Linux.
#' #'
#' James, Benjamin T. et al. (2018), #' James, Benjamin T. et al. (2018),
#' MeShClust: an intelligent tool for clustering DNA sequences. #' MeShClust: an intelligent tool for clustering DNA sequences.
...@@ -17,9 +17,8 @@ ...@@ -17,9 +17,8 @@
#' #'
#' @examples #' @examples
#' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path) #' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path)
#' @import Biostrings #' @importFrom Biostrings writeXStringSet
#' @import tidyverse #' @import dplyr readr stringr
#' @import dplyr
#' #'
#' @export #' @export
meshclustR <- function(seqs = MyDNAStringSet, meshclustR <- function(seqs = MyDNAStringSet,
...@@ -39,7 +38,7 @@ meshclustR <- function(seqs = MyDNAStringSet, ...@@ -39,7 +38,7 @@ meshclustR <- function(seqs = MyDNAStringSet,
'-o', out_file)) '-o', out_file))
#read output file and parse #read output file and parse
stable_cluster <- read_delim(out_file, delim = '\t', stable_cluster <- readr::read_delim(out_file, delim = '\t',
col_names = c('cluster', 'seqnames', col_names = c('cluster', 'seqnames',
'identity_with_center', 'cluster_class'), 'identity_with_center', 'cluster_class'),
col_types = 'fcdc') %>% col_types = 'fcdc') %>%
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment