Skip to content
Snippets Groups Projects
Commit de14a9b4 authored by Simeon's avatar Simeon
Browse files

Better dependency handling

parent a71f6342
Branches
No related tags found
No related merge requests found
Showing
with 83 additions and 54 deletions
......@@ -24,12 +24,22 @@ Imports:
Biostrings,
cowplot,
DECIPHER,
dplyr,
forcats,
GenomicRanges,
kmer,
ggdendro,
ggplot2,
ggtree,
tidyverse,
magrittr,
phangorn,
purrr,
readr,
stringr,
tibble,
tidyr,
vegan,
viridis,
phangorn
viridisLite
License: use_gpl_license(version = 3, include_future = TRUE)
Encoding: UTF-8
LazyData: true
......
......@@ -45,14 +45,35 @@ export(translate_and_count_stops)
export(variant_classifier)
export(veganify_asvcounts)
export(veganify_generic_wide_tbl)
import(Biostrings)
import(DECIPHER)
import(GenomicRanges)
import(ape)
import(cowplot)
import(dplyr)
import(ggdendro)
import(ggtree)
import(phangorn)
import(tidyverse)
import(viridis)
import(forcats)
import(ggplot2)
import(purrr)
import(readr)
import(stringr)
import(tibble)
import(tidyr)
importFrom(Biostrings,AAStringSet)
importFrom(Biostrings,DNAStringSet)
importFrom(Biostrings,DNAStringSetList)
importFrom(Biostrings,translate)
importFrom(Biostrings,writeXStringSet)
importFrom(DECIPHER,AlignSeqs)
importFrom(DECIPHER,DistanceMatrix)
importFrom(GenomicRanges,GRanges)
importFrom(IRanges,ranges)
importFrom(ape,as.DNAbin)
importFrom(cowplot,get_legend)
importFrom(cowplot,plot_grid)
importFrom(ggdendro,dendro_data)
importFrom(ggdendro,segment)
importFrom(ggtree,geom_tiplab)
importFrom(ggtree,get_taxa_name)
importFrom(ggtree,ggtree)
importFrom(kmer,kdistance)
importFrom(magrittr,"%>%")
importFrom(phangorn,dist.ml)
importFrom(phangorn,phyDat)
importFrom(phangorn,upgma)
importFrom(viridisLite,viridis)
......@@ -5,14 +5,14 @@
#' @param cluster The name of the cluster to generate the UPGMA tree from
#' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences
#' @return A UPGMA tree object
#' @import DECIPHER
#' @import phangorn
#' @import tidyverse
#' @importFrom DECIPHER AlignSeqs
#' @importFrom phangorn dist.ml upgma phyDat
#' @importFrom magrittr %>%
#' @export
align_and_generate_upgma <- function(cluster, sequence_list) {
# Use AlignSeqs function to align the sequences in the given cluster
alig <- AlignSeqs(sequence_list[[cluster]], verbose = FALSE)
alig <- DECIPHER::AlignSeqs(sequence_list[[cluster]], verbose = FALSE)
# Convert the aligned sequences to a matrix of DNA data and calculate the distance matrix using maximum likelihood
# Then use the upgma function to generate the tree
......
......@@ -7,19 +7,18 @@
#'
#' @return A matrix object containing the distance scores calculated based on the alignments of the input sequences.
#'
#' @import DECIPHER
#' @import Biostrings
#' @importFrom DECIPHER AlignSeqs DistanceMatrix
#'
#' @examples
#' data(smallexample)
#' dna_sequences <- DNAStringSet(smallexample)
#' dna_sequences <- DNAStringSet("AGACCACTCC", "GCATGTAGCT",
#' "GTGGTACGGC", "TCAAACGGCT")
#' alignment_based_distance_matrix(dna_sequences, ncores = 2)
#'
#' @export
alignment_based_distance_matrix <- function(seqs = DNAStringSet,
ncores = 1) {
# Align the sequences using the specified number of processors
seqs_alig <- AlignSeqs(seqs, processors = ncores, verbose = FALSE)
seqs_alig <- DECIPHER::AlignSeqs(seqs, processors = ncores, verbose = FALSE)
# Generate the distance matrix from the aligned sequences using the specified number of processors
DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores)
DECIPHER::DistanceMatrix(seqs_alig, verbose = FALSE, processors = ncores)
}
......@@ -9,7 +9,7 @@
#' @param ... Additional arguments passed to the `metaMDS` function from the vegan package
#'
#' @return A list object with results including NMDS results and NMDS tibble
#' @import tidyverse
#' @import dplyr tibble tidyr
#' @export
#'
#' @examples
......
......@@ -8,7 +8,7 @@
#' @param output A logical value indicating whether to output a CSV file.
#' @return A tibble containing the cleaned sequence table.
#'
#' @import tidyverse
#' @import dplyr readr tibble tidyr
#'
#' @examples
#' clean_seqtab()
......
......@@ -12,8 +12,9 @@
#' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9))
#' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl)
#'
#' @import Biostrings
#' @import tidyverse
#' @import dplyr purrr tibble tidyr
#' @importFrom Biostrings DNAStringSetList
#'
cluster_longest_reading_frames <- function(
clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered
reading_frame_tbl = tbl) { # A variable that holds a table of reading frames
......
......@@ -11,8 +11,7 @@
#' @return A tibble that contains the cluster number, sequence name, cluster
#' name, sequence number within the cluster, and cluster size.
#'
#' @import tidyverse
#' @import Biostrings
#' @import dplyr purrr tibble tidyr
#' @export
cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){
# First: get names of each cluster
......
......@@ -11,7 +11,7 @@
#' @param w The width of the plot. Default is 'cm_width'.
#' @param h The height of the plot. Default is 'cm_height'.
#' @return combined plot
#' @import tidyverse
#' @import ggplot2
#' @export
combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path,
w = cm_width, h = cm_height) {
......
......@@ -18,7 +18,7 @@
#' )
#' count_clusters(clus_tbl_list)
#'
#' @import tidyverse
#' @import dplyr purrr tidyr
#' @export
count_clusters <- function(clus_tbl_list){
......
......@@ -12,7 +12,7 @@
#' @examples
#' define_plateau(cluster_counts = cluster_counts_df)
#'
#' @import tidyverse
#' @import dplyr
#' @export
define_plateau <- function(cluster_counts){
# "cluster_counts" is a tibble of cluster counts passed as a parameter to the function
......
......@@ -8,8 +8,8 @@
#'
#' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram.
#'
#' @import ggdendro
#' @import tidyverse
#' @importFrom ggdendro dendro_data
#' @importFrom magrittr %>%
#'
#' @examples
#' # Generate dendrogram with default parameters
......@@ -21,7 +21,6 @@
#'
#' @export
dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) {
require(ggdendro)
set.seed(seed)
# make hierarchical cluster from vegdist matrix and extract data for plotting
......
......@@ -12,8 +12,8 @@
#' @examples
#' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE)
#'
#' @import Biostrings
#' @import tidyverse
#' @importFrom Biostrings AAStringSet writeXStringSet
#' @import dplyr
#'
#' @export
export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table
......
......@@ -23,8 +23,8 @@
#'
#' # Expected output: c(2, 1)
#'
#' @import tidyverse
#' @import Biostrings
#' @import stringr
#' @importFrom Biostrings DNAStringSet
#'
find_contiguous_multi_repeats <- function(sequences = DNAStringSet,
repeat_sequence = 'string',
......
......@@ -10,7 +10,7 @@
#' @examples
#' find_longest_hrf(seqs)
#'
#' @import tidyverse
#' @import dplyr
#'
#' @export
find_longest_hrf <- function(seqs = DNAStringSet){
......
......@@ -6,10 +6,10 @@
#' @param seqs A DNAStringSet object containing the DNA sequences to search for ORFs.
#'
#' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence.
#'
#' @import Biostrings
#' @import GenomicRanges
#' @import tidyverse
#' @importFrom Biostrings DNAStringSet
#' @importFrom GenomicRanges GRanges
#' @importFrom IRanges ranges
#' @import dplyr tibble tidyr
#'
#' @examples
#' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC"))
......
......@@ -10,7 +10,7 @@
#' @return A data frame containing the longest reading frames for each sequence.
#' The data frame includes the sequence names, reading frame, and the width of the reading frame.
#'
#' @import tidyverse
#' @import dplyr tidyr
#'
#' @export
find_longest_reading_frames <- function(seqs = myDNAStringSet){
......
......@@ -6,7 +6,8 @@
#' @param repeat_sequence A string specifying the repeat sequence to search for.
#'
#' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity.
#' @import tidyverse
#' @import dplyr stringr tibble tidyr
#' @importFrom Biostrings DNAStringSet
#'
#' @examples
#' sequences <- DNAStringSet(c("AGTCAGT",
......
#' Convert to bin format and get distance matrix using kmers
#'
#' This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from XXXX and returns it as a matrix.
#' This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from the "kmer" package and returns it as a matrix.
#'
#' @param seqs DNAStringSet containing the DNA sequences
#'
#' @return A distance matrix in bin format.
#'
#' @import ape
#' @import Biostrings
#' @importFrom ape as.DNAbin
#' @importFrom kmer kdistance
#'
#' @export
kmer_based_distance_matrix <- function (seqs) {
seqbins <- ape::as.DNAbin(seqs)
as.matrix(kdistance(seqbins))
as.matrix(kmer::kdistance(seqbins))
}
......@@ -2,7 +2,7 @@
#'
#' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
#' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
#' Meshclust has to be installed and executlable via system2() to run this function.
#' Meshclust has to be installed and executlable via system2() to run this function. This is hard to achieve on Windows and intended for use on Linux.
#'
#' James, Benjamin T. et al. (2018),
#' MeShClust: an intelligent tool for clustering DNA sequences.
......@@ -17,9 +17,8 @@
#'
#' @examples
#' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path)
#' @import Biostrings
#' @import tidyverse
#' @import dplyr
#' @importFrom Biostrings writeXStringSet
#' @import dplyr readr stringr
#'
#' @export
meshclustR <- function(seqs = MyDNAStringSet,
......@@ -39,7 +38,7 @@ meshclustR <- function(seqs = MyDNAStringSet,
'-o', out_file))
#read output file and parse
stable_cluster <- read_delim(out_file, delim = '\t',
stable_cluster <- readr::read_delim(out_file, delim = '\t',
col_names = c('cluster', 'seqnames',
'identity_with_center', 'cluster_class'),
col_types = 'fcdc') %>%
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment