Skip to content
Snippets Groups Projects
Commit de14a9b4 authored by Simeon's avatar Simeon
Browse files

Better dependency handling

parent a71f6342
No related branches found
No related tags found
No related merge requests found
......@@ -6,7 +6,8 @@
#' @param cluster_tbl A data frame or tibble containing the cluster assignments. It should have two columns, 'cluster' and 'seqnames', where 'cluster' contains the cluster numbers and 'seqnames' contains the corresponding sequence names.
#' @param save_to_file Logical value indicating whether to save the resulting sequences to separate files for each cluster.
#' @return A list of sequence objects, where each list element corresponds to a cluster and contains the sequences in that cluster
#' @import tidyverse
#' @import dplyr purrr
#' @importFrom Biostrings writeXStringSet
#' @export
subset_by_clusters <- function(seqs, cluster_tbl, save_to_file = TRUE){
cluster_seqs <- cluster_tbl %>%
......
......@@ -9,7 +9,7 @@
#'
#' @return A tibble containing the subsetted variant table.
#'
#' @import tidyverse
#' @import dplyr
#'
#' @examples
#' subset_variant_table(mytibble, c("cluster1", "cluster2"), c("variant1", "variant2"), c("sample1", "sample2"))
......
......@@ -11,7 +11,8 @@
#' @return A list of clustering results, where each element in the list corresponds to a specific threshold value.
#'
#' @import DECIPHER
#' @import tidyverse
#' @import dplyr tibble tidyr
#' @importFrom Biostrings DNAStringSet
#'
#' @examples
#' # Create a DNAStringSet object
......
......@@ -17,8 +17,8 @@
#' seqs <- DNAStringSet("ATGTCGATAGCCTAGGTCAGTAA")
#' translate_and_count_stops(seqs)
#'
#' @import Biostrings
#' @import tidyverse
#' @import dplyr stringr tidyr
#' @importFrom Biostrings translate
#' @export
translate_and_count_stops <- function(seqs = DNAStringSet) {
# Make reading frames and translate to protein
......
......@@ -7,7 +7,7 @@
#' @param reference_informed Logical value indicating whether the classification should be reference informed (default: FALSE)
#'
#' @return A modified master table with variant classifications
#' @import tidyverse
#' @import dplyr purrr stringr tibble tidyr
#' @export
variant_classifier <- function(
......@@ -22,20 +22,20 @@ variant_classifier <- function(
mutate(sample_count_sum = sum(asv_counts)) %>%
ungroup() %>%
mutate(rel_abundance = asv_counts/sample_count_sum) %>%
filter(sample_count_sum > 0)
dplyr::filter(sample_count_sum > 0)
# Create a table for clustered sequences and filter out clusters with only one sequence
clustab_tbl <- cluster_tbl_named(clustered_sequences) %>%
left_join(tibble(seqnames = unlist(map(clustered_seqs, names)),
seqs = as.character(unlist(clustered_sequences))),
by = 'seqnames') %>%
filter(clus_size > 1)
dplyr::filter(clus_size > 1)
# If reference informed, add reference info and name variants
if(reference_informed){
# Join the clustered sequences table with the long seqtab table by sequence name
master_tbl <- left_join(clustab_tbl, seqtab_tbl_long, by = 'seqs') %>%
filter(seqnames != clus_name)
dplyr::filter(seqnames != clus_name)
# Calculate the similarity of each cluster to the reference sequence
sim_to_ref_tbl <- map(clustered_seqs, similiarity_to_reference,
......@@ -83,7 +83,7 @@ variant_classifier <- function(
# Call variant types using top 2 most abundant sequences per sample and cluster based on relative abundance
variant_types <- master_tbl %>%
filter(rel_abundance > 0) %>%
dplyr::filter(rel_abundance > 0) %>%
group_by(sample, clus_name) %>%
top_n(2, rel_abundance) %>%
ungroup() %>%
......@@ -114,7 +114,7 @@ variant_classifier <- function(
mutate(rel_var_abundance = rel_abundance/cluster_sum_rel_abu) %>%
top_n(2, rel_abundance) %>%
ungroup() %>%
filter(rel_abundance > 0)
dplyr::filter(rel_abundance > 0)
aa_info <- find_longest_reading_frames(unlist(unname(clustered_sequences))) %>%
mutate(longest_aa_seq = max_width/3) %>%
......
......@@ -5,7 +5,7 @@
#' @param cleaned_seqtab A cleaned sequence table.
#'
#' @return A vegan formatted count matrix.
#' @import tidyverse
#' @import dplyr
#' @export
veganify_asvcounts <- function(cleaned_seqtab = my_cleaned_seqtab){
out <- cleaned_seqtab %>%
......
......@@ -5,7 +5,7 @@
#' @param data A wide tibble with rownames in the first column and input data for vegdist in all other columns.
#'
#' @return A data.frame that is compatible with the vegdist function.
#' @import tidyverse
#' @import dplyr tibble tidyr
#'
#' @examples
#' library(tibble)
......
......@@ -18,8 +18,8 @@ A matrix object containing the distance scores calculated based on the alignment
This function uses the DECIPHER package to align a set of DNA sequences and generate a distance matrix based on the alignments. The alignments can be performed using multiple processors for faster execution. This function requires the DECIPHER package to be installed.
}
\examples{
data(smallexample)
dna_sequences <- DNAStringSet(smallexample)
dna_sequences <- DNAStringSet("AGACCACTCC", "GCATGTAGCT",
"GTGGTACGGC", "TCAAACGGCT")
alignment_based_distance_matrix(dna_sequences, ncores = 2)
}
......@@ -13,5 +13,5 @@ kmer_based_distance_matrix(seqs)
A distance matrix in bin format.
}
\description{
This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from XXXX and returns it as a matrix.
This function takes in a DNAStringSet and converts it to a bin format using ape::as.DNAbin function. It then calculates the distance matrix using the kdistance function from the "kmer" package and returns it as a matrix.
}
......@@ -21,7 +21,7 @@ A data frame with information regarding the clustering analysis.
\description{
This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
Meshclust has to be installed and executlable via system2() to run this function.
Meshclust has to be installed and executlable via system2() to run this function. This is hard to achieve on Windows and intended for use on Linux.
}
\details{
James, Benjamin T. et al. (2018),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment