Skip to content
Snippets Groups Projects
Commit fdf13598 authored by Simeon's avatar Simeon
Browse files

Clean up, filled DESCRIPTION

parent 5f5c378c
Branches
No related tags found
No related merge requests found
Showing
with 77 additions and 83 deletions
Package: cAmpSeqR
Type: Package
Title: What the Package Does (Title Case)
Title: Custom Processing and Visualization for Multi-Amplicon Sequencing Projects
Version: 0.1.0
Author: Who wrote it
Maintainer: The package maintainer <yourself@somewhere.net>
Description: More about what it does (maybe more than one line)
Use four spaces when indenting paragraphs within the Description.
License: What license is it under?
Authors@R: person(
"Simeon", "Lim Rossmann",
email = "simeon.rossmann@nmbu.no",
role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0435-8221")
)
Description: This package provides a range of functions to process
and visualize next-generation sequencing data from multi-amplicon
sequencing projects. It may work with a range of input variants
consisting of a sequence list and count table for these sequences but
was initially designed for data generated by the DADA2 package.
The functions range from very simple utilities and parsers to complex
plotting functions and are packaged as they are here for the
convenience of the author and collaborators.
A commented pipeline incorporating most of these functions in the
intended sequence can be obtained from the author upon request.
Imports:
Biostrings,
DECIPHER,
ggdendro,
ggtree,
tidyverse,
vegan,
viridis,
ape,
cowplot,
phangorn
License: use_gpl_license(version = 3, include_future = TRUE)
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
......@@ -3,6 +3,7 @@
export(align_and_generate_upgma)
export(alignment_based_distance_matrix)
export(calc_asv_nmds)
export(clean_seqtab)
export(cluster_longest_reading_frames)
export(cluster_tbl_named)
export(combine_cluster_plots_and_save)
......@@ -18,6 +19,7 @@ export(find_repeat_positions)
export(kmer_based_distance_matrix)
export(meshclustR)
export(pivot_cluster_tbl_wider)
export(plot_abundance_per_sample)
export(plot_abundance_sums_per_sequence)
export(plot_asv_nmds)
export(plot_cluster_dendrogram)
......@@ -45,24 +47,11 @@ export(veganify_asvcounts)
export(veganify_generic_wide_tbl)
import(Biostrings)
import(DECIPHER)
import(dplyr)
import(GenomicRanges)
import(ape)
import(cowplot)
import(ggdendro)
import(ggplot2)
import(ggtree)
import(magrittr)
import(purrr)
import(readr)
import(scales)
import(stats)
import(stringr)
import(tibble)
import(tidyr)
import(phangorn)
import(tidyverse)
import(utils)
import(vegan)
import(viridis)
importFrom(GenomicRanges,GRanges)
import(ape)
import(cowplot)
import(phangorn)
......@@ -5,14 +5,10 @@
#' @param cluster The name of the cluster to generate the UPGMA tree from
#' @param sequence_list A named list where each element is a \code{DNAStringSet} object containing DNA sequences
#' @return A UPGMA tree object
#' @import utils
#' @importFrom DECIPHER AlignSeqs
#' @importFrom phangorn phyDat
#' @importFrom phangorn dist.ml
#' @importFrom phangorn upgma
#' @import DECIPHER
#' @import phangorn
#' @import tidyverse
#' @export
# Define function to align a cluster of sequences and generate a UPGMA tree
align_and_generate_upgma <- function(cluster, sequence_list) {
# Use AlignSeqs function to align the sequences in the given cluster
......
......@@ -9,6 +9,7 @@
#' @param ... Additional arguments passed to the `metaMDS` function from the vegan package
#'
#' @return A list object with results including NMDS results and NMDS tibble
#' @import tidyverse
#' @export
#'
#' @examples
......
#' Clean Sequence Table
#'
#' Cleans a typical sequence table as output by our internal DADA2 pipeline by
#' transposing it, converting it to a tibble format, and adding a column of sequence names.
#'
#' @param file The path to the RDS file containing the sequence table.
#' @param ASV_sequences A character vector specifying the ASV sequences.
#' @param output A logical value indicating whether to output a CSV file.
#' @return A tibble containing the cleaned sequence table.
#'
#' @import tidyverse
#'
#' @examples
#' clean_seqtab()
#' clean_seqtab(file = 'seqtab.rds', output = FALSE)
#' clean_seqtab(ASV_sequences = asvstrings)
#' @export
clean_seqtab <- function(file = 'seqtab_nochim.rds',
ASV_sequences = asvstrings, # Specify a default value for 'ASV_sequences' if none given
output = TRUE){ # Specify a default value for 'output' if none given
......
......@@ -12,11 +12,8 @@
#' reading_frame_tbl <- data.frame(seqnames=c("seq1","seq2"), strand=c("+","-"), start=c(1,3), end=c(6,11), width=c(6,9))
#' cluster_longest_reading_frames(clustered_sequences=clustered_sequences, reading_frame_tbl=reading_frame_tbl)
#'
#' @import dplyr
#' @import Biostrings
#'
# Define a function that clusters DNA sequences and determines their longest reading frame
#' @import tidyverse
cluster_longest_reading_frames <- function(
clustered_sequences = DNAStringSetList, # A variable that holds a list of DNA sequences that have been clustered
reading_frame_tbl = tbl) { # A variable that holds a table of reading frames
......
......@@ -12,9 +12,8 @@
#' name, sequence number within the cluster, and cluster size.
#'
#' @import tidyverse
#' @import Biostrings DNAStringSet
#' @import Biostrings
#' @export
#'
cluster_tbl_named <- function(clustered_sequences = myDNAStringSetList){
# First: get names of each cluster
cluster_names <- tibble(clus_name = names(clustered_sequences),
......
......@@ -11,10 +11,8 @@
#' @param w The width of the plot. Default is 'cm_width'.
#' @param h The height of the plot. Default is 'cm_height'.
#' @return combined plot
#' @import tidyverse
#' @export
# plot list of three plots in three columns and save to "path" with filename
# "Cluster_overview_'cluster'.pdf"
combine_cluster_plots_and_save <- function(plot_list, cluster, out_path = path,
w = cm_width, h = cm_height) {
dir.create(out_path, showWarnings = FALSE)
......
......@@ -18,11 +18,8 @@
#' )
#' count_clusters(clus_tbl_list)
#'
#' @import dplyr
#' @import purrr
#' @import tidyr
#' @import tidyverse
#' @export
count_clusters <- function(clus_tbl_list){
# Remove the non-numeric column "seqnames" from each cluster table in the list.
......
......@@ -12,9 +12,8 @@
#' @examples
#' define_plateau(cluster_counts = cluster_counts_df)
#'
#' @import dplyr
#' @import tidyverse
#' @export
# The following code defines a function called "define_plateau"
define_plateau <- function(cluster_counts){
# "cluster_counts" is a tibble of cluster counts passed as a parameter to the function
# "clus_plateau" filters the cluster counts by selecting only those with cluster_number greater than or equal to 2
......
......@@ -9,8 +9,7 @@
#' @return A `ggdendro::dendro_data` object, containing data for plotting the dendrogram.
#'
#' @import ggdendro
#' @import vegan
#' @import stats
#' @import tidyverse
#'
#' @examples
#' # Generate dendrogram with default parameters
......@@ -21,7 +20,6 @@
#' dendrogram_hclust(daisy_dist)
#'
#' @export
dendrogram_hclust <- function(data = veganized_tibble, seed = 1, ...) {
require(ggdendro)
set.seed(seed)
......
......@@ -13,12 +13,9 @@
#' export_longest_reading_frame(clustered_reading_frames_tbl, myDNAStringSet, myDirPath, TRUE)
#'
#' @import Biostrings
#' @import dplyr
#' @import tidyr
#' @import utils
#' @import tidyverse
#'
#' @export
# Define a function that exports the longest reading frames
export_longest_reading_frame <- function(clustered_reading_frames_tbl = tbl, # function argument for clustered_reading_frame table
seqs = myDNAStringSet, # function argument for DNA sequence set
outpath = path, # function argument for output file path
......
......@@ -23,11 +23,9 @@
#'
#' # Expected output: c(2, 1)
#'
#' @import stringr
#' @import tidyverse
#' @import Biostrings
#'
#' @keywords sequence, repeats
#'
find_contiguous_multi_repeats <- function(sequences = DNAStringSet,
repeat_sequence = 'string',
singlet_count = 100) {
......
......@@ -10,7 +10,7 @@
#' @examples
#' find_longest_hrf(seqs)
#'
#' @import dplyr
#' @import tidyverse
#'
#' @export
find_longest_hrf <- function(seqs = DNAStringSet){
......
......@@ -8,18 +8,14 @@
#' @return A tibble containing the start and end positions, strand, and length of the longest ORF in each sequence.
#'
#' @import Biostrings
#' @importFrom GenomicRanges GRanges
#' @import tibble
#' @import dplyr
#' @export
#' @import GenomicRanges
#' @import tidyverse
#'
#' @examples
#' seqs <- DNAStringSet(c("ATGAGTTCGAAATGGCGTTGAA", "GGGGGCTCGAGCTAGC"))
#' find_longest_orf(seqs)
#'
#' @seealso \code{\link{findORFs}}
#'
#' @export
find_longest_orf <- function(seqs = DNAStringSet) {
# Find ORFs in the sequences, return longest ORF, and convert to a vector
orfs <- findORFs(seqs, longestORF = TRUE, startCodon = startDefinition(6)) %>%
......
......@@ -10,10 +10,9 @@
#' @return A data frame containing the longest reading frames for each sequence.
#' The data frame includes the sequence names, reading frame, and the width of the reading frame.
#'
#' @import dplyr, tidyr
#' @import tidyverse
#'
#' @export
## Reading frame finder (longest orf or hrf)
find_longest_reading_frames <- function(seqs = myDNAStringSet){
orfs <- find_longest_orf(seqs)
hrfs <- find_longest_hrf(seqs)
......
......@@ -6,18 +6,14 @@
#' @param repeat_sequence A string specifying the repeat sequence to search for.
#'
#' @return A data frame with columns: seqname, start, end, fragment, and plot_intensity.
#'
#' @import stringr
#' @import dplyr
#' @import tibble
#'
#' @export
#' @import tidyverse
#'
#' @examples
#' sequences <- DNAStringSet(c("AGTCAGT",
#' "ACGTAGT",
#' "AGTCGAT"))
#' find_repeat_positions(sequences, "AGT")
#' @export
find_repeat_positions <- function(sequences = DNAStringSet, repeat_sequence = 'string'){
repeat_positions <- str_locate_all(as.character(sequences), repeat_sequence)
......
......@@ -6,12 +6,10 @@
#'
#' @return A distance matrix in bin format.
#'
#' @importFrom ape as.DNAbin
#'
#' @importFrom Biostrings DNAStringSet
#' @import ape
#' @import Biostrings
#'
#' @export
#'
kmer_based_distance_matrix <- function (seqs) {
seqbins <- ape::as.DNAbin(seqs)
as.matrix(kdistance(seqbins))
......
......@@ -2,6 +2,7 @@
#'
#' This function writes a temporary file to perform a clustering analysis on a set of DNA sequences.
#' The clustering is done using the \href{https://github.com/BioinformaticsToolsmith/MeShClust}{Meshclust commandline} tool.
#' Meshclust has to be installed and executlable via system2() to run this function.
#'
#' James, Benjamin T. et al. (2018),
#' MeShClust: an intelligent tool for clustering DNA sequences.
......@@ -17,12 +18,8 @@
#' @examples
#' meshclustR(seqs = MyDNAStringSet, meshclust_bin = meshclust, filepath = path)
#' @import Biostrings
#' @import readr
#' @import magrittr
#' @import stringr
#' @importFrom tools file_path_sans_ext
#' @import tidyverse
#' @export
meshclustR <- function(seqs = MyDNAStringSet,
meshclust_bin = meshclust,
filepath = path){
......
......@@ -4,10 +4,9 @@
#'
#' @return A wide table of clusters with all sequences in each cluster listed
#'
#' @import dplyr
#' @import tidyr
#' @export
#' @import tidyverse
#'
#' @export
pivot_cluster_tbl_wider <- function(cluster_tbl) {
# First: get names of each cluster
cluster_names <- cluster_tbl %>%
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment