Skip to content
Snippets Groups Projects
Commit e1761832 authored by Simeon's avatar Simeon
Browse files

clustering threshold test for meshclust

parent 2a6eb09c
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,11 @@
#' @param step_size The step size for the threshold values. Default is 0.01.
#' @param step_max The maximum threshold value. Default is 0.99.
#' @param ncores The number of cores to use for parallel processing. Default is 1.
#' @param method Method used for clustering, can be "Clusterize" (default) or a
#' file path to the location of meshclust on the users machine, e.g. obtained by
#' running `which meshclust`, see also \code{\link{meshclustR}}.
#' @param meshclust_temp_dir temporary directory for meshclust files.
#' Temporary files will be removed after running the function.
#'
#' @return A list of clustering results, where each element in the list corresponds to a specific threshold value.
#'
......@@ -22,28 +27,45 @@
#' results <- test_clustering_thresholds(my_sequences, step_size = 0.1, step_max = 0.9, ncores = 2)
#' @export
test_clustering_thresholds <- function(MyDNAstring, step_size,
step_max = 0.99, ncores = 1) {
step_max = 0.99, ncores = 1,
method = "Clusterize",
meshclust_temp_dir = "tmp") {
#DECIPHER removed the 'type' argument from IdClusters around v2.24. Currently (Feb23), the function is renamed "Clusterize"
if(numeric_version(packageVersion("DECIPHER")) < 2.24){
clus_tbl <- IdClusters(method = "inexact", myXStringSet = MyDNAstring,
cutoff = seq(0, step_max, step_size),
processors = ncores,
verbose = FALSE)
if(method == "Clusterize"){
#DECIPHER removed the 'type' argument from IdClusters around v2.24. Currently (Feb23), the function is renamed "Clusterize"
if(numeric_version(packageVersion("DECIPHER")) < 2.24){
clus_tbl <- IdClusters(method = "inexact", myXStringSet = MyDNAstring,
cutoff = seq(0, step_max, step_size),
processors = ncores,
verbose = FALSE)
}else{
clus_tbl <- Clusterize(MyDNAstring,
cutoff = seq(0, step_max, step_size),
processors = ncores,
verbose = FALSE)
}
clus_tbl_list <- as_tibble(clus_tbl, rownames = "seqnames") %>%
pivot_longer(cols = -seqnames, names_to = "cutoff", values_to = "cluster") %>%
group_by(cutoff) %>%
group_split(.keep = FALSE)
}else if(dir.exists(method)){
preexisting <- dir.exists(meshclust_temp_dir)
dir.create(meshclust_temp_dir)
clust_tbl_list <- lapply(seq(0, step_max, step_size),
meshclustR, seqs = MyDNAstring,
filepath = meshclust_temp_dir,
meshclust_bin = method)
if(!preexisting){
unlink(meshclust_temp_dir)
}
}else{
clus_tbl <- Clusterize(MyDNAstring,
cutoff = seq(0, step_max, step_size),
processors = ncores,
verbose = FALSE)
stop("'method' argument needs to be 'Clusterize' or the file path to the meshclust bin ('which meshclust' in shell).")
}
steps_temp <- seq(0, step_max, step_size)
clus_tbl_list <- as_tibble(clus_tbl, rownames = "seqnames") %>%
pivot_longer(cols = -seqnames, names_to = "cutoff", values_to = "cluster") %>%
group_by(cutoff) %>%
group_split(.keep = FALSE)
steps_temp <- seq(0, step_max, step_size)
names(clus_tbl_list) <- steps_temp
return(clus_tbl_list)
}
......@@ -4,7 +4,14 @@
\alias{test_clustering_thresholds}
\title{Iterate over threshold values}
\usage{
test_clustering_thresholds(MyDNAstring, step_size, step_max = 0.99, ncores = 1)
test_clustering_thresholds(
MyDNAstring,
step_size,
step_max = 0.99,
ncores = 1,
method = "Clusterize",
meshclust_temp_dir = "tmp"
)
}
\arguments{
\item{MyDNAstring}{A DNAStringSet object containing the DNA sequences to be clustered.}
......@@ -14,6 +21,13 @@ test_clustering_thresholds(MyDNAstring, step_size, step_max = 0.99, ncores = 1)
\item{step_max}{The maximum threshold value. Default is 0.99.}
\item{ncores}{The number of cores to use for parallel processing. Default is 1.}
\item{method}{Method used for clustering, can be "Clusterize" (default) or a
file path to the location of meshclust on the users machine, e.g. obtained by
running `which meshclust`, see also \code{\link{meshclustR}}.}
\item{meshclust_temp_dir}{temporary directory for meshclust files.
Temporary files will be removed after running the function.}
}
\value{
A list of clustering results, where each element in the list corresponds to a specific threshold value.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment