#' Iterate over threshold values
#'
#' This function iterates over a range of threshold values and performs clustering
#' on a DNA string dataset.
#'
#' @param MyDNAstring A DNAStringSet object containing the DNA sequences to be clustered.
#' @param step_size The step size for the threshold values. Default is 0.01.
#' @param step_max The maximum threshold value. Default is 0.99.
#' @param ncores The number of cores to use for parallel processing. Default is 1.
#' @param method Method used for clustering, can be "Clusterize" (default) or a
#' file path to the location of meshclust on the users machine, e.g. obtained by
#' running `which meshclust`, see also \code{\link{meshclustR}}.
#' @param meshclust_temp_dir temporary directory for meshclust files.
#' Temporary files will be removed after running the function.
#' @param ... pass on any additional strings to the meshclust commandline tool.
#'
#' @return A list of clustering results, where each element in the list corresponds to a specific threshold value.
#'
#' @import DECIPHER
#' @import dplyr tibble tidyr
#' @importFrom Biostrings DNAStringSet
#'
#' @examples
#' # Create a DNAStringSet object
#' my_sequences <- DNAStringSet(c("ACGT", "AGCT", "ATGC"))
#'
#' # Run the clustering function
#' results <- test_clustering_thresholds(my_sequences, step_size = 0.1, step_max = 0.9, ncores = 2)
#' @export
test_clustering_thresholds <- function(MyDNAstring, step_size,
                                       step_max = 0.99, ncores = 1,
                                       method = "Clusterize",
                                       meshclust_temp_dir = "tmp", ...) {

  if(method == "Clusterize"){
    #DECIPHER removed the 'type' argument from IdClusters around v2.24. Currently (Feb23), the function is renamed "Clusterize"
    if(numeric_version(packageVersion("DECIPHER")) < 2.24){
      clus_tbl <- IdClusters(method = "inexact", myXStringSet = MyDNAstring,
                             cutoff = seq(0, step_max, step_size),
                             processors = ncores,
                             verbose = FALSE)
    }else{
      clus_tbl <- Clusterize(MyDNAstring,
                             cutoff = seq(0, step_max, step_size),
                             processors = ncores,
                             verbose = FALSE)
    }

    clus_tbl_list <- as_tibble(clus_tbl, rownames = "seqnames") %>%
      pivot_longer(cols = -seqnames, names_to = "cutoff", values_to = "cluster") %>%
      group_by(cutoff) %>%
      group_split(.keep = FALSE)

  }else if(file.exists(method)){
    preexisting <- dir.exists(meshclust_temp_dir)
    dir.create(meshclust_temp_dir)
    clus_tbl_list <- lapply(seq(0, step_max, step_size),
                             meshclustR, seqs = MyDNAstring,
                             filepath = meshclust_temp_dir,
                             meshclust_bin = method, ...)
    if(!preexisting){
      unlink(meshclust_temp_dir)
    }
  }else{
    stop("'method' argument needs to be 'Clusterize' or the absolute file path to the meshclust bin ('which meshclust' in shell).")
  }


  steps_temp <- seq(0, step_max, step_size)
  names(clus_tbl_list) <- steps_temp
  return(clus_tbl_list)
}