From 892f6670bda24f8652009071bc3c0f1a8ce0ba3a Mon Sep 17 00:00:00 2001
From: simeross_vm <simeon.rossmann@nibio.no>
Date: Mon, 2 May 2022 13:42:17 +0200
Subject: [PATCH] descriptors and analysis files

---
 Analysis_pipeline_v4-3 - 16S.Rmd         | 1506 +++++++++++++++++++++
 Analysis_pipeline_v4-3 - ITS2.Rmd        | 1509 +++++++++++++++++++++
 Analysis_pipeline_v4-3 - Sven+Trich.Rmd  | 1546 ++++++++++++++++++++++
 Analysis_pipeline_v4-3 - oomy.Rmd        | 1507 +++++++++++++++++++++
 Analysis_pipeline_v4-3 - oomy_Simeon.Rmd | 1508 +++++++++++++++++++++
 descriptors_16S.txt                      |   57 +
 descriptors_ITS2.txt                     |   54 +
 descriptors_S+T.txt                      |   85 ++
 oomy/descriptors.txt                     |   51 +
 9 files changed, 7823 insertions(+)
 create mode 100644 Analysis_pipeline_v4-3 - 16S.Rmd
 create mode 100644 Analysis_pipeline_v4-3 - ITS2.Rmd
 create mode 100644 Analysis_pipeline_v4-3 - Sven+Trich.Rmd
 create mode 100644 Analysis_pipeline_v4-3 - oomy.Rmd
 create mode 100644 Analysis_pipeline_v4-3 - oomy_Simeon.Rmd
 create mode 100644 descriptors_16S.txt
 create mode 100644 descriptors_ITS2.txt
 create mode 100644 descriptors_S+T.txt
 create mode 100644 oomy/descriptors.txt

diff --git a/Analysis_pipeline_v4-3 - 16S.Rmd b/Analysis_pipeline_v4-3 - 16S.Rmd
new file mode 100644
index 0000000..c7e1dcc
--- /dev/null
+++ b/Analysis_pipeline_v4-3 - 16S.Rmd	
@@ -0,0 +1,1506 @@
+---
+title: "Analysis template v4-3"
+author: "Simeon Rossmann"
+date: "10.12.2020"
+output:
+  html_document: default
+urlcolor: blue
+---
+
+## Introduction and initialization
+
+This is an R Markdown file containing code to parse the results of a dada2 analysis into phyloseq for further analysis. It is separated into chunks that may be run independently by pressing the _play_ button. You will need **3 files** in the same location in order to run this pipeline successfully:
+
+* A sequence table called **'seqtab_nochim.rds'** (automatically generated by the dada2 pipeline)
+* A taxonomy table called **'taxa.rds'** (automatically generated by the dada2 pipeline)
+* A table describing the sample properties called **'descriptors.txt'** (Has to be provided)
+* Optional (some plots require this): A phylogenic tree file called **'phylotree.rds'** (can be calculated if it is not provided, takes a long time)
+* Optional: Instead of **'taxa.rds'**, a **custom taxonomy file** may be provided (tab-delimited text)
+
+**Recommended use:** Set the individual chunks until you are content with the ouput, then knit the whole document into a PDF/html, so you have a full record of a successful run.
+
+#### Optional custom Taxonomy file
+A custom taxonomy file may be provided instead of using the taxonomy output from dada2. This may be used to supply taxonomy derived e.g. from BLAST searches of the ASVs. Custom taxonomy files must be tab-delimited text with as many rows as the original, colum headers (for all columns except for the first column). For example:
+
+>Kingdom	Phylum	Class	Order	Family	Genus	Species  
+>ESV1	Kingdomx	Phylumx	Classx	Orderx	Familyx	Genusx	Speciesx  
+>ESV2	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy  
+>ESV3	Kingdomz	Phylumz	Classz	Orderz	Familyz	Genusz	Speciesz  
+> ...
+>ESVn	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy
+
+_Friendly warning: Parsing the results of a BLAST search into this format may require some effort._
+
+#### Descriptor table
+
+'descriptors.txt' should be a tab-delimited .txt table describing the samples. It must have the same length and order as the samples in seqtab_nochim.rds. To check the order and length of samples in seqtab_nochim.rds and generate a template to fill out, you may run the chunk below with "optional_sample_check" set to "TRUE". 
+
+Any number of descriptors is possible. The sample names may be retained as one descriptor, but this is not necessary, as they will be added during parsing. For example, if there are 4 samples (order: s1, s2, s3, s4), the txt file could look as follows:  
+
+>Subject	Species	Time  
+>Kar1	A.thaliana	24hpi  
+>Kar1	A.thaliana	72hpi  
+>Mec2	S.tuberosum	24hpi  
+>Mec3	S.tuberosum	24hpi  
+
+Finally, the file should end with an empty line, since it may throw an error otherwise. However, this is usually not a serious problem.
+
+**If you choose to use the blank file, you MUST retain the original order of the samples!**
+
+## Setup
+
+This chunk also loads required packages and defines the location of the input files. It requires the **correct path** as input, and allows **setting the pruning of control samples** and **choosing generation of a phylogenetic tree**.  **Beware:** The generation of a phylogenetic tree may take several days for >1000 sequences, it is therefore recommended to only use this feature for the final analysis or small sample sets. This scricpt assumes the packages _Biostrings_, _dada2_, _DECIPHER_, _ggplot2_, _ggsci_, _phangorn_, _phyloseq_ and _stringr_ to be installed.
+
+```{r check-samples, message=FALSE, tidy=FALSE, warning=FALSE}
+# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
+path = "16S/"
+
+# CHANGE ME to TRUE to list all samples and generate an empty metadata file 
+optional_sample_check = FALSE
+
+# CHANGE ME to TRUE to update cuphyr
+update_cuphyr = TRUE
+
+# Initiate by loading packages and setting knit options
+################# NO CHANGES NECESSARY BELOW #################
+knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(root.dir = paste0(path))
+knitr::opts_chunk$set(message = FALSE)
+knitr::opts_chunk$set(warning = FALSE)
+
+if (update_cuphyr) {
+  devtools::install_github("simeross/cuphyr")
+}
+
+# Sequence and microbiome specific libraries
+library(dada2)
+library(Biostrings)
+library(DECIPHER)
+library(cuphyr)
+# The export of phyloseq objects to a BIOM format and the generation of fancier 
+# ordination plots require the phyloseq-extended package. The first command 
+# installs the package that is currently on the dev brach of the author's 
+# repository, the second command sources some extra functions, including the 
+# better ordination plot implementation.
+remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
+source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
+
+library(phyloseq)
+library(SIAMCAT)
+
+# Phylogeny libraries
+library(phangorn)
+library(ape)
+
+# Plotting and figure export
+library(gridExtra)
+library(viridis)
+library(ggpubr)
+
+# Tidyverse
+library(tidyverse)
+library(stringr)
+
+
+# Checks whether output path exists and creates it if not. Throws warning if 
+# directory exists.
+outp <- paste0(path,"/analysis_output")
+dir.create(file.path(outp))
+
+if (optional_sample_check) {
+  seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds")) 
+  samps <- rownames(seqtabcheck)
+  lensamps <- length(samps)
+  blankcol <- vector(mode = "character", length = lensamps)
+  blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol, 
+                           ExampleProperty2 = blankcol, 
+                           ExampleProperty3 = blankcol)
+  write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"), 
+              sep = "\t", row.names = F)
+  cat("'seqtab_nochim.rds' contains samples in the following order:\n", 
+      samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
+  rm(optional_sample_check, seqtabcheck, samps, 
+     lensamps, blankcol, blanktable, update_cuphyr)
+  }else{rm(optional_sample_check, update_cuphyr)}
+```
+
+#### Parameters
+
+This chunk allows the adjustment of several parameters, such as **setting the pruning of control samples** based on keywords, **requiring that a phylogenetic tree be provided or generated**, **defining a minimum ASV count** and **providing an alternative taxonomy**.
+
+```{r setup, message=FALSE, tidy=TRUE, warning=FALSE}
+# Dedicated environment containing all global analysis settings for better 
+# overview and collected export of settings
+parameters <- new.env()
+
+# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to 
+# analyse all samples.
+parameters$prune_controls = "TRUE"
+  # CHANGE ME to a list of unique identifiers that only occur in the names of 
+  # samples you do NOT want to analyse. Common examples are provided.
+  parameters$controls = c("Pos", "H2O", "Neg", "Kontr", "Contr", "POSK", "V1", "V2","V3","V4")
+
+# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by 
+# name. This is useful to exclude non-target organisms or noise from organelles 
+# such as Chloroplasts and Mitochondria. It is recommended to first look at all 
+# data before using this setting.
+parameters$prune_noise_taxgroups = "FALSE"
+  # CHANGE ME to define the taxonomic groups to be removed as noise.
+  parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
+
+# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should 
+# minimally have. Samples with lower ASV counts than 'minread' will be pruned. 
+# Set to 0 to not prune any samples.
+parameters$minASVcount = 3000
+
+# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of 
+# using the default dada2 output ('taxa.rds').
+parameters$customTax = "TRUE"
+  # CHANGE ME to the location of the custom taxonomy file. This only matters if 
+  # parameters$customTax="TRUE", otherwise it will be ignored.
+  parameters$taxfile = "16S/custom_BLAST_taxonomy_nt.txt"
+
+# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a 
+# long time depending on the number of sequences (up to days for thousands). 
+# If a tree is provided as 'phylotree.rds' in 'path', then it will be used 
+# regardless of the value of 'parameters$maketree'
+parameters$maketree = "TRUE"
+
+# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the 
+# leaf with the longest branch (outgroup). This makes analyses that rely on the 
+# phylogenetic tree reproducible instead of picking a random leaf as root when 
+# calculating UNIFRAC distances. Implementation based on 
+# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers 
+# in https://github.com/joey711/phyloseq/issues/597
+parameters$roottree = "TRUE"
+
+## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
+parameters$biom_export = "FALSE"
+```
+
+#### Parsing input data
+
+This chunk loads the input data into a usable format.**This chunk does not require any user inputs**. If no phylogenetic tree with the name 'phylotree.rds' was provided and 'parameters$maketree="TRUE"', it will be calculated here. The phylogenetic tree is necessary for certain plots that incorporate 'true' taxonomic relationships beyond the annotations, such as PCoA.
+
+```{r parse input, tidy=TRUE}
+############### NO NEED FOR CHANGES BELOW ###############
+# Make dedicated environments to contain temporary values and manage other objects
+tmp <- new.env()
+plots <- new.env()
+set <- new.env()
+
+# Read in variables
+tmp$seqtabp <- readRDS(paste0(path,"/seqtab_nochim.rds"))
+if (parameters$customTax == "TRUE") {
+  tmp$taxap <- read.delim(parameters$taxfile, header = TRUE, sep = "\t")
+  rownames(tmp$taxap) <- colnames(tmp$seqtabp)
+  tmp$taxap <- as.matrix(tmp$taxap)
+}else{
+  tmp$taxap <- readRDS(paste0(path,"/taxa.rds"))}
+tmp$samp_table <- read.delim(paste0(path, "/descriptors.txt"), 
+                             header = TRUE, sep = "\t")
+tmp$samp_list <- rownames(tmp$seqtabp)
+
+# Check if descriptors has the same samples as seqtabp
+if (length(tmp$samp_table[,1]) != length(tmp$samp_list)) {
+  stop("There are ", length(tmp$samp_table[,1]), 
+    " samples in 'descriptors.txt', but ", length(tmp$samp_list), 
+    " samples in 'seqtab_nochim.rds'. Please make sure that the correct samples 
+    are contained in descriptors.txt.
+       
+    You may use 'optional_sample_check <- TRUE' in the first chunk to generate an 
+    empty template for 'descriptors.txt'" )
+} else if (!identical(tmp$samp_table[,1], tmp$samp_list)) {
+  warning("Warning: The samples in 'descriptors.txt' do not have the same names 
+          or order as the samples in 'seqtab_nochim.rds'. This may be fine if 
+          abbreviated names were used or the sample names are not contained in 
+          the first column of 'descriptors.txt'. Double-checking never hurts!")
+}
+
+
+# generate phylogenetic tree of ASVs only if there is no file called 
+# 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+if (!file.exists(paste0(path, "/phylotree.rds"))) {
+  if (parameters$maketree == "TRUE") {
+    tmp$ASVs <- getSequences(tmp$seqtabp)
+    names(tmp$ASVs) <- tmp$ASVs
+    tmp$ASV_align <- AlignSeqs(DNAStringSet(tmp$ASVs), anchor = NA)
+    tmp$ASV_phang <- phyDat(as(tmp$ASV_align, "matrix"), type = "DNA")
+    tmp$dm <- dist.ml(tmp$ASV_phang)
+    tmp$treeNJ <- NJ(tmp$dm)
+    tmp$fit <- pml(tmp$treeNJ, data = tmp$ASV_phang)
+    tmp$fitGTR <- update(tmp$fit, k = 4, inv = 0.2)
+    tmp$fitGTR <- optim.pml(tmp$fitGTR, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE, rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(tmp$fitGTR, file = paste0(path, "/phylotree.rds"))}}
+
+##parse into phyloseq object
+row.names(tmp$samp_table) <- tmp$samp_list
+if (file.exists(paste0(path, "/phylotree.rds"))) {
+  tmp$treep <- readRDS(paste0(path, "/phylotree.rds"))
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), 
+                tax_table(tmp$taxap), 
+                phy_tree(tmp$treep$tree))
+}else{
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), tax_table(tmp$taxap))}
+
+##Adding nucleotide info and giving sequences ASV## identifiers
+tmp$ASV_sequences <- Biostrings::DNAStringSet(taxa_names(p))
+taxa_names(p) <- paste0("ASV", seq(ntaxa(p)))
+names(tmp$ASV_sequences) <- taxa_names(p)
+p <- merge_phyloseq(p, tmp$ASV_sequences)
+
+##optional pruning
+if (parameters$prune_controls == "TRUE") {
+  if (!is.null(parameters$controls)) {
+    tmp$samp_clean <- tmp$samp_list[!tmp$samp_list %in% grep(paste0(
+      parameters$controls, collapse = "|"), tmp$samp_list, value = T)]
+    tmp$contr_pruned <- setdiff(tmp$samp_list, tmp$samp_clean)
+    ps <- prune_samples(tmp$samp_clean, p)
+    #Physeq object for Just controls
+    ps.contr <- prune_samples(tmp$contr_pruned, p)
+    ps.contr <- prune_taxa(taxa_sums(ps.contr) > 0, ps.contr)
+    ps.transcontr <- transform_sample_counts(
+      ps.contr, function(ASV) ASV/sum(ASV))
+    
+    message(cat(
+      "\n",  
+      "Number of control samples that were pruned and will not be analysed:\n", 
+      length(tmp$samp_list) - length(tmp$samp_clean), 
+      "\n", 
+      "The following controls were pruned:\n", 
+      tmp$contr_pruned, 
+      "The controls are contained in a separate phyloseq object: ps.contr", 
+      "\n", 
+      sep = "\n"))
+  }else{warning(cat(
+    "\n\nparameters$prune_controls is TRUE but 'parameters$controls' is empty. 
+    No samples were pruned.\n\n"))}
+}else{ps <- p}
+
+# Prune ASVs defined as noise
+if (parameters$prune_noise_taxgroups == "TRUE") {
+  tmp$ps_taxlvls <- colnames(tax_table(ps))
+  tmp$noise_ASVs <- character(0)
+  for (lvl in tmp$ps_taxlvls) {
+    tmp$noise_ASVs <- c(tmp$noise_ASVs, 
+                        cuphyr::list_subset_ASVs(
+                          physeq = ps, subv = parameters$noise_taxgroups, 
+                          taxlvlsub = lvl))
+  }
+  tmp$noise_ASVs <- unique(tmp$noise_ASVs)
+  tmp$no_noise_ASVs <- colnames(otu_table(ps))
+  tmp$no_noise_ASVs <- setdiff(tmp$no_noise_ASVs, tmp$noise_ASVs)
+  if (length(tmp$noise_ASVs) > 0) {
+    ps <- prune_taxa(tmp$no_noise_ASVs, ps)
+    tmp$no_noise_ps <- ps
+    cat(length(tmp$noise_ASVs), 
+        "ASVs were pruned because they belonged to the following 
+        taxonomic groups:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")}
+  else{
+    cat("No ASVs were recognized as belonging to the following taxonomic groups 
+        defined as noise:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")
+  }
+}
+
+# Prune samples with fewer than reads than minASVcount
+if (parameters$minASVcount > 0) {
+  tmp$samp_pruned <- names(which(sample_sums(ps) < parameters$minASVcount))
+  ps <- prune_samples(sample_sums(ps) >= parameters$minASVcount, ps)
+  if (length(tmp$samp_pruned) > 0) {
+    cat("The following samples were pruned because ASV counts were lower than", 
+        parameters$minASVcount,  ":\n")
+    cat(tmp$samp_pruned, "\n", sep = "\n")
+  }
+}
+
+# Remove 0 count ASVs (e.g. control ASVs that remain) from the base object
+ps <- prune_taxa(taxa_sums(ps) > 0, ps)
+
+# Get a tbl of the base object for easier access in some phyloseq-independent 
+# analyses. Takes some seconds, potentially up to minutes.
+ps_tbl <- as_tibble(psmelt(ps))
+
+# Transformed per sample (per-sample relative abundance)
+ps.trans <- transform_sample_counts(ps, function(ASV) ASV/sum(ASV))
+
+if (parameters$roottree == "TRUE" && parameters$maketree == "TRUE") {
+  phyloseq::phy_tree(ps) <- cuphyr::root_tree_in_outgroup(physeq = ps)}
+
+if (parameters$biom_export == "TRUE") {
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = paste0(path, "all_samples.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps, biom_file = file.path(path, "samples_without_controls.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.trans, biom_file = file.path(
+      path, "samples_without_controls_rel_abundance.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.contr, biom_file = file.path(path, "just_controls.biom"), 
+    biom_format = "standard"))
+}
+
+ps
+```
+
+## Output
+
+The chunks below will produce various plots and other output. Each chunk is headed by a description of the output and may contain some parameters to adjust the output. 
+
+#### Plot looks
+This chunk sets the background structure and color palette. Viridis was chosen because it is optimized for grey-scale printing and various types of color blindness and  More info on the Viridis palette can be found on [the Viridis info page](https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html). It also establishes save_plot as a shorter variant of ggsave with customized date-time structure to save plots with the same name mulitple times instead of overwriting them (overwriting can be triggered with overwrite=TRUE).
+
+```{r plot-design-global, tidy=FALSE, warning=FALSE}
+##### Optional settings (sensible defaults) #####
+# Can be changed to adjust the output format for all plots. Default "pdf", 
+# possible "eps"/"ps", "tex" (pictex), "jpeg", "tiff", "png", "bmp" and "svg"
+parameters$output_format = "pdf"
+
+# Can be changed to preferred ggplot2 theme. Recommended: "theme_bw()".
+theme_set(theme_bw())
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+my_scale_col <- scale_color_viridis(discrete = TRUE)
+my_scale_fill <- scale_fill_viridis(discrete = TRUE)
+
+# Custom, more narrow color ranges based on viridis
+# Base order to have adjacent colors be distinct from each other
+tmp$sort_colors <- c(rbind(c(1:5), c(6:10), c(11:15), c(16:20)))
+
+# Customized vectors
+tmp$n_col <- 20
+tmp$viridis_greens <- viridis(tmp$n_col,  option = "D", begin = 0.85, 
+                              end = 0.7)[tmp$sort_colors]
+tmp$viridis_reds <- viridis(tmp$n_col,  option = "B", begin = 0.7, 
+                            end = 0.5)[tmp$sort_colors]
+tmp$viridis_blues <- viridis(tmp$n_col,  option = "D", begin = 0.2, 
+                             end = 0.4)[tmp$sort_colors]
+tmp$viridis_yellows <- viridis(tmp$n_col,  option = "D", begin = 1, 
+                               end = 0.9)[tmp$sort_colors]
+tmp$viridis_dark <- viridis(tmp$n_col,  option = "A", begin = 0, 
+                            end = 0.1)[tmp$sort_colors]
+tmp$viridis_light <- viridis(tmp$n_col,  option = "A", begin = 1, 
+                             end = 0.9)[tmp$sort_colors]
+# Collected list that is available in the global environment
+sub_viridis <- list(tmp$viridis_greens, tmp$viridis_blues, tmp$viridis_yellows, 
+                    tmp$viridis_light, tmp$viridis_reds, tmp$viridis_dark)
+names(sub_viridis) <- c("greens", "blues", "yellows", "lights", "reds", "darks")
+
+tmp$out <- paste0(".", parameters$output_format)
+
+#################### Function ############################
+
+# Generic save function for plots that checks whether file exists and if so, 
+# creates a new one with d/m/y+time info to avoid overwriting. Overwriting can 
+# be triggered with overwrite = TRUE. Width, height and resolution are taken 
+# from parameters in the 'set' environment or set to 20x20 cm with 300dpi.
+save_plot <- function(
+  pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE){
+  wp <- if (!is.null(set$wp)) set$wp else 20
+  hp <- if (!is.null(set$hp)) set$hp else 20
+  res <- if (!is.null(set$res)) set$res else 300
+  name <- paste0("/", plot_name,filetype)
+  if (file.exists(paste0(outp, name)) & !overwrite) {
+  name <- paste0("/", plot_name, "_", 
+                 format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)}
+  ggsave(file.path(outp, name), pl, 
+         width = wp, height = hp, unit = "cm", dpi = res)
+}
+
+################################################
+```
+
+#### Total ASV counts ranked
+This chunk plots the absolute abundance of all samples (including controls) and all samples without controls and other trimmed samples.
+```{r overview}
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Rank samples
+set$ranked <- cuphyr::make_ranked_sums(p, myset = tmp$subset_id)
+set$ranked_ps <- cuphyr::make_ranked_sums(ps, myset = tmp$subset_id)
+set$ymax <- max(set$ranked$Abundance)
+set$ymax <- set$ymax + round(set$ymax/10)
+set$xmax <- nrow(set$ranked) + 1
+set$title2 <- "Samples (without controls)"
+
+# Stabilize colors
+set$color_vars <- set$ranked[,set$color_by]  %>% 
+  unlist() %>% as.character() %>% unique()
+set$color_vars <- sort(set$color_vars)
+set$color_varsPalette <- viridis(length(set$color_vars))
+names(set$color_varsPalette) <- set$color_vars
+set$my_scale_fill <- scale_fill_manual(values = set$color_varsPalette)
+
+# plot
+# This makes the first plot that should be made in all cases
+plots$overview_all <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle("All samples") + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# If there are noise ASVs defined and found, make a plot indicating counts after
+# removal of those. This plot should be second of three.
+if (length(tmp$noise_ASVs) > 0) {
+  set$ranked_nonoise <- cuphyr::make_ranked_sums(
+    tmp$no_noise_ps, myset = tmp$subset_id)
+  plots$overview_noise <- ggplot(
+    data = set$ranked_nonoise, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + 
+    ggtitle("Samples (without controls), noise ASVs removed") + 
+    ylim(0, set$ymax) + 
+    xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+}
+
+# If there is a cutoff for ASV counts, overwrite the first plot to include the
+# cutoff
+if (parameters$minASVcount > 0) {
+plots$overview_all <- plots$overview_all + 
+  geom_hline(yintercept = parameters$minASVcount, linetype = "dashed") + 
+    ggtitle("All samples (ASV count cutoff indicated)")
+set$title2 <- "Samples (without controls and low count samps)"
+}
+
+# Make a plot that should always be on the bottom, showing the final object ps
+# without controls, pruned samples and noise ASVs
+plots$overview_ps <- ggplot(data = set$ranked_ps, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle(set$title2) + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# Combine first and last plot
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_ps, nrow = 2, align = "v", 
+  common.legend = TRUE, legend = "right")
+
+# If noise ASVs were defined and found overwrite the combined plot to include
+# Before/raw plot, noise removed plot and after/ps plot.
+if (length(tmp$noise_ASVs) > 0) {
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_noise, plots$overview_ps,
+  nrow = 3, align = "v", 
+  common.legend = TRUE, legend = "right")
+}
+
+#Save final plot
+save_plot(plots$combo_overview, plot_name = "Overview_all_and_pruned", 
+          filetype = tmp$out)
+
+#Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+#Print final plots
+plots$combo_overview
+```
+
+#### Controls
+This chunk generates an overview over the controls (positive AND negative)
+
+```{r Positive controls}
+# CHANGE ME to the desired sample categories on the x-axis. In this case it 
+# should be the Sample names.
+set$x_axis_value = "SampleIDs"
+
+# CHANGE ME to the taxonomic level for color coding. Use "OTU" for ASVs, 
+# "Genus", "Species" or "OTU" recommended to compare pos. controls.
+set$color_by_taxlvl = "Genus"
+
+# CHANGE ME to the taxonomic level for labeling the tree tips (if phylogenetic 
+# tree is available). Use "OTU" for ASVs.
+set$label_by_taxlvl = "OTU"
+
+# CHANGE ME to a sample category to shape the tree tip labels by (if 
+# phylogenetic tree is available).
+set$label_shape_by = "SampleIDs"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to generate a tree for just the control sequences IF no 
+# phylogenetic tree for all seuquences is provided. This may slow down this 
+# chunk when running it for the first time
+set$control_tree = TRUE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+if (set$control_tree & class(try(phy_tree(ps.transcontr), 
+                                 silent = TRUE)) == "try-error") {
+  # generate phylogenetic tree of ASVs only if there is no file called 
+  # 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+  if (!file.exists(paste0(path, "/controls_phylotree.rds"))) {
+    set$ASVs <- phyloseq::refseq(ps.transcontr)
+    set$ASV_align <- AlignSeqs(set$ASVs, anchor = NA)
+    set$ASV_phang <- phyDat(as(set$ASV_align, "matrix"), type = "DNA")
+    set$dm <- dist.ml(set$ASV_phang)
+    set$treeNJ <- NJ(set$dm)
+    set$fit <- pml(set$treeNJ, data = set$ASV_phang)
+    set$fitGTR <- update(set$fit, k = 4, inv = 0.2)
+    set$fitGTR <- optim.pml(set$fitGTR, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(set$fitGTR, file = paste0(path, "/controls_phylotree.rds"))}
+  set$fitGTR <- readRDS(paste0(path, "/controls_phylotree.rds"))
+  phyloseq::phy_tree(ps.transcontr) <- set$fitGTR$tree
+}
+
+plots$topnpplot <- plot_bar(ps.contr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("ASV counts") + guides(col = guide_legend(ncol = 3))
+
+plots$topntplot <- plot_bar(ps.transcontr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("Relative abundance") + guides(col = guide_legend(ncol = 3))
+
+plots$combo_contr <- ggarrange(plots$topnpplot, plots$topntplot, ncol = 2, 
+                               labels = c("A", "B"), align = "hv", 
+                               common.legend = TRUE, legend = "right")
+
+if (class(try(phy_tree(ps.transcontr), silent = TRUE)) != "try-error") {
+plots$tre <- plot_tree(
+          ps.transcontr, ladderize = "left", label.tips = set$label_by_taxlvl, 
+          color = "abundance", text.size = 2.5, shape = set$label_shape_by) + 
+          scale_color_viridis_c(aesthetics = c("color","fill")) + 
+          theme(legend.position = "left", panel.border = element_blank())
+plots$combo_contr <- ggarrange(plots$tre, ggarrange(plots$topnpplot, 
+                                                    plots$topntplot, ncol = 2, 
+                               labels = c("B", "C"), align = "hv", 
+                               common.legend = TRUE, legend = "right"), 
+                               nrow = 2, legend = "right", labels = c("A")) 
+}
+
+# save
+save_plot(plots$combo_contr, plot_name = "Controls", filetype = tmp$out)
+
+plots$combo_contr
+```
+
+#### Richness plot
+
+This chunk plots the Alpha-Diversity according to the Shannon and Simpson indices. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Richness, tidy=TRUE, message=FALSE}
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "Skifte"
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Plot all diversity measures
+plots$richP <- try(
+  plot_richness(ps, x = set$x_axis_value, color = set$color_by) + 
+                     my_scale_col, silent = TRUE)
+# Just Shannon and Simpson
+plots$richShSi <- plot_richness(ps, x = set$x_axis_value, 
+                                measures = c("Shannon", "Simpson"), 
+                                color = set$color_by) + my_scale_col
+
+# Save
+if (!class(plots$richP) == "try-error") {
+  save_plot(plots$richP, plot_name = "Alpha_diversity_all", filetype = tmp$out)
+}
+save_plot(plots$richShSi, plot_name = "Alpha_diversity_all_ShSi", 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# Print to standard out
+if (!class(plots$richP) == "try-error") {
+  plots$richP
+}
+plots$richShSi
+```
+
+#### Bray-Curtis NMDS plot
+
+This chunk generates a non-metric multidimensional scaling (NMDS) plot of the Bray-Curtis dissimililarity, giving a two-dimensional measure of community diversity. This is done for the primary parameter and the taxonomic level separately. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary, as well as the **max. number of taxa to be displayed at taxlvl**. _Friendly warning: This chunk may not perform for lower order taxlvl, such as 'species', if they are not sufficiently abundant in all samples_
+
+```{r Bray-Curtis NMDS, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Vekst"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Skifte"
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+# Can be changed to change the number of Top n taxa plotted at taxlvl in 
+# separate panels, a maximum of 9 is recommended for good readability.
+set$top_n = 9
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Transform data for Bray-Curtis distance 
+tmp$ord_nmds <- ordinate(ps.trans, method = "NMDS", distance = "bray")
+tmp$ps.topn <- cuphyr::abundant_tax_physeq(physeq = ps.trans, lvl = set$taxlvl, 
+                                           top = set$top_n)
+tmp$top_ord_nmds <- ordinate(tmp$ps.topn, method = "NMDS", distance = "bray")
+# Plot
+plots$nmds <- plot_samples(ps.trans, tmp$ord_nmds, color = set$color_by,
+                              shape = set$shape_by, 
+                           title = paste0("Bray NMDS")) + 
+  my_scale_col + guides(color = FALSE, shape = FALSE)
+
+plots$nmds_tax <- plot_ordination(tmp$ps.topn, tmp$top_ord_nmds, type = "taxa",
+                                  color = set$taxlvl, 
+                                  title = paste0("Bray NMDS ", set$taxlvl)) + 
+  my_scale_col
+
+plots$nmds_taxpanels <- plots$nmds_tax + 
+  facet_wrap(paste0("~", set$taxlvl), scales = "free_x") + 
+  my_scale_col
+
+# Save
+save_plot(plots$nmds, plot_name = paste0("NMDS_", set$shape_by, "_", 
+                                         set$color_by), filetype = tmp$out)
+save_plot(plots$nmds_tax, plot_name = paste0("NMDS_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$nmds_taxpanels, 
+          plot_name = paste0("NMDS_top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$nmds
+plots$nmds_tax
+plots$nmds_taxpanels
+```
+
+#### PcoA (requires phylogenetic tree)
+
+This chunk generates an alternative common ordination plot, called 'PcoA', based on the primary variable, giving a two-dimensional measure of community diversity by considering the phylogenetic tree. **The chunk does not require any input**, although it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary. If the provided tree is not rooted, Phyloseq will root it to a random ASV. Root the tree to a given ASV to get consistent plots here (implementation will follow, until then, see: [this github issue](https://github.com/joey711/phyloseq/issues/235#issuecomment-26289761).
+
+```{r PcoA, eval=FALSE, message=FALSE, warning=FALSE, tidy=TRUE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Skifte"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Stopping this code from running, if there is no phylogenetic tree
+if (class(try(phy_tree(ps), silent = TRUE)) == "try-error") {
+  # Message could be more informative
+  cat("This plot could not be generated because no Phylogenetic tree was 
+      provided.\n\n")
+}else{
+# Transform and ordinate
+  tmp$ord_pcoa <- ordinate(ps.trans, "PCoA", "unifrac", weighted = TRUE)
+# Plot
+  plots$pcoa <- plot_ordination(ps.trans, tmp$ord_pcoa, color = set$color_by, 
+                                shape=set$shape_by) + my_scale_col
+# Save
+  save_plot(plots$pcoa, plot_name = paste0("PCOA_", set$color_by, "_", 
+                                           shape = set$shape_by), 
+            filetype = tmp$out)
+
+# Clean up plot parameters
+  rm(list = ls(set), envir = set)
+
+# Print to standard out
+  plots$pcoa
+}
+
+
+```
+
+#### Get a list of Top N taxa at a given level
+This chunk lists the top n most abundant taxonomic terms at a given level. Change the function parameters to the desired values. For more info, check help page of the function with `?cuphyr::abundant_tax_physeq()`. Change 'ignore_na' to include/exclude NA values at the given level.
+```{r Toplist}
+#The character vector can later be accessed by calling 'tmp$tops'
+tmp$tops <- cuphyr::abundant_tax_physeq(physeq = ps, 
+                            lvl = "Genus",
+                            top = 20,
+                            output_format = "tops",
+                            ignore_na = TRUE,
+                            silent = FALSE)
+```
+
+#### Top N ASVs/taxa Bar plot
+
+This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 20, a larger n may lead to delay/skipping of the plot in standard out, but it should be saved as a PDF regardless for ASVs. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Bar-plot, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Skifte"
+
+# CHANGE ME to the desired sample categories on the x-axis. 
+# Accepted values are the column headers in the descriptor file.
+set$x_axis_value = "Vekst"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+# CHANGE ME to change the number of Top n taxa to be plotted at 
+# taxlvl.
+set$top_n = 10
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+#set$highlight = "Streptomyces"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors 
+# is FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 40
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relativ abundans"
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.trans, lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_tax <- plot_bar(set$ps.topnTax, 
+                           x = set$x_axis_value, 
+                           fill = set$taxlvl, 
+                           title = paste0("Top", set$top_n, "_", set$taxlvl)) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax <- plots$topn_tax + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs <- plot_bar(set$ps.topnASVs, 
+                            x = set$x_axis_value, 
+                            fill = set$taxlvl, 
+                            title = paste0("Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax, plot_name = paste0("Top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs, plot_name = paste0("Top", set$topASVs, "_ASVs"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax
+plots$topn_ASVs
+```
+
+#### Siamcat
+This chunk implements statistical testing of ASVs that are differentially abundant for a given biological train (column in descriptors.txt). It can also test whether grouping variables other than the tested one is associated with the abundance data in a similar or different way than the chosen train (confounders). The chunk is largely based on the [SIAMCAT "Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html).
+There are several options that can be chosen.
+
+```{r siamcat, warning=FALSE}
+# CHANGE ME to the sample category that will be used as the test group. ASVs 
+# that are differentially abundant according to this grouping will be detected. 
+# Accepted values are the column headers in your descriptor file.
+set$test_label = "Vekst"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$case_value = "Bra"
+
+# CHANGE ME to the cutoff p-value for selecting significant ASVs (FDR-adjusted 
+# p-value)
+set$p_val_cutoff = 0.05
+
+# CHANGE ME to the taxonomic level of interest for more informative ASV 
+# annotation (format: taxlv-ASV)
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to filter low-abundance ASVs. Sequence variants with lower 
+# abundance will not be analysed to reduce artifacts
+set$filter_abundance = 0.001
+
+# Can be changed to include (TRUE) or exclude (FALSE) an output file where all 
+# possible confounders are checked. This will analyse the confounding effect of 
+# other factors in 'descriptors' over the chosen test group and produce a pdf 
+# file containing several plots.
+set$check_confounders = FALSE
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make a copy of the transformed physeq object and parse taxonomic information 
+# for the chosen taxlvl into ASV names to give more informative plots.
+ps.siam <- ps.trans
+taxa_names(ps.siam) <- tax_table(ps.trans) %>% 
+  as.data.frame() %>% 
+  rownames_to_column(var = "OTU") %>% 
+  unite(col = OTU, set$taxlvl, OTU) %>% 
+  select(OTU) %>% 
+  unlist() %>% 
+  unname()
+
+# Read in transformed physeq object as SIAMCAT object and choose trait
+sc.trans <- siamcat(phyloseq = ps.siam, 
+                    label = set$test_label, 
+                    case = set$case_value)
+
+# print the generated Siamcat object to check for valid parsing
+show(sc.trans)
+
+# Filter ASVs with less than set$filter_abundace
+sc.filt <- filter.features(sc.trans,
+    filter.method = 'abundance',
+    cutoff = set$filter_abundance)
+
+# check confounders if the option is TRUE
+if (set$check_confounders) {  
+  sc.conf <- check.confounders(
+      sc.filt,
+      fn.plot = file.path(outp, 'confounder_plots.pdf'),
+      meta.in = NULL,
+      feature.type = 'filtered',
+      verbose = 1)
+  cat("Confounders checked, results stored in", 
+      file.path(outp, 'confounder_plots.pdf'))
+}
+
+# Plot asscoiations and save the analysis to the siamcat object
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    fn.plot = file.path(outp, 
+                  paste0("Differential_abundance_", set$test_label,
+                         "_", format(Sys.time(), "%d-%m-%y_%H%M%S"), ".pdf")))
+
+# Plot asscoiations again to standard out
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    verbose = 0)
+
+# record plot from standard out
+plots$siam_assoc <- recordPlot()
+
+# Turn significant hits into tbl, if there are any, generate a vector containing 
+# significant tax groups at taxlvl and a vector containing significant ASVs
+tbl_me_this <- associations(sc.filt) %>%
+  filter(p.adj < set$p_val_cutoff) %>%
+  rownames_to_column("tax_ASV") %>%
+  separate(col = "tax_ASV", into = c("tax", "ASV"), sep = "_") %>%
+  select(tax, ASV, p.adj)
+
+if (nrow(tbl_me_this) > 0) {
+ significant_tax_groups <- select(tbl_me_this, tax) %>%
+   unique() %>% unlist() %>% unname()
+ significant_ASVs <- select(tbl_me_this, ASV) %>%
+   unique() %>% unlist() %>% unname()
+ 
+ cat(sep = "\n", "The following taxonomic groups were found to be differentially 
+     abundant and stored in 'significant_tax_groups':",
+     significant_tax_groups, 
+     "This object can be used to set a subgroup in the chunk below.")
+}
+  
+```
+
+
+## Subset the Phyloseq object by taxonomic group(s)
+
+This chunk gives the option to create a subset of the general Phyloseq object by providing a vector of search terms and a taxonomic level to search at. It requires **one or more search terms**, a **taxonomic level** to search at and a **description of the subset**. The description will only be used for the titles of plots generated from the subsets.
+
+```{r Subset by Taxonomic identity, tidy=TRUE}
+# Vector to subset on
+set$subv = c("Streptomyces", "Pectobacterium", "Dickeya", "Clavibacter", "Liberibacter", "Erwinia", "Pseudomonas", "Corynebacterium")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+# CHANGE ME to a descriptor for the subset
+tmp$subset_id = "Bacteria og interest"
+# CHANGE ME if you want to use the significant groups found by SIAMCAT. If TRUE, 
+# those groups will be used in addition to the groups specified in set$subv.
+set$use_siamcat_results = TRUE
+
+# CHANGE ME to the sample group for color coding in the summary plot. 
+# Accepted values are the column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subv <- if (set$use_siamcat_results && exists("significant_tax_groups")) { 
+  unique(c(set$subv, significant_tax_groups))
+  }else{
+  set$subv}
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+ps.subs <- prune_taxa(set$subASVs, ps)
+ps.subs.trans <- prune_taxa(set$subASVs, ps.trans)
+
+# plot the reads in the subset
+set$ranked <- cuphyr::make_ranked_sums(ps.subs, myset = tmp$subset_id)
+set$avg <- mean(set$ranked$Abundance)
+set$avg_round <- format(round(set$avg, 0), nsmall = 0)
+plots$subset <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + my_scale_fill + 
+  geom_hline(yintercept = set$avg, linetype = "dashed") + 
+  ylab("ASV counts ('reads')") +
+  ggtitle(paste0("Subset: ", tmp$subset_id, 
+                 " (average ASV count ",set$avg_round, ")"))
+
+# Save plot
+save_plot(plots$subset, plot_name = "Subset_overview", filetype = tmp$out)
+# Print plot
+plots$subset
+# print info on generated object
+cuphyr::summarise_physeq(ps, ASV_sublist = set$subASVs, 
+                         sublist_id = tmp$subset_id, samp_names = FALSE)
+
+# Optional export as biom-file
+if (parameters$biom_export == "TRUE") {
+  tmp$subset_id <- tmp$subset_id %>% str_replace_all(" ", "_")
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = file.path(path, paste0("subset_",tmp$subset_id, ".biom")), 
+    biom_format = "standard"))
+}
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+```
+
+#### Bar plots for subsets of taxonomic group(s)
+The chunk is very similar to the vanilla bar plot chunk above but takes the subset data instead of the complete Phyloseq object. This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 100 for subsets. The range of n is for subsets is larger, since the taxonomic variety was reduced by the subsetting already, meaning the Top 100 ASVs likely belong to few species. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+```{r Bar plot subset, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Vekst"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "pH"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+# CHANGE ME to change the number of Top n taxa to be plotted at taxlvl.
+set$top_n = 20
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+set$highlight <- "Streptomyces"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors is 
+# FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of 
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relative abundance"
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.subs.trans, 
+                                              lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.subs.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+
+plots$topn_tax_subset <- plot_bar(set$ps.topnTax, x = set$x_axis_value, 
+                                  fill = set$taxlvl, title = paste0(
+                                    "Top", set$top_n, "_", set$taxlvl)) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax_subset <- plots$topn_tax_subset + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs_subset <- plot_bar(set$ps.topnASVs, x = set$x_axis_value, 
+                                   fill = set$taxlvl, title = paste0(
+                                     "Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax_subset, 
+          plot_name = paste0("Top", set$top_n, "_", set$taxlvl, "_subset"), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs_subset, 
+          plot_name = paste0("Top", set$topASVs, "_ASVs_subset"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax_subset
+plots$topn_ASVs_subset
+```
+### Abundance tables
+```{r Abundance tables per genus, message=FALSE}
+ps_trans_tbl <- as_tibble(psmelt(ps.trans))
+
+# Genera of interest
+genera_of_interest <- c("Streptomyces", "Pseudomonas", "Sphingomonas")
+
+# Taking ps_tbl, grouping by Genus and summarizing (open heart surgery)
+# Will give a table with two columns: "Genus" "Genus_total_Abundance"
+genus_abundance_tbl <- ps_tbl %>%
+  group_by(Genus) %>%
+  summarise(Genus_total_Abundance = sum(Abundance))
+
+# If you want per genus and sample
+genus_abundance_tbl_per_sample <- ps_trans_tbl %>%
+  group_by(Genus, Sample) %>%
+  summarise(Genus_Sample_Abundance = sum(Abundance)) %>%
+  filter(Genus %in% genera_of_interest) %>%
+  group_by(Sample) %>%
+  mutate(sums_abu_sample = sum(Genus_Sample_Abundance)) %>%
+  ungroup() %>%
+  mutate(ratio = Genus_Sample_Abundance/sums_abu_sample)
+
+# To get back all the other info, one way is to fuse back to original table
+ps_tbl_with_genus_abundance <- left_join(ps_tbl, genus_abundance_tbl_per_sample,
+                                         by = c("Genus", "Sample"))
+
+# Can be made more compact by removing ASV+Species info and original abundance
+# values and then reducing to unique rows
+genus_abundance_tbl_full_info <- ps_tbl_with_genus_abundance %>%
+  select(-OTU, -Abundance, -Species) %>%
+  unique()
+view(genus_abundance_tbl)
+
+library(readxl)
+Morfologisk <- read_excel("16S/Bakteriedata.xlsx", sheet = "Ark1")
+
+morph_data <- tibble(Morfologisk)
+view(morph_data)
+data_mol_morph <- genus_abundance_tbl_per_sample %>%
+
+  right_join(morph_data, by = c("Sample"="SampleIDs"))
+
+view(data_mol_morph)
+
+# Scatter plot
+plots$mel_plot <- ggplot(subset(data_mol_morph),  aes(x = as.numeric(NemSum), y = as.numeric(AphAbu), color=Vekst)) +
+  geom_point() +
+  geom_smooth(formula = y ~ x, method = "lm", se=FALSE) +
+  ggtitle("Aphanomyces og Nematoder") +
+  labs(y= "Aphanomyces", x = "Nematoder")
+
+mel_plot
+
+save_plot(plots$mel_plot,
+          plot_name = "Mel_plot",
+          filetype = tmp$out)
+```
+
+### Other phylogenetic trees
+For these chunks, the ggtree library is required. If you are not sure whether it is installed, run the following chunk.
+
+```{r check-ggtree}
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")}
+if (!requireNamespace("ggtree", quietly = TRUE)) {
+  BiocManager::install("ggtree")}
+```
+
+#### Generic phylogenetic (from a subset)
+
+This chunk allows the generation of a generic phylogenetic tree for a given subset of the phyloseq object, even if none is provided for the whole set.
+```{r tree-auto}
+# Vector to subset on (the larger the subset, the longer the tree generation 
+# will take!)
+set$subv = c("Streptomyces")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+set$ps.treesubs <- prune_taxa(set$subASVs, ps)
+set$seqs <- phyloseq::refseq(set$ps.treesubs)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$subset_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + 
+  xlim(0,set$tree_width)
+
+# save
+save_plot(plots$subset_ASV_tree, 
+          plot_name = "subset_phylogenetic_tree", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$subset_ASV_tree
+```
+
+#### Generic phylogenetic tree (from any FASTA)
+
+This chunk allows the generation of a generic phylogenetic tree for any given fasta. This may be useful to compare the phylogeny of a given set of ASVs and some reference sequences.
+
+```{r tree-manual}
+# CHANGE ME to the path of the FASTA file you want to make a phylogenetic tree 
+# for (the larger the fasta, the longer the tree generation will take!)
+set$fasta = "16S/ASV_custom_BLAST_taxonomy.fasta"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$seqs <- readDNAStringSet(set$fasta)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$fasta_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + xlim(0,set$tree_width)
+
+# save
+save_plot(plots$fasta_ASV_tree, 
+          plot_name = "phylogenetic_tree_from_fasta", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$fasta_ASV_tree
+```
+
+#### Machine learning with SIAMCAT
+This is an experimental chunk implementing the machine learning functions of SIAMCAT following the tutorial steps and settings from the ["Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html). There is no convenient way to change the settings yet, because the usefulness and different optimal ways to run these models needs to be tested further. The chunk can be run as is and will produce a result if the basic SIAMCAT chunk above was run. However, this should be handled skeptically and not given extraordinary weight, if the user is not confident that they understand the used method.
+
+```{r experimental-machine-learning, warning=FALSE}
+# Count normalization by log-transforming and adding pseudocounts
+sc.norm <- normalize.features(
+    sc.filt,
+    norm.method = "log.unit",
+    norm.param = list(
+        log.n0 = 1e-06,
+        n.p = 2,
+        norm.margin = 1
+    )
+)
+# splitting data into training and test sets to validate the model
+sc.obj <-  create.data.split(
+    sc.norm,
+    num.folds = 5,
+    num.resample = 2
+)
+
+# Train a model on the training set
+sc.obj <- train.model(
+     sc.obj,
+     method = "lasso"
+)
+
+# Store model into separate object and check first entry
+models <- models(sc.obj)
+models[[1]]
+
+# Run model on the data and check output prediction matrix
+sc.obj <- make.predictions(sc.obj)
+pred_matrix <- pred_matrix(sc.obj)
+head(pred_matrix)
+
+# Save model results to plot
+model.interpretation.plot(
+     sc.obj,
+     fn.plot = file.path(outp, 'model_interpretation.pdf'),
+     consens.thres = 0.5,
+     limits = c(-3, 3),
+     heatmap.type = 'zscore',
+ )
+
+cat("Model results stored to:", file.path(outp, 'model_interpretation.pdf'))
+```
+
+##### Credit
+This script is based on ideas and code from the [dada2 Tutorial](https://benjjneb.github.io/dada2/tutorial.html) by Benjamin Callahan, the publication "Bioconductor Workflow for Microbiome Data Analysis: from raw reads to community analyses" by [Callahan et al. (2016)](https://f1000research.com/articles/5-1492) and various pages of the official [phyloseq website](https://joey711.github.io/phyloseq) by Paul J. McMurdie.
\ No newline at end of file
diff --git a/Analysis_pipeline_v4-3 - ITS2.Rmd b/Analysis_pipeline_v4-3 - ITS2.Rmd
new file mode 100644
index 0000000..dbc4810
--- /dev/null
+++ b/Analysis_pipeline_v4-3 - ITS2.Rmd	
@@ -0,0 +1,1509 @@
+---
+title: "Analysis template v4-3"
+author: "Simeon Rossmann"
+date: "10.12.2020"
+output:
+  html_document: default
+urlcolor: blue
+---
+
+## Introduction and initialization
+
+This is an R Markdown file containing code to parse the results of a dada2 analysis into phyloseq for further analysis. It is separated into chunks that may be run independently by pressing the _play_ button. You will need **3 files** in the same location in order to run this pipeline successfully:
+
+* A sequence table called **'seqtab_nochim.rds'** (automatically generated by the dada2 pipeline)
+* A taxonomy table called **'taxa.rds'** (automatically generated by the dada2 pipeline)
+* A table describing the sample properties called **'descriptors.txt'** (Has to be provided)
+* Optional (some plots require this): A phylogenic tree file called **'phylotree.rds'** (can be calculated if it is not provided, takes a long time)
+* Optional: Instead of **'taxa.rds'**, a **custom taxonomy file** may be provided (tab-delimited text)
+
+**Recommended use:** Set the individual chunks until you are content with the ouput, then knit the whole document into a PDF/html, so you have a full record of a successful run.
+
+#### Optional custom Taxonomy file
+A custom taxonomy file may be provided instead of using the taxonomy output from dada2. This may be used to supply taxonomy derived e.g. from BLAST searches of the ASVs. Custom taxonomy files must be tab-delimited text with as many rows as the original, colum headers (for all columns except for the first column). For example:
+
+>Kingdom	Phylum	Class	Order	Family	Genus	Species  
+>ESV1	Kingdomx	Phylumx	Classx	Orderx	Familyx	Genusx	Speciesx  
+>ESV2	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy  
+>ESV3	Kingdomz	Phylumz	Classz	Orderz	Familyz	Genusz	Speciesz  
+> ...
+>ESVn	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy
+
+_Friendly warning: Parsing the results of a BLAST search into this format may require some effort._
+
+#### Descriptor table
+
+'descriptors.txt' should be a tab-delimited .txt table describing the samples. It must have the same length and order as the samples in seqtab_nochim.rds. To check the order and length of samples in seqtab_nochim.rds and generate a template to fill out, you may run the chunk below with "optional_sample_check" set to "TRUE". 
+
+Any number of descriptors is possible. The sample names may be retained as one descriptor, but this is not necessary, as they will be added during parsing. For example, if there are 4 samples (order: s1, s2, s3, s4), the txt file could look as follows:  
+
+>Subject	Species	Time  
+>Kar1	A.thaliana	24hpi  
+>Kar1	A.thaliana	72hpi  
+>Mec2	S.tuberosum	24hpi  
+>Mec3	S.tuberosum	24hpi  
+
+Finally, the file should end with an empty line, since it may throw an error otherwise. However, this is usually not a serious problem.
+
+**If you choose to use the blank file, you MUST retain the original order of the samples!**
+
+## Setup
+
+This chunk also loads required packages and defines the location of the input files. It requires the **correct path** as input, and allows **setting the pruning of control samples** and **choosing generation of a phylogenetic tree**.  **Beware:** The generation of a phylogenetic tree may take several days for >1000 sequences, it is therefore recommended to only use this feature for the final analysis or small sample sets. This scricpt assumes the packages _Biostrings_, _dada2_, _DECIPHER_, _ggplot2_, _ggsci_, _phangorn_, _phyloseq_ and _stringr_ to be installed.
+
+```{r check-samples, message=FALSE, tidy=FALSE, warning=FALSE}
+# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
+path = "ITS2/"
+
+# CHANGE ME to TRUE to list all samples and generate an empty metadata file 
+optional_sample_check = FALSE
+
+# CHANGE ME to TRUE to update cuphyr
+update_cuphyr = TRUE
+
+# Initiate by loading packages and setting knit options
+################# NO CHANGES NECESSARY BELOW #################
+knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(root.dir = paste0(path))
+knitr::opts_chunk$set(message = FALSE)
+knitr::opts_chunk$set(warning = FALSE)
+
+if (update_cuphyr) {
+  devtools::install_github("simeross/cuphyr")
+}
+
+# Sequence and microbiome specific libraries
+library(dada2)
+library(Biostrings)
+library(DECIPHER)
+library(cuphyr)
+# The export of phyloseq objects to a BIOM format and the generation of fancier 
+# ordination plots require the phyloseq-extended package. The first command 
+# installs the package that is currently on the dev brach of the author's 
+# repository, the second command sources some extra functions, including the 
+# better ordination plot implementation.
+remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
+source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
+
+library(phyloseq)
+library(SIAMCAT)
+
+# Phylogeny libraries
+library(phangorn)
+library(ape)
+
+# Plotting and figure export
+library(gridExtra)
+library(viridis)
+library(ggpubr)
+
+# Tidyverse
+library(tidyverse)
+library(stringr)
+
+
+# Checks whether output path exists and creates it if not. Throws warning if 
+# directory exists.
+outp <- paste0(path,"/analysis_output")
+dir.create(file.path(outp))
+
+if (optional_sample_check) {
+  seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds")) 
+  samps <- rownames(seqtabcheck)
+  lensamps <- length(samps)
+  blankcol <- vector(mode = "character", length = lensamps)
+  blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol, 
+                           ExampleProperty2 = blankcol, 
+                           ExampleProperty3 = blankcol)
+  write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"), 
+              sep = "\t", row.names = F)
+  cat("'seqtab_nochim.rds' contains samples in the following order:\n", 
+      samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
+  rm(optional_sample_check, seqtabcheck, samps, 
+     lensamps, blankcol, blanktable, update_cuphyr)
+  }else{rm(optional_sample_check, update_cuphyr)}
+```
+
+#### Parameters
+
+This chunk allows the adjustment of several parameters, such as **setting the pruning of control samples** based on keywords, **requiring that a phylogenetic tree be provided or generated**, **defining a minimum ASV count** and **providing an alternative taxonomy**.
+
+```{r setup, message=FALSE, tidy=TRUE, warning=FALSE}
+# Dedicated environment containing all global analysis settings for better 
+# overview and collected export of settings
+parameters <- new.env()
+
+# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to 
+# analyse all samples.
+parameters$prune_controls = "TRUE"
+  # CHANGE ME to a list of unique identifiers that only occur in the names of 
+  # samples you do NOT want to analyse. Common examples are provided.
+  parameters$controls = c("Pos", "H2O", "Neg", "Kontr", "Contr", "POSK", "V1", "V2","V3","V4")
+
+# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by 
+# name. This is useful to exclude non-target organisms or noise from organelles 
+# such as Chloroplasts and Mitochondria. It is recommended to first look at all 
+# data before using this setting.
+parameters$prune_noise_taxgroups = "FALSE"
+  # CHANGE ME to define the taxonomic groups to be removed as noise.
+  parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
+
+# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should 
+# minimally have. Samples with lower ASV counts than 'minread' will be pruned. 
+# Set to 0 to not prune any samples.
+parameters$minASVcount = 3000
+
+# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of 
+# using the default dada2 output ('taxa.rds').
+parameters$customTax = "TRUE"
+  # CHANGE ME to the location of the custom taxonomy file. This only matters if 
+  # parameters$customTax="TRUE", otherwise it will be ignored.
+  parameters$taxfile = "ITS2/custom_BLAST_taxonomy_nt.txt"
+
+# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a 
+# long time depending on the number of sequences (up to days for thousands). 
+# If a tree is provided as 'phylotree.rds' in 'path', then it will be used 
+# regardless of the value of 'parameters$maketree'
+parameters$maketree = "TRUE"
+
+# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the 
+# leaf with the longest branch (outgroup). This makes analyses that rely on the 
+# phylogenetic tree reproducible instead of picking a random leaf as root when 
+# calculating UNIFRAC distances. Implementation based on 
+# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers 
+# in https://github.com/joey711/phyloseq/issues/597
+parameters$roottree = "TRUE"
+
+## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
+parameters$biom_export = "FALSE"
+```
+
+#### Parsing input data
+
+This chunk loads the input data into a usable format.**This chunk does not require any user inputs**. If no phylogenetic tree with the name 'phylotree.rds' was provided and 'parameters$maketree="TRUE"', it will be calculated here. The phylogenetic tree is necessary for certain plots that incorporate 'true' taxonomic relationships beyond the annotations, such as PCoA.
+
+```{r parse input, tidy=TRUE}
+############### NO NEED FOR CHANGES BELOW ###############
+# Make dedicated environments to contain temporary values and manage other objects
+tmp <- new.env()
+plots <- new.env()
+set <- new.env()
+
+# Read in variables
+tmp$seqtabp <- readRDS(paste0(path,"/seqtab_nochim.rds"))
+if (parameters$customTax == "TRUE") {
+  tmp$taxap <- read.delim(parameters$taxfile, header = TRUE, sep = "\t")
+  rownames(tmp$taxap) <- colnames(tmp$seqtabp)
+  tmp$taxap <- as.matrix(tmp$taxap)
+}else{
+  tmp$taxap <- readRDS(paste0(path,"/taxa.rds"))}
+tmp$samp_table <- read.delim(paste0(path, "/descriptors.txt"), 
+                             header = TRUE, sep = "\t")
+tmp$samp_list <- rownames(tmp$seqtabp)
+
+# Check if descriptors has the same samples as seqtabp
+if (length(tmp$samp_table[,1]) != length(tmp$samp_list)) {
+  stop("There are ", length(tmp$samp_table[,1]), 
+    " samples in 'descriptors.txt', but ", length(tmp$samp_list), 
+    " samples in 'seqtab_nochim.rds'. Please make sure that the correct samples 
+    are contained in descriptors.txt.
+       
+    You may use 'optional_sample_check <- TRUE' in the first chunk to generate an 
+    empty template for 'descriptors.txt'" )
+} else if (!identical(tmp$samp_table[,1], tmp$samp_list)) {
+  warning("Warning: The samples in 'descriptors.txt' do not have the same names 
+          or order as the samples in 'seqtab_nochim.rds'. This may be fine if 
+          abbreviated names were used or the sample names are not contained in 
+          the first column of 'descriptors.txt'. Double-checking never hurts!")
+}
+
+
+# generate phylogenetic tree of ASVs only if there is no file called 
+# 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+if (!file.exists(paste0(path, "/phylotree.rds"))) {
+  if (parameters$maketree == "TRUE") {
+    tmp$ASVs <- getSequences(tmp$seqtabp)
+    names(tmp$ASVs) <- tmp$ASVs
+    tmp$ASV_align <- AlignSeqs(DNAStringSet(tmp$ASVs), anchor = NA)
+    tmp$ASV_phang <- phyDat(as(tmp$ASV_align, "matrix"), type = "DNA")
+    tmp$dm <- dist.ml(tmp$ASV_phang)
+    tmp$treeNJ <- NJ(tmp$dm)
+    tmp$fit <- pml(tmp$treeNJ, data = tmp$ASV_phang)
+    tmp$fitGTR <- update(tmp$fit, k = 4, inv = 0.2)
+    tmp$fitGTR <- optim.pml(tmp$fitGTR, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE, rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(tmp$fitGTR, file = paste0(path, "/phylotree.rds"))}}
+
+##parse into phyloseq object
+row.names(tmp$samp_table) <- tmp$samp_list
+if (file.exists(paste0(path, "/phylotree.rds"))) {
+  tmp$treep <- readRDS(paste0(path, "/phylotree.rds"))
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), 
+                tax_table(tmp$taxap), 
+                phy_tree(tmp$treep$tree))
+}else{
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), tax_table(tmp$taxap))}
+
+##Adding nucleotide info and giving sequences ASV## identifiers
+tmp$ASV_sequences <- Biostrings::DNAStringSet(taxa_names(p))
+taxa_names(p) <- paste0("ASV", seq(ntaxa(p)))
+names(tmp$ASV_sequences) <- taxa_names(p)
+p <- merge_phyloseq(p, tmp$ASV_sequences)
+
+##optional pruning
+if (parameters$prune_controls == "TRUE") {
+  if (!is.null(parameters$controls)) {
+    tmp$samp_clean <- tmp$samp_list[!tmp$samp_list %in% grep(paste0(
+      parameters$controls, collapse = "|"), tmp$samp_list, value = T)]
+    tmp$contr_pruned <- setdiff(tmp$samp_list, tmp$samp_clean)
+    ps <- prune_samples(tmp$samp_clean, p)
+    #Physeq object for Just controls
+    ps.contr <- prune_samples(tmp$contr_pruned, p)
+    ps.contr <- prune_taxa(taxa_sums(ps.contr) > 0, ps.contr)
+    ps.transcontr <- transform_sample_counts(
+      ps.contr, function(ASV) ASV/sum(ASV))
+    
+    message(cat(
+      "\n",  
+      "Number of control samples that were pruned and will not be analysed:\n", 
+      length(tmp$samp_list) - length(tmp$samp_clean), 
+      "\n", 
+      "The following controls were pruned:\n", 
+      tmp$contr_pruned, 
+      "The controls are contained in a separate phyloseq object: ps.contr", 
+      "\n", 
+      sep = "\n"))
+  }else{warning(cat(
+    "\n\nparameters$prune_controls is TRUE but 'parameters$controls' is empty. 
+    No samples were pruned.\n\n"))}
+}else{ps <- p}
+
+# Prune ASVs defined as noise
+if (parameters$prune_noise_taxgroups == "TRUE") {
+  tmp$ps_taxlvls <- colnames(tax_table(ps))
+  tmp$noise_ASVs <- character(0)
+  for (lvl in tmp$ps_taxlvls) {
+    tmp$noise_ASVs <- c(tmp$noise_ASVs, 
+                        cuphyr::list_subset_ASVs(
+                          physeq = ps, subv = parameters$noise_taxgroups, 
+                          taxlvlsub = lvl))
+  }
+  tmp$noise_ASVs <- unique(tmp$noise_ASVs)
+  tmp$no_noise_ASVs <- colnames(otu_table(ps))
+  tmp$no_noise_ASVs <- setdiff(tmp$no_noise_ASVs, tmp$noise_ASVs)
+  if (length(tmp$noise_ASVs) > 0) {
+    ps <- prune_taxa(tmp$no_noise_ASVs, ps)
+    tmp$no_noise_ps <- ps
+    cat(length(tmp$noise_ASVs), 
+        "ASVs were pruned because they belonged to the following 
+        taxonomic groups:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")}
+  else{
+    cat("No ASVs were recognized as belonging to the following taxonomic groups 
+        defined as noise:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")
+  }
+}
+
+# Prune samples with fewer than reads than minASVcount
+if (parameters$minASVcount > 0) {
+  tmp$samp_pruned <- names(which(sample_sums(ps) < parameters$minASVcount))
+  ps <- prune_samples(sample_sums(ps) >= parameters$minASVcount, ps)
+  if (length(tmp$samp_pruned) > 0) {
+    cat("The following samples were pruned because ASV counts were lower than", 
+        parameters$minASVcount,  ":\n")
+    cat(tmp$samp_pruned, "\n", sep = "\n")
+  }
+}
+
+# Remove 0 count ASVs (e.g. control ASVs that remain) from the base object
+ps <- prune_taxa(taxa_sums(ps) > 0, ps)
+
+# Get a tbl of the base object for easier access in some phyloseq-independent 
+# analyses. Takes some seconds, potentially up to minutes.
+ps_tbl <- as_tibble(psmelt(ps))
+
+# Transformed per sample (per-sample relative abundance)
+ps.trans <- transform_sample_counts(ps, function(ASV) ASV/sum(ASV))
+
+if (parameters$roottree == "TRUE" && parameters$maketree == "TRUE") {
+  phyloseq::phy_tree(ps) <- cuphyr::root_tree_in_outgroup(physeq = ps)}
+
+if (parameters$biom_export == "TRUE") {
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = paste0(path, "all_samples.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps, biom_file = file.path(path, "samples_without_controls.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.trans, biom_file = file.path(
+      path, "samples_without_controls_rel_abundance.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.contr, biom_file = file.path(path, "just_controls.biom"), 
+    biom_format = "standard"))
+}
+
+ps
+```
+
+## Output
+
+The chunks below will produce various plots and other output. Each chunk is headed by a description of the output and may contain some parameters to adjust the output. 
+
+#### Plot looks
+This chunk sets the background structure and color palette. Viridis was chosen because it is optimized for grey-scale printing and various types of color blindness and  More info on the Viridis palette can be found on [the Viridis info page](https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html). It also establishes save_plot as a shorter variant of ggsave with customized date-time structure to save plots with the same name mulitple times instead of overwriting them (overwriting can be triggered with overwrite=TRUE).
+
+```{r plot-design-global, tidy=FALSE, warning=FALSE}
+##### Optional settings (sensible defaults) #####
+# Can be changed to adjust the output format for all plots. Default "pdf", 
+# possible "eps"/"ps", "tex" (pictex), "jpeg", "tiff", "png", "bmp" and "svg"
+parameters$output_format = "pdf"
+
+# Can be changed to preferred ggplot2 theme. Recommended: "theme_bw()".
+theme_set(theme_bw())
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+my_scale_col <- scale_color_viridis(discrete = TRUE)
+my_scale_fill <- scale_fill_viridis(discrete = TRUE)
+
+# Custom, more narrow color ranges based on viridis
+# Base order to have adjacent colors be distinct from each other
+tmp$sort_colors <- c(rbind(c(1:5), c(6:10), c(11:15), c(16:20)))
+
+# Customized vectors
+tmp$n_col <- 20
+tmp$viridis_greens <- viridis(tmp$n_col,  option = "D", begin = 0.85, 
+                              end = 0.7)[tmp$sort_colors]
+tmp$viridis_reds <- viridis(tmp$n_col,  option = "B", begin = 0.7, 
+                            end = 0.5)[tmp$sort_colors]
+tmp$viridis_blues <- viridis(tmp$n_col,  option = "D", begin = 0.2, 
+                             end = 0.4)[tmp$sort_colors]
+tmp$viridis_yellows <- viridis(tmp$n_col,  option = "D", begin = 1, 
+                               end = 0.9)[tmp$sort_colors]
+tmp$viridis_dark <- viridis(tmp$n_col,  option = "A", begin = 0, 
+                            end = 0.1)[tmp$sort_colors]
+tmp$viridis_light <- viridis(tmp$n_col,  option = "A", begin = 1, 
+                             end = 0.9)[tmp$sort_colors]
+# Collected list that is available in the global environment
+sub_viridis <- list(tmp$viridis_greens, tmp$viridis_blues, tmp$viridis_yellows, 
+                    tmp$viridis_light, tmp$viridis_reds, tmp$viridis_dark)
+names(sub_viridis) <- c("greens", "blues", "yellows", "lights", "reds", "darks")
+
+tmp$out <- paste0(".", parameters$output_format)
+
+#################### Function ############################
+
+# Generic save function for plots that checks whether file exists and if so, 
+# creates a new one with d/m/y+time info to avoid overwriting. Overwriting can 
+# be triggered with overwrite = TRUE. Width, height and resolution are taken 
+# from parameters in the 'set' environment or set to 20x20 cm with 300dpi.
+save_plot <- function(
+  pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE){
+  wp <- if (!is.null(set$wp)) set$wp else 20
+  hp <- if (!is.null(set$hp)) set$hp else 20
+  res <- if (!is.null(set$res)) set$res else 300
+  name <- paste0("/", plot_name,filetype)
+  if (file.exists(paste0(outp, name)) & !overwrite) {
+  name <- paste0("/", plot_name, "_", 
+                 format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)}
+  ggsave(file.path(outp, name), pl, 
+         width = wp, height = hp, unit = "cm", dpi = res)
+}
+
+################################################
+```
+
+#### Total ASV counts ranked
+This chunk plots the absolute abundance of all samples (including controls) and all samples without controls and other trimmed samples.
+```{r overview}
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Rank samples
+set$ranked <- cuphyr::make_ranked_sums(p, myset = tmp$subset_id)
+set$ranked_ps <- cuphyr::make_ranked_sums(ps, myset = tmp$subset_id)
+set$ymax <- max(set$ranked$Abundance)
+set$ymax <- set$ymax + round(set$ymax/10)
+set$xmax <- nrow(set$ranked) + 1
+set$title2 <- "Samples (without controls)"
+
+# Stabilize colors
+set$color_vars <- set$ranked[,set$color_by]  %>% 
+  unlist() %>% as.character() %>% unique()
+set$color_vars <- sort(set$color_vars)
+set$color_varsPalette <- viridis(length(set$color_vars))
+names(set$color_varsPalette) <- set$color_vars
+set$my_scale_fill <- scale_fill_manual(values = set$color_varsPalette)
+
+# plot
+# This makes the first plot that should be made in all cases
+plots$overview_all <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle("All samples") + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# If there are noise ASVs defined and found, make a plot indicating counts after
+# removal of those. This plot should be second of three.
+if (length(tmp$noise_ASVs) > 0) {
+  set$ranked_nonoise <- cuphyr::make_ranked_sums(
+    tmp$no_noise_ps, myset = tmp$subset_id)
+  plots$overview_noise <- ggplot(
+    data = set$ranked_nonoise, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + 
+    ggtitle("Samples (without controls), noise ASVs removed") + 
+    ylim(0, set$ymax) + 
+    xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+}
+
+# If there is a cutoff for ASV counts, overwrite the first plot to include the
+# cutoff
+if (parameters$minASVcount > 0) {
+plots$overview_all <- plots$overview_all + 
+  geom_hline(yintercept = parameters$minASVcount, linetype = "dashed") + 
+    ggtitle("All samples (ASV count cutoff indicated)")
+set$title2 <- "Samples (without controls and low count samps)"
+}
+
+# Make a plot that should always be on the bottom, showing the final object ps
+# without controls, pruned samples and noise ASVs
+plots$overview_ps <- ggplot(data = set$ranked_ps, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle(set$title2) + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# Combine first and last plot
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_ps, nrow = 2, align = "v", 
+  common.legend = TRUE, legend = "right")
+
+# If noise ASVs were defined and found overwrite the combined plot to include
+# Before/raw plot, noise removed plot and after/ps plot.
+if (length(tmp$noise_ASVs) > 0) {
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_noise, plots$overview_ps,
+  nrow = 3, align = "v", 
+  common.legend = TRUE, legend = "right")
+}
+
+#Save final plot
+save_plot(plots$combo_overview, plot_name = "Overview_all_and_pruned", 
+          filetype = tmp$out)
+
+#Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+#Print final plots
+plots$combo_overview
+```
+
+#### Controls
+This chunk generates an overview over the controls (positive AND negative)
+
+```{r Positive controls}
+# CHANGE ME to the desired sample categories on the x-axis. In this case it 
+# should be the Sample names.
+set$x_axis_value = "SampleIDs"
+
+# CHANGE ME to the taxonomic level for color coding. Use "OTU" for ASVs, 
+# "Genus", "Species" or "OTU" recommended to compare pos. controls.
+set$color_by_taxlvl = "Genus"
+
+# CHANGE ME to the taxonomic level for labeling the tree tips (if phylogenetic 
+# tree is available). Use "OTU" for ASVs.
+set$label_by_taxlvl = "OTU"
+
+# CHANGE ME to a sample category to shape the tree tip labels by (if 
+# phylogenetic tree is available).
+set$label_shape_by = "SampleIDs"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to generate a tree for just the control sequences IF no 
+# phylogenetic tree for all seuquences is provided. This may slow down this 
+# chunk when running it for the first time
+set$control_tree = TRUE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+if (set$control_tree & class(try(phy_tree(ps.transcontr), 
+                                 silent = TRUE)) == "try-error") {
+  # generate phylogenetic tree of ASVs only if there is no file called 
+  # 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+  if (!file.exists(paste0(path, "/controls_phylotree.rds"))) {
+    set$ASVs <- phyloseq::refseq(ps.transcontr)
+    set$ASV_align <- AlignSeqs(set$ASVs, anchor = NA)
+    set$ASV_phang <- phyDat(as(set$ASV_align, "matrix"), type = "DNA")
+    set$dm <- dist.ml(set$ASV_phang)
+    set$treeNJ <- NJ(set$dm)
+    set$fit <- pml(set$treeNJ, data = set$ASV_phang)
+    set$fitGTR <- update(set$fit, k = 4, inv = 0.2)
+    set$fitGTR <- optim.pml(set$fitGTR, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(set$fitGTR, file = paste0(path, "/controls_phylotree.rds"))}
+  set$fitGTR <- readRDS(paste0(path, "/controls_phylotree.rds"))
+  phyloseq::phy_tree(ps.transcontr) <- set$fitGTR$tree
+}
+
+plots$topnpplot <- plot_bar(ps.contr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("ASV counts") + guides(col = guide_legend(ncol = 3))
+
+plots$topntplot <- plot_bar(ps.transcontr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("Relative abundance") + guides(col = guide_legend(ncol = 3))
+
+plots$combo_contr <- ggarrange(plots$topnpplot, plots$topntplot, ncol = 2, 
+                               labels = c("A", "B"), align = "hv", 
+                               common.legend = TRUE, legend = "right")
+
+if (class(try(phy_tree(ps.transcontr), silent = TRUE)) != "try-error") {
+plots$tre <- plot_tree(
+          ps.transcontr, ladderize = "left", label.tips = set$label_by_taxlvl, 
+          color = "abundance", text.size = 2.5, shape = set$label_shape_by) + 
+          scale_color_viridis_c(aesthetics = c("color","fill")) + 
+          theme(legend.position = "left", panel.border = element_blank())
+plots$combo_contr <- ggarrange(plots$tre, ggarrange(plots$topnpplot, 
+                                                    plots$topntplot, ncol = 2, 
+                               labels = c("B", "C"), align = "hv", 
+                               common.legend = TRUE, legend = "right"), 
+                               nrow = 2, legend = "right", labels = c("A")) 
+}
+
+# save
+save_plot(plots$combo_contr, plot_name = "Controls", filetype = tmp$out)
+
+plots$combo_contr
+```
+
+#### Richness plot
+
+This chunk plots the Alpha-Diversity according to the Shannon and Simpson indices. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Richness, tidy=TRUE, message=FALSE}
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "Skifte"
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Plot all diversity measures
+plots$richP <- try(
+  plot_richness(ps, x = set$x_axis_value, color = set$color_by) + 
+                     my_scale_col, silent = TRUE)
+# Just Shannon and Simpson
+plots$richShSi <- plot_richness(ps, x = set$x_axis_value, 
+                                measures = c("Shannon", "Simpson"), 
+                                color = set$color_by) + my_scale_col
+
+# Save
+if (!class(plots$richP) == "try-error") {
+  save_plot(plots$richP, plot_name = "Alpha_diversity_all", filetype = tmp$out)
+}
+save_plot(plots$richShSi, plot_name = "Alpha_diversity_all_ShSi", 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# Print to standard out
+if (!class(plots$richP) == "try-error") {
+  plots$richP
+}
+plots$richShSi
+```
+
+#### Bray-Curtis NMDS plot
+
+This chunk generates a non-metric multidimensional scaling (NMDS) plot of the Bray-Curtis dissimililarity, giving a two-dimensional measure of community diversity. This is done for the primary parameter and the taxonomic level separately. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary, as well as the **max. number of taxa to be displayed at taxlvl**. _Friendly warning: This chunk may not perform for lower order taxlvl, such as 'species', if they are not sufficiently abundant in all samples_
+
+```{r Bray-Curtis NMDS, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Vekst"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Skifte"
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+# Can be changed to change the number of Top n taxa plotted at taxlvl in 
+# separate panels, a maximum of 9 is recommended for good readability.
+set$top_n = 9
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Transform data for Bray-Curtis distance 
+tmp$ord_nmds <- ordinate(ps.trans, method = "NMDS", distance = "bray")
+tmp$ps.topn <- cuphyr::abundant_tax_physeq(physeq = ps.trans, lvl = set$taxlvl, 
+                                           top = set$top_n)
+tmp$top_ord_nmds <- ordinate(tmp$ps.topn, method = "NMDS", distance = "bray")
+# Plot
+plots$nmds <- plot_samples(ps.trans, tmp$ord_nmds, color = set$color_by,
+                              shape = set$shape_by, 
+                           title = paste0("Bray NMDS")) + 
+  my_scale_col + guides(color = FALSE, shape = FALSE)
+
+plots$nmds_tax <- plot_ordination(tmp$ps.topn, tmp$top_ord_nmds, type = "taxa",
+                                  color = set$taxlvl, 
+                                  title = paste0("Bray NMDS ", set$taxlvl)) + 
+  my_scale_col
+
+plots$nmds_taxpanels <- plots$nmds_tax + 
+  facet_wrap(paste0("~", set$taxlvl), scales = "free_x") + 
+  my_scale_col
+
+# Save
+save_plot(plots$nmds, plot_name = paste0("NMDS_", set$shape_by, "_", 
+                                         set$color_by), filetype = tmp$out)
+save_plot(plots$nmds_tax, plot_name = paste0("NMDS_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$nmds_taxpanels, 
+          plot_name = paste0("NMDS_top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$nmds
+plots$nmds_tax
+plots$nmds_taxpanels
+```
+
+#### PcoA (requires phylogenetic tree)
+
+This chunk generates an alternative common ordination plot, called 'PcoA', based on the primary variable, giving a two-dimensional measure of community diversity by considering the phylogenetic tree. **The chunk does not require any input**, although it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary. If the provided tree is not rooted, Phyloseq will root it to a random ASV. Root the tree to a given ASV to get consistent plots here (implementation will follow, until then, see: [this github issue](https://github.com/joey711/phyloseq/issues/235#issuecomment-26289761).
+
+```{r PcoA, eval=FALSE, message=FALSE, warning=FALSE, tidy=TRUE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Skifte"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Stopping this code from running, if there is no phylogenetic tree
+if (class(try(phy_tree(ps), silent = TRUE)) == "try-error") {
+  # Message could be more informative
+  cat("This plot could not be generated because no Phylogenetic tree was 
+      provided.\n\n")
+}else{
+# Transform and ordinate
+  tmp$ord_pcoa <- ordinate(ps.trans, "PCoA", "unifrac", weighted = TRUE)
+# Plot
+  plots$pcoa <- plot_ordination(ps.trans, tmp$ord_pcoa, color = set$color_by, 
+                                shape=set$shape_by) + my_scale_col
+# Save
+  save_plot(plots$pcoa, plot_name = paste0("PCOA_", set$color_by, "_", 
+                                           shape = set$shape_by), 
+            filetype = tmp$out)
+
+# Clean up plot parameters
+  rm(list = ls(set), envir = set)
+
+# Print to standard out
+  plots$pcoa
+}
+
+
+```
+
+#### Get a list of Top N taxa at a given level
+This chunk lists the top n most abundant taxonomic terms at a given level. Change the function parameters to the desired values. For more info, check help page of the function with `?cuphyr::abundant_tax_physeq()`. Change 'ignore_na' to include/exclude NA values at the given level.
+```{r Toplist}
+#The character vector can later be accessed by calling 'tmp$tops'
+tmp$tops <- cuphyr::abundant_tax_physeq(physeq = ps, 
+                            lvl = "Genus",
+                            top = 20,
+                            output_format = "tops",
+                            ignore_na = TRUE,
+                            silent = FALSE)
+```
+
+#### Top N ASVs/taxa Bar plot
+
+This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 20, a larger n may lead to delay/skipping of the plot in standard out, but it should be saved as a PDF regardless for ASVs. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Bar-plot, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Skifte"
+
+# CHANGE ME to the desired sample categories on the x-axis. 
+# Accepted values are the column headers in the descriptor file.
+set$x_axis_value = "Vekst"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+# CHANGE ME to change the number of Top n taxa to be plotted at 
+# taxlvl.
+set$top_n = 10
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+#set$highlight = "Rhizoctonia"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors 
+# is FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 40
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relativ abundans"
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.trans, lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_tax <- plot_bar(set$ps.topnTax, 
+                           x = set$x_axis_value, 
+                           fill = set$taxlvl, 
+                           title = paste0("Top", set$top_n, "_", set$taxlvl)) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax <- plots$topn_tax + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs <- plot_bar(set$ps.topnASVs, 
+                            x = set$x_axis_value, 
+                            fill = set$taxlvl, 
+                            title = paste0("Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax, plot_name = paste0("Top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs, plot_name = paste0("Top", set$topASVs, "_ASVs"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax
+plots$topn_ASVs
+```
+
+#### Siamcat
+This chunk implements statistical testing of ASVs that are differentially abundant for a given biological train (column in descriptors.txt). It can also test whether grouping variables other than the tested one is associated with the abundance data in a similar or different way than the chosen train (confounders). The chunk is largely based on the [SIAMCAT "Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html).
+There are several options that can be chosen.
+
+```{r siamcat, warning=FALSE}
+# CHANGE ME to the sample category that will be used as the test group. ASVs 
+# that are differentially abundant according to this grouping will be detected. 
+# Accepted values are the column headers in your descriptor file.
+set$test_label = "Vekst"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$case_value = "Bra"
+
+# CHANGE ME to the cutoff p-value for selecting significant ASVs (FDR-adjusted 
+# p-value)
+set$p_val_cutoff = 0.05
+
+# CHANGE ME to the taxonomic level of interest for more informative ASV 
+# annotation (format: taxlv-ASV)
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to filter low-abundance ASVs. Sequence variants with lower 
+# abundance will not be analysed to reduce artifacts
+set$filter_abundance = 0.001
+
+# Can be changed to include (TRUE) or exclude (FALSE) an output file where all 
+# possible confounders are checked. This will analyse the confounding effect of 
+# other factors in 'descriptors' over the chosen test group and produce a pdf 
+# file containing several plots.
+set$check_confounders = TRUE
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make a copy of the transformed physeq object and parse taxonomic information 
+# for the chosen taxlvl into ASV names to give more informative plots.
+ps.siam <- ps.trans
+taxa_names(ps.siam) <- tax_table(ps.trans) %>% 
+  as.data.frame() %>% 
+  rownames_to_column(var = "OTU") %>% 
+  unite(col = OTU, set$taxlvl, OTU) %>% 
+  select(OTU) %>% 
+  unlist() %>% 
+  unname()
+
+# Read in transformed physeq object as SIAMCAT object and choose trait
+sc.trans <- siamcat(phyloseq = ps.siam, 
+                    label = set$test_label, 
+                    case = set$case_value)
+
+# print the generated Siamcat object to check for valid parsing
+show(sc.trans)
+
+# Filter ASVs with less than set$filter_abundace
+sc.filt <- filter.features(sc.trans,
+    filter.method = 'abundance',
+    cutoff = set$filter_abundance)
+
+# check confounders if the option is TRUE
+if (set$check_confounders) {  
+  sc.conf <- check.confounders(
+      sc.filt,
+      fn.plot = file.path(outp, 'confounder_plots.pdf'),
+      meta.in = NULL,
+      feature.type = 'filtered',
+      verbose = 1)
+  cat("Confounders checked, results stored in", 
+      file.path(outp, 'confounder_plots.pdf'))
+}
+
+# Plot asscoiations and save the analysis to the siamcat object
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    fn.plot = file.path(outp, 
+                  paste0("Differential_abundance_", set$test_label,
+                         "_", format(Sys.time(), "%d-%m-%y_%H%M%S"), ".pdf")))
+
+# Plot asscoiations again to standard out
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    verbose = 0)
+
+# record plot from standard out
+plots$siam_assoc <- recordPlot()
+
+# Turn significant hits into tbl, if there are any, generate a vector containing 
+# significant tax groups at taxlvl and a vector containing significant ASVs
+tbl_me_this <- associations(sc.filt) %>%
+  filter(p.adj < set$p_val_cutoff) %>%
+  rownames_to_column("tax_ASV") %>%
+  separate(col = "tax_ASV", into = c("tax", "ASV"), sep = "_") %>%
+  select(tax, ASV, p.adj)
+
+if (nrow(tbl_me_this) > 0) {
+ significant_tax_groups <- select(tbl_me_this, tax) %>%
+   unique() %>% unlist() %>% unname()
+ significant_ASVs <- select(tbl_me_this, ASV) %>%
+   unique() %>% unlist() %>% unname()
+ 
+ cat(sep = "\n", "The following taxonomic groups were found to be differentially 
+     abundant and stored in 'significant_tax_groups':",
+     significant_tax_groups, 
+     "This object can be used to set a subgroup in the chunk below.")
+}
+  
+```
+
+
+## Subset the Phyloseq object by taxonomic group(s)
+
+This chunk gives the option to create a subset of the general Phyloseq object by providing a vector of search terms and a taxonomic level to search at. It requires **one or more search terms**, a **taxonomic level** to search at and a **description of the subset**. The description will only be used for the titles of plots generated from the subsets.
+
+```{r Subset by Taxonomic identity, tidy=TRUE}
+# Vector to subset on
+set$subv = c("Fusarium")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+# CHANGE ME to a descriptor for the subset
+tmp$subset_id = "Fungi of interest"
+# CHANGE ME if you want to use the significant groups found by SIAMCAT. If TRUE, 
+# those groups will be used in addition to the groups specified in set$subv.
+set$use_siamcat_results = TRUE
+
+# CHANGE ME to the sample group for color coding in the summary plot. 
+# Accepted values are the column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subv <- if (set$use_siamcat_results && exists("significant_tax_groups")) { 
+  unique(c(set$subv, significant_tax_groups))
+  }else{
+  set$subv}
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+ps.subs <- prune_taxa(set$subASVs, ps)
+ps.subs.trans <- prune_taxa(set$subASVs, ps.trans)
+
+# plot the reads in the subset
+set$ranked <- cuphyr::make_ranked_sums(ps.subs, myset = tmp$subset_id)
+set$avg <- mean(set$ranked$Abundance)
+set$avg_round <- format(round(set$avg, 0), nsmall = 0)
+plots$subset <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + my_scale_fill + 
+  geom_hline(yintercept = set$avg, linetype = "dashed") + 
+  ylab("ASV counts ('reads')") +
+  ggtitle(paste0("Subset: ", tmp$subset_id, 
+                 " (average ASV count ",set$avg_round, ")"))
+
+# Save plot
+save_plot(plots$subset, plot_name = "Subset_overview", filetype = tmp$out)
+# Print plot
+plots$subset
+# print info on generated object
+cuphyr::summarise_physeq(ps, ASV_sublist = set$subASVs, 
+                         sublist_id = tmp$subset_id, samp_names = FALSE)
+
+# Optional export as biom-file
+if (parameters$biom_export == "TRUE") {
+  tmp$subset_id <- tmp$subset_id %>% str_replace_all(" ", "_")
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = file.path(path, paste0("subset_",tmp$subset_id, ".biom")), 
+    biom_format = "standard"))
+}
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+```
+
+#### Bar plots for subsets of taxonomic group(s)
+The chunk is very similar to the vanilla bar plot chunk above but takes the subset data instead of the complete Phyloseq object. This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 100 for subsets. The range of n is for subsets is larger, since the taxonomic variety was reduced by the subsetting already, meaning the Top 100 ASVs likely belong to few species. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+```{r Bar plot subset, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Skifte"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "Vekst"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+# CHANGE ME to change the number of Top n taxa to be plotted at taxlvl.
+set$top_n = 10
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+set$highlight <- "Rhizoctonia"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors is 
+# FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of 
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relative abundance"
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.subs.trans, 
+                                              lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.subs.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+
+plots$topn_tax_subset <- plot_bar(set$ps.topnTax, x = set$x_axis_value, 
+                                  fill = set$taxlvl, title = paste0(
+                                    "Top", set$top_n, "_", set$taxlvl)) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax_subset <- plots$topn_tax_subset + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs_subset <- plot_bar(set$ps.topnASVs, x = set$x_axis_value, 
+                                   fill = set$taxlvl, title = paste0(
+                                     "Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax_subset, 
+          plot_name = paste0("Top", set$top_n, "_", set$taxlvl, "_subset"), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs_subset, 
+          plot_name = paste0("Top", set$topASVs, "_ASVs_subset"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax_subset
+plots$topn_ASVs_subset
+```
+### Abundance tables
+```{r Abundance tables per genus, message=FALSE}
+ps_trans_tbl <- as_tibble(psmelt(ps.trans))
+
+# Genera of interest
+genera_of_interest <- c("Paratrichodorus")
+
+# Taking ps_tbl, grouping by Genus and summarizing (open heart surgery)
+# Will give a table with two columns: "Genus" "Genus_total_Abundance"
+genus_abundance_tbl <- ps_tbl %>%
+  group_by(Genus) %>%
+  summarise(Genus_total_Abundance = sum(Abundance))
+
+# If you want per genus and sample
+genus_abundance_tbl_per_sample <- ps_trans_tbl %>%
+  group_by(Genus, Sample) %>%
+  summarise(Genus_Sample_Abundance = sum(Abundance)) %>%
+  filter(Genus %in% genera_of_interest) %>%
+  group_by(Sample) %>%
+  mutate(sums_abu_sample = sum(Genus_Sample_Abundance)) %>%
+  ungroup() %>%
+  mutate(ratio = Genus_Sample_Abundance/sums_abu_sample)
+
+# To get back all the other info, one way is to fuse back to original table
+ps_tbl_with_genus_abundance <- left_join(ps_tbl, genus_abundance_tbl_per_sample,
+                                         by = c("Genus", "Sample"))
+
+# Can be made more compact by removing ASV+Species info and original abundance
+# values and then reducing to unique rows
+genus_abundance_tbl_full_info <- ps_tbl_with_genus_abundance %>%
+  select(-OTU, -Abundance, -Species) %>%
+  unique()
+view(genus_abundance_tbl_per_sample)
+
+
+library(readxl)
+Morfologisk <- read_excel("ITS2/data_morph.xlsx", sheet = "Sheet1")
+
+morph_data <- tibble(Morfologisk)
+view(morph_data)
+data_mol_morph <- genus_abundance_tbl_per_sample %>%
+
+  right_join(morph_data, by = c("Sample"="Sample"))
+view(data_mol_morph)
+
+
+# Scatter plot)
+plots$ITS2_plot <- ggplot(subset((data_mol_morph)),  aes(x = as.numeric(pH.tall), y = as.numeric(StrepAbu))) +
+  geom_point() +
+  geom_smooth(formula = y ~ x, method = "lm") +
+  ggtitle("Streptomyces") +
+  labs(y = "Relativ abundans", x = "pH")
+
+plots$ITS2_plot
+
+save_plot(plots$ITS2_plot,
+          plot_name = "ITS2_plot",
+          filetype = tmp$out)
+
+library(xlsx)
+write.xlsx(data_mol_morph, "ITS2/data_morph.xlsx")
+```
+### Other phylogenetic trees
+For these chunks, the ggtree library is required. If you are not sure whether it is installed, run the following chunk.
+
+```{r check-ggtree}
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")}
+if (!requireNamespace("ggtree", quietly = TRUE)) {
+  BiocManager::install("ggtree")}
+```
+
+#### Generic phylogenetic (from a subset)
+
+This chunk allows the generation of a generic phylogenetic tree for a given subset of the phyloseq object, even if none is provided for the whole set.
+```{r tree-auto}
+# Vector to subset on (the larger the subset, the longer the tree generation 
+# will take!)
+set$subv = c("Rhizoctonia")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+set$ps.treesubs <- prune_taxa(set$subASVs, ps)
+set$seqs <- phyloseq::refseq(set$ps.treesubs)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$subset_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + 
+  xlim(0,set$tree_width)
+
+# save
+save_plot(plots$subset_ASV_tree, 
+          plot_name = "subset_phylogenetic_tree", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$subset_ASV_tree
+```
+
+#### Generic phylogenetic tree (from any FASTA)
+
+This chunk allows the generation of a generic phylogenetic tree for any given fasta. This may be useful to compare the phylogeny of a given set of ASVs and some reference sequences.
+
+```{r tree-manual}
+# CHANGE ME to the path of the FASTA file you want to make a phylogenetic tree 
+# for (the larger the fasta, the longer the tree generation will take!)
+set$fasta = "ITS2/ASV_custom_BLAST_taxonomy.fasta"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$seqs <- readDNAStringSet(set$fasta)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$fasta_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + xlim(0,set$tree_width)
+
+# save
+save_plot(plots$fasta_ASV_tree, 
+          plot_name = "phylogenetic_tree_from_fasta", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$fasta_ASV_tree
+```
+
+#### Machine learning with SIAMCAT
+This is an experimental chunk implementing the machine learning functions of SIAMCAT following the tutorial steps and settings from the ["Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html). There is no convenient way to change the settings yet, because the usefulness and different optimal ways to run these models needs to be tested further. The chunk can be run as is and will produce a result if the basic SIAMCAT chunk above was run. However, this should be handled skeptically and not given extraordinary weight, if the user is not confident that they understand the used method.
+
+```{r experimental-machine-learning, warning=FALSE}
+# Count normalization by log-transforming and adding pseudocounts
+sc.norm <- normalize.features(
+    sc.filt,
+    norm.method = "log.unit",
+    norm.param = list(
+        log.n0 = 1e-06,
+        n.p = 2,
+        norm.margin = 1
+    )
+)
+# splitting data into training and test sets to validate the model
+sc.obj <-  create.data.split(
+    sc.norm,
+    num.folds = 5,
+    num.resample = 2
+)
+
+# Train a model on the training set
+sc.obj <- train.model(
+     sc.obj,
+     method = "lasso"
+)
+
+# Store model into separate object and check first entry
+models <- models(sc.obj)
+models[[1]]
+
+# Run model on the data and check output prediction matrix
+sc.obj <- make.predictions(sc.obj)
+pred_matrix <- pred_matrix(sc.obj)
+head(pred_matrix)
+
+# Save model results to plot
+model.interpretation.plot(
+     sc.obj,
+     fn.plot = file.path(outp, 'model_interpretation.pdf'),
+     consens.thres = 0.5,
+     limits = c(-3, 3),
+     heatmap.type = 'zscore',
+ )
+
+cat("Model results stored to:", file.path(outp, 'model_interpretation.pdf'))
+```
+
+##### Credit
+This script is based on ideas and code from the [dada2 Tutorial](https://benjjneb.github.io/dada2/tutorial.html) by Benjamin Callahan, the publication "Bioconductor Workflow for Microbiome Data Analysis: from raw reads to community analyses" by [Callahan et al. (2016)](https://f1000research.com/articles/5-1492) and various pages of the official [phyloseq website](https://joey711.github.io/phyloseq) by Paul J. McMurdie.
\ No newline at end of file
diff --git a/Analysis_pipeline_v4-3 - Sven+Trich.Rmd b/Analysis_pipeline_v4-3 - Sven+Trich.Rmd
new file mode 100644
index 0000000..ef092ae
--- /dev/null
+++ b/Analysis_pipeline_v4-3 - Sven+Trich.Rmd	
@@ -0,0 +1,1546 @@
+---
+title: "Analysis template v4-3"
+author: "Simeon Rossmann"
+date: "10.12.2020"
+output:
+  html_document: default
+urlcolor: blue
+---
+
+```{r}
+#Pakker som installeres gjennom Bioconductor pakkedatabase
+
+BiocManager::install("Biostrings")
+
+BiocManager::install("DECIPHER")
+
+BiocManager::install("phyloseq")
+
+BiocManager::install("SIAMCAT")
+
+ 
+
+#Pakker som installeres gjennom CRAN (R sin offisielle pakkedatabase)
+
+install.packages("remotes")
+
+install.packages("ape")
+
+install.packages("viridis")
+
+install.packages("gridExtra")
+
+install.packages("phangorn")
+
+install.packages("ggpubr")
+
+install.packages("stringr")
+
+install.packages("dplyr")
+
+install.packages("rlang")
+
+setwd("~/Metabarcoding_analysis")
+```
+
+
+
+## Introduction and initialization
+
+This is an R Markdown file containing code to parse the results of a dada2 analysis into phyloseq for further analysis. It is separated into chunks that may be run independently by pressing the _play_ button. You will need **3 files** in the same location in order to run this pipeline successfully:
+
+* A sequence table called **'seqtab_nochim.rds'** (automatically generated by the dada2 pipeline)
+* A taxonomy table called **'taxa.rds'** (automatically generated by the dada2 pipeline)
+* A table describing the sample properties called **'descriptors.txt'** (Has to be provided)
+* Optional (some plots require this): A phylogenic tree file called **'phylotree.rds'** (can be calculated if it is not provided, takes a long time)
+* Optional: Instead of **'taxa.rds'**, a **custom taxonomy file** may be provided (tab-delimited text)
+
+**Recommended use:** Set the individual chunks until you are content with the ouput, then knit the whole document into a PDF/html, so you have a full record of a successful run.
+
+#### Optional custom Taxonomy file
+A custom taxonomy file may be provided instead of using the taxonomy output from dada2. This may be used to supply taxonomy derived e.g. from BLAST searches of the ASVs. Custom taxonomy files must be tab-delimited text with as many rows as the original, colum headers (for all columns except for the first column). For example:
+
+>Kingdom	Phylum	Class	Order	Family	Genus	Species  
+>ESV1	Kingdomx	Phylumx	Classx	Orderx	Familyx	Genusx	Speciesx  
+>ESV2	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy  
+>ESV3	Kingdomz	Phylumz	Classz	Orderz	Familyz	Genusz	Speciesz  
+> ...
+>ESVn	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy
+
+_Friendly warning: Parsing the results of a BLAST search into this format may require some effort._
+
+#### Descriptor table
+
+'descriptors.txt' should be a tab-delimited .txt table describing the samples. It must have the same length and order as the samples in seqtab_nochim.rds. To check the order and length of samples in seqtab_nochim.rds and generate a template to fill out, you may run the chunk below with "optional_sample_check" set to "TRUE". 
+
+Any number of descriptors is possible. The sample names may be retained as one descriptor, but this is not necessary, as they will be added during parsing. For example, if there are 4 samples (order: s1, s2, s3, s4), the txt file could look as follows:  
+
+>Subject	Species	Time  
+>Kar1	A.thaliana	24hpi  
+>Kar1	A.thaliana	72hpi  
+>Mec2	S.tuberosum	24hpi  
+>Mec3	S.tuberosum	24hpi  
+
+Finally, the file should end with an empty line, since it may throw an error otherwise. However, this is usually not a serious problem.
+
+**If you choose to use the blank file, you MUST retain the original order of the samples!**
+
+## Setup
+
+This chunk also loads required packages and defines the location of the input files. It requires the **correct path** as input, and allows **setting the pruning of control samples** and **choosing generation of a phylogenetic tree**.  **Beware:** The generation of a phylogenetic tree may take several days for >1000 sequences, it is therefore recommended to only use this feature for the final analysis or small sample sets. This scricpt assumes the packages _Biostrings_, _dada2_, _DECIPHER_, _ggplot2_, _ggsci_, _phangorn_, _phyloseq_ and _stringr_ to be installed.
+
+```{r check-samples, message=FALSE, tidy=FALSE, warning=FALSE}
+# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
+path = "Sven+Trich/"
+
+# CHANGE ME to TRUE to list all samples and generate an empty metadata file 
+optional_sample_check = FALSE
+
+# CHANGE ME to TRUE to update cuphyr
+update_cuphyr = TRUE
+
+# Initiate by loading packages and setting knit options
+################# NO CHANGES NECESSARY BELOW #################
+knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(root.dir = paste0(path))
+knitr::opts_chunk$set(message = FALSE)
+knitr::opts_chunk$set(warning = FALSE)
+
+if (update_cuphyr) {
+  devtools::install_github("simeross/cuphyr")
+}
+
+# Sequence and microbiome specific libraries
+library(dada2)
+library(Biostrings)
+library(DECIPHER)
+library(cuphyr)
+# The export of phyloseq objects to a BIOM format and the generation of fancier 
+# ordination plots require the phyloseq-extended package. The first command 
+# installs the package that is currently on the dev brach of the author's 
+# repository, the second command sources some extra functions, including the 
+# better ordination plot implementation.
+remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
+source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
+
+library(phyloseq)
+library(SIAMCAT)
+
+# Phylogeny libraries
+library(phangorn)
+library(ape)
+
+# Plotting and figure export
+library(gridExtra)
+library(viridis)
+library(ggpubr)
+
+# Tidyverse
+library(tidyverse)
+library(stringr)
+
+
+# Checks whether output path exists and creates it if not. Throws warning if 
+# directory exists.
+outp <- paste0(path,"/analysis_output")
+dir.create(file.path(outp))
+
+if (optional_sample_check) {
+  seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds")) 
+  samps <- rownames(seqtabcheck)
+  lensamps <- length(samps)
+  blankcol <- vector(mode = "character", length = lensamps)
+  blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol, 
+                           ExampleProperty2 = blankcol, 
+                           ExampleProperty3 = blankcol)
+  write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"), 
+              sep = "\t", row.names = F)
+  cat("'seqtab_nochim.rds' contains samples in the following order:\n", 
+      samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
+  rm(optional_sample_check, seqtabcheck, samps, 
+     lensamps, blankcol, blanktable, update_cuphyr)
+  }else{rm(optional_sample_check, update_cuphyr)}
+```
+
+#### Parameters
+
+This chunk allows the adjustment of several parameters, such as **setting the pruning of control samples** based on keywords, **requiring that a phylogenetic tree be provided or generated**, **defining a minimum ASV count** and **providing an alternative taxonomy**.
+
+```{r setup, message=FALSE, tidy=TRUE, warning=FALSE}
+# Dedicated environment containing all global analysis settings for better 
+# overview and collected export of settings
+parameters <- new.env()
+
+# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to 
+# analyse all samples.
+parameters$prune_controls = "TRUE"
+  # CHANGE ME to a list of unique identifiers that only occur in the names of 
+  # samples you do NOT want to analyse. Common examples are provided.
+  parameters$controls = c("Pos", "H2O", "Neg", "Kontr", "Contr", "POSK", "V1", "V2")
+
+# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by 
+# name. This is useful to exclude non-target organisms or noise from organelles 
+# such as Chloroplasts and Mitochondria. It is recommended to first look at all 
+# data before using this setting.
+parameters$prune_noise_taxgroups = "FALSE"
+  # CHANGE ME to define the taxonomic groups to be removed as noise.
+  parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
+
+# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should 
+# minimally have. Samples with lower ASV counts than 'minread' will be pruned. 
+# Set to 0 to not prune any samples.
+parameters$minASVcount = 10
+
+# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of 
+# using the default dada2 output ('taxa.rds').
+parameters$customTax = "TRUE"
+  # CHANGE ME to the location of the custom taxonomy file. This only matters if 
+  # parameters$customTax="TRUE", otherwise it will be ignored.
+  parameters$taxfile = "Sven+Trich/custom_BLAST_taxonomy_nt.txt"
+
+# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a 
+# long time depending on the number of sequences (up to days for thousands). 
+# If a tree is provided as 'phylotree.rds' in 'path', then it will be used 
+# regardless of the value of 'parameters$maketree'
+parameters$maketree = "TRUE"
+
+# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the 
+# leaf with the longest branch (outgroup). This makes analyses that rely on the 
+# phylogenetic tree reproducible instead of picking a random leaf as root when 
+# calculating UNIFRAC distances. Implementation based on 
+# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers 
+# in https://github.com/joey711/phyloseq/issues/597
+parameters$roottree = "TRUE"
+
+## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
+parameters$biom_export = "FALSE"
+```
+
+#### Parsing input data
+
+This chunk loads the input data into a usable format.**This chunk does not require any user inputs**. If no phylogenetic tree with the name 'phylotree.rds' was provided and 'parameters$maketree="TRUE"', it will be calculated here. The phylogenetic tree is necessary for certain plots that incorporate 'true' taxonomic relationships beyond the annotations, such as PCoA.
+
+```{r parse input, tidy=TRUE}
+############### NO NEED FOR CHANGES BELOW ###############
+# Make dedicated environments to contain temporary values and manage other objects
+tmp <- new.env()
+plots <- new.env()
+set <- new.env()
+
+# Read in variables
+tmp$seqtabp <- readRDS(paste0(path,"/seqtab_nochim.rds"))
+if (parameters$customTax == "TRUE") {
+  tmp$taxap <- read.delim(parameters$taxfile, header = TRUE, sep = "\t")
+  rownames(tmp$taxap) <- colnames(tmp$seqtabp)
+  tmp$taxap <- as.matrix(tmp$taxap)
+}else{
+  tmp$taxap <- readRDS(paste0(path,"/taxa.rds"))}
+tmp$samp_table <- read.delim(paste0(path, "/descriptors.txt"), 
+                             header = TRUE, sep = "\t")
+tmp$samp_list <- rownames(tmp$seqtabp)
+
+# Check if descriptors has the same samples as seqtabp
+if (length(tmp$samp_table[,1]) != length(tmp$samp_list)) {
+  stop("There are ", length(tmp$samp_table[,1]), 
+    " samples in 'descriptors.txt', but ", length(tmp$samp_list), 
+    " samples in 'seqtab_nochim.rds'. Please make sure that the correct samples 
+    are contained in descriptors.txt.
+       
+    You may use 'optional_sample_check <- TRUE' in the first chunk to generate an 
+    empty template for 'descriptors.txt'" )
+} else if (!identical(tmp$samp_table[,1], tmp$samp_list)) {
+  warning("Warning: The samples in 'descriptors.txt' do not have the same names 
+          or order as the samples in 'seqtab_nochim.rds'. This may be fine if 
+          abbreviated names were used or the sample names are not contained in 
+          the first column of 'descriptors.txt'. Double-checking never hurts!")
+}
+
+
+# generate phylogenetic tree of ASVs only if there is no file called 
+# 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+if (!file.exists(paste0(path, "/phylotree.rds"))) {
+  if (parameters$maketree == "TRUE") {
+    tmp$ASVs <- getSequences(tmp$seqtabp)
+    names(tmp$ASVs) <- tmp$ASVs
+    tmp$ASV_align <- AlignSeqs(DNAStringSet(tmp$ASVs), anchor = NA)
+    tmp$ASV_phang <- phyDat(as(tmp$ASV_align, "matrix"), type = "DNA")
+    tmp$dm <- dist.ml(tmp$ASV_phang)
+    tmp$treeNJ <- NJ(tmp$dm)
+    tmp$fit <- pml(tmp$treeNJ, data = tmp$ASV_phang)
+    tmp$fitGTR <- update(tmp$fit, k = 4, inv = 0.2)
+    tmp$fitGTR <- optim.pml(tmp$fitGTR, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE, rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(tmp$fitGTR, file = paste0(path, "/phylotree.rds"))}}
+
+##parse into phyloseq object
+row.names(tmp$samp_table) <- tmp$samp_list
+if (file.exists(paste0(path, "/phylotree.rds"))) {
+  tmp$treep <- readRDS(paste0(path, "/phylotree.rds"))
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), 
+                tax_table(tmp$taxap), 
+                phy_tree(tmp$treep$tree))
+}else{
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), tax_table(tmp$taxap))}
+
+##Adding nucleotide info and giving sequences ASV## identifiers
+tmp$ASV_sequences <- Biostrings::DNAStringSet(taxa_names(p))
+taxa_names(p) <- paste0("ASV", seq(ntaxa(p)))
+names(tmp$ASV_sequences) <- taxa_names(p)
+p <- merge_phyloseq(p, tmp$ASV_sequences)
+
+##optional pruning
+if (parameters$prune_controls == "TRUE") {
+  if (!is.null(parameters$controls)) {
+    tmp$samp_clean <- tmp$samp_list[!tmp$samp_list %in% grep(paste0(
+      parameters$controls, collapse = "|"), tmp$samp_list, value = T)]
+    tmp$contr_pruned <- setdiff(tmp$samp_list, tmp$samp_clean)
+    ps <- prune_samples(tmp$samp_clean, p)
+    #Physeq object for Just controls
+    ps.contr <- prune_samples(tmp$contr_pruned, p)
+    ps.contr <- prune_taxa(taxa_sums(ps.contr) > 0, ps.contr)
+    ps.transcontr <- transform_sample_counts(
+      ps.contr, function(ASV) ASV/sum(ASV))
+    
+    message(cat(
+      "\n",  
+      "Number of control samples that were pruned and will not be analysed:\n", 
+      length(tmp$samp_list) - length(tmp$samp_clean), 
+      "\n", 
+      "The following controls were pruned:\n", 
+      tmp$contr_pruned, 
+      "The controls are contained in a separate phyloseq object: ps.contr", 
+      "\n", 
+      sep = "\n"))
+  }else{warning(cat(
+    "\n\nparameters$prune_controls is TRUE but 'parameters$controls' is empty. 
+    No samples were pruned.\n\n"))}
+}else{ps <- p}
+
+# Prune ASVs defined as noise
+if (parameters$prune_noise_taxgroups == "TRUE") {
+  tmp$ps_taxlvls <- colnames(tax_table(ps))
+  tmp$noise_ASVs <- character(0)
+  for (lvl in tmp$ps_taxlvls) {
+    tmp$noise_ASVs <- c(tmp$noise_ASVs, 
+                        cuphyr::list_subset_ASVs(
+                          physeq = ps, subv = parameters$noise_taxgroups, 
+                          taxlvlsub = lvl))
+  }
+  tmp$noise_ASVs <- unique(tmp$noise_ASVs)
+  tmp$no_noise_ASVs <- colnames(otu_table(ps))
+  tmp$no_noise_ASVs <- setdiff(tmp$no_noise_ASVs, tmp$noise_ASVs)
+  if (length(tmp$noise_ASVs) > 0) {
+    ps <- prune_taxa(tmp$no_noise_ASVs, ps)
+    tmp$no_noise_ps <- ps
+    cat(length(tmp$noise_ASVs), 
+        "ASVs were pruned because they belonged to the following 
+        taxonomic groups:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")}
+  else{
+    cat("No ASVs were recognized as belonging to the following taxonomic groups 
+        defined as noise:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")
+  }
+}
+
+# Prune samples with fewer than reads than minASVcount
+if (parameters$minASVcount > 0) {
+  tmp$samp_pruned <- names(which(sample_sums(ps) < parameters$minASVcount))
+  ps <- prune_samples(sample_sums(ps) >= parameters$minASVcount, ps)
+  if (length(tmp$samp_pruned) > 0) {
+    cat("The following samples were pruned because ASV counts were lower than", 
+        parameters$minASVcount,  ":\n")
+    cat(tmp$samp_pruned, "\n", sep = "\n")
+  }
+}
+
+# Remove 0 count ASVs (e.g. control ASVs that remain) from the base object
+ps <- prune_taxa(taxa_sums(ps) > 0, ps)
+
+# Get a tbl of the base object for easier access in some phyloseq-independent 
+# analyses. Takes some seconds, potentially up to minutes.
+ps_tbl <- as_tibble(psmelt(ps))
+
+# Transformed per sample (per-sample relative abundance)
+ps.trans <- transform_sample_counts(ps, function(ASV) ASV/sum(ASV))
+
+if (parameters$roottree == "TRUE" && parameters$maketree == "TRUE") {
+  phyloseq::phy_tree(ps) <- cuphyr::root_tree_in_outgroup(physeq = ps)}
+
+if (parameters$biom_export == "TRUE") {
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = paste0(path, "all_samples.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps, biom_file = file.path(path, "samples_without_controls.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.trans, biom_file = file.path(
+      path, "samples_without_controls_rel_abundance.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.contr, biom_file = file.path(path, "just_controls.biom"), 
+    biom_format = "standard"))
+}
+
+ps
+```
+
+## Output
+
+The chunks below will produce various plots and other output. Each chunk is headed by a description of the output and may contain some parameters to adjust the output. 
+
+#### Plot looks
+This chunk sets the background structure and color palette. Viridis was chosen because it is optimized for grey-scale printing and various types of color blindness and  More info on the Viridis palette can be found on [the Viridis info page](https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html). It also establishes save_plot as a shorter variant of ggsave with customized date-time structure to save plots with the same name mulitple times instead of overwriting them (overwriting can be triggered with overwrite=TRUE).
+
+```{r plot-design-global, tidy=FALSE, warning=FALSE}
+##### Optional settings (sensible defaults) #####
+# Can be changed to adjust the output format for all plots. Default "pdf", 
+# possible "eps"/"ps", "tex" (pictex), "jpeg", "tiff", "png", "bmp" and "svg"
+parameters$output_format = "pdf"
+
+# Can be changed to preferred ggplot2 theme. Recommended: "theme_bw()".
+theme_set(theme_bw())
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+my_scale_col <- scale_color_viridis(discrete = TRUE)
+my_scale_fill <- scale_fill_viridis(discrete = TRUE)
+
+# Custom, more narrow color ranges based on viridis
+# Base order to have adjacent colors be distinct from each other
+tmp$sort_colors <- c(rbind(c(1:5), c(6:10), c(11:15), c(16:20)))
+
+# Customized vectors
+tmp$n_col <- 20
+tmp$viridis_greens <- viridis(tmp$n_col,  option = "D", begin = 0.85, 
+                              end = 0.7)[tmp$sort_colors]
+tmp$viridis_reds <- viridis(tmp$n_col,  option = "B", begin = 0.7, 
+                            end = 0.5)[tmp$sort_colors]
+tmp$viridis_blues <- viridis(tmp$n_col,  option = "D", begin = 0.2, 
+                             end = 0.4)[tmp$sort_colors]
+tmp$viridis_yellows <- viridis(tmp$n_col,  option = "D", begin = 1, 
+                               end = 0.9)[tmp$sort_colors]
+tmp$viridis_dark <- viridis(tmp$n_col,  option = "A", begin = 0, 
+                            end = 0.1)[tmp$sort_colors]
+tmp$viridis_light <- viridis(tmp$n_col,  option = "A", begin = 1, 
+                             end = 0.9)[tmp$sort_colors]
+# Collected list that is available in the global environment
+sub_viridis <- list(tmp$viridis_greens, tmp$viridis_blues, tmp$viridis_yellows, 
+                    tmp$viridis_light, tmp$viridis_reds, tmp$viridis_dark)
+names(sub_viridis) <- c("greens", "blues", "yellows", "lights", "reds", "darks")
+
+tmp$out <- paste0(".", parameters$output_format)
+
+#################### Function ############################
+
+# Generic save function for plots that checks whether file exists and if so, 
+# creates a new one with d/m/y+time info to avoid overwriting. Overwriting can 
+# be triggered with overwrite = TRUE. Width, height and resolution are taken 
+# from parameters in the 'set' environment or set to 20x20 cm with 300dpi.
+save_plot <- function(
+  pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE){
+  wp <- if (!is.null(set$wp)) set$wp else 20
+  hp <- if (!is.null(set$hp)) set$hp else 20
+  res <- if (!is.null(set$res)) set$res else 300
+  name <- paste0("/", plot_name,filetype)
+  if (file.exists(paste0(outp, name)) & !overwrite) {
+  name <- paste0("/", plot_name, "_", 
+                 format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)}
+  ggsave(file.path(outp, name), pl, 
+         width = wp, height = hp, unit = "cm", dpi = res)
+}
+
+################################################
+```
+
+#### Total ASV counts ranked
+This chunk plots the absolute abundance of all samples (including controls) and all samples without controls and other trimmed samples.
+```{r overview}
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Rank samples
+set$ranked <- cuphyr::make_ranked_sums(p, myset = tmp$subset_id)
+set$ranked_ps <- cuphyr::make_ranked_sums(ps, myset = tmp$subset_id)
+set$ymax <- max(set$ranked$Abundance)
+set$ymax <- set$ymax + round(set$ymax/10)
+set$xmax <- nrow(set$ranked) + 1
+set$title2 <- "Samples (without controls)"
+
+# Stabilize colors
+set$color_vars <- set$ranked[,set$color_by]  %>% 
+  unlist() %>% as.character() %>% unique()
+set$color_vars <- sort(set$color_vars)
+set$color_varsPalette <- viridis(length(set$color_vars))
+names(set$color_varsPalette) <- set$color_vars
+set$my_scale_fill <- scale_fill_manual(values = set$color_varsPalette)
+
+# plot
+# This makes the first plot that should be made in all cases
+plots$overview_all <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle("All samples") + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# If there are noise ASVs defined and found, make a plot indicating counts after
+# removal of those. This plot should be second of three.
+if (length(tmp$noise_ASVs) > 0) {
+  set$ranked_nonoise <- cuphyr::make_ranked_sums(
+    tmp$no_noise_ps, myset = tmp$subset_id)
+  plots$overview_noise <- ggplot(
+    data = set$ranked_nonoise, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + 
+    ggtitle("Samples (without controls), noise ASVs removed") + 
+    ylim(0, set$ymax) + 
+    xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+}
+
+# If there is a cutoff for ASV counts, overwrite the first plot to include the
+# cutoff
+if (parameters$minASVcount > 0) {
+plots$overview_all <- plots$overview_all + 
+  geom_hline(yintercept = parameters$minASVcount, linetype = "dashed") + 
+    ggtitle("All samples (ASV count cutoff indicated)")
+set$title2 <- "Samples (without controls and low count samps)"
+}
+
+# Make a plot that should always be on the bottom, showing the final object ps
+# without controls, pruned samples and noise ASVs
+plots$overview_ps <- ggplot(data = set$ranked_ps, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle(set$title2) + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# Combine first and last plot
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_ps, nrow = 2, align = "v", 
+  common.legend = TRUE, legend = "right")
+
+# If noise ASVs were defined and found overwrite the combined plot to include
+# Before/raw plot, noise removed plot and after/ps plot.
+if (length(tmp$noise_ASVs) > 0) {
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_noise, plots$overview_ps,
+  nrow = 3, align = "v", 
+  common.legend = TRUE, legend = "right")
+}
+
+#Save final plot
+save_plot(plots$combo_overview, plot_name = "Overview_all_and_pruned", 
+          filetype = tmp$out)
+
+#Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+#Print final plots
+plots$combo_overview
+```
+
+#### Controls
+This chunk generates an overview over the controls (positive AND negative)
+
+```{r Positive controls}
+# CHANGE ME to the desired sample categories on the x-axis. In this case it a
+
+# should be the Sample names.
+set$x_axis_value = "SampleIDs"
+
+# CHANGE ME to the taxonomic level for color coding. Use "OTU" for ASVs, 
+# "Genus", "Species" or "OTU" recommended to compare pos. controls.
+set$color_by_taxlvl = "Genus"
+
+# CHANGE ME to the taxonomic level for labeling the tree tips (if phylogenetic 
+# tree is available). Use "OTU" for ASVs.
+set$label_by_taxlvl = "OTU"
+
+# CHANGE ME to a sample category to shape the tree tip labels by (if 
+# phylogenetic tree is available).
+set$label_shape_by = "SampleIDs"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to generate a tree for just the control sequences IF no 
+# phylogenetic tree for all seuquences is provided. This may slow down this 
+# chunk when running it for the first time
+set$control_tree = TRUE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+if (set$control_tree & class(try(phy_tree(ps.transcontr), 
+                                 silent = TRUE)) == "try-error") {
+  # generate phylogenetic tree of ASVs only if there is no file called 
+  # 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+  if (!file.exists(paste0(path, "/controls_phylotree.rds"))) {
+    set$ASVs <- phyloseq::refseq(ps.transcontr)
+    set$ASV_align <- AlignSeqs(set$ASVs, anchor = NA)
+    set$ASV_phang <- phyDat(as(set$ASV_align, "matrix"), type = "DNA")
+    set$dm <- dist.ml(set$ASV_phang)
+    set$treeNJ <- NJ(set$dm)
+    set$fit <- pml(set$treeNJ, data = set$ASV_phang)
+    set$fitGTR <- update(set$fit, k = 4, inv = 0.2)
+    set$fitGTR <- optim.pml(set$fitGTR, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(set$fitGTR, file = paste0(path, "/controls_phylotree.rds"))}
+  set$fitGTR <- readRDS(paste0(path, "/controls_phylotree.rds"))
+  phyloseq::phy_tree(ps.transcontr) <- set$fitGTR$tree
+}
+
+plots$topnpplot <- plot_bar(ps.contr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("ASV counts") + guides(col = guide_legend(ncol = 3))
+
+plots$topntplot <- plot_bar(ps.transcontr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("Relative abundance") + guides(col = guide_legend(ncol = 3))
+
+plots$combo_contr <- ggarrange(plots$topnpplot, plots$topntplot, ncol = 2, 
+                               labels = c("A", "B"), align = "hv", 
+                               common.legend = TRUE, legend = "right")
+
+if (class(try(phy_tree(ps.transcontr), silent = TRUE)) != "try-error") {
+plots$tre <- plot_tree(
+          ps.transcontr, ladderize = "left", label.tips = set$label_by_taxlvl, 
+          color = "abundance", text.size = 2.5, shape = set$label_shape_by) + 
+          scale_color_viridis_c(aesthetics = c("color","fill")) + 
+          theme(legend.position = "left", panel.border = element_blank())
+plots$combo_contr <- ggarrange(plots$tre, ggarrange(plots$topnpplot, 
+                                                    plots$topntplot, ncol = 2, 
+                               labels = c("B", "C"), align = "hv", 
+                               common.legend = TRUE, legend = "right"), 
+                               nrow = 2, legend = "right", labels = c("A")) 
+}
+
+# save
+save_plot(plots$combo_contr, plot_name = "Controls", filetype = tmp$out)
+
+plots$combo_contr
+```
+
+#### Richness plot
+
+This chunk plots the Alpha-Diversity according to the Shannon and Simpson indices. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Richness, tidy=TRUE, message=FALSE}
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "Skifte"
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Plot all diversity measures
+plots$richP <- try(
+  plot_richness(ps, x = set$x_axis_value, color = set$color_by) + 
+                     my_scale_col, silent = TRUE)
+# Just Shannon and Simpson
+plots$richShSi <- plot_richness(ps, x = set$x_axis_value, 
+                                measures = c("Shannon", "Simpson"), 
+                                color = set$color_by) + my_scale_col
+
+# Save
+if (!class(plots$richP) == "try-error") {
+  save_plot(plots$richP, plot_name = "Alpha_diversity_all", filetype = tmp$out)
+}
+save_plot(plots$richShSi, plot_name = "Alpha_diversity_all_ShSi", 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# Print to standard out
+if (!class(plots$richP) == "try-error") {
+  plots$richP
+}
+plots$richShSi
+```
+
+#### Bray-Curtis NMDS plot
+
+This chunk generates a non-metric multidimensional scaling (NMDS) plot of the Bray-Curtis dissimililarity, giving a two-dimensional measure of community diversity. This is done for the primary parameter and the taxonomic level separately. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary, as well as the **max. number of taxa to be displayed at taxlvl**. _Friendly warning: This chunk may not perform for lower order taxlvl, such as 'species', if they are not sufficiently abundant in all samples_
+
+```{r Bray-Curtis NMDS, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Vekst"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Skifte"
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+# Can be changed to change the number of Top n taxa plotted at taxlvl in 
+# separate panels, a maximum of 9 is recommended for good readability.
+set$top_n = 9
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Transform data for Bray-Curtis distance 
+tmp$ord_nmds <- ordinate(ps.trans, method = "NMDS", distance = "bray")
+tmp$ps.topn <- cuphyr::abundant_tax_physeq(physeq = ps.trans, lvl = set$taxlvl, 
+                                           top = set$top_n)
+#tmp$top_ord_nmds <- ordinate(tmp$ps.topn, method = "NMDS", distance = "bray")
+# Plot
+plots$nmds <- plot_samples(ps.trans, tmp$ord_nmds, color = set$color_by,
+                              shape = set$shape_by, 
+                           title = paste0("Bray NMDS")) + 
+  my_scale_col + guides(color = FALSE, shape = FALSE)
+
+#plots$nmds_tax <- plot_ordination(tmp$ps.topn, tmp$top_ord_nmds, type = "taxa",
+#                                  color = set$taxlvl, 
+#                                  title = paste0("Bray NMDS ", set$taxlvl)) + 
+#  my_scale_col
+
+#plots$nmds_taxpanels <- plots$nmds_tax + 
+##  facet_wrap(paste0("~", set$taxlvl), scales = "free_x") + 
+#  my_scale_col
+
+# Save
+save_plot(plots$nmds, plot_name = paste0("NMDS_", set$shape_by, "_", 
+                                         set$color_by), filetype = tmp$out)
+#save_plot(plots$nmds_tax, plot_name = paste0("NMDS_", set$taxlvl), 
+#          filetype = tmp$out)
+#save_plot(plots$nmds_taxpanels, 
+#          plot_name = paste0("NMDS_top", set$top_n, "_", set$taxlvl), 
+#          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$nmds
+#plots$nmds_tax
+#plots$nmds_taxpanels
+```
+
+#### PcoA (requires phylogenetic tree)
+
+This chunk generates an alternative common ordination plot, called 'PcoA', based on the primary variable, giving a two-dimensional measure of community diversity by considering the phylogenetic tree. **The chunk does not require any input**, although it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary. If the provided tree is not rooted, Phyloseq will root it to a random ASV. Root the tree to a given ASV to get consistent plots here (implementation will follow, until then, see: [this github issue](https://github.com/joey711/phyloseq/issues/235#issuecomment-26289761).
+
+```{r PcoA, eval=FALSE, message=FALSE, warning=FALSE, tidy=TRUE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Vekst"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Skifte"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Stopping this code from running, if there is no phylogenetic tree
+if (class(try(phy_tree(ps), silent = TRUE)) == "try-error") {
+  # Message could be more informative
+  cat("This plot could not be generated because no Phylogenetic tree was 
+      provided.\n\n")
+}else{
+# Transform and ordinate
+  tmp$ord_pcoa <- ordinate(ps.trans, "PCoA", "unifrac", weighted = TRUE)
+# Plot
+  plots$pcoa <- plot_ordination(ps.trans, tmp$ord_pcoa, color = set$color_by, 
+                                shape=set$shape_by) + my_scale_col
+# Save
+  save_plot(plots$pcoa, plot_name = paste0("PCOA_", set$color_by, "_", 
+                                           shape = set$shape_by), 
+            filetype = tmp$out)
+
+# Clean up plot parameters
+  rm(list = ls(set), envir = set)
+
+# Print to standard out
+  plots$pcoa
+}
+
+
+```
+
+#### Get a list of Top N taxa at a given level
+This chunk lists the top n most abundant taxonomic terms at a given level. Change the function parameters to the desired values. For more info, check help page of the function with `?cuphyr::abundant_tax_physeq()`. Change 'ignore_na' to include/exclude NA values at the given level.
+```{r Toplist}
+#The character vector can later be accessed by calling 'tmp$tops'
+tmp$tops <- cuphyr::abundant_tax_physeq(physeq = ps, 
+                            lvl = "Genus",
+                            top = 20,
+                            output_format = "tops",
+                            ignore_na = TRUE,
+                            silent = FALSE)
+```
+
+#### Top N ASVs/taxa Bar plot
+
+This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 20, a larger n may lead to delay/skipping of the plot in standard out, but it should be saved as a PDF regardless for ASVs. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Bar-plot, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Skifte"
+
+# CHANGE ME to the desired sample categories on the x-axis. 
+# Accepted values are the column headers in the descriptor file.
+set$x_axis_value = "Vekst"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+# CHANGE ME to change the number of Top n taxa to be plotted at 
+# taxlvl.
+set$top_n = 10
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+#set$highlight = "Pratylenchus"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors 
+# is FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 40
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relativ abundans"
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.trans, lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_tax <- plot_bar(set$ps.topnTax, 
+                           x = set$x_axis_value, 
+                           fill = set$taxlvl, 
+                           title = paste0("Top", set$top_n, "_", set$taxlvl)) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax <- plots$topn_tax + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs <- plot_bar(set$ps.topnASVs, 
+                            x = set$x_axis_value, 
+                            fill = set$taxlvl, 
+                            title = paste0("Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax, plot_name = paste0("Top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs, plot_name = paste0("Top", set$topASVs, "_ASVs"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax
+plots$topn_ASVs
+```
+
+#### Siamcat
+This chunk implements statistical testing of ASVs that are differentially abundant for a given biological train (column in descriptors.txt). It can also test whether grouping variables other than the tested one is associated with the abundance data in a similar or different way than the chosen train (confounders). The chunk is largely based on the [SIAMCAT "Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html).
+There are several options that can be chosen.
+
+```{r siamcat, warning=FALSE}
+# CHANGE ME to the sample category that will be used as the test group. ASVs 
+# that are differentially abundant according to this grouping will be detected. 
+# Accepted values are the column headers in your descriptor file.
+set$test_label = "Vekst"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$case_value = "Forsinket"
+
+# CHANGE ME to the cutoff p-value for selecting significant ASVs (FDR-adjusted 
+# p-value)
+set$p_val_cutoff = 0.05
+
+# CHANGE ME to the taxonomic level of interest for more informative ASV 
+# annotation (format: taxlv-ASV)
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to filter low-abundance ASVs. Sequence variants with lower 
+# abundance will not be analysed to reduce artifacts
+set$filter_abundance = 0.001
+
+# Can be changed to include (TRUE) or exclude (FALSE) an output file where all 
+# possible confounders are checked. This will analyse the confounding effect of 
+# other factors in 'descriptors' over the chosen test group and produce a pdf 
+# file containing several plots.
+set$check_confounders = FALSE
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make a copy of the transformed physeq object and parse taxonomic information 
+# for the chosen taxlvl into ASV names to give more informative plots.
+ps.siam <- ps.trans
+taxa_names(ps.siam) <- tax_table(ps.trans) %>% 
+  as.data.frame() %>% 
+  rownames_to_column(var = "OTU") %>% 
+  unite(col = OTU, set$taxlvl, OTU) %>% 
+  select(OTU) %>% 
+  unlist() %>% 
+  unname()
+
+# Read in transformed physeq object as SIAMCAT object and choose trait
+sc.trans <- siamcat(phyloseq = ps.siam, 
+                    label = set$test_label, 
+                    case = set$case_value)
+
+# print the generated Siamcat object to check for valid parsing
+show(sc.trans)
+
+# Filter ASVs with less than set$filter_abundace
+sc.filt <- filter.features(sc.trans,
+    filter.method = 'abundance',
+    cutoff = set$filter_abundance)
+
+# check confounders if the option is TRUE
+if (set$check_confounders) {  
+  sc.conf <- check.confounders(
+      sc.filt,
+      fn.plot = file.path(outp, 'confounder_plots.pdf'),
+      meta.in = NULL,
+      feature.type = 'filtered',
+      verbose = 1)
+  cat("Confounders checked, results stored in", 
+      file.path(outp, 'confounder_plots.pdf'))
+}
+
+
+# Plot asscoiations and save the analysis to the siamcat object
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    fn.plot = file.path(outp, 
+                  paste0("Differential_abundance_", set$test_label,
+                         "_", format(Sys.time(), "%d-%m-%y_%H%M%S"), ".pdf")))
+
+# Plot asscoiations again to standard out
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    verbose = 0)
+
+# record plot from standard out
+plots$siam_assoc <- recordPlot()
+
+# Turn significant hits into tbl, if there are any, generate a vector containing 
+# significant tax groups at taxlvl and a vector containing significant ASVs
+tbl_me_this <- associations(sc.filt) %>%
+  filter(p.adj < set$p_val_cutoff) %>%
+  rownames_to_column("tax_ASV") %>%
+  separate(col = "tax_ASV", into = c("tax", "ASV"), sep = "_") %>%
+  select(tax, ASV, p.adj)
+
+if (nrow(tbl_me_this) > 0) {
+ significant_tax_groups <- select(tbl_me_this, tax) %>%
+   unique() %>% unlist() %>% unname()
+ significant_ASVs <- select(tbl_me_this, ASV) %>%
+   unique() %>% unlist() %>% unname()
+ 
+ cat(sep = "\n", "The following taxonomic groups were found to be differentially 
+     abundant and stored in 'significant_tax_groups':",
+     significant_tax_groups, 
+     "This object can be used to set a subgroup in the chunk below.")
+}
+  
+```
+
+
+## Subset the Phyloseq object by taxonomic group(s)
+
+This chunk gives the option to create a subset of the general Phyloseq object by providing a vector of search terms and a taxonomic level to search at. It requires **one or more search terms**, a **taxonomic level** to search at and a **description of the subset**. The description will only be used for the titles of plots generated from the subsets.
+
+```{r Subset by Taxonomic identity, tidy=TRUE}
+# Vector to subset on
+set$subv = c("Meloidogyne", "Paratrichodorus", "Pratylenchus", "Ditylenchus", "Tylenchus", "Rotylenchus", "Globodera")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+# CHANGE ME to a descriptor for the subset
+tmp$subset_id = "Interessante nematoder"
+# CHANGE ME if you want to use the significant groups found by SIAMCAT. If TRUE, 
+# those groups will be used in addition to the groups specified in set$subv.
+set$use_siamcat_results = TRUE
+
+# CHANGE ME to the sample group for color coding in the summary plot. 
+# Accepted values are the column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subv <- if (set$use_siamcat_results && exists("significant_tax_groups")) { 
+  unique(c(set$subv, significant_tax_groups))
+  }else{
+  set$subv}
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+ps.subs <- prune_taxa(set$subASVs, ps)
+ps.subs.trans <- prune_taxa(set$subASVs, ps.trans)
+
+# plot the reads in the subset
+set$ranked <- cuphyr::make_ranked_sums(ps.subs, myset = tmp$subset_id)
+set$avg <- mean(set$ranked$Abundance)
+set$avg_round <- format(round(set$avg, 0), nsmall = 0)
+plots$subset <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + my_scale_fill + 
+  geom_hline(yintercept = set$avg, linetype = "dashed") + 
+  ylab("ASV counts ('reads')") +
+  ggtitle(paste0("Subset: ", tmp$subset_id, 
+                 " (average ASV count ",set$avg_round, ")"))
+
+# Save plot
+save_plot(plots$subset, plot_name = "Subset_overview", filetype = tmp$out)
+# Print plot
+plots$subset
+# print info on generated object
+cuphyr::summarise_physeq(ps, ASV_sublist = set$subASVs, 
+                         sublist_id = tmp$subset_id, samp_names = FALSE)
+
+# Optional export as biom-file
+if (parameters$biom_export == "TRUE") {
+  tmp$subset_id <- tmp$subset_id %>% str_replace_all(" ", "_")
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = file.path(path, paste0("subset_",tmp$subset_id, ".biom")), 
+    biom_format = "standard"))
+}
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+```
+
+#### Bar plots for subsets of taxonomic group(s)
+The chunk is very similar to the vanilla bar plot chunk above but takes the subset data instead of the complete Phyloseq object. This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 100 for subsets. The range of n is for subsets is larger, since the taxonomic variety was reduced by the subsetting already, meaning the Top 100 ASVs likely belong to few species. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+```{r Bar plot subset, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Skifte"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "Vekst"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+# CHANGE ME to change the number of Top n taxa to be plotted at taxlvl.
+set$top_n = 20
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+set$highlight <- "Paratrichodorus"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors is 
+# FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of 
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relativ abundans"
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.subs.trans, 
+                                              lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.subs.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+
+plots$topn_tax_subset <- plot_bar(set$ps.topnTax, x = set$x_axis_value, 
+                                  fill = set$taxlvl, title = paste0(
+                                    "Top", set$top_n, "_", set$taxlvl)) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax_subset <- plots$topn_tax_subset + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs_subset <- plot_bar(set$ps.topnASVs, x = set$x_axis_value, 
+                                   fill = set$taxlvl, title = paste0(
+                                     "Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax_subset, 
+          plot_name = paste0("Top", set$top_n, "_", set$taxlvl, "_subset"), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs_subset, 
+          plot_name = paste0("Top", set$topASVs, "_ASVs_subset"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax_subset
+plots$topn_ASVs_subset
+```
+### Abundance tables
+```{r Abundance tables per genus, message=FALSE}
+ps_trans_tbl <- as_tibble(psmelt(ps.trans))
+
+# Genera of interest
+genera_of_interest <- c("Paratrichodorus")
+
+# Taking ps_tbl, grouping by Genus and summarizing (open heart surgery)
+# Will give a table with two columns: "Genus" "Genus_total_Abundance"
+genus_abundance_tbl <- ps_tbl %>%
+  group_by(Genus) %>%
+  summarise(Genus_total_Abundance = sum(Abundance))
+
+# If you want per genus and sample
+genus_abundance_tbl_per_sample <- ps_trans_tbl %>%
+  group_by(Genus, Sample) %>%
+  summarise(Genus_Sample_Abundance = sum(Abundance)) %>%
+  filter(Genus %in% genera_of_interest) %>%
+  group_by(Sample) %>%
+  mutate(sums_abu_sample = sum(Genus_Sample_Abundance)) %>%
+  ungroup() %>%
+  mutate(ratio = Genus_Sample_Abundance/sums_abu_sample)
+
+# To get back all the other info, one way is to fuse back to original table
+ps_tbl_with_genus_abundance <- left_join(ps_tbl, genus_abundance_tbl_per_sample,
+                                         by = c("Genus", "Sample"))
+
+# Can be made more compact by removing ASV+Species info and original abundance
+# values and then reducing to unique rows
+genus_abundance_tbl_full_info <- ps_tbl_with_genus_abundance %>%
+  select(-OTU, -Abundance, -Species) %>%
+  unique()
+view(genus_abundance_tbl_per_sample)
+
+library(readxl)
+Morfologisk <- read_excel("Sven+Trich/Morfologisk.xlsx")
+
+Sven_data <- tibble(Morfologisk)
+view(Sven_data)
+data_Sven <- genus_abundance_tbl_per_sample %>%
+
+  right_join(Sven_data, by = c("Sample"="SampleIDs"))
+
+view(data_Sven)
+
+# Scatter plot
+plots$nem_plot <- ggplot(subset(data_Sven),  aes(x = as.numeric(Veksy), y = as.factor(sums_abu_sample))) +
+  geom_point() +
+  geom_smooth(formula = y ~ x, method = "lm") +
+  ggtitle("Paratrichodorus") +
+  labs(y= "Vekst", x = "Relativ abundans")
+
+plots$nem_plot
+
+save_plot(plots$nem_plot,
+          plot_name = "Nem_plot",
+          filetype = tmp$out)
+```
+
+### Other phylogenetic trees
+For these chunks, the ggtree library is required. If you are not sure whether it is installed, run the following chunk.
+
+```{r check-ggtree}
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")}
+if (!requireNamespace("ggtree", quietly = TRUE)) {
+  BiocManager::install("ggtree")}
+```
+
+#### Generic phylogenetic (from a subset)
+
+This chunk allows the generation of a generic phylogenetic tree for a given subset of the phyloseq object, even if none is provided for the whole set.
+```{r tree-auto}
+# Vector to subset on (the larger the subset, the longer the tree generation 
+# will take!)
+set$subv = c("Pratylenchus", "Tylenchus", "Rotylenchus", "Tylenchorhynchus", "Trichodorus", "Paratrichodorus", "Ditylenchus", "Helicotylenchus", "Xiphinema", "Globodera", "Meloidogyne")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+set$ps.treesubs <- prune_taxa(set$subASVs, ps)
+set$seqs <- phyloseq::refseq(set$ps.treesubs)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$subset_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + 
+  xlim(0,set$tree_width)
+
+# save
+save_plot(plots$subset_ASV_tree, 
+          plot_name = "subset_phylogenetic_tree", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$subset_ASV_tree
+```
+
+#### Generic phylogenetic tree (from any FASTA)
+
+This chunk allows the generation of a generic phylogenetic tree for any given fasta. This may be useful to compare the phylogeny of a given set of ASVs and some reference sequences.
+
+```{r tree-manual}
+# CHANGE ME to the path of the FASTA file you want to make a phylogenetic tree 
+# for (the larger the fasta, the longer the tree generation will take!)
+set$fasta = "Sven+Trich/ASV_custom_BLAST_taxonomy.fasta"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$seqs <- readDNAStringSet(set$fasta)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$fasta_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + xlim(0,set$tree_width)
+
+# save
+save_plot(plots$fasta_ASV_tree, 
+          plot_name = "phylogenetic_tree_from_fasta", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$fasta_ASV_tree
+```
+
+#### Machine learning with SIAMCAT
+This is an experimental chunk implementing the machine learning functions of SIAMCAT following the tutorial steps and settings from the ["Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html). There is no convenient way to change the settings yet, because the usefulness and different optimal ways to run these models needs to be tested further. The chunk can be run as is and will produce a result if the basic SIAMCAT chunk above was run. However, this should be handled skeptically and not given extraordinary weight, if the user is not confident that they understand the used method.
+
+```{r experimental-machine-learning, warning=FALSE}
+# Count normalization by log-transforming and adding pseudocounts
+sc.norm <- normalize.features(
+    sc.filt,
+    norm.method = "log.unit",
+    norm.param = list(
+        log.n0 = 1e-06,
+        n.p = 2,
+        norm.margin = 1
+    )
+)
+# splitting data into training and test sets to validate the model
+sc.obj <-  create.data.split(
+    sc.norm,
+    num.folds = 5,
+    num.resample = 2
+)
+
+# Train a model on the training set
+sc.obj <- train.model(
+     sc.obj,
+     method = "lasso"
+)
+
+# Store model into separate object and check first entry
+models <- models(sc.obj)
+models[[1]]
+
+# Run model on the data and check output prediction matrix
+sc.obj <- make.predictions(sc.obj)
+pred_matrix <- pred_matrix(sc.obj)
+head(pred_matrix)
+
+# Save model results to plot
+model.interpretation.plot(
+     sc.obj,
+     fn.plot = file.path(outp, 'model_interpretation.pdf'),
+     consens.thres = 0.5,
+     limits = c(-3, 3),
+     heatmap.type = 'zscore',
+ )
+
+cat("Model results stored to:", file.path(outp, 'model_interpretation.pdf'))
+```
+
+##### Credit
+This script is based on ideas and code from the [dada2 Tutorial](https://benjjneb.github.io/dada2/tutorial.html) by Benjamin Callahan, the publication "Bioconductor Workflow for Microbiome Data Analysis: from raw reads to community analyses" by [Callahan et al. (2016)](https://f1000research.com/articles/5-1492) and various pages of the official [phyloseq website](https://joey711.github.io/phyloseq) by Paul J. McMurdie.
\ No newline at end of file
diff --git a/Analysis_pipeline_v4-3 - oomy.Rmd b/Analysis_pipeline_v4-3 - oomy.Rmd
new file mode 100644
index 0000000..2565e96
--- /dev/null
+++ b/Analysis_pipeline_v4-3 - oomy.Rmd	
@@ -0,0 +1,1507 @@
+---
+title: "Analysis template v4-3"
+author: "Simeon Rossmann"
+date: "10.12.2020"
+output:
+  html_document: default
+urlcolor: blue
+---
+
+## Introduction and initialization
+
+This is an R Markdown file containing code to parse the results of a dada2 analysis into phyloseq for further analysis. It is separated into chunks that may be run independently by pressing the _play_ button. You will need **3 files** in the same location in order to run this pipeline successfully:
+
+* A sequence table called **'seqtab_nochim.rds'** (automatically generated by the dada2 pipeline)
+* A taxonomy table called **'taxa.rds'** (automatically generated by the dada2 pipeline)
+* A table describing the sample properties called **'descriptors.txt'** (Has to be provided)
+* Optional (some plots require this): A phylogenic tree file called **'phylotree.rds'** (can be calculated if it is not provided, takes a long time)
+* Optional: Instead of **'taxa.rds'**, a **custom taxonomy file** may be provided (tab-delimited text)
+
+**Recommended use:** Set the individual chunks until you are content with the ouput, then knit the whole document into a PDF/html, so you have a full record of a successful run.
+
+#### Optional custom Taxonomy file
+A custom taxonomy file may be provided instead of using the taxonomy output from dada2. This may be used to supply taxonomy derived e.g. from BLAST searches of the ASVs. Custom taxonomy files must be tab-delimited text with as many rows as the original, colum headers (for all columns except for the first column). For example:
+
+>Kingdom	Phylum	Class	Order	Family	Genus	Species  
+>ESV1	Kingdomx	Phylumx	Classx	Orderx	Familyx	Genusx	Speciesx  
+>ESV2	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy  
+>ESV3	Kingdomz	Phylumz	Classz	Orderz	Familyz	Genusz	Speciesz  
+> ...
+>ESVn	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy
+
+_Friendly warning: Parsing the results of a BLAST search into this format may require some effort._
+
+#### Descriptor table
+
+'descriptors.txt' should be a tab-delimited .txt table describing the samples. It must have the same length and order as the samples in seqtab_nochim.rds. To check the order and length of samples in seqtab_nochim.rds and generate a template to fill out, you may run the chunk below with "optional_sample_check" set to "TRUE". 
+
+Any number of descriptors is possible. The sample names may be retained as one descriptor, but this is not necessary, as they will be added during parsing. For example, if there are 4 samples (order: s1, s2, s3, s4), the txt file could look as follows:  
+
+>Subject	Species	Time  
+>Kar1	A.thaliana	24hpi  
+>Kar1	A.thaliana	72hpi  
+>Mec2	S.tuberosum	24hpi  
+>Mec3	S.tuberosum	24hpi  
+
+Finally, the file should end with an empty line, since it may throw an error otherwise. However, this is usually not a serious problem.
+
+**If you choose to use the blank file, you MUST retain the original order of the samples!**
+
+## Setup
+
+This chunk also loads required packages and defines the location of the input files. It requires the **correct path** as input, and allows **setting the pruning of control samples** and **choosing generation of a phylogenetic tree**.  **Beware:** The generation of a phylogenetic tree may take several days for >1000 sequences, it is therefore recommended to only use this feature for the final analysis or small sample sets. This scricpt assumes the packages _Biostrings_, _dada2_, _DECIPHER_, _ggplot2_, _ggsci_, _phangorn_, _phyloseq_ and _stringr_ to be installed.
+
+```{r check-samples, message=FALSE, tidy=FALSE, warning=FALSE}
+# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
+path = "oomy/"
+
+# CHANGE ME to TRUE to list all samples and generate an empty metadata file 
+optional_sample_check = FALSE
+
+# CHANGE ME to TRUE to update cuphyr
+update_cuphyr = TRUE
+
+# Initiate by loading packages and setting knit options
+################# NO CHANGES NECESSARY BELOW #################
+knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(root.dir = paste0(path))
+knitr::opts_chunk$set(message = FALSE)
+knitr::opts_chunk$set(warning = FALSE)
+
+if (update_cuphyr) {
+  devtools::install_github("simeross/cuphyr")
+}
+
+# Sequence and microbiome specific libraries
+library(dada2)
+library(Biostrings)
+library(DECIPHER)
+library(cuphyr)
+# The export of phyloseq objects to a BIOM format and the generation of fancier 
+# ordination plots require the phyloseq-extended package. The first command 
+# installs the package that is currently on the dev brach of the author's 
+# repository, the second command sources some extra functions, including the 
+# better ordination plot implementation.
+remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
+source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
+
+library(phyloseq)
+library(SIAMCAT)
+
+# Phylogeny libraries
+library(phangorn)
+library(ape)
+
+# Plotting and figure export
+library(gridExtra)
+library(viridis)
+library(ggpubr)
+
+# Tidyverse
+library(tidyverse)
+library(stringr)
+
+
+# Checks whether output path exists and creates it if not. Throws warning if 
+# directory exists.
+outp <- paste0(path,"/analysis_output")
+dir.create(file.path(outp))
+
+if (optional_sample_check) {
+  seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds")) 
+  samps <- rownames(seqtabcheck)
+  lensamps <- length(samps)
+  blankcol <- vector(mode = "character", length = lensamps)
+  blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol, 
+                           ExampleProperty2 = blankcol, 
+                           ExampleProperty3 = blankcol)
+  write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"), 
+              sep = "\t", row.names = F)
+  cat("'seqtab_nochim.rds' contains samples in the following order:\n", 
+      samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
+  rm(optional_sample_check, seqtabcheck, samps, 
+     lensamps, blankcol, blanktable, update_cuphyr)
+  }else{rm(optional_sample_check, update_cuphyr)}
+```
+
+#### Parameters
+
+This chunk allows the adjustment of several parameters, such as **setting the pruning of control samples** based on keywords, **requiring that a phylogenetic tree be provided or generated**, **defining a minimum ASV count** and **providing an alternative taxonomy**.
+
+```{r setup, message=FALSE, tidy=TRUE, warning=FALSE}
+# Dedicated environment containing all global analysis settings for better 
+# overview and collected export of settings
+parameters <- new.env()
+
+# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to 
+# analyse all samples.
+parameters$prune_controls = "TRUE"
+  # CHANGE ME to a list of unique identifiers that only occur in the names of 
+  # samples you do NOT want to analyse. Common examples are provided.
+  parameters$controls = c("Pos", "H2O", "Neg", "Kontr", "Contr", "POSK", "V1", "V2", "V3", "V4")
+
+# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by 
+# name. This is useful to exclude non-target organisms or noise from organelles 
+# such as Chloroplasts and Mitochondria. It is recommended to first look at all 
+# data before using this setting.
+parameters$prune_noise_taxgroups = "FALSE"
+  # CHANGE ME to define the taxonomic groups to be removed as noise.
+  parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
+
+# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should 
+# minimally have. Samples with lower ASV counts than 'minread' will be pruned. 
+# Set to 0 to not prune any samples.
+parameters$minASVcount = 0
+
+# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of 
+# using the default dada2 output ('taxa.rds').
+parameters$customTax = "TRUE"
+  # CHANGE ME to the location of the custom taxonomy file. This only matters if 
+  # parameters$customTax="TRUE", otherwise it will be ignored.
+  parameters$taxfile = "oomy/custom_BLAST_taxonomy_nt.txt"
+
+# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a 
+# long time depending on the number of sequences (up to days for thousands). 
+# If a tree is provided as 'phylotree.rds' in 'path', then it will be used 
+# regardless of the value of 'parameters$maketree'
+parameters$maketree = "TRUE"
+
+# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the 
+# leaf with the longest branch (outgroup). This makes analyses that rely on the 
+# phylogenetic tree reproducible instead of picking a random leaf as root when 
+# calculating UNIFRAC distances. Implementation based on 
+# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers 
+# in https://github.com/joey711/phyloseq/issues/597
+parameters$roottree = "TRUE"
+
+## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
+parameters$biom_export = "FALSE"
+```
+
+#### Parsing input data
+
+This chunk loads the input data into a usable format.**This chunk does not require any user inputs**. If no phylogenetic tree with the name 'phylotree.rds' was provided and 'parameters$maketree="TRUE"', it will be calculated here. The phylogenetic tree is necessary for certain plots that incorporate 'true' taxonomic relationships beyond the annotations, such as PCoA.
+
+```{r parse input, tidy=TRUE}
+############### NO NEED FOR CHANGES BELOW ###############
+# Make dedicated environments to contain temporary values and manage other objects
+tmp <- new.env()
+plots <- new.env()
+set <- new.env()
+
+# Read in variables
+tmp$seqtabp <- readRDS(paste0(path,"/seqtab_nochim.rds"))
+if (parameters$customTax == "TRUE") {
+  tmp$taxap <- read.delim(parameters$taxfile, header = TRUE, sep = "\t")
+  rownames(tmp$taxap) <- colnames(tmp$seqtabp)
+  tmp$taxap <- as.matrix(tmp$taxap)
+}else{
+  tmp$taxap <- readRDS(paste0(path,"/taxa.rds"))}
+tmp$samp_table <- read.delim(paste0(path, "/descriptors.txt"), 
+                             header = TRUE, sep = "\t")
+tmp$samp_list <- rownames(tmp$seqtabp)
+
+# Check if descriptors has the same samples as seqtabp
+if (length(tmp$samp_table[,1]) != length(tmp$samp_list)) {
+  stop("There are ", length(tmp$samp_table[,1]), 
+    " samples in 'descriptors.txt', but ", length(tmp$samp_list), 
+    " samples in 'seqtab_nochim.rds'. Please make sure that the correct samples 
+    are contained in descriptors.txt.
+       
+    You may use 'optional_sample_check <- TRUE' in the first chunk to generate an 
+    empty template for 'descriptors.txt'" )
+} else if (!identical(tmp$samp_table[,1], tmp$samp_list)) {
+  warning("Warning: The samples in 'descriptors.txt' do not have the same names 
+          or order as the samples in 'seqtab_nochim.rds'. This may be fine if 
+          abbreviated names were used or the sample names are not contained in 
+          the first column of 'descriptors.txt'. Double-checking never hurts!")
+}
+
+
+# generate phylogenetic tree of ASVs only if there is no file called 
+# 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+if (!file.exists(paste0(path, "/phylotree.rds"))) {
+  if (parameters$maketree == "TRUE") {
+    tmp$ASVs <- getSequences(tmp$seqtabp)
+    names(tmp$ASVs) <- tmp$ASVs
+    tmp$ASV_align <- AlignSeqs(DNAStringSet(tmp$ASVs), anchor = NA)
+    tmp$ASV_phang <- phyDat(as(tmp$ASV_align, "matrix"), type = "DNA")
+    tmp$dm <- dist.ml(tmp$ASV_phang)
+    tmp$treeNJ <- NJ(tmp$dm)
+    tmp$fit <- pml(tmp$treeNJ, data = tmp$ASV_phang)
+    tmp$fitGTR <- update(tmp$fit, k = 4, inv = 0.2)
+    tmp$fitGTR <- optim.pml(tmp$fitGTR, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE, rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(tmp$fitGTR, file = paste0(path, "/phylotree.rds"))}}
+
+##parse into phyloseq object
+row.names(tmp$samp_table) <- tmp$samp_list
+if (file.exists(paste0(path, "/phylotree.rds"))) {
+  tmp$treep <- readRDS(paste0(path, "/phylotree.rds"))
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), 
+                tax_table(tmp$taxap), 
+                phy_tree(tmp$treep$tree))
+}else{
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), tax_table(tmp$taxap))}
+
+##Adding nucleotide info and giving sequences ASV## identifiers
+tmp$ASV_sequences <- Biostrings::DNAStringSet(taxa_names(p))
+taxa_names(p) <- paste0("ASV", seq(ntaxa(p)))
+names(tmp$ASV_sequences) <- taxa_names(p)
+p <- merge_phyloseq(p, tmp$ASV_sequences)
+
+##optional pruning
+if (parameters$prune_controls == "TRUE") {
+  if (!is.null(parameters$controls)) {
+    tmp$samp_clean <- tmp$samp_list[!tmp$samp_list %in% grep(paste0(
+      parameters$controls, collapse = "|"), tmp$samp_list, value = T)]
+    tmp$contr_pruned <- setdiff(tmp$samp_list, tmp$samp_clean)
+    ps <- prune_samples(tmp$samp_clean, p)
+    #Physeq object for Just controls
+    ps.contr <- prune_samples(tmp$contr_pruned, p)
+    ps.contr <- prune_taxa(taxa_sums(ps.contr) > 0, ps.contr)
+    ps.transcontr <- transform_sample_counts(
+      ps.contr, function(ASV) ASV/sum(ASV))
+    
+    message(cat(
+      "\n",  
+      "Number of control samples that were pruned and will not be analysed:\n", 
+      length(tmp$samp_list) - length(tmp$samp_clean), 
+      "\n", 
+      "The following controls were pruned:\n", 
+      tmp$contr_pruned, 
+      "The controls are contained in a separate phyloseq object: ps.contr", 
+      "\n", 
+      sep = "\n"))
+  }else{warning(cat(
+    "\n\nparameters$prune_controls is TRUE but 'parameters$controls' is empty. 
+    No samples were pruned.\n\n"))}
+}else{ps <- p}
+
+# Prune ASVs defined as noise
+if (parameters$prune_noise_taxgroups == "TRUE") {
+  tmp$ps_taxlvls <- colnames(tax_table(ps))
+  tmp$noise_ASVs <- character(0)
+  for (lvl in tmp$ps_taxlvls) {
+    tmp$noise_ASVs <- c(tmp$noise_ASVs, 
+                        cuphyr::list_subset_ASVs(
+                          physeq = ps, subv = parameters$noise_taxgroups, 
+                          taxlvlsub = lvl))
+  }
+  tmp$noise_ASVs <- unique(tmp$noise_ASVs)
+  tmp$no_noise_ASVs <- colnames(otu_table(ps))
+  tmp$no_noise_ASVs <- setdiff(tmp$no_noise_ASVs, tmp$noise_ASVs)
+  if (length(tmp$noise_ASVs) > 0) {
+    ps <- prune_taxa(tmp$no_noise_ASVs, ps)
+    tmp$no_noise_ps <- ps
+    cat(length(tmp$noise_ASVs), 
+        "ASVs were pruned because they belonged to the following 
+        taxonomic groups:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")}
+  else{
+    cat("No ASVs were recognized as belonging to the following taxonomic groups 
+        defined as noise:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")
+  }
+}
+
+# Prune samples with fewer than reads than minASVcount
+if (parameters$minASVcount > 0) {
+  tmp$samp_pruned <- names(which(sample_sums(ps) < parameters$minASVcount))
+  ps <- prune_samples(sample_sums(ps) >= parameters$minASVcount, ps)
+  if (length(tmp$samp_pruned) > 0) {
+    cat("The following samples were pruned because ASV counts were lower than", 
+        parameters$minASVcount,  ":\n")
+    cat(tmp$samp_pruned, "\n", sep = "\n")
+  }
+}
+
+# Remove 0 count ASVs (e.g. control ASVs that remain) from the base object
+ps <- prune_taxa(taxa_sums(ps) > 0, ps)
+
+# Get a tbl of the base object for easier access in some phyloseq-independent 
+# analyses. Takes some seconds, potentially up to minutes.
+ps_tbl <- as_tibble(psmelt(ps))
+
+# Transformed per sample (per-sample relative abundance)
+ps.trans <- transform_sample_counts(ps, function(ASV) ASV/sum(ASV))
+
+if (parameters$roottree == "TRUE" && parameters$maketree == "TRUE") {
+  phyloseq::phy_tree(ps) <- cuphyr::root_tree_in_outgroup(physeq = ps)}
+
+if (parameters$biom_export == "TRUE") {
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = paste0(path, "all_samples.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps, biom_file = file.path(path, "samples_without_controls.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.trans, biom_file = file.path(
+      path, "samples_without_controls_rel_abundance.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.contr, biom_file = file.path(path, "just_controls.biom"), 
+    biom_format = "standard"))
+}
+
+ps
+```
+
+## Output
+
+The chunks below will produce various plots and other output. Each chunk is headed by a description of the output and may contain some parameters to adjust the output. 
+
+#### Plot looks
+This chunk sets the background structure and color palette. Viridis was chosen because it is optimized for grey-scale printing and various types of color blindness and  More info on the Viridis palette can be found on [the Viridis info page](https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html). It also establishes save_plot as a shorter variant of ggsave with customized date-time structure to save plots with the same name mulitple times instead of overwriting them (overwriting can be triggered with overwrite=TRUE).
+
+```{r plot-design-global, tidy=FALSE, warning=FALSE}
+##### Optional settings (sensible defaults) #####
+# Can be changed to adjust the output format for all plots. Default "pdf", 
+# possible "eps"/"ps", "tex" (pictex), "jpeg", "tiff", "png", "bmp" and "svg"
+parameters$output_format = "pdf"
+
+# Can be changed to preferred ggplot2 theme. Recommended: "theme_bw()".
+theme_set(theme_bw())
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+my_scale_col <- scale_color_viridis(discrete = TRUE)
+my_scale_fill <- scale_fill_viridis(discrete = TRUE)
+
+# Custom, more narrow color ranges based on viridis
+# Base order to have adjacent colors be distinct from each other
+tmp$sort_colors <- c(rbind(c(1:5), c(6:10), c(11:15), c(16:20)))
+
+# Customized vectors
+tmp$n_col <- 20
+tmp$viridis_greens <- viridis(tmp$n_col,  option = "D", begin = 0.85, 
+                              end = 0.7)[tmp$sort_colors]
+tmp$viridis_reds <- viridis(tmp$n_col,  option = "B", begin = 0.7, 
+                            end = 0.5)[tmp$sort_colors]
+tmp$viridis_blues <- viridis(tmp$n_col,  option = "D", begin = 0.2, 
+                             end = 0.4)[tmp$sort_colors]
+tmp$viridis_yellows <- viridis(tmp$n_col,  option = "D", begin = 1, 
+                               end = 0.9)[tmp$sort_colors]
+tmp$viridis_dark <- viridis(tmp$n_col,  option = "A", begin = 0, 
+                            end = 0.1)[tmp$sort_colors]
+tmp$viridis_light <- viridis(tmp$n_col,  option = "A", begin = 1, 
+                             end = 0.9)[tmp$sort_colors]
+# Collected list that is available in the global environment
+sub_viridis <- list(tmp$viridis_greens, tmp$viridis_blues, tmp$viridis_yellows, 
+                    tmp$viridis_light, tmp$viridis_reds, tmp$viridis_dark)
+names(sub_viridis) <- c("greens", "blues", "yellows", "lights", "reds", "darks")
+
+tmp$out <- paste0(".", parameters$output_format)
+
+#################### Function ############################
+
+# Generic save function for plots that checks whether file exists and if so, 
+# creates a new one with d/m/y+time info to avoid overwriting. Overwriting can 
+# be triggered with overwrite = TRUE. Width, height and resolution are taken 
+# from parameters in the 'set' environment or set to 20x20 cm with 300dpi.
+save_plot <- function(
+  pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE){
+  wp <- if (!is.null(set$wp)) set$wp else 20
+  hp <- if (!is.null(set$hp)) set$hp else 20
+  res <- if (!is.null(set$res)) set$res else 300
+  name <- paste0("/", plot_name,filetype)
+  if (file.exists(paste0(outp, name)) & !overwrite) {
+  name <- paste0("/", plot_name, "_", 
+                 format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)}
+  ggsave(file.path(outp, name), pl, 
+         width = wp, height = hp, unit = "cm", dpi = res)
+}
+
+################################################
+```
+
+#### Total ASV counts ranked
+This chunk plots the absolute abundance of all samples (including controls) and all samples without controls and other trimmed samples.
+```{r overview}
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Rank samples
+set$ranked <- cuphyr::make_ranked_sums(p, myset = tmp$subset_id)
+set$ranked_ps <- cuphyr::make_ranked_sums(ps, myset = tmp$subset_id)
+set$ymax <- max(set$ranked$Abundance)
+set$ymax <- set$ymax + round(set$ymax/10)
+set$xmax <- nrow(set$ranked) + 1
+set$title2 <- "Samples (without controls)"
+
+# Stabilize colors
+set$color_vars <- set$ranked[,set$color_by]  %>% 
+  unlist() %>% as.character() %>% unique()
+set$color_vars <- sort(set$color_vars)
+set$color_varsPalette <- viridis(length(set$color_vars))
+names(set$color_varsPalette) <- set$color_vars
+set$my_scale_fill <- scale_fill_manual(values = set$color_varsPalette)
+
+# plot
+# This makes the first plot that should be made in all cases
+plots$overview_all <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle("All samples") + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# If there are noise ASVs defined and found, make a plot indicating counts after
+# removal of those. This plot should be second of three.
+if (length(tmp$noise_ASVs) > 0) {
+  set$ranked_nonoise <- cuphyr::make_ranked_sums(
+    tmp$no_noise_ps, myset = tmp$subset_id)
+  plots$overview_noise <- ggplot(
+    data = set$ranked_nonoise, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + 
+    ggtitle("Samples (without controls), noise ASVs removed") + 
+    ylim(0, set$ymax) + 
+    xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+}
+
+# If there is a cutoff for ASV counts, overwrite the first plot to include the
+# cutoff
+if (parameters$minASVcount > 0) {
+plots$overview_all <- plots$overview_all + 
+  geom_hline(yintercept = parameters$minASVcount, linetype = "dashed") + 
+    ggtitle("All samples (ASV count cutoff indicated)")
+set$title2 <- "Samples (without controls and low count samps)"
+}
+
+# Make a plot that should always be on the bottom, showing the final object ps
+# without controls, pruned samples and noise ASVs
+plots$overview_ps <- ggplot(data = set$ranked_ps, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle(set$title2) + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# Combine first and last plot
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_ps, nrow = 2, align = "v", 
+  common.legend = TRUE, legend = "right")
+
+# If noise ASVs were defined and found overwrite the combined plot to include
+# Before/raw plot, noise removed plot and after/ps plot.
+if (length(tmp$noise_ASVs) > 0) {
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_noise, plots$overview_ps,
+  nrow = 3, align = "v", 
+  common.legend = TRUE, legend = "right")
+}
+
+#Save final plot
+save_plot(plots$combo_overview, plot_name = "Overview_all_and_pruned", 
+          filetype = tmp$out)
+
+#Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+#Print final plots
+plots$combo_overview
+```
+
+#### Controls
+This chunk generates an overview over the controls (positive AND negative)
+
+```{r Positive controls}
+# CHANGE ME to the desired sample categories on the x-axis. In this case it 
+# should be the Sample names.
+set$x_axis_value = "SampleIDs"
+
+# CHANGE ME to the taxonomic level for color coding. Use "OTU" for ASVs, 
+# "Genus", "Species" or "OTU" recommended to compare pos. controls.
+set$color_by_taxlvl = "Genus"
+
+# CHANGE ME to the taxonomic level for labeling the tree tips (if phylogenetic 
+# tree is available). Use "OTU" for ASVs.
+set$label_by_taxlvl = "OTU"
+
+# CHANGE ME to a sample category to shape the tree tip labels by (if 
+# phylogenetic tree is available).
+set$label_shape_by = "SampleIDs"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to generate a tree for just the control sequences IF no 
+# phylogenetic tree for all seuquences is provided. This may slow down this 
+# chunk when running it for the first time
+set$control_tree = TRUE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+if (set$control_tree & class(try(phy_tree(ps.transcontr), 
+                                 silent = TRUE)) == "try-error") {
+  # generate phylogenetic tree of ASVs only if there is no file called 
+  # 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+  if (!file.exists(paste0(path, "/controls_phylotree.rds"))) {
+    set$ASVs <- phyloseq::refseq(ps.transcontr)
+    set$ASV_align <- AlignSeqs(set$ASVs, anchor = NA)
+    set$ASV_phang <- phyDat(as(set$ASV_align, "matrix"), type = "DNA")
+    set$dm <- dist.ml(set$ASV_phang)
+    set$treeNJ <- NJ(set$dm)
+    set$fit <- pml(set$treeNJ, data = set$ASV_phang)
+    set$fitGTR <- update(set$fit, k = 4, inv = 0.2)
+    set$fitGTR <- optim.pml(set$fitGTR, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(set$fitGTR, file = paste0(path, "/controls_phylotree.rds"))}
+  set$fitGTR <- readRDS(paste0(path, "/controls_phylotree.rds"))
+  phyloseq::phy_tree(ps.transcontr) <- set$fitGTR$tree
+}
+
+plots$topnpplot <- plot_bar(ps.contr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("ASV counts") + guides(col = guide_legend(ncol = 3))
+
+plots$topntplot <- plot_bar(ps.transcontr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("Relative abundance") + guides(col = guide_legend(ncol = 3))
+
+plots$combo_contr <- ggarrange(plots$topnpplot, plots$topntplot, ncol = 2, 
+                               labels = c("A", "B"), align = "hv", 
+                               common.legend = TRUE, legend = "right")
+
+if (class(try(phy_tree(ps.transcontr), silent = TRUE)) != "try-error") {
+plots$tre <- plot_tree(
+          ps.transcontr, ladderize = "left", label.tips = set$label_by_taxlvl, 
+          color = "abundance", text.size = 2.5, shape = set$label_shape_by) + 
+          scale_color_viridis_c(aesthetics = c("color","fill")) + 
+          theme(legend.position = "left", panel.border = element_blank())
+plots$combo_contr <- ggarrange(plots$tre, ggarrange(plots$topnpplot, 
+                                                    plots$topntplot, ncol = 2, 
+                               labels = c("B", "C"), align = "hv", 
+                               common.legend = TRUE, legend = "right"), 
+                               nrow = 2, legend = "right", labels = c("A")) 
+}
+
+# save
+save_plot(plots$combo_contr, plot_name = "Controls", filetype = tmp$out)
+
+plots$combo_contr
+```
+
+#### Richness plot
+
+This chunk plots the Alpha-Diversity according to the Shannon and Simpson indices. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Richness, tidy=TRUE, message=FALSE}
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "Skifte"
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Plot all diversity measures
+plots$richP <- try(
+  plot_richness(ps, x = set$x_axis_value, color = set$color_by) + 
+                     my_scale_col, silent = TRUE)
+# Just Shannon and Simpson
+plots$richShSi <- plot_richness(ps, x = set$x_axis_value, 
+                                measures = c("Shannon", "Simpson"), 
+                                color = set$color_by) + my_scale_col
+
+# Save
+if (!class(plots$richP) == "try-error") {
+  save_plot(plots$richP, plot_name = "Alpha_diversity_all", filetype = tmp$out)
+}
+save_plot(plots$richShSi, plot_name = "Alpha_diversity_all_ShSi", 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# Print to standard out
+if (!class(plots$richP) == "try-error") {
+  plots$richP
+}
+plots$richShSi
+```
+
+#### Bray-Curtis NMDS plot
+
+This chunk generates a non-metric multidimensional scaling (NMDS) plot of the Bray-Curtis dissimililarity, giving a two-dimensional measure of community diversity. This is done for the primary parameter and the taxonomic level separately. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary, as well as the **max. number of taxa to be displayed at taxlvl**. _Friendly warning: This chunk may not perform for lower order taxlvl, such as 'species', if they are not sufficiently abundant in all samples_
+
+```{r Bray-Curtis NMDS, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Vekst"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Skifte"
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+# Can be changed to change the number of Top n taxa plotted at taxlvl in 
+# separate panels, a maximum of 9 is recommended for good readability.
+set$top_n = 9
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Transform data for Bray-Curtis distance 
+tmp$ord_nmds <- ordinate(ps.trans, method = "NMDS", distance = "bray")
+tmp$ps.topn <- cuphyr::abundant_tax_physeq(physeq = ps.trans, lvl = set$taxlvl, 
+                                           top = set$top_n)
+tmp$top_ord_nmds <- ordinate(tmp$ps.topn, method = "NMDS", distance = "bray")
+# Plot
+plots$nmds <- plot_samples(ps.trans, tmp$ord_nmds, color = set$color_by,
+                              shape = set$shape_by, 
+                           title = paste0("Bray NMDS")) + 
+  my_scale_col + guides(color = FALSE, shape = FALSE)
+
+plots$nmds_tax <- plot_ordination(tmp$ps.topn, tmp$top_ord_nmds, type = "taxa",
+                                  color = set$taxlvl, 
+                                  title = paste0("Bray NMDS ", set$taxlvl)) + 
+  my_scale_col
+
+plots$nmds_taxpanels <- plots$nmds_tax + 
+  facet_wrap(paste0("~", set$taxlvl), scales = "free_x") + 
+  my_scale_col
+
+# Save
+save_plot(plots$nmds, plot_name = paste0("NMDS_", set$shape_by, "_", 
+                                         set$color_by), filetype = tmp$out)
+save_plot(plots$nmds_tax, plot_name = paste0("NMDS_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$nmds_taxpanels, 
+          plot_name = paste0("NMDS_top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$nmds
+plots$nmds_tax
+plots$nmds_taxpanels
+```
+
+#### PcoA (requires phylogenetic tree)
+
+This chunk generates an alternative common ordination plot, called 'PcoA', based on the primary variable, giving a two-dimensional measure of community diversity by considering the phylogenetic tree. **The chunk does not require any input**, although it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary. If the provided tree is not rooted, Phyloseq will root it to a random ASV. Root the tree to a given ASV to get consistent plots here (implementation will follow, until then, see: [this github issue](https://github.com/joey711/phyloseq/issues/235#issuecomment-26289761).
+
+```{r PcoA, eval=FALSE, message=FALSE, warning=FALSE, tidy=TRUE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Skife"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Stopping this code from running, if there is no phylogenetic tree
+if (class(try(phy_tree(ps), silent = TRUE)) == "try-error") {
+  # Message could be more informative
+  cat("This plot could not be generated because no Phylogenetic tree was 
+      provided.\n\n")
+}else{
+# Transform and ordinate
+  tmp$ord_pcoa <- ordinate(ps.trans, "PCoA", "unifrac", weighted = TRUE)
+# Plot
+  plots$pcoa <- plot_ordination(ps.trans, tmp$ord_pcoa, color = set$color_by, 
+                                shape=set$shape_by) + my_scale_col
+# Save
+  save_plot(plots$pcoa, plot_name = paste0("PCOA_", set$color_by, "_", 
+                                           shape = set$shape_by), 
+            filetype = tmp$out)
+
+# Clean up plot parameters
+  rm(list = ls(set), envir = set)
+
+# Print to standard out
+  plots$pcoa
+}
+
+
+```
+
+#### Get a list of Top N taxa at a given level
+This chunk lists the top n most abundant taxonomic terms at a given level. Change the function parameters to the desired values. For more info, check help page of the function with `?cuphyr::abundant_tax_physeq()`. Change 'ignore_na' to include/exclude NA values at the given level.
+```{r Toplist}
+#The character vector can later be accessed by calling 'tmp$tops'
+tmp$tops <- cuphyr::abundant_tax_physeq(physeq = ps, 
+                            lvl = "Genus",
+                            top = 20,
+                            output_format = "tops",
+                            ignore_na = TRUE,
+                            silent = FALSE)
+```
+
+#### Top N ASVs/taxa Bar plot
+
+This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 20, a larger n may lead to delay/skipping of the plot in standard out, but it should be saved as a PDF regardless for ASVs. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Bar-plot, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Skifte"
+
+# CHANGE ME to the desired sample categories on the x-axis. 
+# Accepted values are the column headers in the descriptor file.
+set$x_axis_value = "Vekst"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+# CHANGE ME to change the number of Top n taxa to be plotted at 
+# taxlvl.
+set$top_n = 10
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+#set$highlight = "Globisporangium"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors 
+# is FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 40
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relativ abundans"
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.trans, lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_tax <- plot_bar(set$ps.topnTax, 
+                           x = set$x_axis_value, 
+                           fill = set$taxlvl, 
+                           title = paste0("Top", set$top_n, "_", set$taxlvl)) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax <- plots$topn_tax + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs <- plot_bar(set$ps.topnASVs, 
+                            x = set$x_axis_value, 
+                            fill = set$taxlvl, 
+                            title = paste0("Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax, plot_name = paste0("Top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs, plot_name = paste0("Top", set$topASVs, "_ASVs"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax
+plots$topn_ASVs
+```
+
+#### Siamcat
+This chunk implements statistical testing of ASVs that are differentially abundant for a given biological train (column in descriptors.txt). It can also test whether grouping variables other than the tested one is associated with the abundance data in a similar or different way than the chosen train (confounders). The chunk is largely based on the [SIAMCAT "Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html).
+There are several options that can be chosen.
+
+```{r siamcat, warning=FALSE}
+# CHANGE ME to the sample category that will be used as the test group. ASVs 
+# that are differentially abundant according to this grouping will be detected. 
+# Accepted values are the column headers in your descriptor file.
+set$test_label = "Dato"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$case_value = "J21"
+
+# CHANGE ME to the cutoff p-value for selecting significant ASVs (FDR-adjusted 
+# p-value)
+set$p_val_cutoff = 0.05
+
+# CHANGE ME to the taxonomic level of interest for more informative ASV 
+# annotation (format: taxlv-ASV)
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to filter low-abundance ASVs. Sequence variants with lower 
+# abundance will not be analysed to reduce artifacts
+set$filter_abundance = 0.001
+
+# Can be changed to include (TRUE) or exclude (FALSE) an output file where all 
+# possible confounders are checked. This will analyse the confounding effect of 
+# other factors in 'descriptors' over the chosen test group and produce a pdf 
+# file containing several plots.
+set$check_confounders = TRUE
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make a copy of the transformed physeq object and parse taxonomic information 
+# for the chosen taxlvl into ASV names to give more informative plots.
+ps.siam <- ps.trans
+taxa_names(ps.siam) <- tax_table(ps.trans) %>% 
+  as.data.frame() %>% 
+  rownames_to_column(var = "OTU") %>% 
+  unite(col = OTU, set$taxlvl, OTU) %>% 
+  select(OTU) %>% 
+  unlist() %>% 
+  unname()
+
+# Read in transformed physeq object as SIAMCAT object and choose trait
+sc.trans <- siamcat(phyloseq = ps.siam, 
+                    label = set$test_label, 
+                    case = set$case_value)
+
+# print the generated Siamcat object to check for valid parsing
+show(sc.trans)
+
+# Filter ASVs with less than set$filter_abundace
+sc.filt <- filter.features(sc.trans,
+    filter.method = 'abundance',
+    cutoff = set$filter_abundance)
+
+# check confounders if the option is TRUE
+if (set$check_confounders) {  
+  sc.conf <- check.confounders(
+      sc.filt,
+      fn.plot = file.path(outp, 'confounder_plots.pdf'),
+      meta.in = NULL,
+      feature.type = 'filtered',
+      verbose = 1)
+  cat("Confounders checked, results stored in", 
+      file.path(outp, 'confounder_plots.pdf'))
+}
+
+# Plot asscoiations and save the analysis to the siamcat object
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    fn.plot = file.path(outp, 
+                  paste0("Differential_abundance_", set$test_label,
+                         "_", format(Sys.time(), "%d-%m-%y_%H%M%S"), ".pdf")))
+
+# Plot asscoiations again to standard out
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    verbose = 0)
+
+# record plot from standard out
+plots$siam_assoc <- recordPlot()
+
+# Turn significant hits into tbl, if there are any, generate a vector containing 
+# significant tax groups at taxlvl and a vector containing significant ASVs
+tbl_me_this <- associations(sc.filt) %>%
+  filter(p.adj < set$p_val_cutoff) %>%
+  rownames_to_column("tax_ASV") %>%
+  separate(col = "tax_ASV", into = c("tax", "ASV"), sep = "_") %>%
+  select(tax, ASV, p.adj)
+
+if (nrow(tbl_me_this) > 0) {
+ significant_tax_groups <- select(tbl_me_this, tax) %>%
+   unique() %>% unlist() %>% unname()
+ significant_ASVs <- select(tbl_me_this, ASV) %>%
+   unique() %>% unlist() %>% unname()
+ 
+ cat(sep = "\n", "The following taxonomic groups were found to be differentially 
+     abundant and stored in 'significant_tax_groups':",
+     significant_tax_groups, 
+     "This object can be used to set a subgroup in the chunk below.")
+}
+  
+```
+
+
+## Subset the Phyloseq object by taxonomic group(s)
+
+This chunk gives the option to create a subset of the general Phyloseq object by providing a vector of search terms and a taxonomic level to search at. It requires **one or more search terms**, a **taxonomic level** to search at and a **description of the subset**. The description will only be used for the titles of plots generated from the subsets.
+
+```{r Subset by Taxonomic identity, tidy=TRUE}
+# Vector to subset on
+set$subv = c("Phytophthora")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+# CHANGE ME to a descriptor for the subset
+tmp$subset_id = "Oomycetes of interest"
+# CHANGE ME if you want to use the significant groups found by SIAMCAT. If TRUE, 
+# those groups will be used in addition to the groups specified in set$subv.
+set$use_siamcat_results = TRUE
+
+# CHANGE ME to the sample group for color coding in the summary plot. 
+# Accepted values are the column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subv <- if (set$use_siamcat_results && exists("significant_tax_groups")) { 
+  unique(c(set$subv, significant_tax_groups))
+  }else{
+  set$subv}
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+ps.subs <- prune_taxa(set$subASVs, ps)
+ps.subs.trans <- prune_taxa(set$subASVs, ps.trans)
+
+# plot the reads in the subset
+set$ranked <- cuphyr::make_ranked_sums(ps.subs, myset = tmp$subset_id)
+set$avg <- mean(set$ranked$Abundance)
+set$avg_round <- format(round(set$avg, 0), nsmall = 0)
+plots$subset <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + my_scale_fill + 
+  geom_hline(yintercept = set$avg, linetype = "dashed") + 
+  ylab("ASV counts ('reads')") +
+  ggtitle(paste0("Subset: ", tmp$subset_id, 
+                 " (average ASV count ",set$avg_round, ")"))
+
+# Save plot
+save_plot(plots$subset, plot_name = "Subset_overview", filetype = tmp$out)
+# Print plot
+plots$subset
+# print info on generated object
+cuphyr::summarise_physeq(ps, ASV_sublist = set$subASVs, 
+                         sublist_id = tmp$subset_id, samp_names = FALSE)
+
+# Optional export as biom-file
+if (parameters$biom_export == "TRUE") {
+  tmp$subset_id <- tmp$subset_id %>% str_replace_all(" ", "_")
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = file.path(path, paste0("subset_",tmp$subset_id, ".biom")), 
+    biom_format = "standard"))
+}
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+```
+
+#### Bar plots for subsets of taxonomic group(s)
+The chunk is very similar to the vanilla bar plot chunk above but takes the subset data instead of the complete Phyloseq object. This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 100 for subsets. The range of n is for subsets is larger, since the taxonomic variety was reduced by the subsetting already, meaning the Top 100 ASVs likely belong to few species. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+```{r Bar plot subset, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Skifte"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "SampleIDs"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+# CHANGE ME to change the number of Top n taxa to be plotted at taxlvl.
+set$top_n = 10
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+#set$highlight <- "Pythium"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors is 
+# FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of 
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relative abundance"
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.subs.trans, 
+                                              lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.subs.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+
+plots$topn_tax_subset <- plot_bar(set$ps.topnTax, x = set$x_axis_value, 
+                                  fill = set$taxlvl, title = paste0(
+                                    "Top", set$top_n, "_", set$taxlvl)) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax_subset <- plots$topn_tax_subset + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs_subset <- plot_bar(set$ps.topnASVs, x = set$x_axis_value, 
+                                   fill = set$taxlvl, title = paste0(
+                                     "Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax_subset, 
+          plot_name = paste0("Top", set$top_n, "_", set$taxlvl, "_subset"), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs_subset, 
+          plot_name = paste0("Top", set$topASVs, "_ASVs_subset"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax_subset
+plots$topn_ASVs_subset
+```
+### Abundance tables
+```{r Abundance tables per genus, message=FALSE}
+ps_trans_tbl <- as_tibble(psmelt(ps.trans))
+
+# Genera of interest
+genera_of_interest <- c("Aphanomyces", "Pythium", "Globisporangium", "Phytophthora")
+
+# Taking ps_tbl, grouping by Genus and summarizing (open heart surgery)
+# Will give a table with two columns: "Genus" "Genus_total_Abundance"
+genus_abundance_tbl <- ps_tbl %>%
+  group_by(Genus) %>%
+  summarise(Genus_total_Abundance = sum(Abundance))
+
+# If you want per genus and sample
+genus_abundance_tbl_per_sample <- ps_trans_tbl %>%
+  group_by(Genus, Sample) %>%
+  summarise(Genus_Sample_Abundance = sum(Abundance)) %>%
+  filter(Genus %in% genera_of_interest) %>%
+  group_by(Sample) %>%
+  mutate(sums_abu_sample = sum(Genus_Sample_Abundance)) %>%
+  ungroup() %>%
+  mutate(ratio = Genus_Sample_Abundance/sums_abu_sample)
+
+# To get back all the other info, one way is to fuse back to original table
+ps_tbl_with_genus_abundance <- left_join(ps_tbl, genus_abundance_tbl_per_sample,
+                                         by = c("Genus", "Sample"))
+
+# Can be made more compact by removing ASV+Species info and original abundance
+# values and then reducing to unique rows
+genus_abundance_tbl_full_info <- ps_tbl_with_genus_abundance %>%
+  select(-OTU, -Abundance, -Species) %>%
+  unique()
+view(genus_abundance_tbl_per_sample)
+
+library(readxl)
+Morfologisk <- read.delim("Oomy/descriptors.txt")
+
+morph_data <- tibble(Morfologisk)
+view(morph_data)
+data_mol_morph <- genus_abundance_tbl_per_sample %>%
+
+  full_join(morph_data, by = c("Sample"="SampleIDs"))
+
+view(data_mol_morph)
+
+# Scatter plot
+mel_plot <- ggplot(subset(data_mol_morph),  aes(x = sums_abu_sample.x, y = pH_tall)) +
+  geom_point() +
+  geom_smooth(formula = y ~ x, method = "lm") +
+  ggtitle("Oomyceter") +
+  labs(y= "pH", x = "Relative abundance")
+
+mel_plot
+
+install.packages("xlsx")
+library(xlsx)
+write.xlsx(data_mol_morph, "Oomy/morph_data.xlsx")
+```
+
+
+### Other phylogenetic trees
+For these chunks, the ggtree library is required. If you are not sure whether it is installed, run the following chunk.
+
+```{r check-ggtree}
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")}
+if (!requireNamespace("ggtree", quietly = TRUE)) {
+  BiocManager::install("ggtree")}
+```
+
+#### Generic phylogenetic (from a subset)
+
+This chunk allows the generation of a generic phylogenetic tree for a given subset of the phyloseq object, even if none is provided for the whole set.
+```{r tree-auto}
+# Vector to subset on (the larger the subset, the longer the tree generation 
+# will take!)
+set$subv = c("Phytophthora", "Phytium", "Globisporangium")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+set$ps.treesubs <- prune_taxa(set$subASVs, ps)
+set$seqs <- phyloseq::refseq(set$ps.treesubs)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$subset_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + 
+  xlim(0,set$tree_width)
+
+# save
+save_plot(plots$subset_ASV_tree, 
+          plot_name = "subset_phylogenetic_tree", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$subset_ASV_tree
+```
+
+#### Generic phylogenetic tree (from any FASTA)
+
+This chunk allows the generation of a generic phylogenetic tree for any given fasta. This may be useful to compare the phylogeny of a given set of ASVs and some reference sequences.
+
+```{r tree-manual}
+# CHANGE ME to the path of the FASTA file you want to make a phylogenetic tree 
+# for (the larger the fasta, the longer the tree generation will take!)
+set$fasta = "oomy/for_phylogeny.fasta"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$seqs <- readDNAStringSet(set$fasta)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$fasta_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + xlim(0,set$tree_width)
+
+# save
+save_plot(plots$fasta_ASV_tree, 
+          plot_name = "phylogenetic_tree_from_fasta", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$fasta_ASV_tree
+```
+
+#### Machine learning with SIAMCAT
+This is an experimental chunk implementing the machine learning functions of SIAMCAT following the tutorial steps and settings from the ["Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html). There is no convenient way to change the settings yet, because the usefulness and different optimal ways to run these models needs to be tested further. The chunk can be run as is and will produce a result if the basic SIAMCAT chunk above was run. However, this should be handled skeptically and not given extraordinary weight, if the user is not confident that they understand the used method.
+
+```{r experimental-machine-learning, warning=FALSE}
+# Count normalization by log-transforming and adding pseudocounts
+sc.norm <- normalize.features(
+    sc.filt,
+    norm.method = "log.unit",
+    norm.param = list(
+        log.n0 = 1e-06,
+        n.p = 2,
+        norm.margin = 1
+    )
+)
+# splitting data into training and test sets to validate the model
+sc.obj <-  create.data.split(
+    sc.norm,
+    num.folds = 5,
+    num.resample = 2
+)
+
+# Train a model on the training set
+sc.obj <- train.model(
+     sc.obj,
+     method = "lasso"
+)
+
+# Store model into separate object and check first entry
+models <- models(sc.obj)
+models[[1]]
+
+# Run model on the data and check output prediction matrix
+sc.obj <- make.predictions(sc.obj)
+pred_matrix <- pred_matrix(sc.obj)
+head(pred_matrix)
+
+# Save model results to plot
+model.interpretation.plot(
+     sc.obj,
+     fn.plot = file.path(outp, 'model_interpretation.pdf'),
+     consens.thres = 0.5,
+     limits = c(-3, 3),
+     heatmap.type = 'zscore',
+ )
+
+cat("Model results stored to:", file.path(outp, 'model_interpretation.pdf'))
+```
+
+##### Credit
+This script is based on ideas and code from the [dada2 Tutorial](https://benjjneb.github.io/dada2/tutorial.html) by Benjamin Callahan, the publication "Bioconductor Workflow for Microbiome Data Analysis: from raw reads to community analyses" by [Callahan et al. (2016)](https://f1000research.com/articles/5-1492) and various pages of the official [phyloseq website](https://joey711.github.io/phyloseq) by Paul J. McMurdie.
\ No newline at end of file
diff --git a/Analysis_pipeline_v4-3 - oomy_Simeon.Rmd b/Analysis_pipeline_v4-3 - oomy_Simeon.Rmd
new file mode 100644
index 0000000..3a4ee7a
--- /dev/null
+++ b/Analysis_pipeline_v4-3 - oomy_Simeon.Rmd	
@@ -0,0 +1,1508 @@
+---
+title: "Analysis template v4-3"
+author: "Simeon Rossmann"
+date: "10.12.2020"
+output:
+  html_document: default
+urlcolor: blue
+---
+
+## Introduction and initialization
+
+This is an R Markdown file containing code to parse the results of a dada2 analysis into phyloseq for further analysis. It is separated into chunks that may be run independently by pressing the _play_ button. You will need **3 files** in the same location in order to run this pipeline successfully:
+
+* A sequence table called **'seqtab_nochim.rds'** (automatically generated by the dada2 pipeline)
+* A taxonomy table called **'taxa.rds'** (automatically generated by the dada2 pipeline)
+* A table describing the sample properties called **'descriptors.txt'** (Has to be provided)
+* Optional (some plots require this): A phylogenic tree file called **'phylotree.rds'** (can be calculated if it is not provided, takes a long time)
+* Optional: Instead of **'taxa.rds'**, a **custom taxonomy file** may be provided (tab-delimited text)
+
+**Recommended use:** Set the individual chunks until you are content with the ouput, then knit the whole document into a PDF/html, so you have a full record of a successful run.
+
+#### Optional custom Taxonomy file
+A custom taxonomy file may be provided instead of using the taxonomy output from dada2. This may be used to supply taxonomy derived e.g. from BLAST searches of the ASVs. Custom taxonomy files must be tab-delimited text with as many rows as the original, colum headers (for all columns except for the first column). For example:
+
+>Kingdom	Phylum	Class	Order	Family	Genus	Species  
+>ESV1	Kingdomx	Phylumx	Classx	Orderx	Familyx	Genusx	Speciesx  
+>ESV2	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy  
+>ESV3	Kingdomz	Phylumz	Classz	Orderz	Familyz	Genusz	Speciesz  
+> ...
+>ESVn	Kingdomy	Phylumy	Classy	Ordery	Familyy	Genusy	Speciesy
+
+_Friendly warning: Parsing the results of a BLAST search into this format may require some effort._
+
+#### Descriptor table
+
+'descriptors.txt' should be a tab-delimited .txt table describing the samples. It must have the same length and order as the samples in seqtab_nochim.rds. To check the order and length of samples in seqtab_nochim.rds and generate a template to fill out, you may run the chunk below with "optional_sample_check" set to "TRUE". 
+
+Any number of descriptors is possible. The sample names may be retained as one descriptor, but this is not necessary, as they will be added during parsing. For example, if there are 4 samples (order: s1, s2, s3, s4), the txt file could look as follows:  
+
+>Subject	Species	Time  
+>Kar1	A.thaliana	24hpi  
+>Kar1	A.thaliana	72hpi  
+>Mec2	S.tuberosum	24hpi  
+>Mec3	S.tuberosum	24hpi  
+
+Finally, the file should end with an empty line, since it may throw an error otherwise. However, this is usually not a serious problem.
+
+**If you choose to use the blank file, you MUST retain the original order of the samples!**
+
+## Setup
+
+This chunk also loads required packages and defines the location of the input files. It requires the **correct path** as input, and allows **setting the pruning of control samples** and **choosing generation of a phylogenetic tree**.  **Beware:** The generation of a phylogenetic tree may take several days for >1000 sequences, it is therefore recommended to only use this feature for the final analysis or small sample sets. This scricpt assumes the packages _Biostrings_, _dada2_, _DECIPHER_, _ggplot2_, _ggsci_, _phangorn_, _phyloseq_ and _stringr_ to be installed.
+
+```{r check-samples, message=FALSE, tidy=FALSE, warning=FALSE}
+# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
+path = "oomy/"
+
+# CHANGE ME to TRUE to list all samples and generate an empty metadata file 
+optional_sample_check = FALSE
+
+# CHANGE ME to TRUE to update cuphyr
+update_cuphyr = TRUE
+
+# Initiate by loading packages and setting knit options
+################# NO CHANGES NECESSARY BELOW #################
+knitr::opts_chunk$set(echo = TRUE)
+knitr::opts_chunk$set(root.dir = paste0(path))
+knitr::opts_chunk$set(message = FALSE)
+knitr::opts_chunk$set(warning = FALSE)
+
+if (update_cuphyr) {
+  devtools::install_github("simeross/cuphyr")
+}
+
+# Sequence and microbiome specific libraries
+library(dada2)
+library(Biostrings)
+library(DECIPHER)
+library(cuphyr)
+# The export of phyloseq objects to a BIOM format and the generation of fancier 
+# ordination plots require the phyloseq-extended package. The first command 
+# installs the package that is currently on the dev brach of the author's 
+# repository, the second command sources some extra functions, including the 
+# better ordination plot implementation.
+remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
+source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
+
+library(phyloseq)
+library(SIAMCAT)
+
+# Phylogeny libraries
+library(phangorn)
+library(ape)
+
+# Plotting and figure export
+library(gridExtra)
+library(viridis)
+library(ggpubr)
+
+# Tidyverse
+library(tidyverse)
+library(stringr)
+
+
+# Checks whether output path exists and creates it if not. Throws warning if 
+# directory exists.
+outp <- paste0(path,"/analysis_output")
+dir.create(file.path(outp))
+
+if (optional_sample_check) {
+  seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds")) 
+  samps <- rownames(seqtabcheck)
+  lensamps <- length(samps)
+  blankcol <- vector(mode = "character", length = lensamps)
+  blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol, 
+                           ExampleProperty2 = blankcol, 
+                           ExampleProperty3 = blankcol)
+  write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"), 
+              sep = "\t", row.names = F)
+  cat("'seqtab_nochim.rds' contains samples in the following order:\n", 
+      samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
+  rm(optional_sample_check, seqtabcheck, samps, 
+     lensamps, blankcol, blanktable, update_cuphyr)
+  }else{rm(optional_sample_check, update_cuphyr)}
+```
+
+#### Parameters
+
+This chunk allows the adjustment of several parameters, such as **setting the pruning of control samples** based on keywords, **requiring that a phylogenetic tree be provided or generated**, **defining a minimum ASV count** and **providing an alternative taxonomy**.
+
+```{r setup, message=FALSE, tidy=TRUE, warning=FALSE}
+# Dedicated environment containing all global analysis settings for better 
+# overview and collected export of settings
+parameters <- new.env()
+
+# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to 
+# analyse all samples.
+parameters$prune_controls = "TRUE"
+  # CHANGE ME to a list of unique identifiers that only occur in the names of 
+  # samples you do NOT want to analyse. Common examples are provided.
+  parameters$controls = c("Pos", "H2O", "Neg", "Kontr", "Contr", "POSK", "V1", "V2", "V3", "V4")
+
+# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by 
+# name. This is useful to exclude non-target organisms or noise from organelles 
+# such as Chloroplasts and Mitochondria. It is recommended to first look at all 
+# data before using this setting.
+parameters$prune_noise_taxgroups = "FALSE"
+  # CHANGE ME to define the taxonomic groups to be removed as noise.
+  parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
+
+# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should 
+# minimally have. Samples with lower ASV counts than 'minread' will be pruned. 
+# Set to 0 to not prune any samples.
+parameters$minASVcount = 0
+
+# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of 
+# using the default dada2 output ('taxa.rds').
+parameters$customTax = "TRUE"
+  # CHANGE ME to the location of the custom taxonomy file. This only matters if 
+  # parameters$customTax="TRUE", otherwise it will be ignored.
+  parameters$taxfile = "oomy/custom_BLAST_taxonomy_nt.txt"
+
+# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a 
+# long time depending on the number of sequences (up to days for thousands). 
+# If a tree is provided as 'phylotree.rds' in 'path', then it will be used 
+# regardless of the value of 'parameters$maketree'
+parameters$maketree = "FALSE"
+
+# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the 
+# leaf with the longest branch (outgroup). This makes analyses that rely on the 
+# phylogenetic tree reproducible instead of picking a random leaf as root when 
+# calculating UNIFRAC distances. Implementation based on 
+# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers 
+# in https://github.com/joey711/phyloseq/issues/597
+parameters$roottree = "TRUE"
+
+## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
+parameters$biom_export = "FALSE"
+```
+
+#### Parsing input data
+
+This chunk loads the input data into a usable format.**This chunk does not require any user inputs**. If no phylogenetic tree with the name 'phylotree.rds' was provided and 'parameters$maketree="TRUE"', it will be calculated here. The phylogenetic tree is necessary for certain plots that incorporate 'true' taxonomic relationships beyond the annotations, such as PCoA.
+
+```{r parse input, tidy=TRUE}
+############### NO NEED FOR CHANGES BELOW ###############
+# Make dedicated environments to contain temporary values and manage other objects
+tmp <- new.env()
+plots <- new.env()
+set <- new.env()
+
+# Read in variables
+tmp$seqtabp <- readRDS(paste0(path,"/seqtab_nochim.rds"))
+if (parameters$customTax == "TRUE") {
+  tmp$taxap <- read.delim(parameters$taxfile, header = TRUE, sep = "\t")
+  rownames(tmp$taxap) <- colnames(tmp$seqtabp)
+  tmp$taxap <- as.matrix(tmp$taxap)
+}else{
+  tmp$taxap <- readRDS(paste0(path,"/taxa.rds"))}
+tmp$samp_table <- read.delim(paste0(path, "/descriptors.txt"), 
+                             header = TRUE, sep = "\t")
+tmp$samp_list <- rownames(tmp$seqtabp)
+
+# Check if descriptors has the same samples as seqtabp
+if (length(tmp$samp_table[,1]) != length(tmp$samp_list)) {
+  stop("There are ", length(tmp$samp_table[,1]), 
+    " samples in 'descriptors.txt', but ", length(tmp$samp_list), 
+    " samples in 'seqtab_nochim.rds'. Please make sure that the correct samples 
+    are contained in descriptors.txt.
+       
+    You may use 'optional_sample_check <- TRUE' in the first chunk to generate an 
+    empty template for 'descriptors.txt'" )
+} else if (!identical(tmp$samp_table[,1], tmp$samp_list)) {
+  warning("Warning: The samples in 'descriptors.txt' do not have the same names 
+          or order as the samples in 'seqtab_nochim.rds'. This may be fine if 
+          abbreviated names were used or the sample names are not contained in 
+          the first column of 'descriptors.txt'. Double-checking never hurts!")
+}
+
+
+# generate phylogenetic tree of ASVs only if there is no file called 
+# 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+if (!file.exists(paste0(path, "/phylotree.rds"))) {
+  if (parameters$maketree == "TRUE") {
+    tmp$ASVs <- getSequences(tmp$seqtabp)
+    names(tmp$ASVs) <- tmp$ASVs
+    tmp$ASV_align <- AlignSeqs(DNAStringSet(tmp$ASVs), anchor = NA)
+    tmp$ASV_phang <- phyDat(as(tmp$ASV_align, "matrix"), type = "DNA")
+    tmp$dm <- dist.ml(tmp$ASV_phang)
+    tmp$treeNJ <- NJ(tmp$dm)
+    tmp$fit <- pml(tmp$treeNJ, data = tmp$ASV_phang)
+    tmp$fitGTR <- update(tmp$fit, k = 4, inv = 0.2)
+    tmp$fitGTR <- optim.pml(tmp$fitGTR, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE, rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(tmp$fitGTR, file = paste0(path, "/phylotree.rds"))}}
+
+##parse into phyloseq object
+row.names(tmp$samp_table) <- tmp$samp_list
+if (file.exists(paste0(path, "/phylotree.rds"))) {
+  tmp$treep <- readRDS(paste0(path, "/phylotree.rds"))
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), 
+                tax_table(tmp$taxap), 
+                phy_tree(tmp$treep$tree))
+}else{
+  p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE), 
+                sample_data(tmp$samp_table), tax_table(tmp$taxap))}
+
+##Adding nucleotide info and giving sequences ASV## identifiers
+tmp$ASV_sequences <- Biostrings::DNAStringSet(taxa_names(p))
+taxa_names(p) <- paste0("ASV", seq(ntaxa(p)))
+names(tmp$ASV_sequences) <- taxa_names(p)
+p <- merge_phyloseq(p, tmp$ASV_sequences)
+
+##optional pruning
+if (parameters$prune_controls == "TRUE") {
+  if (!is.null(parameters$controls)) {
+    tmp$samp_clean <- tmp$samp_list[!tmp$samp_list %in% grep(paste0(
+      parameters$controls, collapse = "|"), tmp$samp_list, value = T)]
+    tmp$contr_pruned <- setdiff(tmp$samp_list, tmp$samp_clean)
+    ps <- prune_samples(tmp$samp_clean, p)
+    #Physeq object for Just controls
+    ps.contr <- prune_samples(tmp$contr_pruned, p)
+    ps.contr <- prune_taxa(taxa_sums(ps.contr) > 0, ps.contr)
+    ps.transcontr <- transform_sample_counts(
+      ps.contr, function(ASV) ASV/sum(ASV))
+    
+    message(cat(
+      "\n",  
+      "Number of control samples that were pruned and will not be analysed:\n", 
+      length(tmp$samp_list) - length(tmp$samp_clean), 
+      "\n", 
+      "The following controls were pruned:\n", 
+      tmp$contr_pruned, 
+      "The controls are contained in a separate phyloseq object: ps.contr", 
+      "\n", 
+      sep = "\n"))
+  }else{warning(cat(
+    "\n\nparameters$prune_controls is TRUE but 'parameters$controls' is empty. 
+    No samples were pruned.\n\n"))}
+}else{ps <- p}
+
+# Prune ASVs defined as noise
+if (parameters$prune_noise_taxgroups == "TRUE") {
+  tmp$ps_taxlvls <- colnames(tax_table(ps))
+  tmp$noise_ASVs <- character(0)
+  for (lvl in tmp$ps_taxlvls) {
+    tmp$noise_ASVs <- c(tmp$noise_ASVs, 
+                        cuphyr::list_subset_ASVs(
+                          physeq = ps, subv = parameters$noise_taxgroups, 
+                          taxlvlsub = lvl))
+  }
+  tmp$noise_ASVs <- unique(tmp$noise_ASVs)
+  tmp$no_noise_ASVs <- colnames(otu_table(ps))
+  tmp$no_noise_ASVs <- setdiff(tmp$no_noise_ASVs, tmp$noise_ASVs)
+  if (length(tmp$noise_ASVs) > 0) {
+    ps <- prune_taxa(tmp$no_noise_ASVs, ps)
+    tmp$no_noise_ps <- ps
+    cat(length(tmp$noise_ASVs), 
+        "ASVs were pruned because they belonged to the following 
+        taxonomic groups:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")}
+  else{
+    cat("No ASVs were recognized as belonging to the following taxonomic groups 
+        defined as noise:\n")
+    cat(parameters$noise_taxgroups, "\n", sep = "\n")
+  }
+}
+
+# Prune samples with fewer than reads than minASVcount
+if (parameters$minASVcount > 0) {
+  tmp$samp_pruned <- names(which(sample_sums(ps) < parameters$minASVcount))
+  ps <- prune_samples(sample_sums(ps) >= parameters$minASVcount, ps)
+  if (length(tmp$samp_pruned) > 0) {
+    cat("The following samples were pruned because ASV counts were lower than", 
+        parameters$minASVcount,  ":\n")
+    cat(tmp$samp_pruned, "\n", sep = "\n")
+  }
+}
+
+# Remove 0 count ASVs (e.g. control ASVs that remain) from the base object
+ps <- prune_taxa(taxa_sums(ps) > 0, ps)
+
+# Get a tbl of the base object for easier access in some phyloseq-independent 
+# analyses. Takes some seconds, potentially up to minutes.
+ps_tbl <- as_tibble(psmelt(ps))
+
+# Transformed per sample (per-sample relative abundance)
+ps.trans <- transform_sample_counts(ps, function(ASV) ASV/sum(ASV))
+
+if (parameters$roottree == "TRUE" && parameters$maketree == "TRUE") {
+  phyloseq::phy_tree(ps) <- cuphyr::root_tree_in_outgroup(physeq = ps)}
+
+if (parameters$biom_export == "TRUE") {
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = paste0(path, "all_samples.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps, biom_file = file.path(path, "samples_without_controls.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.trans, biom_file = file.path(
+      path, "samples_without_controls_rel_abundance.biom"), 
+    biom_format = "standard"))
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    ps.contr, biom_file = file.path(path, "just_controls.biom"), 
+    biom_format = "standard"))
+}
+
+ps
+```
+
+## Output
+
+The chunks below will produce various plots and other output. Each chunk is headed by a description of the output and may contain some parameters to adjust the output. 
+
+#### Plot looks
+This chunk sets the background structure and color palette. Viridis was chosen because it is optimized for grey-scale printing and various types of color blindness and  More info on the Viridis palette can be found on [the Viridis info page](https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html). It also establishes save_plot as a shorter variant of ggsave with customized date-time structure to save plots with the same name mulitple times instead of overwriting them (overwriting can be triggered with overwrite=TRUE).
+
+```{r plot-design-global, tidy=FALSE, warning=FALSE}
+##### Optional settings (sensible defaults) #####
+# Can be changed to adjust the output format for all plots. Default "pdf", 
+# possible "eps"/"ps", "tex" (pictex), "jpeg", "tiff", "png", "bmp" and "svg"
+parameters$output_format = "pdf"
+
+# Can be changed to preferred ggplot2 theme. Recommended: "theme_bw()".
+theme_set(theme_bw())
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+my_scale_col <- scale_color_viridis(discrete = TRUE)
+my_scale_fill <- scale_fill_viridis(discrete = TRUE)
+
+# Custom, more narrow color ranges based on viridis
+# Base order to have adjacent colors be distinct from each other
+tmp$sort_colors <- c(rbind(c(1:5), c(6:10), c(11:15), c(16:20)))
+
+# Customized vectors
+tmp$n_col <- 20
+tmp$viridis_greens <- viridis(tmp$n_col,  option = "D", begin = 0.85, 
+                              end = 0.7)[tmp$sort_colors]
+tmp$viridis_reds <- viridis(tmp$n_col,  option = "B", begin = 0.7, 
+                            end = 0.5)[tmp$sort_colors]
+tmp$viridis_blues <- viridis(tmp$n_col,  option = "D", begin = 0.2, 
+                             end = 0.4)[tmp$sort_colors]
+tmp$viridis_yellows <- viridis(tmp$n_col,  option = "D", begin = 1, 
+                               end = 0.9)[tmp$sort_colors]
+tmp$viridis_dark <- viridis(tmp$n_col,  option = "A", begin = 0, 
+                            end = 0.1)[tmp$sort_colors]
+tmp$viridis_light <- viridis(tmp$n_col,  option = "A", begin = 1, 
+                             end = 0.9)[tmp$sort_colors]
+# Collected list that is available in the global environment
+sub_viridis <- list(tmp$viridis_greens, tmp$viridis_blues, tmp$viridis_yellows, 
+                    tmp$viridis_light, tmp$viridis_reds, tmp$viridis_dark)
+names(sub_viridis) <- c("greens", "blues", "yellows", "lights", "reds", "darks")
+
+tmp$out <- paste0(".", parameters$output_format)
+
+#################### Function ############################
+
+# Generic save function for plots that checks whether file exists and if so, 
+# creates a new one with d/m/y+time info to avoid overwriting. Overwriting can 
+# be triggered with overwrite = TRUE. Width, height and resolution are taken 
+# from parameters in the 'set' environment or set to 20x20 cm with 300dpi.
+save_plot <- function(
+  pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE){
+  wp <- if (!is.null(set$wp)) set$wp else 20
+  hp <- if (!is.null(set$hp)) set$hp else 20
+  res <- if (!is.null(set$res)) set$res else 300
+  name <- paste0("/", plot_name,filetype)
+  if (file.exists(paste0(outp, name)) & !overwrite) {
+  name <- paste0("/", plot_name, "_", 
+                 format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)}
+  ggsave(file.path(outp, name), pl, 
+         width = wp, height = hp, unit = "cm", dpi = res)
+}
+
+################################################
+```
+
+#### Total ASV counts ranked
+This chunk plots the absolute abundance of all samples (including controls) and all samples without controls and other trimmed samples.
+```{r overview}
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Rank samples
+set$ranked <- cuphyr::make_ranked_sums(p, myset = tmp$subset_id)
+set$ranked_ps <- cuphyr::make_ranked_sums(ps, myset = tmp$subset_id)
+set$ymax <- max(set$ranked$Abundance)
+set$ymax <- set$ymax + round(set$ymax/10)
+set$xmax <- nrow(set$ranked) + 1
+set$title2 <- "Samples (without controls)"
+
+# Stabilize colors
+set$color_vars <- set$ranked[,set$color_by]  %>% 
+  unlist() %>% as.character() %>% unique()
+set$color_vars <- sort(set$color_vars)
+set$color_varsPalette <- viridis(length(set$color_vars))
+names(set$color_varsPalette) <- set$color_vars
+set$my_scale_fill <- scale_fill_manual(values = set$color_varsPalette)
+
+# plot
+# This makes the first plot that should be made in all cases
+plots$overview_all <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle("All samples") + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# If there are noise ASVs defined and found, make a plot indicating counts after
+# removal of those. This plot should be second of three.
+if (length(tmp$noise_ASVs) > 0) {
+  set$ranked_nonoise <- cuphyr::make_ranked_sums(
+    tmp$no_noise_ps, myset = tmp$subset_id)
+  plots$overview_noise <- ggplot(
+    data = set$ranked_nonoise, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + 
+    ggtitle("Samples (without controls), noise ASVs removed") + 
+    ylim(0, set$ymax) + 
+    xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+}
+
+# If there is a cutoff for ASV counts, overwrite the first plot to include the
+# cutoff
+if (parameters$minASVcount > 0) {
+plots$overview_all <- plots$overview_all + 
+  geom_hline(yintercept = parameters$minASVcount, linetype = "dashed") + 
+    ggtitle("All samples (ASV count cutoff indicated)")
+set$title2 <- "Samples (without controls and low count samps)"
+}
+
+# Make a plot that should always be on the bottom, showing the final object ps
+# without controls, pruned samples and noise ASVs
+plots$overview_ps <- ggplot(data = set$ranked_ps, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + set$my_scale_fill + ggtitle(set$title2) + ylim(0, set$ymax) + 
+  xlim(0,set$xmax) + ylab("ASV counts ('reads')")
+
+# Combine first and last plot
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_ps, nrow = 2, align = "v", 
+  common.legend = TRUE, legend = "right")
+
+# If noise ASVs were defined and found overwrite the combined plot to include
+# Before/raw plot, noise removed plot and after/ps plot.
+if (length(tmp$noise_ASVs) > 0) {
+plots$combo_overview <- ggarrange(
+  plots$overview_all, plots$overview_noise, plots$overview_ps,
+  nrow = 3, align = "v", 
+  common.legend = TRUE, legend = "right")
+}
+
+#Save final plot
+save_plot(plots$combo_overview, plot_name = "Overview_all_and_pruned", 
+          filetype = tmp$out)
+
+#Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+#Print final plots
+plots$combo_overview
+```
+
+#### Controls
+This chunk generates an overview over the controls (positive AND negative)
+
+```{r Positive controls}
+# CHANGE ME to the desired sample categories on the x-axis. In this case it 
+# should be the Sample names.
+set$x_axis_value = "SampleIDs"
+
+# CHANGE ME to the taxonomic level for color coding. Use "OTU" for ASVs, 
+# "Genus", "Species" or "OTU" recommended to compare pos. controls.
+set$color_by_taxlvl = "Genus"
+
+# CHANGE ME to the taxonomic level for labeling the tree tips (if phylogenetic 
+# tree is available). Use "OTU" for ASVs.
+set$label_by_taxlvl = "OTU"
+
+# CHANGE ME to a sample category to shape the tree tip labels by (if 
+# phylogenetic tree is available).
+set$label_shape_by = "SampleIDs"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to generate a tree for just the control sequences IF no 
+# phylogenetic tree for all seuquences is provided. This may slow down this 
+# chunk when running it for the first time
+set$control_tree = TRUE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+if (set$control_tree & class(try(phy_tree(ps.transcontr), 
+                                 silent = TRUE)) == "try-error") {
+  # generate phylogenetic tree of ASVs only if there is no file called 
+  # 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
+  if (!file.exists(paste0(path, "/controls_phylotree.rds"))) {
+    set$ASVs <- phyloseq::refseq(ps.transcontr)
+    set$ASV_align <- AlignSeqs(set$ASVs, anchor = NA)
+    set$ASV_phang <- phyDat(as(set$ASV_align, "matrix"), type = "DNA")
+    set$dm <- dist.ml(set$ASV_phang)
+    set$treeNJ <- NJ(set$dm)
+    set$fit <- pml(set$treeNJ, data = set$ASV_phang)
+    set$fitGTR <- update(set$fit, k = 4, inv = 0.2)
+    set$fitGTR <- optim.pml(set$fitGTR, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+    saveRDS(set$fitGTR, file = paste0(path, "/controls_phylotree.rds"))}
+  set$fitGTR <- readRDS(paste0(path, "/controls_phylotree.rds"))
+  phyloseq::phy_tree(ps.transcontr) <- set$fitGTR$tree
+}
+
+plots$topnpplot <- plot_bar(ps.contr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("ASV counts") + guides(col = guide_legend(ncol = 3))
+
+plots$topntplot <- plot_bar(ps.transcontr, x = set$x_axis_value, 
+                            fill = set$color_by_taxlvl) + my_scale_fill + 
+  theme(axis.title.x = element_blank(), legend.position = "none", 
+        legend.key.size = unit(3, "mm")) + 
+  ylab("Relative abundance") + guides(col = guide_legend(ncol = 3))
+
+plots$combo_contr <- ggarrange(plots$topnpplot, plots$topntplot, ncol = 2, 
+                               labels = c("A", "B"), align = "hv", 
+                               common.legend = TRUE, legend = "right")
+
+if (class(try(phy_tree(ps.transcontr), silent = TRUE)) != "try-error") {
+plots$tre <- plot_tree(
+          ps.transcontr, ladderize = "left", label.tips = set$label_by_taxlvl, 
+          color = "abundance", text.size = 2.5, shape = set$label_shape_by) + 
+          scale_color_viridis_c(aesthetics = c("color","fill")) + 
+          theme(legend.position = "left", panel.border = element_blank())
+plots$combo_contr <- ggarrange(plots$tre, ggarrange(plots$topnpplot, 
+                                                    plots$topntplot, ncol = 2, 
+                               labels = c("B", "C"), align = "hv", 
+                               common.legend = TRUE, legend = "right"), 
+                               nrow = 2, legend = "right", labels = c("A")) 
+}
+
+# save
+save_plot(plots$combo_contr, plot_name = "Controls", filetype = tmp$out)
+
+plots$combo_contr
+```
+
+#### Richness plot
+
+This chunk plots the Alpha-Diversity according to the Shannon and Simpson indices. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Richness, tidy=TRUE, message=FALSE}
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "Skifte"
+# CHANGE ME to the sample group for color coding. Accepted values are the column 
+# headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Plot all diversity measures
+plots$richP <- try(
+  plot_richness(ps, x = set$x_axis_value, color = set$color_by) + 
+                     my_scale_col, silent = TRUE)
+# Just Shannon and Simpson
+plots$richShSi <- plot_richness(ps, x = set$x_axis_value, 
+                                measures = c("Shannon", "Simpson"), 
+                                color = set$color_by) + my_scale_col
+
+# Save
+if (!class(plots$richP) == "try-error") {
+  save_plot(plots$richP, plot_name = "Alpha_diversity_all", filetype = tmp$out)
+}
+save_plot(plots$richShSi, plot_name = "Alpha_diversity_all_ShSi", 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# Print to standard out
+if (!class(plots$richP) == "try-error") {
+  plots$richP
+}
+plots$richShSi
+```
+
+#### Bray-Curtis NMDS plot
+
+This chunk generates a non-metric multidimensional scaling (NMDS) plot of the Bray-Curtis dissimililarity, giving a two-dimensional measure of community diversity. This is done for the primary parameter and the taxonomic level separately. **The chunk does not require any input**, but it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary, as well as the **max. number of taxa to be displayed at taxlvl**. _Friendly warning: This chunk may not perform for lower order taxlvl, such as 'species', if they are not sufficiently abundant in all samples_
+
+```{r Bray-Curtis NMDS, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Vekst"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Skifte"
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+# Can be changed to change the number of Top n taxa plotted at taxlvl in 
+# separate panels, a maximum of 9 is recommended for good readability.
+set$top_n = 9
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Transform data for Bray-Curtis distance 
+tmp$ord_nmds <- ordinate(ps.trans, method = "NMDS", distance = "bray")
+tmp$ps.topn <- cuphyr::abundant_tax_physeq(physeq = ps.trans, lvl = set$taxlvl, 
+                                           top = set$top_n)
+tmp$top_ord_nmds <- ordinate(tmp$ps.topn, method = "NMDS", distance = "bray")
+# Plot
+plots$nmds <- plot_samples(ps.trans, tmp$ord_nmds, color = set$color_by,
+                              shape = set$shape_by, 
+                           title = paste0("Bray NMDS")) + 
+  my_scale_col + guides(color = FALSE, shape = FALSE)
+
+plots$nmds_tax <- plot_ordination(tmp$ps.topn, tmp$top_ord_nmds, type = "taxa",
+                                  color = set$taxlvl, 
+                                  title = paste0("Bray NMDS ", set$taxlvl)) + 
+  my_scale_col
+
+plots$nmds_taxpanels <- plots$nmds_tax + 
+  facet_wrap(paste0("~", set$taxlvl), scales = "free_x") + 
+  my_scale_col
+
+# Save
+save_plot(plots$nmds, plot_name = paste0("NMDS_", set$shape_by, "_", 
+                                         set$color_by), filetype = tmp$out)
+save_plot(plots$nmds_tax, plot_name = paste0("NMDS_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$nmds_taxpanels, 
+          plot_name = paste0("NMDS_top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$nmds
+plots$nmds_tax
+plots$nmds_taxpanels
+```
+
+#### PcoA (requires phylogenetic tree)
+
+This chunk generates an alternative common ordination plot, called 'PcoA', based on the primary variable, giving a two-dimensional measure of community diversity by considering the phylogenetic tree. **The chunk does not require any input**, although it is possible to adjust the **width**, **height** and **resolution** of the PDF-output if necessary. If the provided tree is not rooted, Phyloseq will root it to a random ASV. Root the tree to a given ASV to get consistent plots here (implementation will follow, until then, see: [this github issue](https://github.com/joey711/phyloseq/issues/235#issuecomment-26289761).
+
+```{r PcoA, eval=FALSE, message=FALSE, warning=FALSE, tidy=TRUE}
+# CHANGE ME to the sample group for shape coding. Accepted values are the 
+# column headers in the descriptor file.
+set$shape_by = "Skife"
+# CHANGE ME to the sample group for color coding. Accepted values are the 
+# column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Stopping this code from running, if there is no phylogenetic tree
+if (class(try(phy_tree(ps), silent = TRUE)) == "try-error") {
+  # Message could be more informative
+  cat("This plot could not be generated because no Phylogenetic tree was 
+      provided.\n\n")
+}else{
+# Transform and ordinate
+  tmp$ord_pcoa <- ordinate(ps.trans, "PCoA", "unifrac", weighted = TRUE)
+# Plot
+  plots$pcoa <- plot_ordination(ps.trans, tmp$ord_pcoa, color = set$color_by, 
+                                shape=set$shape_by) + my_scale_col
+# Save
+  save_plot(plots$pcoa, plot_name = paste0("PCOA_", set$color_by, "_", 
+                                           shape = set$shape_by), 
+            filetype = tmp$out)
+
+# Clean up plot parameters
+  rm(list = ls(set), envir = set)
+
+# Print to standard out
+  plots$pcoa
+}
+
+
+```
+
+#### Get a list of Top N taxa at a given level
+This chunk lists the top n most abundant taxonomic terms at a given level. Change the function parameters to the desired values. For more info, check help page of the function with `?cuphyr::abundant_tax_physeq()`. Change 'ignore_na' to include/exclude NA values at the given level.
+```{r Toplist}
+#The character vector can later be accessed by calling 'tmp$tops'
+tmp$tops <- cuphyr::abundant_tax_physeq(physeq = ps, 
+                            lvl = "Genus",
+                            top = 20,
+                            output_format = "tops",
+                            ignore_na = TRUE,
+                            silent = FALSE)
+```
+
+#### Top N ASVs/taxa Bar plot
+
+This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 20, a larger n may lead to delay/skipping of the plot in standard out, but it should be saved as a PDF regardless for ASVs. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+
+```{r Bar-plot, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Skifte"
+set$panel_y_by = "Vekst"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "SampleIDs"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Genus"
+
+# CHANGE ME to change the number of Top n taxa to be plotted at 
+# taxlvl.
+set$top_n = 20
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+set$highlight = "Globisporangium"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors 
+# is FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = TRUE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of
+# many small ASVs overlapping. (This is broken)
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 19
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 15
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relativ abundans"
+
+############### NO NEED FOR CHANGES BELOW ###############
+
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.trans, lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_tax <- plot_bar(set$ps.topnTax, 
+                           x = set$x_axis_value, 
+                           fill = set$taxlvl, 
+                           title = paste0("Top", set$top_n, "_", set$taxlvl)) + 
+  facet_grid(paste0(set$panel_y_by, "~", set$panel_by), scales = "free_x") +
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax <- plots$topn_tax + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs <- plot_bar(set$ps.topnASVs, 
+                            x = set$x_axis_value, 
+                            fill = set$taxlvl, 
+                            title = paste0("Top", set$topASVs, "_ASVs")) + 
+  facet_grid(paste0("~", set$panel_by), scales = "free_x", space = "free") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax, plot_name = paste0("Top", set$top_n, "_", set$taxlvl), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs, plot_name = paste0("Top", set$topASVs, "_ASVs"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax
+plots$topn_ASVs
+```
+
+#### Siamcat
+This chunk implements statistical testing of ASVs that are differentially abundant for a given biological train (column in descriptors.txt). It can also test whether grouping variables other than the tested one is associated with the abundance data in a similar or different way than the chosen train (confounders). The chunk is largely based on the [SIAMCAT "Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html).
+There are several options that can be chosen.
+
+```{r siamcat, warning=FALSE}
+# CHANGE ME to the sample category that will be used as the test group. ASVs 
+# that are differentially abundant according to this grouping will be detected. 
+# Accepted values are the column headers in your descriptor file.
+set$test_label = "Dato"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$case_value = "J21"
+
+# CHANGE ME to the cutoff p-value for selecting significant ASVs (FDR-adjusted 
+# p-value)
+set$p_val_cutoff = 0.05
+
+# CHANGE ME to the taxonomic level of interest for more informative ASV 
+# annotation (format: taxlv-ASV)
+set$taxlvl = "Genus"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to filter low-abundance ASVs. Sequence variants with lower 
+# abundance will not be analysed to reduce artifacts
+set$filter_abundance = 0.001
+
+# Can be changed to include (TRUE) or exclude (FALSE) an output file where all 
+# possible confounders are checked. This will analyse the confounding effect of 
+# other factors in 'descriptors' over the chosen test group and produce a pdf 
+# file containing several plots.
+set$check_confounders = TRUE
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make a copy of the transformed physeq object and parse taxonomic information 
+# for the chosen taxlvl into ASV names to give more informative plots.
+ps.siam <- ps.trans
+taxa_names(ps.siam) <- tax_table(ps.trans) %>% 
+  as.data.frame() %>% 
+  rownames_to_column(var = "OTU") %>% 
+  unite(col = OTU, set$taxlvl, OTU) %>% 
+  select(OTU) %>% 
+  unlist() %>% 
+  unname()
+
+# Read in transformed physeq object as SIAMCAT object and choose trait
+sc.trans <- siamcat(phyloseq = ps.siam, 
+                    label = set$test_label, 
+                    case = set$case_value)
+
+# print the generated Siamcat object to check for valid parsing
+show(sc.trans)
+
+# Filter ASVs with less than set$filter_abundace
+sc.filt <- filter.features(sc.trans,
+    filter.method = 'abundance',
+    cutoff = set$filter_abundance)
+
+# check confounders if the option is TRUE
+if (set$check_confounders) {  
+  sc.conf <- check.confounders(
+      sc.filt,
+      fn.plot = file.path(outp, 'confounder_plots.pdf'),
+      meta.in = NULL,
+      feature.type = 'filtered',
+      verbose = 1)
+  cat("Confounders checked, results stored in", 
+      file.path(outp, 'confounder_plots.pdf'))
+}
+
+# Plot asscoiations and save the analysis to the siamcat object
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    fn.plot = file.path(outp, 
+                  paste0("Differential_abundance_", set$test_label,
+                         "_", format(Sys.time(), "%d-%m-%y_%H%M%S"), ".pdf")))
+
+# Plot asscoiations again to standard out
+sc.filt <- check.associations(
+    sc.filt,
+    sort.by = 'fc',
+    alpha = set$p_val_cutoff,
+    mult.corr = "fdr",
+    detect.lim = 10^-6,
+    plot.type = "quantile.box",
+    panels = c("fc", "prevalence", "auroc"),
+    prompt = FALSE,
+    verbose = 0)
+
+# record plot from standard out
+plots$siam_assoc <- recordPlot()
+
+# Turn significant hits into tbl, if there are any, generate a vector containing 
+# significant tax groups at taxlvl and a vector containing significant ASVs
+tbl_me_this <- associations(sc.filt) %>%
+  filter(p.adj < set$p_val_cutoff) %>%
+  rownames_to_column("tax_ASV") %>%
+  separate(col = "tax_ASV", into = c("tax", "ASV"), sep = "_") %>%
+  select(tax, ASV, p.adj)
+
+if (nrow(tbl_me_this) > 0) {
+ significant_tax_groups <- select(tbl_me_this, tax) %>%
+   unique() %>% unlist() %>% unname()
+ significant_ASVs <- select(tbl_me_this, ASV) %>%
+   unique() %>% unlist() %>% unname()
+ 
+ cat(sep = "\n", "The following taxonomic groups were found to be differentially 
+     abundant and stored in 'significant_tax_groups':",
+     significant_tax_groups, 
+     "This object can be used to set a subgroup in the chunk below.")
+}
+  
+```
+
+
+## Subset the Phyloseq object by taxonomic group(s)
+
+This chunk gives the option to create a subset of the general Phyloseq object by providing a vector of search terms and a taxonomic level to search at. It requires **one or more search terms**, a **taxonomic level** to search at and a **description of the subset**. The description will only be used for the titles of plots generated from the subsets.
+
+```{r Subset by Taxonomic identity, tidy=TRUE}
+# Vector to subset on
+set$subv = c("Globisporangium irregulare")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Species"
+# CHANGE ME to a descriptor for the subset
+tmp$subset_id = "Glob of interest"
+# CHANGE ME if you want to use the significant groups found by SIAMCAT. If TRUE, 
+# those groups will be used in addition to the groups specified in set$subv.
+set$use_siamcat_results = TRUE
+
+# CHANGE ME to the sample group for color coding in the summary plot. 
+# Accepted values are the column headers in the descriptor file.
+set$color_by = "Vekst"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 17
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 12  
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subv <- if (set$use_siamcat_results && exists("significant_tax_groups")) { 
+  unique(c(set$subv, significant_tax_groups))
+  }else{
+  set$subv}
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+ps.subs <- prune_taxa(set$subASVs, ps)
+ps.subs.trans <- prune_taxa(set$subASVs, ps.trans)
+
+# plot the reads in the subset
+set$ranked <- cuphyr::make_ranked_sums(ps.subs, myset = tmp$subset_id)
+set$avg <- mean(set$ranked$Abundance)
+set$avg_round <- format(round(set$avg, 0), nsmall = 0)
+plots$subset <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) + 
+  aes_string(fill = set$color_by) + 
+  geom_col() + my_scale_fill + 
+  geom_hline(yintercept = set$avg, linetype = "dashed") + 
+  ylab("ASV counts ('reads')") +
+  ggtitle(paste0("Subset: ", tmp$subset_id, 
+                 " (average ASV count ",set$avg_round, ")"))
+
+# Save plot
+save_plot(plots$subset, plot_name = "Subset_overview", filetype = tmp$out)
+# Print plot
+plots$subset
+# print info on generated object
+cuphyr::summarise_physeq(ps, ASV_sublist = set$subASVs, 
+                         sublist_id = tmp$subset_id, samp_names = FALSE)
+
+# Optional export as biom-file
+if (parameters$biom_export == "TRUE") {
+  tmp$subset_id <- tmp$subset_id %>% str_replace_all(" ", "_")
+  suppressWarnings(phyloseq.extended::write_phyloseq(
+    p, biom_file = file.path(path, paste0("subset_",tmp$subset_id, ".biom")), 
+    biom_format = "standard"))
+}
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+```
+
+#### Bar plots for subsets of taxonomic group(s)
+The chunk is very similar to the vanilla bar plot chunk above but takes the subset data instead of the complete Phyloseq object. This chunk plots abundance of the Top n ASVs or taxa at a given level as a bar plot, giving an insight into the presence of the n ASV and most common taxa for the primary and secondary parameters. The default for n is set at 100 for subsets. The range of n is for subsets is larger, since the taxonomic variety was reduced by the subsetting already, meaning the Top 100 ASVs likely belong to few species. For taxa, a large n  may lead to unreadable plots. **The chunk does not require any input**, but it is possible to adjust the **default 'n'**, and to change **width**, **height** and **resolution** of the PDF-output if necessary.
+```{r Bar plot subset, tidy=TRUE, message=FALSE}
+# CHANGE ME to the sample category that will be shown in separate panels. 
+# Accepted values are the column headers in your descriptor file.
+set$panel_by = "Vekst"
+set$panel_y_by = "Skifte"
+# CHANGE ME to the desired sample categories on the x-axis. Accepted values are 
+# the column headers in the descriptor file.
+set$x_axis_value = "SampleIDs"
+
+# CHANGE ME to the count of top ASVs you want to plot (e.g. 'set$topASVs = 20' 
+# plots the 20 most abundant ASVs)
+set$topASVs = 200
+
+# CHANGE ME to the taxonomic level of interest (color coding). Accepted values 
+# are the column headers in your descriptor file.
+set$taxlvl = "Species"
+# CHANGE ME to change the number of Top n taxa to be plotted at taxlvl.
+set$top_n = 10
+
+# CHANGE ME to an entry at the chosen taxonomic level you want to highlight. 
+# Comment out to not highlight anything.
+#set$highlight <- "Pythium"
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to turn unified coloring on or off (same taxonomy term = same 
+# color in both plots). Highlighting will unify colors even if unify_colors is 
+# FALSE.
+set$unify_colors = TRUE
+
+# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
+set$ignore_na = FALSE
+
+# Can be changed to remove ASV segmentation in the top n taxlvl plot. This 
+# improves visual clarity when a bar segment appears black due to the border of 
+# many small ASVs overlapping.
+set$fuse_ASVs = FALSE
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 19
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 15
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+# Can be changed to change the y-axis label
+set$y_axis_label = "Relative abundance"
+
+############### NO NEED FOR CHANGES BELOW ###############
+# Make physeq objects of top n taxa and top n ASVs
+set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.subs.trans, 
+                                              lvl = set$taxlvl, 
+                                              top = set$top_n, 
+                                              ignore_na = set$ignore_na)
+set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
+set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.subs.trans)
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+  set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl], 
+                      phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
+  set$toptax <- sort(set$toptax)
+  set$taxlvlPalette <- viridis(length(set$toptax))
+  names(set$taxlvlPalette) <- set$toptax
+  if (exists("highlight", envir = set)) {
+    # It is possible to change the highlight color here by substituting 
+    # 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
+    set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
+  }
+  set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
+  set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette, 
+                                         na.value = "grey")
+}else{
+  set$my_scale_fill <- my_scale_fill
+}
+
+# Plot
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
+  na.value = "grey")
+}
+
+plots$topn_tax_subset <- plot_bar(set$ps.topnTax, x = set$x_axis_value, 
+                                  fill = set$taxlvl, title = paste0(
+                                    "Top", set$top_n, "_", set$taxlvl)) + 
+  facet_grid(paste0(set$panel_y_by, "~", set$panel_by), scales = "free_x", space = "free") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+if (set$fuse_ASVs) { 
+  plots$topn_tax_subset <- plots$topn_tax_subset + geom_bar(
+    aes_string(color = set$taxlvl, fill = set$taxlvl), 
+    stat = "identity", position = "stack") + 
+    scale_color_manual(values = set$taxlvlPalette, na.value = NA)
+}
+
+if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
+set$my_scale_fill <- scale_fill_manual(
+  values = set$taxlvlPalette[
+    sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
+  na.value = "grey")
+}
+plots$topn_ASVs_subset <- plot_bar(set$ps.topnASVs, x = set$x_axis_value, 
+                                   fill = set$taxlvl, title = paste0(
+                                     "Top", set$topASVs, "_ASVs")) + 
+  facet_wrap(paste0("~", set$panel_by), scales = "free_x") + 
+  set$my_scale_fill + 
+  ylab(set$y_axis_label)
+
+# save
+save_plot(plots$topn_tax_subset, 
+          plot_name = paste0("Top", set$top_n, "_", set$taxlvl, "_subset"), 
+          filetype = tmp$out)
+save_plot(plots$topn_ASVs_subset, 
+          plot_name = paste0("Top", set$topASVs, "_ASVs_subset"), 
+          filetype = tmp$out)
+
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+
+# Print to standard out
+plots$topn_tax_subset
+plots$topn_ASVs_subset
+```
+### Abundance tables
+```{r Abundance tables per genus, message=FALSE}
+ps_trans_tbl <- as_tibble(psmelt(ps.trans))
+
+# Genera of interest
+genera_of_interest <- c("Aphanomyces", "Pythium", "Globisporangium", "Phytophthora")
+
+# Taking ps_tbl, grouping by Genus and summarizing (open heart surgery)
+# Will give a table with two columns: "Genus" "Genus_total_Abundance"
+genus_abundance_tbl <- ps_tbl %>%
+  group_by(Genus) %>%
+  summarise(Genus_total_Abundance = sum(Abundance))
+
+# If you want per genus and sample
+genus_abundance_tbl_per_sample <- ps_trans_tbl %>%
+  group_by(Genus, Sample) %>%
+  summarise(Genus_Sample_Abundance = sum(Abundance)) %>%
+  filter(Genus %in% genera_of_interest) %>%
+  group_by(Sample) %>%
+  mutate(sums_abu_sample = sum(Genus_Sample_Abundance)) %>%
+  ungroup() %>%
+  mutate(ratio = Genus_Sample_Abundance/sums_abu_sample)
+
+# To get back all the other info, one way is to fuse back to original table
+ps_tbl_with_genus_abundance <- left_join(ps_tbl, genus_abundance_tbl_per_sample,
+                                         by = c("Genus", "Sample"))
+
+# Can be made more compact by removing ASV+Species info and original abundance
+# values and then reducing to unique rows
+genus_abundance_tbl_full_info <- ps_tbl_with_genus_abundance %>%
+  select(-OTU, -Abundance, -Species) %>%
+  unique()
+view(genus_abundance_tbl_per_sample)
+
+library(readxl)
+Morfologisk <- read.delim("Oomy/descriptors.txt")
+
+morph_data <- tibble(Morfologisk)
+view(morph_data)
+data_mol_morph <- genus_abundance_tbl_per_sample %>%
+
+  full_join(morph_data, by = c("Sample"="SampleIDs"))
+
+view(data_mol_morph)
+
+# Scatter plot
+mel_plot <- ggplot(subset(data_mol_morph),  aes(x = sums_abu_sample.x, y = pH_tall)) +
+  geom_point() +
+  geom_smooth(formula = y ~ x, method = "lm") +
+  ggtitle("Oomyceter") +
+  labs(y= "pH", x = "Relative abundance")
+
+mel_plot
+
+install.packages("xlsx")
+library(xlsx)
+write.xlsx(data_mol_morph, "Oomy/morph_data.xlsx")
+```
+
+
+### Other phylogenetic trees
+For these chunks, the ggtree library is required. If you are not sure whether it is installed, run the following chunk.
+
+```{r check-ggtree}
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+  install.packages("BiocManager")}
+if (!requireNamespace("ggtree", quietly = TRUE)) {
+  BiocManager::install("ggtree")}
+```
+
+#### Generic phylogenetic (from a subset)
+
+This chunk allows the generation of a generic phylogenetic tree for a given subset of the phyloseq object, even if none is provided for the whole set.
+```{r tree-auto}
+# Vector to subset on (the larger the subset, the longer the tree generation 
+# will take!)
+set$subv = c("Phytophthora", "Phytium", "Globisporangium")
+# CHANGE ME to the taxonomic level at which you want to search for matching 
+# entries
+set$taxlvlsub = "Genus"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$subASVs <- cuphyr::list_subset_ASVs(subv = set$subv, 
+                                        taxlvlsub = set$taxlvlsub)
+set$ps.treesubs <- prune_taxa(set$subASVs, ps)
+set$seqs <- phyloseq::refseq(set$ps.treesubs)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", optInv = TRUE, 
+                            optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$subset_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + 
+  xlim(0,set$tree_width)
+
+# save
+save_plot(plots$subset_ASV_tree, 
+          plot_name = "subset_phylogenetic_tree", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$subset_ASV_tree
+```
+
+#### Generic phylogenetic tree (from any FASTA)
+
+This chunk allows the generation of a generic phylogenetic tree for any given fasta. This may be useful to compare the phylogeny of a given set of ASVs and some reference sequences.
+
+```{r tree-manual}
+# CHANGE ME to the path of the FASTA file you want to make a phylogenetic tree 
+# for (the larger the fasta, the longer the tree generation will take!)
+set$fasta = "oomy/for_phylogeny.fasta"
+
+# CHANGE ME to the width of the tree shown (depends on its size, start large, 
+# make smaller untill best fit is achieved)
+set$tree_width = 11
+# CHANGE ME to the position you want the tree's scale to be shown. Must be 
+# smaller than set$tree_width, larger = more to the right.
+set$tree_scale_pos = 10
+
+##### Optional settings (sensible defaults) #####
+
+# Can be changed to change the width (in cm) of the saved plot.
+set$wp = 20
+# Can be changed to change the height (in cm) of the saved plot.
+set$hp = 20
+# Can be changed to change the resolution (in dpi) of the saved plot.
+set$res = 300
+
+############### NO NEED FOR CHANGES BELOW ###############
+set$seqs <- readDNAStringSet(set$fasta)
+set$align <- AlignSeqs(DNAStringSet(set$seqs), anchor = NA)
+set$seqs_phang <- phangorn::phyDat(as(set$align, "matrix"), type = "DNA")
+set$seqs_dm <- phangorn::dist.ml(set$seqs_phang)
+set$seqs_treeNJ <- NJ(set$seqs_dm)
+set$seqs_fit = pml(set$seqs_treeNJ, data = set$seqs_phang)
+set$fitGTRseqs <- update(set$seqs_fit, k = 4, inv = 0.2)
+set$fitGTRseqs <- optim.pml(set$fitGTRseqs, model = "GTR", 
+                            optInv = TRUE, optGamma = TRUE,
+                            rearrangement = "stochastic", 
+                            control = pml.control(trace = 0))
+
+# Part of cuphyr::root_tree in outgroup, consider implementing a generic tree 
+# version instead of just physeq.
+root_generic_tree <- function(tree.unrooted){
+      if (requireNamespace(c("ape", "data.table"), quietly = TRUE)) {
+        phylo_tree <- tree.unrooted
+        tips <- ape::Ntip(phylo_tree)
+        tree_data <- base::cbind(data.table::data.table(phylo_tree$edge), 
+            data.table::data.table(length = phylo_tree$edge.length))[1:tips,]
+        tree_data <- base::cbind(
+          tree_data, data.table::data.table(id = phylo_tree$tip.label))
+        out_group <- dplyr::slice(tree_data, which.max(length)) %>% 
+            select(id) %>% as.character()
+        new_tree <- ape::root(phylo_tree, outgroup = out_group, 
+            resolve.root = TRUE)
+        message("Tree successfully rooted.")
+    }else {
+        stop("The function 'root_tree_in_outgroup' requires the packages 
+             'ape' and 'data.table' to be installed. Please make sure those 
+             packages can be loaded.") }
+  return(new_tree)
+}
+
+set$fitGTRseqs$tree <- root_generic_tree(set$fitGTRseqs$tree)
+plots$fasta_ASV_tree <- ggtree::ggtree(set$fitGTRseqs$tree) + 
+  ggtree::geom_tree() + 
+  ggtree::geom_treescale(x = set$tree_scale_pos) + 
+  ggtree::geom_tiplab() + xlim(0,set$tree_width)
+
+# save
+save_plot(plots$fasta_ASV_tree, 
+          plot_name = "phylogenetic_tree_from_fasta", 
+          filetype = tmp$out)
+# Clean up plot parameters
+rm(list = ls(set), envir = set)
+# plot
+plots$fasta_ASV_tree
+```
+
+#### Machine learning with SIAMCAT
+This is an experimental chunk implementing the machine learning functions of SIAMCAT following the tutorial steps and settings from the ["Get started" vignette](https://siamcat.embl.de/articles/SIAMCAT_vignette.html). There is no convenient way to change the settings yet, because the usefulness and different optimal ways to run these models needs to be tested further. The chunk can be run as is and will produce a result if the basic SIAMCAT chunk above was run. However, this should be handled skeptically and not given extraordinary weight, if the user is not confident that they understand the used method.
+
+```{r experimental-machine-learning, warning=FALSE}
+# Count normalization by log-transforming and adding pseudocounts
+sc.norm <- normalize.features(
+    sc.filt,
+    norm.method = "log.unit",
+    norm.param = list(
+        log.n0 = 1e-06,
+        n.p = 2,
+        norm.margin = 1
+    )
+)
+# splitting data into training and test sets to validate the model
+sc.obj <-  create.data.split(
+    sc.norm,
+    num.folds = 5,
+    num.resample = 2
+)
+
+# Train a model on the training set
+sc.obj <- train.model(
+     sc.obj,
+     method = "lasso"
+)
+
+# Store model into separate object and check first entry
+models <- models(sc.obj)
+models[[1]]
+
+# Run model on the data and check output prediction matrix
+sc.obj <- make.predictions(sc.obj)
+pred_matrix <- pred_matrix(sc.obj)
+head(pred_matrix)
+
+# Save model results to plot
+model.interpretation.plot(
+     sc.obj,
+     fn.plot = file.path(outp, 'model_interpretation.pdf'),
+     consens.thres = 0.5,
+     limits = c(-3, 3),
+     heatmap.type = 'zscore',
+ )
+
+cat("Model results stored to:", file.path(outp, 'model_interpretation.pdf'))
+```
+
+##### Credit
+This script is based on ideas and code from the [dada2 Tutorial](https://benjjneb.github.io/dada2/tutorial.html) by Benjamin Callahan, the publication "Bioconductor Workflow for Microbiome Data Analysis: from raw reads to community analyses" by [Callahan et al. (2016)](https://f1000research.com/articles/5-1492) and various pages of the official [phyloseq website](https://joey711.github.io/phyloseq) by Paul J. McMurdie.
\ No newline at end of file
diff --git a/descriptors_16S.txt b/descriptors_16S.txt
new file mode 100644
index 0000000..4a0446f
--- /dev/null
+++ b/descriptors_16S.txt
@@ -0,0 +1,57 @@
+SampleIDs	Skifte	Vekst	Dato	pH	pH tall	Moldklasse	NemMorf	PratAbuMorf	AphAbu	AphRatio	GloAbu	GloRatio	PhyAbu	PhyRatio	PytAbu	PytRatio	OomySum	PseuAbu	PseuRatio	SphiAbu	SphiRatio	StrepAbu	StrepRatio	16SSum	PhoAbu	PhoRatio	RhiAbu	RhiRatio	ITS2Sum	DitAbu	ParaAbu	PratAbu	RotAbu	TylAbu	NemSum
+1-16S	Lysmyra	Bra	J20	Ukjent	Ukjent	Ukjent	Ukjent	50	0.08353145	0.154298474	0.143369464	0.264830666	0.002901237	0.005359137	0.311560624	0.575511723	0.541362776	0	0	0.004555809	0.678571429	0.002158015	0.321428571	0.006713823	0	0	0.000349089	1	0.000349089	0.00896861	0	0.051818635	0	0	0.060787245
+10-16S	Trollmyra	Forsinket	J20	Ukjent	Ukjent	Ukjent	Liten	6	0.000405577	0.000408101	0.970443599	0.976483191	0.000253485	0.000255063	0.022712294	0.022853645	0.993814956	0.00191501	1	0	0	0	0	0.00191501	0.000319565	1	0	0	0.000319565	0	0	0	0	0	0
+11-16S	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.007337894	0.012840188	0.416535111	0.728872519	0.000789335	0.001381215	0.146816348	0.256906077	0.571478688	0.00302694	0.098039216	0.020280496	0.656862745	0.007567349	0.245098039	0.030874786	0.009083045	0.471910112	0.01016436	0.528089888	0.019247405	0.01376673	0.106807512	0.066539197	0.008030593	0	0.195144032
+12-16S	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.016973309	0.030446235	0.349788373	0.627440347	0.002418589	0.004338395	0.188304397	0.337775023	0.557484668	NA	NA	NA	NA	NA	NA	NA	0.019534884	0.677419355	0.009302326	0.322580645	0.028837209	0	0.024253731	0.124317238	0.001092419	0	0.149663388
+13-16S	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.012942002	0.018845024	0.4706063	0.685256165	0	0	0.203211371	0.295898812	0.686759674	0.001326951	0.05204461	0.016634283	0.652416357	0.007535188	0.295539033	0.025496422	0.017447199	0.406417112	0.025482094	0.593582888	0.042929293	0.01793722	0.125162973	0.02864169	0	0	0.171741882
+14-16S	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.006530991	0.010298319	0.487801795	0.769184772	0.003412969	0.005381702	0.1364345	0.215135207	0.634180255	0.001690953	0.069815195	0.016163525	0.667351129	0.006365942	0.262833676	0.024220421	0.019107231	0.682352941	0.008894746	0.317647059	0.028001977	0.01740644	0	0.011314186	0	0	0.028720627
+15-16S	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0.006613261	0.013629338	0.346207602	0.71350288	0.000170445	0.000351272	0.132231123	0.27251651	0.485222431	0.004289721	0.188405797	0.00445471	0.195652174	0.014024088	0.615942029	0.02276852	0.005603287	1	0	0	0.005603287	0.005132592	0	0.007413744	0	0	0.012546336
+16-16S	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0	0	0.25496593	0.613473768	0.001651869	0.003974563	0.15899236	0.382551669	0.415610159	0	0	0.013019892	0.72	0.005063291	0.28	0.018083183	0	0	0.009823183	1	0.009823183	0.033616769	0	0.033616769	0	0	0.067233538
+17-16S	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0.028027442	0.054730502	0.311120566	0.607539724	0.00194118	0.003790632	0.171009945	0.333939142	0.512099133	0.002587136	0.107142857	0.014588573	0.604166667	0.006970895	0.288690476	0.024146604	0.027608607	0.877419355	0.003857085	0.122580645	0.031465692	0.070199275	0	0.017889493	0	0	0.088088768
+18-16S	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0	0	0.394276215	0.845342092	0.001531993	0.003284646	0.07060204	0.151373262	0.466410248	0.00247977	0.080508475	0.022709475	0.737288136	0.005612112	0.18220339	0.030801357	0.015297272	0.335640138	0.030279136	0.664359862	0.045576408	0.045103093	0	0.208505155	0	0	0.253608247
+19-16S	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.016490486	0.031424321	0.426963734	0.813623404	0	0	0.081314035	0.154952275	0.524768255	0	0	0	0	0.002528154	1	0.002528154	0	0	0.005896735	1	0.005896735	0	0	0.022277228	0	0	0.022277228
+2-16S	Lysmyra	Forsinket	J20	Ukjent	Ukjent	Ukjent	Ukjent	22	0.02631246	0.075036075	0.144296859	0.411495911	0.001433692	0.004088504	0.178621126	0.509379509	0.350664137	0.003113417	0.313432836	0.006819867	0.686567164	0	0	0.009933284	0	0	0.000883392	1	0.000883392	0	0.02991453	0.005439005	0	0.006604507	0.041958042
+20-16S	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.025420096	0.040767221	0.439814815	0.705348549	0.000514403	0.000824969	0.15779321	0.25305926	0.623542524	0.000641766	0.135135135	0.004107303	0.864864865	0	0	0.004749069	0.001734305	0.25	0.005202914	0.75	0.006937218	0	0	0.015678255	0	0.021813224	0.037491479
+21-16S	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.030772723	0.052933958	0.438405915	0.754127622	0	0	0.11216317	0.19293842	0.581341807	0	NA	0	NA	0	NA	0	0	0	0.003703704	1	0.003703704	0	0	0.008860759	0	0	0.008860759
+22-16S	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.031985516	0.07326882	0.296867285	0.680030162	0	0	0.107697372	0.246701018	0.436550173	0	0	0.008905711	1	0	0	0.008905711	0.001264542	0.4	0.001896813	0.6	0.003161356	0.03465982	0	0.028883184	0	0	0.063543004
+23-16S	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.094016217	0.298274178	0.029303089	0.09296646	0	0	0.191881351	0.608759362	0.315200657	0	0	0.001772687	0.298850575	0.004158996	0.701149425	0.005931683	0	NA	0	NA	0	0.135207101	0	0.367159764	0	0	0.502366864
+24-16S	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.153793215	0.362603799	0.124726743	0.294072731	0	0	0.14561574	0.34332347	0.424135698	0.000900379	0.157303371	0	0	0.004823461	0.842696629	0.005723841	0.004990926	1	0	0	0.004990926	0.00309119	0	0.523163273	0	0	0.526254464
+25-16S	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.128550039	0.493327155	0.048541345	0.186283596	0	0	0.083486283	0.320389249	0.260577666	0	0	0.0023338	0.343137255	0.00446756	0.656862745	0.00680136	0	0	0.000470884	1	0.000470884	0.020306966	0	0.127508855	0	0	0.147815821
+26-16S	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.385976382	0.701991677	0.046091611	0.083828775	0	0	0.117762432	0.214179548	0.549830425	0	NA	0	NA	0	NA	0	0.001460707	1	0	0	0.001460707	0.019444444	0	0.296301606	0	0	0.31574605
+27-16S	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.22163806	0.606359726	0.061543219	0.168370585	0	0	0.082341116	0.225269689	0.365522396	0	0	0	0	0.004395604	1	0.004395604	0	NA	0	NA	0	0.019685039	0	0.116985377	0	0	0.136670416
+28-16S	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.15636577	0.371503268	0.199317757	0.473551198	0	0	0.065216594	0.154945534	0.420900121	0.001273538	0.179487179	0.005821887	0.820512821	0	0	0.007095424	0.00109569	1	0	0	0.00109569	0.064370412	0	0.206662902	0	0	0.271033315
+29-16S	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.165265174	0.400969599	0.139830156	0.339258661	0	0	0.107068521	0.25977174	0.41216385	0	0	0.006475597	0.486486486	0.006835352	0.513513514	0.013310949	0.001188354	1	0	0	0.001188354	0.018762677	0	0.273833671	0	0	0.292596349
+3-16S	Beita	Bra	J20	Ukjent	Ukjent	Ukjent	Ukjent	20	0.031965359	0.174102449	0.077058856	0.419708577	0.004330079	0.023584197	0.070246566	0.382604777	0.18360086	0.002268799	0.136363636	0.010587727	0.636363636	0.003781331	0.227272727	0.016637857	0	0	0.001781037	1	0.001781037	NA	NA	NA	NA	NA	NA
+30-16S	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.180688919	0.394181214	0.157597927	0.343807149	0.000990703	0.002161264	0.11911294	0.259850374	0.458390489	0	0	0.004026731	0.398305085	0.006082934	0.601694915	0.010109664	0.001576624	1	0	0	0.001576624	0.012096774	0	0.185819892	0	0	0.197916667
+31-16S	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.087822115	0.157544836	0.324226493	0.581632653	0.001034215	0.001855288	0.144359217	0.258967223	0.557442041	0.002501014	0.308333333	0.001689874	0.208333333	0.003920508	0.483333333	0.008111397	0	NA	0	NA	0	0.264983442	0	0.127670397	0	0	0.392653839
+32-16S	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.054384916	0.186315303	0.145688342	0.499108381	0.000481965	0.001651146	0.091341983	0.31292517	0.291897207	0.001655107	0.190909091	0.007014502	0.809090909	0	0	0.008669609	0.005667182	0.897959184	0.000643998	0.102040816	0.00631118	0.01214128	0	0.09638046	0	0	0.10852174
+33-16S	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.04802367	0.10034081	0.277748274	0.580328129	0	0	0.152833624	0.319331061	0.478605569	0.000635654	0.053140097	0.00768564	0.642512077	0.003640566	0.304347826	0.011961861	0	0	0.003305974	1	0.003305974	0.001372213	0	0.35780446	0	0	0.359176672
+34-16S	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.07283219	0.189329441	0.179756706	0.4672829	0.004273238	0.011108408	0.127822832	0.332279251	0.384684966	0	NA	0	NA	0	NA	0	0	NA	0	NA	0	0.075972264	0	0.18450407	0	0	0.260476334
+35-16S	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0.021054804	0.027367856	0.584270823	0.759458009	0	0	0.164000413	0.213174135	0.76932604	0	0	0.003970004	0.3	0.009263344	0.7	0.013233348	0	NA	0	NA	0	0.047764228	0.285714286	0.058943089	0	0	0.392421603
+36-16S	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0.101013124	0.361646771	0.029025777	0.103917968	0.007609114	0.027242118	0.14166631	0.507193144	0.279314325	0.001526032	0.165048544	0	0	0.007719928	0.834951456	0.009245961	0	NA	0	NA	0	0	0.709677419	0.023550725	0	0.003623188	0.736851332
+37-16S	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0.031756417	0.106175332	0.028032209	0.093723709	0.004479114	0.014975602	0.234826371	0.785125358	0.299094112	0.001243008	0.102189781	0.008434698	0.693430657	0.002486016	0.204379562	0.012163722	0	0	0.000630358	1	0.000630358	0.010293557	0	0.016774685	0	0.005718643	0.032786885
+38-16S	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0.064994735	0.242355741	0.070036055	0.261154075	0.004435085	0.016537775	0.128713187	0.479952409	0.268179063	0	0	0.005923293	0.661157025	0.003035688	0.338842975	0.008958981	0	0	0.002937337	1	0.002937337	0.005334428	0	0.066885515	0.001641362	0	0.073861305
+39-16S	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0.026336231	0.088733799	0.049195487	0.165752742	0.001849454	0.006231306	0.219419271	0.739282154	0.296800444	0.000648789	0.391304348	0.001009227	0.608695652	0	0	0.001658016	0	NA	0	NA	0	0	0	0.024871982	0	0	0.024871982
+4-16S	Beita	Forsinket	J20	Ukjent	Ukjent	Ukjent	Ukjent	4	0.018070059	0.133918771	0.01584833	0.117453348	0.003036362	0.022502744	0.097978227	0.726125137	0.134932978	0	0	0	0	0.010712566	1	0.010712566	0	0	0.000608828	1	0.000608828	0	0.040728832	0.032154341	0	0.002143623	0.075026795
+40-16S	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0.192534701	0.367065032	0.026132617	0.049821512	0.004762486	0.009079621	0.301094965	0.574033835	0.524524769	0	0	0.000749545	1	0	0	0.000749545	0	0	0.001653405	1	0.001653405	0.012142857	0.25	0.13	0	0	0.392142857
+41-16S	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0.075093914	0.23380117	0.043350864	0.13497076	0	0	0.202742299	0.63122807	0.321187077	0.001394612	0.097777778	0.012868463	0.902222222	0	0	0.014263074	NA	NA	NA	NA	NA	0.002133207	0	0.003792368	0	0	0.005925575
+42-16S	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0.040783104	0.105356352	0.085464593	0.220783533	0.003212955	0.008300133	0.257636122	0.665559982	0.387096774	0	0	0.00338787	0.322834646	0.007106263	0.677165354	0.010494133	0	NA	0	NA	0	0.00982686	0.277777778	0.011230697	0	0	0.298835335
+43-16S	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0.024420126	0.075518815	0.048895943	0.151209851	0.005485479	0.016963748	0.24456325	0.756307586	0.323364798	0	0	0.019988739	1	0	0	0.019988739	0	NA	0	NA	0	0.007736156	0.269230769	0	0	0	0.276966925
+44-16S	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0.051534428	0.136467107	0.127138313	0.336671978	0.001934153	0.005121785	0.197025703	0.52173913	0.377632597	0	0	0.003062613	0.675	0.001474592	0.325	0.004537205	0	NA	0	NA	0	0.038968167	0	0.086717892	0	0.019758507	0.145444566
+45-16S	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0.024978194	0.1010371	0.069410303	0.28076553	0.012608041	0.050999679	0.1402215	0.567197691	0.247218037	0.001418085	0.109677419	0.007173841	0.55483871	0.004337671	0.335483871	0.012929596	0	NA	0	NA	0	0.011713031	0	0.005124451	0	0	0.016837482
+46-16S	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0.222016334	0.541422594	0.039210304	0.095620642	0.001166701	0.002845188	0.147667742	0.360111576	0.41006108	0.001375786	0.122807018	0.004029088	0.359649123	0.005797956	0.51754386	0.01120283	0	NA	0	NA	0	0	0	0.043798786	0	0.005203816	0.049002602
+5-16S	Lysmyra	Forsinket	O20	Ukjent	Ukjent	Ukjent	Ukjent	0	0.086550388	0.201588878	0.02255814	0.052541302	0	0	0.320232558	0.74586982	0.429341085	0.107273925	0.984455959	0.001693799	0.015544041	0	0	0.108967724	0	NA	0	NA	0	0.004016064	0.204551539	0.018206158	0	0	0.226773762
+6-16S	Lysmyra	Bra	O20	Ukjent	Ukjent	Ukjent	Ukjent	1	0.089267204	0.345524691	0.033330675	0.129012346	0.005302607	0.020524691	0.130452117	0.504938272	0.258352603	0.037478911	1	0	0	0	0	0.037478911	0	0	0.00122165	1	0.00122165	0.001144951	0.02976872	0.049461873	0	0.002976872	0.083352416
+7-16S	Beita	Forsinket	O20	Ukjent	Ukjent	Ukjent	Liten	6	0.012911934	0.022917326	0.100570468	0.178502011	0	0	0.449931238	0.798580663	0.56341364	0.076351351	1	0	0	0	0	0.076351351	0	NA	0	NA	0	0.01676996	0.024790376	0.078745899	0	0	0.120306234
+8-16S	Beita	Bra	O20	Ukjent	Ukjent	Ukjent	Liten	7	0.125312644	0.359338407	0.021948854	0.06293911	0.003062631	0.008782201	0.198407432	0.568940281	0.34873156	0.095257106	0.996658312	0.000319387	0.003341688	0	0	0.095576493	0	NA	0	NA	0	0.017795637	0	0.084959816	0	0.007462687	0.11021814
+9-16S	Trollmyra	Bra	J20	Ukjent	Ukjent	Ukjent	Liten	22	0.001785714	0.001901141	0.881026786	0.937975285	0	0	0.056473214	0.060123574	0.939285714	0	NA	0	NA	0	NA	0	0.001023978	1	0	0	0.001023978	0	0	0	0	0	0
+NegK1-16S	NegK	NegK	NegK	NegK		NegK	NegK	NegK																											
+NegK2-16S	NegK	NegK	NegK	NegK		NegK	NegK	NegK																											
+Posk1-16S	PosK	PosK	PosK	PosK		PosK	PosK	PosK																											
+Posk2-16S	PosK	PosK	PosK	PosK		PosK	PosK	PosK																											
+Posk3-16S	PosK	PosK	PosK	PosK		PosK	PosK	PosK																											
+Posk4-16S	PosK	PosK	PosK	PosK		PosK	PosK	PosK																											
+V1-16S	NegK	NegK	NegK	NegK		NegK	NegK	NegK																											
+V2-16S	NegK	NegK	NegK	NegK		NegK	NegK	NegK																											
+V3-16S	NegK	NegK	NegK	NegK		NegK	NegK	NegK																											
+V4-16S	NegK	NegK	NegK	NegK		NegK	NegK	NegK																											
diff --git a/descriptors_ITS2.txt b/descriptors_ITS2.txt
new file mode 100644
index 0000000..fce94e4
--- /dev/null
+++ b/descriptors_ITS2.txt
@@ -0,0 +1,54 @@
+SampleIDs	Skifte	Vekst	Dato	pH	pH tall	Moldklasse	NemAndel	PratMorf	PhoAbu	PhoRatio	RhiAbu	RhiRatio	ITS2Sum	AphAbu	AphRatio	GloAbu	GloRatio	PhyAbu	PhyRatio	PytAbu	PytRatio	OomySum	PseuAbu	PseuRatio	SphiAbu	SphiRatio	StrepAbu	StrepRatio	16SSum	DitAbu	ParaAbu	PratAbu	RotAbu	TylAbu	NemSum
+1-its2	Lysmyra	Bra	J20	Ukjent	Ukjent	Ukjent	Ukjent	50	0	0	0.000349089	1	0.000349089	0.08353145	0.154298474	0.143369464	0.264830666	0.002901237	0.005359137	0.311560624	0.575511723	0.541362776	0	0	0.004555809	0.678571429	0.002158015	0.321428571	0.006713823	0.00896861	0	0.051818635	0	0	0.060787245
+10-its2	Trollmyra	Forsinket	J20	Ukjent	Ukjent	Ukjent	Liten	6	0.000319565	1	0	0	0.000319565	0.000405577	0.000408101	0.970443599	0.976483191	0.000253485	0.000255063	0.022712294	0.022853645	0.993814956	0.00191501	1	0	0	0	0	0.00191501	0	0	0	0	0	0
+11-its2	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.009083045	0.471910112	0.01016436	0.528089888	0.019247405	0.007337894	0.012840188	0.416535111	0.728872519	0.000789335	0.001381215	0.146816348	0.256906077	0.571478688	0.00302694	0.098039216	0.020280496	0.656862745	0.007567349	0.245098039	0.030874786	0.01376673	0.106807512	0.066539197	0.008030593	0	0.195144032
+12-its2	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.019534884	0.677419355	0.009302326	0.322580645	0.028837209	0.016973309	0.030446235	0.349788373	0.627440347	0.002418589	0.004338395	0.188304397	0.337775023	0.557484668	NA	NA	NA	NA	NA	NA	NA	0	0.024253731	0.124317238	0.001092419	0	0.149663388
+13-its2	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.017447199	0.406417112	0.025482094	0.593582888	0.042929293	0.012942002	0.018845024	0.4706063	0.685256165	0	0	0.203211371	0.295898812	0.686759674	0.001326951	0.05204461	0.016634283	0.652416357	0.007535188	0.295539033	0.025496422	0.01793722	0.125162973	0.02864169	0	0	0.171741882
+14-its2	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.019107231	0.682352941	0.008894746	0.317647059	0.028001977	0.006530991	0.010298319	0.487801795	0.769184772	0.003412969	0.005381702	0.1364345	0.215135207	0.634180255	0.001690953	0.069815195	0.016163525	0.667351129	0.006365942	0.262833676	0.024220421	0.01740644	0	0.011314186	0	0	0.028720627
+15-its2	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0.005603287	1	0	0	0.005603287	0.006613261	0.013629338	0.346207602	0.71350288	0.000170445	0.000351272	0.132231123	0.27251651	0.485222431	0.004289721	0.188405797	0.00445471	0.195652174	0.014024088	0.615942029	0.02276852	0.005132592	0	0.007413744	0	0	0.012546336
+16-its2	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0	0	0.009823183	1	0.009823183	0	0	0.25496593	0.613473768	0.001651869	0.003974563	0.15899236	0.382551669	0.415610159	0	0	0.013019892	0.72	0.005063291	0.28	0.018083183	0.033616769	0	0.033616769	0	0	0.067233538
+17-its2	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0.027608607	0.877419355	0.003857085	0.122580645	0.031465692	0.028027442	0.054730502	0.311120566	0.607539724	0.00194118	0.003790632	0.171009945	0.333939142	0.512099133	0.002587136	0.107142857	0.014588573	0.604166667	0.006970895	0.288690476	0.024146604	0.070199275	0	0.017889493	0	0	0.088088768
+18-its2	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0.015297272	0.335640138	0.030279136	0.664359862	0.045576408	0	0	0.394276215	0.845342092	0.001531993	0.003284646	0.07060204	0.151373262	0.466410248	0.00247977	0.080508475	0.022709475	0.737288136	0.005612112	0.18220339	0.030801357	0.045103093	0	0.208505155	0	0	0.253608247
+19-its2	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0	0	0.005896735	1	0.005896735	0.016490486	0.031424321	0.426963734	0.813623404	0	0	0.081314035	0.154952275	0.524768255	0	0	0	0	0.002528154	1	0.002528154	0	0	0.022277228	0	0	0.022277228
+2-its2	Lysmyra	Forsinket	J20	Ukjent	Ukjent	Ukjent	Ukjent	22	0	0	0.000883392	1	0.000883392	0.02631246	0.075036075	0.144296859	0.411495911	0.001433692	0.004088504	0.178621126	0.509379509	0.350664137	0.003113417	0.313432836	0.006819867	0.686567164	0	0	0.009933284	0	0.02991453	0.005439005	0	0.006604507	0.041958042
+20-its2	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.001734305	0.25	0.005202914	0.75	0.006937218	0.025420096	0.040767221	0.439814815	0.705348549	0.000514403	0.000824969	0.15779321	0.25305926	0.623542524	0.000641766	0.135135135	0.004107303	0.864864865	0	0	0.004749069	0	0	0.015678255	0	0.021813224	0.037491479
+21-its2	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0	0	0.003703704	1	0.003703704	0.030772723	0.052933958	0.438405915	0.754127622	0	0	0.11216317	0.19293842	0.581341807	0	NA	0	NA	0	NA	0	0	0	0.008860759	0	0	0.008860759
+22-its2	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.001264542	0.4	0.001896813	0.6	0.003161356	0.031985516	0.07326882	0.296867285	0.680030162	0	0	0.107697372	0.246701018	0.436550173	0	0	0.008905711	1	0	0	0.008905711	0.03465982	0	0.028883184	0	0	0.063543004
+23-its2	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0	NA	0	NA	0	0.094016217	0.298274178	0.029303089	0.09296646	0	0	0.191881351	0.608759362	0.315200657	0	0	0.001772687	0.298850575	0.004158996	0.701149425	0.005931683	0.135207101	0	0.367159764	0	0	0.502366864
+24-its2	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.004990926	1	0	0	0.004990926	0.153793215	0.362603799	0.124726743	0.294072731	0	0	0.14561574	0.34332347	0.424135698	0.000900379	0.157303371	0	0	0.004823461	0.842696629	0.005723841	0.00309119	0	0.523163273	0	0	0.526254464
+25-its2	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0	0	0.000470884	1	0.000470884	0.128550039	0.493327155	0.048541345	0.186283596	0	0	0.083486283	0.320389249	0.260577666	0	0	0.0023338	0.343137255	0.00446756	0.656862745	0.00680136	0.020306966	0	0.127508855	0	0	0.147815821
+26-its2	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.001460707	1	0	0	0.001460707	0.385976382	0.701991677	0.046091611	0.083828775	0	0	0.117762432	0.214179548	0.549830425	0	NA	0	NA	0	NA	0	0.019444444	0	0.296301606	0	0	0.31574605
+27-its2	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0	NA	0	NA	0	0.22163806	0.606359726	0.061543219	0.168370585	0	0	0.082341116	0.225269689	0.365522396	0	0	0	0	0.004395604	1	0.004395604	0.019685039	0	0.116985377	0	0	0.136670416
+28-its2	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.00109569	1	0	0	0.00109569	0.15636577	0.371503268	0.199317757	0.473551198	0	0	0.065216594	0.154945534	0.420900121	0.001273538	0.179487179	0.005821887	0.820512821	0	0	0.007095424	0.064370412	0	0.206662902	0	0	0.271033315
+29-its2	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.001188354	1	0	0	0.001188354	0.165265174	0.400969599	0.139830156	0.339258661	0	0	0.107068521	0.25977174	0.41216385	0	0	0.006475597	0.486486486	0.006835352	0.513513514	0.013310949	0.018762677	0	0.273833671	0	0	0.292596349
+3-its2	Beita	Bra	J20	Ukjent	Ukjent	Ukjent	Ukjent	20	0	0	0.001781037	1	0.001781037	0.031965359	0.174102449	0.077058856	0.419708577	0.004330079	0.023584197	0.070246566	0.382604777	0.18360086	0.002268799	0.136363636	0.010587727	0.636363636	0.003781331	0.227272727	0.016637857	NA	NA	NA	NA	NA	NA
+30-its2	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.001576624	1	0	0	0.001576624	0.180688919	0.394181214	0.157597927	0.343807149	0.000990703	0.002161264	0.11911294	0.259850374	0.458390489	0	0	0.004026731	0.398305085	0.006082934	0.601694915	0.010109664	0.012096774	0	0.185819892	0	0	0.197916667
+31-its2	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0	NA	0	NA	0	0.087822115	0.157544836	0.324226493	0.581632653	0.001034215	0.001855288	0.144359217	0.258967223	0.557442041	0.002501014	0.308333333	0.001689874	0.208333333	0.003920508	0.483333333	0.008111397	0.264983442	0	0.127670397	0	0	0.392653839
+32-its2	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.005667182	0.897959184	0.000643998	0.102040816	0.00631118	0.054384916	0.186315303	0.145688342	0.499108381	0.000481965	0.001651146	0.091341983	0.31292517	0.291897207	0.001655107	0.190909091	0.007014502	0.809090909	0	0	0.008669609	0.01214128	0	0.09638046	0	0	0.10852174
+33-its2	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0	0	0.003305974	1	0.003305974	0.04802367	0.10034081	0.277748274	0.580328129	0	0	0.152833624	0.319331061	0.478605569	0.000635654	0.053140097	0.00768564	0.642512077	0.003640566	0.304347826	0.011961861	0.001372213	0	0.35780446	0	0	0.359176672
+34-its2	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0	NA	0	NA	0	0.07283219	0.189329441	0.179756706	0.4672829	0.004273238	0.011108408	0.127822832	0.332279251	0.384684966	0	NA	0	NA	0	NA	0	0.075972264	0	0.18450407	0	0	0.260476334
+35-its2	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0	NA	0	NA	0	0.021054804	0.027367856	0.584270823	0.759458009	0	0	0.164000413	0.213174135	0.76932604	0	0	0.003970004	0.3	0.009263344	0.7	0.013233348	0.047764228	0.285714286	0.058943089	0	0	0.392421603
+36-its2	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0	NA	0	NA	0	0.101013124	0.361646771	0.029025777	0.103917968	0.007609114	0.027242118	0.14166631	0.507193144	0.279314325	0.001526032	0.165048544	0	0	0.007719928	0.834951456	0.009245961	0	0.709677419	0.023550725	0	0.003623188	0.736851332
+37-its2	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0	0	0.000630358	1	0.000630358	0.031756417	0.106175332	0.028032209	0.093723709	0.004479114	0.014975602	0.234826371	0.785125358	0.299094112	0.001243008	0.102189781	0.008434698	0.693430657	0.002486016	0.204379562	0.012163722	0.010293557	0	0.016774685	0	0.005718643	0.032786885
+38-its2	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0	0	0.002937337	1	0.002937337	0.064994735	0.242355741	0.070036055	0.261154075	0.004435085	0.016537775	0.128713187	0.479952409	0.268179063	0	0	0.005923293	0.661157025	0.003035688	0.338842975	0.008958981	0.005334428	0	0.066885515	0.001641362	0	0.073861305
+39-its2	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0	NA	0	NA	0	0.026336231	0.088733799	0.049195487	0.165752742	0.001849454	0.006231306	0.219419271	0.739282154	0.296800444	0.000648789	0.391304348	0.001009227	0.608695652	0	0	0.001658016	0	0	0.024871982	0	0	0.024871982
+4-its2	Beita	Forsinket	J20	Ukjent	Ukjent	Ukjent	Ukjent	4	0	0	0.000608828	1	0.000608828	0.018070059	0.133918771	0.01584833	0.117453348	0.003036362	0.022502744	0.097978227	0.726125137	0.134932978	0	0	0	0	0.010712566	1	0.010712566	0	0.040728832	0.032154341	0	0.002143623	0.075026795
+40-its2	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0	0	0.001653405	1	0.001653405	0.192534701	0.367065032	0.026132617	0.049821512	0.004762486	0.009079621	0.301094965	0.574033835	0.524524769	0	0	0.000749545	1	0	0	0.000749545	0.012142857	0.25	0.13	0	0	0.392142857
+42-its2	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0	NA	0	NA	0	0.040783104	0.105356352	0.085464593	0.220783533	0.003212955	0.008300133	0.257636122	0.665559982	0.387096774	0	0	0.00338787	0.322834646	0.007106263	0.677165354	0.010494133	0.00982686	0.277777778	0.011230697	0	0	0.298835335
+43-its2	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0	NA	0	NA	0	0.024420126	0.075518815	0.048895943	0.151209851	0.005485479	0.016963748	0.24456325	0.756307586	0.323364798	0	0	0.019988739	1	0	0	0.019988739	0.007736156	0.269230769	0	0	0	0.276966925
+44-its2	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0	NA	0	NA	0	0.051534428	0.136467107	0.127138313	0.336671978	0.001934153	0.005121785	0.197025703	0.52173913	0.377632597	0	0	0.003062613	0.675	0.001474592	0.325	0.004537205	0.038968167	0	0.086717892	0	0.019758507	0.145444566
+45-its2	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0	NA	0	NA	0	0.024978194	0.1010371	0.069410303	0.28076553	0.012608041	0.050999679	0.1402215	0.567197691	0.247218037	0.001418085	0.109677419	0.007173841	0.55483871	0.004337671	0.335483871	0.012929596	0.011713031	0	0.005124451	0	0	0.016837482
+46-its2	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0	NA	0	NA	0	0.222016334	0.541422594	0.039210304	0.095620642	0.001166701	0.002845188	0.147667742	0.360111576	0.41006108	0.001375786	0.122807018	0.004029088	0.359649123	0.005797956	0.51754386	0.01120283	0	0	0.043798786	0	0.005203816	0.049002602
+5-its2	Lysmyra	Forsinket	O20	Ukjent	Ukjent	Ukjent	Ukjent	0	0	NA	0	NA	0	0.086550388	0.201588878	0.02255814	0.052541302	0	0	0.320232558	0.74586982	0.429341085	0.107273925	0.984455959	0.001693799	0.015544041	0	0	0.108967724	0.004016064	0.204551539	0.018206158	0	0	0.226773762
+6-its2	Lysmyra	Bra	O20	Ukjent	Ukjent	Ukjent	Ukjent	1	0	0	0.00122165	1	0.00122165	0.089267204	0.345524691	0.033330675	0.129012346	0.005302607	0.020524691	0.130452117	0.504938272	0.258352603	0.037478911	1	0	0	0	0	0.037478911	0.001144951	0.02976872	0.049461873	0	0.002976872	0.083352416
+7-its2	Beita	Forsinket	O20	Ukjent	Ukjent	Ukjent	Liten	6	0	NA	0	NA	0	0.012911934	0.022917326	0.100570468	0.178502011	0	0	0.449931238	0.798580663	0.56341364	0.076351351	1	0	0	0	0	0.076351351	0.01676996	0.024790376	0.078745899	0	0	0.120306234
+8-its2	Beita	Bra	O20	Ukjent	Ukjent	Ukjent	Liten	7	0	NA	0	NA	0	0.125312644	0.359338407	0.021948854	0.06293911	0.003062631	0.008782201	0.198407432	0.568940281	0.34873156	0.095257106	0.996658312	0.000319387	0.003341688	0	0	0.095576493	0.017795637	0	0.084959816	0	0.007462687	0.11021814
+9-its2	Trollmyra	Bra	J20	Ukjent	Ukjent	Ukjent	Liten	22	0.001023978	1	0	0	0.001023978	0.001785714	0.001901141	0.881026786	0.937975285	0	0	0.056473214	0.060123574	0.939285714	0	NA	0	NA	0	NA	0	0	0	0	0	0	0
+NegK1-its2	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK
+NegK2-its2	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK
+Posk1-its2	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk2-its2	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk3-its2	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk4-its2	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+V1-its2	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK
+V3-its2	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK
diff --git a/descriptors_S+T.txt b/descriptors_S+T.txt
new file mode 100644
index 0000000..7f9d1ae
--- /dev/null
+++ b/descriptors_S+T.txt
@@ -0,0 +1,85 @@
+SampleIDs	Skifte	Vekst	Dato	pH	Moldklasse	PPNemAbuMorf	pH tall	DitAbuMol	DitRatio	ParaAbuMol	ParaRatio	PratAbuMol	PratRatio	RotAbuMol	RotRatio	TylAbuMorf	TylRatio	sums_abu_sample	PratMorf	NemMorf	PratAbuMorf
+1-Sven-Trich	Lysmyra	Bra	J20	Ukjent	Ukjent	Ukjent	Ukjent	0.00896861	0.147540984	0	0	0.051818635	0.852459016	0	0	0	0	0.060787245	50	NA	NA
+10-Sven	Trollmyra	Forsinket	J21	Ukjent	Ukjent	Ukjent	Ukjent	0	NA	0	NA	0	NA	0	NA	0	NA	0	1	NA	NA
+11-Sven	Trollmyra	Forsinket	J21	Hoy	Moldklasse 2	Stor	5.6	0.01376673	0.155844156	0	0	0.066539197	0.753246753	0.008030593	0.090909091	0	0	0.08833652	116	256	0.453125
+11-Trich	Trollmyra	Forsinket	J21	Hoy	Moldklasse 2	Stor	5.6	0	0	0.106807512	1	0	0	0	0	0	0	0.106807512	116	256	0.453125
+12-Sven	Trollmyra	Forsinket	J21	Hoy	Moldklasse 2	Stor	5.6	0	0	0	0	0.124317238	0.991289199	0.001092419	0.008710801	0	0	0.125409657	116	256	0.453125
+12-Trich	Trollmyra	Forsinket	J21	Hoy	Moldklasse 2	Stor	5.6	0	0	0.024253731	1	0	0	0	0	0	0	0.024253731	116	256	0.453125
+13-Sven	Trollmyra	Forsinket	J21	Hoy	Moldklasse 2	Stor	5.6	0.01793722	0.385093168	0	0	0.02864169	0.614906832	0	0	0	0	0.046578909	116	256	0.453125
+13-Trich	Trollmyra	Forsinket	J21	Hoy	Moldklasse 2	Stor	5.6	0	0	0.125162973	1	0	0	0	0	0	0	0.125162973	116	256	0.453125
+14-Sven	Trollmyra	Forsinket	J21	Hoy	Moldklasse 2	Stor	5.6	0.01740644	0.606060606	0	0	0.011314186	0.393939394	0	0	0	0	0.028720627	116	256	0.453125
+14-Trich	Trollmyra	Forsinket	J21	Hoy	Moldklasse 2	Stor	5.6	0	NA	0	NA	0	NA	0	NA	0	NA	0	116	256	0.453125
+15-Sven	Trollmyra	Overgang	J21	Hoy	Moldklasse 2	Mellom	5.8	0.005132592	0.409090909	0	0	0.007413744	0.590909091	0	0	0	0	0.012546336	87	511	0.170254403
+15-Trich	Trollmyra	Overgang	J21	Hoy	Moldklasse 2	Mellom	5.8	0	NA	0	NA	0	NA	0	NA	0	NA	0	87	511	0.170254403
+16-Sven	Trollmyra	Overgang	J21	Hoy	Moldklasse 2	Mellom	5.8	0.033616769	0.5	0	0	0.033616769	0.5	0	0	0	0	0.067233538	87	511	0.170254403
+16-Trich	Trollmyra	Overgang	J21	Hoy	Moldklasse 2	Mellom	5.8	0	NA	0	NA	0	NA	0	NA	0	NA	0	87	511	0.170254403
+17-Sven	Trollmyra	Overgang	J21	Hoy	Moldklasse 2	Mellom	5.8	0.070199275	0.796915167	0	0	0.017889493	0.203084833	0	0	0	0	0.088088768	87	511	0.170254403
+18-Sven	Trollmyra	Overgang	J21	Hoy	Moldklasse 2	Mellom	5.8	0.045103093	0.177845528	0	0	0.208505155	0.822154472	0	0	0	0	0.253608247	87	511	0.170254403
+19-Sven	Trollmyra	Bra	J21	Lav	Moldklasse 4	Mellom	5	0	0	0	0	0.022277228	1	0	0	0	0	0.022277228	114	731	0.155950752
+2-Sven-Trich	Lysmyra	Forsinket	J20	Ukjent	Ukjent	Ukjent	Ukjent	0	0	0.02991453	0.712962963	0.005439005	0.12962963	0	0	0.006604507	0.157407407	0.041958042	22	NA	NA
+20-Sven	Trollmyra	Bra	J21	Lav	Moldklasse 4	Mellom	5	0	0	0	0	0.015678255	0.418181818	0	0	0.021813224	0.581818182	0.037491479	114	731	0.155950752
+21-Sven	Trollmyra	Bra	J21	Lav	Moldklasse 4	Mellom	5	0	0	0	0	0.008860759	1	0	0	0	0	0.008860759	114	731	0.155950752
+22-Sven	Trollmyra	Bra	J21	Lav	Moldklasse 4	Mellom	5	0.03465982	0.545454545	0	0	0.028883184	0.454545455	0	0	0	0	0.063543004	114	731	0.155950752
+22-Trich	Trollmyra	Bra	J21	Lav	Moldklasse 4	Mellom	5	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	114	731	0.155950752
+23-Sven	Lysmyra	Forsinket	J21	Lav	Moldklasse 1	Mellom	5.1	0.135207101	0.425512104	0	0	0.182544379	0.574487896	0	0	0	0	0.317751479	119	658	0.180851064
+23-Trich	Lysmyra	Forsinket	J21	Lav	Moldklasse 1	Mellom	5.1	0	0	0	0	0.184615385	1	0	0	0	0	0.184615385	119	658	0.180851064
+24-Sven	Lysmyra	Forsinket	J21	Lav	Moldklasse 1	Mellom	5.1	0.00309119	0.00617284	0	0	0.497681607	0.99382716	0	0	0	0	0.500772798	119	658	0.180851064
+24-Trich	Lysmyra	Forsinket	J21	Lav	Moldklasse 1	Mellom	5.1	0	0	0	0	0.025481666	1	0	0	0	0	0.025481666	119	658	0.180851064
+25-Sven	Lysmyra	Forsinket	J21	Lav	Moldklasse 1	Mellom	5.1	0.020306966	0.137380192	0	0	0.127508855	0.862619808	0	0	0	0	0.147815821	119	658	0.180851064
+25-Trich	Lysmyra	Forsinket	J21	Lav	Moldklasse 1	Mellom	5.1	0	NA	0	NA	0	NA	0	NA	0	NA	0	119	658	0.180851064
+26-Sven	Lysmyra	Forsinket	J21	Lav	Moldklasse 1	Mellom	5.1	0.019444444	0.076607387	0	0	0.234375	0.923392613	0	0	0	0	0.253819444	119	658	0.180851064
+26-Trich	Lysmyra	Forsinket	J21	Lav	Moldklasse 1	Mellom	5.1	0	0	0	0	0.061926606	1	0	0	0	0	0.061926606	119	658	0.180851064
+27-Sven	Lysmyra	Overgang	J21	Lav	Moldklasse 2	Mellom	4.9	0.019685039	0.144032922	0	0	0.116985377	0.855967078	0	0	0	0	0.136670416	150	1010	0.148514851
+27-Trich	Lysmyra	Overgang	J21	Lav	Moldklasse 2	Mellom	4.9	0	NA	0	NA	0	NA	0	NA	0	NA	0	150	1010	0.148514851
+28-Sven	Lysmyra	Overgang	J21	Lav	Moldklasse 2	Mellom	4.9	0.064370412	0.2375	0	0	0.206662902	0.7625	0	0	0	0	0.271033315	150	1010	0.148514851
+28-Trich	Lysmyra	Overgang	J21	Lav	Moldklasse 2	Mellom	4.9	0	NA	0	NA	0	NA	0	NA	0	NA	0	150	1010	0.148514851
+29-Sven	Lysmyra	Overgang	J21	Lav	Moldklasse 2	Mellom	4.9	0.018762677	0.064124783	0	0	0.273833671	0.935875217	0	0	0	0	0.292596349	150	1010	0.148514851
+29-Trich	Lysmyra	Overgang	J21	Lav	Moldklasse 2	Mellom	4.9	0	NA	0	NA	0	NA	0	NA	0	NA	0	150	1010	0.148514851
+30-Sven	Lysmyra	Overgang	J21	Lav	Moldklasse 2	Mellom	4.9	0.012096774	0.061120543	0	0	0.185819892	0.938879457	0	0	0	0	0.197916667	150	1010	0.148514851
+30-Trich	Lysmyra	Overgang	J21	Lav	Moldklasse 2	Mellom	4.9	0	#I/T	0	#I/T	0	#I/T	0	#I/T	0	#I/T	0	150	1010	0.148514851
+31-Sven	Lysmyra	Bra	J21	Lav	Moldklasse 2	Mellom	5	0.009664293	0.07037037	0	0	0.127670397	0.92962963	0	0	0	0	0.13733469	119	616	0.193181818
+31-Trich	Lysmyra	Bra	J21	Lav	Moldklasse 2	Mellom	5	0.255319149	1	0	0	0	0	0	0	0	0	0.255319149	119	616	0.193181818
+32-Sven	Lysmyra	Bra	J21	Lav	Moldklasse 2	Mellom	5	0.01214128	0.115303983	0	0	0.093156733	0.884696017	0	0	0	0	0.105298013	119	616	0.193181818
+32-Trich	Lysmyra	Bra	J21	Lav	Moldklasse 2	Mellom	5	0	0	0	0	0.003223727	1	0	0	0	0	0.003223727	119	616	0.193181818
+33-Sven	Lysmyra	Bra	J21	Lav	Moldklasse 2	Mellom	5	0.001372213	0.003820439	0	0	0.35780446	0.996179561	0	0	0	0	0.359176672	119	616	0.193181818
+33-Trich	Lysmyra	Bra	J21	Lav	Moldklasse 2	Mellom	5	0	NA	0	NA	0	NA	0	NA	0	NA	0	119	616	0.193181818
+34-Sven	Lysmyra	Bra	J21	Lav	Moldklasse 2	Mellom	5	0.075972264	0.291666667	0	0	0.18450407	0.708333333	0	0	0	0	0.260476334	119	616	0.193181818
+34-Trich	Lysmyra	Bra	J21	Lav	Moldklasse 2	Mellom	5	0	NA	0	NA	0	NA	0	NA	0	NA	0	119	616	0.193181818
+35-Sven	Beita	Forsinket	J21	Lav	Moldklasse 1	Liten	5	0.047764228	0.447619048	0	0	0.058943089	0.552380952	0	0	0	0	0.106707317	15	1160	0.012931034
+35-Trich	Beita	Forsinket	J21	Lav	Moldklasse 1	Liten	5	0	0	0.285714286	1	0	0	0	0	0	0	0.285714286	15	1160	0.012931034
+36-Sven	Beita	Forsinket	J21	Lav	Moldklasse 1	Liten	5	0	0	0	0	0.023550725	0.866666667	0	0	0.003623188	0.133333333	0.027173913	15	1160	0.012931034
+36-Trich	Beita	Forsinket	J21	Lav	Moldklasse 1	Liten	5	0	0	0.709677419	1	0	0	0	0	0	0	0.709677419	15	1160	0.012931034
+37-Sven	Beita	Forsinket	J21	Lav	Moldklasse 1	Liten	5	0.010293557	0.313953488	0	0	0.016774685	0.511627907	0	0	0.005718643	0.174418605	0.032786885	15	1160	0.012931034
+37-Trich	Beita	Forsinket	J21	Lav	Moldklasse 1	Liten	5	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	15	1160	0.012931034
+38-Sven	Beita	Forsinket	J21	Lav	Moldklasse 1	Liten	5	0.005334428	0.072222222	0	0	0.066885515	0.905555556	0.001641362	0.022222222	0	0	0.073861305	15	1160	0.012931034
+38-Trich	Beita	Forsinket	J21	Lav	Moldklasse 1	Liten	5	0	NA	0	NA	0	NA	0	NA	0	NA	0	15	1160	0.012931034
+39-Sven	Beita	Overgang	J21	Lav	Moldklasse 2	Liten	5.1	0	0	0	0	0.024871982	1	0	0	0	0	0.024871982	8	1238	0.006462036
+39-Trich	Beita	Overgang	J21	Lav	Moldklasse 2	Liten	5.1	0	NA	0	NA	0	NA	0	NA	0	NA	0	8	1238	0.006462036
+4-Sven-Trich	Beita	Forsinket	J20	Ukjent	Ukjent	Ukjent	Ukjent	0	0	0.040728832	0.542857143	0.032154341	0.428571429	0	0	0.002143623	0.028571429	0.075026795	4	NA	NA
+40-Sven	Beita	Overgang	J21	Lav	Moldklasse 2	Liten	5.1	0.012142857	0.131782946	0	0	0.08	0.868217054	0	0	0	0	0.092142857	8	1238	0.006462036
+40-Trich	Beita	Overgang	J21	Lav	Moldklasse 2	Liten	5.1	0	0	0.25	0.833333333	0.05	0.166666667	0	0	0	0	0.3	8	1238	0.006462036
+41-Sven	Beita	Overgang	J21	Lav	Moldklasse 2	Liten	5.1	0.002133207	0.36	0	0	0.003792368	0.64	0	0	0	0	0.005925575	8	1238	0.006462036
+41-Trich	Beita	Overgang	J21	Lav	Moldklasse 2	Liten	5.1	0	NA	0	NA	0	NA	0	NA	0	NA	0	8	1238	0.006462036
+42-Sven	Beita	Overgang	J21	Lav	Moldklasse 2	Liten	5.1	0.00982686	0.466666667	0	0	0.011230697	0.533333333	0	0	0	0	0.021057557	8	1238	0.006462036
+42-Trich	Beita	Overgang	J21	Lav	Moldklasse 2	Liten	5.1	0	0	0.277777778	1	0	0	0	0	0	0	0.277777778	8	1238	0.006462036
+43-Sven	Beita	Bra	J21	Lav	Moldklasse 3	Liten	5	0.007736156	1	0	0	0	0	0	0	0	0	0.007736156	8	1238	0.006462036
+43-Trich	Beita	Bra	J21	Lav	Moldklasse 3	Liten	5	0	0	0.269230769	1	0	0	0	0	0	0	0.269230769	8	658	0.012158055
+44-Sven	Beita	Bra	J21	Lav	Moldklasse 3	Liten	5	0.038968167	0.267924528	0	0	0.086717892	0.596226415	0	0	0.019758507	0.135849057	0.145444566	8	658	0.012158055
+44-Trich	Beita	Bra	J21	Lav	Moldklasse 3	Liten	5	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	8	658	0.012158055
+45-Sven	Beita	Bra	J21	Lav	Moldklasse 3	Liten	5	0.011713031	0.695652174	0	0	0.005124451	0.304347826	0	0	0	0	0.016837482	8	658	0.012158055
+46-Sven	Beita	Bra	J21	Lav	Moldklasse 3	Liten	5	0	0	0	0	0.043798786	0.89380531	0	0	0.005203816	0.10619469	0.049002602	8	658	0.012158055
+46-Trich	Beita	Bra	J21	Lav	Moldklasse 3	Liten	5	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	8	658	0.012158055
+5-Sven-Trich	Lysmyra	Forsinket	O20	Ukjent	Ukjent	Liten	Ukjent	0.004016064	0.017709563	0.204551539	0.902007084	0.018206158	0.080283353	0	0	0	0	0.226773762	7	167	0.041916168
+6-Sven-Trich	Lysmyra	Bra	O20	Ukjent	Ukjent	Liten	Ukjent	0.001144951	0.013736264	0.02976872	0.357142857	0.049461873	0.593406593	0	0	0.002976872	0.035714286	0.083352416	6	550	0.010909091
+7-Sven-Trich	Beita	Forsinket	O20	Ukjent	Ukjent	Liten	Ukjent	0.01676996	0.139393939	0.024790376	0.206060606	0.078745899	0.654545455	0	0	0	0	0.120306234	6	1439	0.004169562
+8-Sven-Trich	Beita	Bra	O20	Ukjent	Ukjent	Liten	Ukjent	0.017795637	0.161458333	0	0	0.084959816	0.770833333	0	0	0.007462687	0.067708333	0.11021814	22	573	0.038394415
+9-Sven-Trich	Trollmyra	Bra	J20	Ukjent	Ukjent	Ukjent	Ukjent	0	NA	0	NA	0	NA	0	NA	0	NA	0	0	NA	NA
+NegK1-Sven-17-Trich	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK
+Posk1-Sven	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk1-Trich	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk2-Sven	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk2-Trich	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk3-Sven	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk4-Sven	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+V1-Trich	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK
+V2-Sven	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK	NegK
diff --git a/oomy/descriptors.txt b/oomy/descriptors.txt
new file mode 100644
index 0000000..d3f1a57
--- /dev/null
+++ b/oomy/descriptors.txt
@@ -0,0 +1,51 @@
+SampleIDs	Skifte	Vekst	Dato	pH	pH_tall	Moldklasse	PPNemMorf	PratMorf	AphAbu	AphRatio	GloAbu	GloRatio	PhyAbu	PhyRatio	PytAbu	PytRatio	sums_abu_sample
+1-oomy	Lysmyra	Bra	J20	Ukjent	Ukjent	Ukjent	Ukjent	50	0.08353145	0.154298474	0.143369464	0.264830666	0.002901237	0.005359137	0.311560624	0.575511723	0.541362776
+10-oomy	Trollmyra	Forsinket	J20	Ukjent	Ukjent	Ukjent	Liten	6	0.000405577	0.000408101	0.970443599	0.976483191	0.000253485	0.000255063	0.022712294	0.022853645	0.993814956
+11-oomy	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.007337894	0.012840188	0.416535111	0.728872519	0.000789335	0.001381215	0.146816348	0.256906077	0.571478688
+12-oomy	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.016973309	0.030446235	0.349788373	0.627440347	0.002418589	0.004338395	0.188304397	0.337775023	0.557484668
+13-oomy	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.012942002	0.018845024	0.4706063	0.685256165	0	0	0.203211371	0.295898812	0.686759674
+14-oomy	Trollmyra	Forsinket	J21	Hoy	5.6	Moldklasse 2	Stor	116	0.006530991	0.010298319	0.487801795	0.769184772	0.003412969	0.005381702	0.1364345	0.215135207	0.634180255
+15-oomy	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0.006613261	0.013629338	0.346207602	0.71350288	0.000170445	0.000351272	0.132231123	0.27251651	0.485222431
+16-oomy	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0	0	0.25496593	0.613473768	0.001651869	0.003974563	0.15899236	0.382551669	0.415610159
+17-oomy	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0.028027442	0.054730502	0.311120566	0.607539724	0.00194118	0.003790632	0.171009945	0.333939142	0.512099133
+18-oomy	Trollmyra	Overgang	J21	Hoy	5.8	Moldklasse 2	Mellom	87	0	0	0.394276215	0.845342092	0.001531993	0.003284646	0.07060204	0.151373262	0.466410248
+19-oomy	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.016490486	0.031424321	0.426963734	0.813623404	0	0	0.081314035	0.154952275	0.524768255
+2-oomy	Lysmyra	Forsinket	J20	Ukjent	Ukjent	Ukjent	Ukjent	22	0.02631246	0.075036075	0.144296859	0.411495911	0.001433692	0.004088504	0.178621126	0.509379509	0.350664137
+20-oomy	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.025420096	0.040767221	0.439814815	0.705348549	0.000514403	0.000824969	0.15779321	0.25305926	0.623542524
+21-oomy	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.030772723	0.052933958	0.438405915	0.754127622	0	0	0.11216317	0.19293842	0.581341807
+22-oomy	Trollmyra	Bra	J21	Lav	5	Moldklasse 4	Mellom	114	0.031985516	0.07326882	0.296867285	0.680030162	0	0	0.107697372	0.246701018	0.436550173
+23-oomy	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.094016217	0.298274178	0.029303089	0.09296646	0	0	0.191881351	0.608759362	0.315200657
+24-oomy	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.153793215	0.362603799	0.124726743	0.294072731	0	0	0.14561574	0.34332347	0.424135698
+25-oomy	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.128550039	0.493327155	0.048541345	0.186283596	0	0	0.083486283	0.320389249	0.260577666
+26-oomy	Lysmyra	Forsinket	J21	Lav	5.1	Moldklasse 1	Mellom	119	0.385976382	0.701991677	0.046091611	0.083828775	0	0	0.117762432	0.214179548	0.549830425
+27-oomy	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.22163806	0.606359726	0.061543219	0.168370585	0	0	0.082341116	0.225269689	0.365522396
+28-oomy	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.15636577	0.371503268	0.199317757	0.473551198	0	0	0.065216594	0.154945534	0.420900121
+29-oomy	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.165265174	0.400969599	0.139830156	0.339258661	0	0	0.107068521	0.25977174	0.41216385
+3-oomy	Beita	Bra	J20	Ukjent	Ukjent	Ukjent	Ukjent	20	0.031965359	0.174102449	0.077058856	0.419708577	0.004330079	0.023584197	0.070246566	0.382604777	0.18360086
+30-oomy	Lysmyra	Overgang	J21	Lav	4.9	Moldklasse 2	Mellom	150	0.180688919	0.394181214	0.157597927	0.343807149	0.000990703	0.002161264	0.11911294	0.259850374	0.458390489
+31-oomy	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.087822115	0.157544836	0.324226493	0.581632653	0.001034215	0.001855288	0.144359217	0.258967223	0.557442041
+32-oomy	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.054384916	0.186315303	0.145688342	0.499108381	0.000481965	0.001651146	0.091341983	0.31292517	0.291897207
+33-oomy	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.04802367	0.10034081	0.277748274	0.580328129	0	0	0.152833624	0.319331061	0.478605569
+34-oomy	Lysmyra	Bra	J21	Lav	5	Moldklasse 2	Mellom	119	0.07283219	0.189329441	0.179756706	0.4672829	0.004273238	0.011108408	0.127822832	0.332279251	0.384684966
+35-oomy	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0.021054804	0.027367856	0.584270823	0.759458009	0	0	0.164000413	0.213174135	0.76932604
+36-oomy	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0.101013124	0.361646771	0.029025777	0.103917968	0.007609114	0.027242118	0.14166631	0.507193144	0.279314325
+37-oomy	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0.031756417	0.106175332	0.028032209	0.093723709	0.004479114	0.014975602	0.234826371	0.785125358	0.299094112
+38-oomy	Beita	Forsinket	J21	Lav	5	Moldklasse 1	Liten	15	0.064994735	0.242355741	0.070036055	0.261154075	0.004435085	0.016537775	0.128713187	0.479952409	0.268179063
+39-oomy	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0.026336231	0.088733799	0.049195487	0.165752742	0.001849454	0.006231306	0.219419271	0.739282154	0.296800444
+4-oomy	Beita	Forsinket	J20	Ukjent	Ukjent	Ukjent	Ukjent	4	0.018070059	0.133918771	0.01584833	0.117453348	0.003036362	0.022502744	0.097978227	0.726125137	0.134932978
+40-oomy	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0.192534701	0.367065032	0.026132617	0.049821512	0.004762486	0.009079621	0.301094965	0.574033835	0.524524769
+41-oomy	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0.075093914	0.23380117	0.043350864	0.13497076	0	0	0.202742299	0.63122807	0.321187077
+42-oomy	Beita	Overgang	J21	Lav	5.1	Moldklasse 2	Liten	8	0.040783104	0.105356352	0.085464593	0.220783533	0.003212955	0.008300133	0.257636122	0.665559982	0.387096774
+43-oomy	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0.024420126	0.075518815	0.048895943	0.151209851	0.005485479	0.016963748	0.24456325	0.756307586	0.323364798
+44-oomy	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0.051534428	0.136467107	0.127138313	0.336671978	0.001934153	0.005121785	0.197025703	0.52173913	0.377632597
+45-oomy	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0.024978194	0.1010371	0.069410303	0.28076553	0.012608041	0.050999679	0.1402215	0.567197691	0.247218037
+46-oomy	Beita	Bra	J21	Lav	5	Moldklasse 3	Liten	8	0.222016334	0.541422594	0.039210304	0.095620642	0.001166701	0.002845188	0.147667742	0.360111576	0.41006108
+5-oomy	Lysmyra	Forsinket	O20	Ukjent	Ukjent	Ukjent	Ukjent	0	0.086550388	0.201588878	0.02255814	0.052541302	0	0	0.320232558	0.74586982	0.429341085
+6-oomy	Lysmyra	Bra	O20	Ukjent	Ukjent	Ukjent	Ukjent	1	0.089267204	0.345524691	0.033330675	0.129012346	0.005302607	0.020524691	0.130452117	0.504938272	0.258352603
+7-oomy	Beita	Forsinket	O20	Ukjent	Ukjent	Ukjent	Liten	6	0.012911934	0.022917326	0.100570468	0.178502011	0	0	0.449931238	0.798580663	0.56341364
+8-oomy	Beita	Bra	O20	Ukjent	Ukjent	Ukjent	Liten	7	0.125312644	0.359338407	0.021948854	0.06293911	0.003062631	0.008782201	0.198407432	0.568940281	0.34873156
+9-oomy	Trollmyra	Bra	J20	Ukjent	Ukjent	Ukjent	Liten	22	0.001785714	0.001901141	0.881026786	0.937975285	0	0	0.056473214	0.060123574	0.939285714
+Posk1-oomy	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk2-oomy	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk3-oomy	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
+Posk4-oomy	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK	PosK
-- 
GitLab