cleanup

957dc3aa · Simeon · 34eded4e · 34eded4e · 34eded4e
Commit 957dc3aa authored 5 months ago by Simeon
--- a/analysis/FITS1/.Rhistory
+++ b/analysis/FITS1/.Rhistory
-parameters$biom_export = "FALSE"
-# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
-path = "FITS1_DADA2_results_260821/"
-# CHANGE ME to TRUE to list all samples and generate an empty metadata file
-optional_sample_check = TRUE
-# CHANGE ME to TRUE to update cuphyr
-update_cuphyr = TRUE
-# Initiate by loading packages and setting knit options
-################# NO CHANGES NECESSARY BELOW #################
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_chunk$set(root.dir = paste0(path))
-knitr::opts_chunk$set(message = FALSE)
-knitr::opts_chunk$set(warning = FALSE)
-if (update_cuphyr) {
-devtools::install_github("simeross/cuphyr")
-}
-# Sequence and microbiome specific libraries
-library(dada2)
-library(Biostrings)
-library(DECIPHER)
-library(cuphyr)
-# The export of phyloseq objects to a BIOM format and the generation of fancier
-# ordination plots require the phyloseq-extended package. The first command
-# installs the package that is currently on the dev brach of the author's
-# repository, the second command sources some extra functions, including the
-# better ordination plot implementation.
-remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
-source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
-library(phyloseq)
-library(SIAMCAT)
-# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
-path = "FITS1_DADA2_results_260821/"
-# CHANGE ME to TRUE to list all samples and generate an empty metadata file
-optional_sample_check = TRUE
-# CHANGE ME to TRUE to update cuphyr
-update_cuphyr = TRUE
-# Initiate by loading packages and setting knit options
-################# NO CHANGES NECESSARY BELOW #################
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_chunk$set(root.dir = paste0(path))
-knitr::opts_chunk$set(message = FALSE)
-knitr::opts_chunk$set(warning = FALSE)
-if (update_cuphyr) {
-devtools::install_github("simeross/cuphyr")
-}
-# Sequence and microbiome specific libraries
-library(dada2)
-library(Biostrings)
-library(DECIPHER)
-library(cuphyr)
-# The export of phyloseq objects to a BIOM format and the generation of fancier
-# ordination plots require the phyloseq-extended package. The first command
-# installs the package that is currently on the dev brach of the author's
-# repository, the second command sources some extra functions, including the
-# better ordination plot implementation.
-remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
-source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
-library(phyloseq)
-#library(SIAMCAT)
-# Phylogeny libraries
-library(phangorn)
-library(ape)
-# Plotting and figure export
-library(gridExtra)
-library(viridis)
-library(ggpubr)
-# Tidyverse
-library(tidyverse)
-library(stringr)
-# Various packages for specific analysis
-library(readxl)
-library(openxlsx)
-library(ggpmisc)
-library(betareg)
-library(BBmisc)
-library(aod)
-library(betareg)
-#install.packages('MicrobiomeStat')
-library(MicrobiomeStat)
-# Checks whether output path exists and creates it if not. Throws warning if
-# directory exists.
-outp <- paste0(path,"/analysis_output")
-dir.create(file.path(outp))
-if (optional_sample_check) {
-seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds"))
-samps <- rownames(seqtabcheck)
-lensamps <- length(samps)
-blankcol <- vector(mode = "character", length = lensamps)
-blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol,
-ExampleProperty2 = blankcol,
-ExampleProperty3 = blankcol)
-write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"),
-sep = "\t", row.names = F)
-cat("'seqtab_nochim.rds' contains samples in the following order:\n",
-samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
-rm(optional_sample_check, seqtabcheck, samps,
-lensamps, blankcol, blanktable, update_cuphyr)
-}else{rm(optional_sample_check, update_cuphyr)}
-# Dedicated environment containing all global analysis settings for better
-# overview and collected export of settings
-parameters <- new.env()
-# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to
-# analyse all samples.
-parameters$prune_controls = "TRUE"
-# CHANGE ME to a list of unique identifiers that only occur in the names of
-# samples you do NOT want to analyse. Common examples are provided.
-parameters$controls = c("NegativK-4-Nem", "Vann", "Neg", "Positivkontroll-Nem", "Contr", "POSK")
-# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by
-# name. This is useful to exclude non-target organisms or noise from organelles
-# such as Chloroplasts and Mitochondria. It is recommended to first look at all
-# data before using this setting.
-parameters$prune_noise_taxgroups = "FALSE"
-# CHANGE ME to define the taxonomic groups to be removed as noise.
-parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
-# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should
-# minimally have. Samples with lower ASV counts than 'minread' will be pruned.
-# Set to 0 to not prune any samples.
-parameters$minASVcount = 3000
-# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of
-# using the default dada2 output ('taxa.rds').
-parameters$customTax = "TRUE"
-# CHANGE ME to the location of the custom taxonomy file. This only matters if
-# parameters$customTax="TRUE", otherwise it will be ignored.
-parameters$taxfile = "Nems_DADA2_results_260821/custom_BLAST_taxonomy_nt.txt"
-# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a
-# long time depending on the number of sequences (up to days for thousands).
-# If a tree is provided as 'phylotree.rds' in 'path', then it will be used
-# regardless of the value of 'parameters$maketree'
-parameters$maketree = "FALSE"
-# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the
-# leaf with the longest branch (outgroup). This makes analyses that rely on the
-# phylogenetic tree reproducible instead of picking a random leaf as root when
-# calculating UNIFRAC distances. Implementation based on
-# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers
-# in https://github.com/joey711/phyloseq/issues/597
-parameters$roottree = "TRUE"
-## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
-parameters$biom_export = "FALSE"
-# Prat
-set$prat_data$ndvi_temp <- (set$prat_data$ndvi+1)/2
-# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
-path = "Nems_DADA2_results_260821"
-# CHANGE ME to TRUE to list all samples and generate an empty metadata file
-optional_sample_check = TRUE
-# CHANGE ME to TRUE to update cuphyr
-update_cuphyr = TRUE
-# Initiate by loading packages and setting knit options
-################# NO CHANGES NECESSARY BELOW #################
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_chunk$set(root.dir = paste0(path))
-knitr::opts_chunk$set(message = FALSE)
-knitr::opts_chunk$set(warning = FALSE)
-if (update_cuphyr) {
-devtools::install_github("simeross/cuphyr")
-}
-# Sequence and microbiome specific libraries
-library(dada2)
-library(Biostrings)
-library(DECIPHER)
-library(cuphyr)
-# The export of phyloseq objects to a BIOM format and the generation of fancier
-# ordination plots require the phyloseq-extended package. The first command
-# installs the package that is currently on the dev brach of the author's
-# repository, the second command sources some extra functions, including the
-# better ordination plot implementation.
-remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
-source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
-library(phyloseq)
-#library(SIAMCAT)
-# Phylogeny libraries
-library(phangorn)
-library(ape)
-# Plotting and figure export
-library(gridExtra)
-library(viridis)
-library(ggpubr)
-library(cowplot)
-# Tidyverse
-library(tidyverse)
-library(stringr)
-# Various packages for specific analysis
-library(readxl)
-library(openxlsx)
-library(ggpmisc)
-library(betareg)
-library(BBmisc)
-library(aod)
-library(betareg)
-#install.packages('MicrobiomeStat')
-library(MicrobiomeStat)
-# Checks whether output path exists and creates it if not. Throws warning if
-# directory exists.
-outp <- paste0(path,"/analysis_output")
-dir.create(file.path(outp))
-if (optional_sample_check) {
-seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds"))
-samps <- rownames(seqtabcheck)
-lensamps <- length(samps)
-blankcol <- vector(mode = "character", length = lensamps)
-blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol,
-ExampleProperty2 = blankcol,
-ExampleProperty3 = blankcol)
-write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"),
-sep = "\t", row.names = F)
-cat("'seqtab_nochim.rds' contains samples in the following order:\n",
-samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
-rm(optional_sample_check, seqtabcheck, samps,
-lensamps, blankcol, blanktable, update_cuphyr)
-}else{rm(optional_sample_check, update_cuphyr)}
-# Dedicated environment containing all global analysis settings for better
-# overview and collected export of settings
-parameters <- new.env()
-# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to
-# analyse all samples.
-parameters$prune_controls = "TRUE"
-# CHANGE ME to a list of unique identifiers that only occur in the names of
-# samples you do NOT want to analyse. Common examples are provided.
-parameters$controls = c("NegativK-4-Nem", "Vann", "Neg", "Positivkontroll-Nem", "Contr", "POSK")
-# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by
-# name. This is useful to exclude non-target organisms or noise from organelles
-# such as Chloroplasts and Mitochondria. It is recommended to first look at all
-# data before using this setting.
-parameters$prune_noise_taxgroups = "FALSE"
-# CHANGE ME to define the taxonomic groups to be removed as noise.
-parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
-# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should
-# minimally have. Samples with lower ASV counts than 'minread' will be pruned.
-# Set to 0 to not prune any samples.
-parameters$minASVcount = 3000
-# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of
-# using the default dada2 output ('taxa.rds').
-parameters$customTax = "TRUE"
-# CHANGE ME to the location of the custom taxonomy file. This only matters if
-# parameters$customTax="TRUE", otherwise it will be ignored.
-parameters$taxfile = "Nems_DADA2_results_260821/custom_BLAST_taxonomy_nt.txt"
-# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a
-# long time depending on the number of sequences (up to days for thousands).
-# If a tree is provided as 'phylotree.rds' in 'path', then it will be used
-# regardless of the value of 'parameters$maketree'
-parameters$maketree = "FALSE"
-# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the
-# leaf with the longest branch (outgroup). This makes analyses that rely on the
-# phylogenetic tree reproducible instead of picking a random leaf as root when
-# calculating UNIFRAC distances. Implementation based on
-# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers
-# in https://github.com/joey711/phyloseq/issues/597
-parameters$roottree = "TRUE"
-## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
-parameters$biom_export = "FALSE"
-############### NO NEED FOR CHANGES BELOW ###############
-# Make dedicated environments to contain temporary values and manage other objects
-tmp <- new.env()
-plots <- new.env()
-set <- new.env()
-# Read in variables
-tmp$seqtabp <- readRDS(paste0(path,"/seqtab_nochim.rds"))
-if (parameters$customTax == "TRUE") {
-tmp$taxap <- read.delim(parameters$taxfile, header = TRUE, sep = "\t")
-rownames(tmp$taxap) <- colnames(tmp$seqtabp)
-tmp$taxap <- as.matrix(tmp$taxap)
-}else{
-tmp$taxap <- readRDS(paste0(path,"/taxa.rds"))}
-tmp$samp_table <- read.delim(paste0(path, "/descriptors.txt"),
-header = TRUE, sep = "\t")
-tmp$samp_list <- rownames(tmp$seqtabp)
-# Check if descriptors has the same samples as seqtabp
-if (length(tmp$samp_table[,1]) != length(tmp$samp_list)) {
-stop("There are ", length(tmp$samp_table[,1]),
-" samples in 'descriptors.txt', but ", length(tmp$samp_list),
-" samples in 'seqtab_nochim.rds'. Please make sure that the correct samples
-are contained in descriptors.txt.
-You may use 'optional_sample_check <- TRUE' in the first chunk to generate an
-empty template for 'descriptors.txt'" )
-} else if (!identical(tmp$samp_table[,1], tmp$samp_list)) {
-warning("Warning: The samples in 'descriptors.txt' do not have the same names
-or order as the samples in 'seqtab_nochim.rds'. This may be fine if
-abbreviated names were used or the sample names are not contained in
-the first column of 'descriptors.txt'. Double-checking never hurts!")
-}
-# generate phylogenetic tree of ASVs only if there is no file called
-# 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
-if (!file.exists(paste0(path, "/phylotree.rds"))) {
-if (parameters$maketree == "TRUE") {
-tmp$ASVs <- getSequences(tmp$seqtabp)
-names(tmp$ASVs) <- tmp$ASVs
-tmp$ASV_align <- AlignSeqs(DNAStringSet(tmp$ASVs), anchor = NA)
-tmp$ASV_phang <- phyDat(as(tmp$ASV_align, "matrix"), type = "DNA")
-tmp$dm <- dist.ml(tmp$ASV_phang)
-tmp$treeNJ <- NJ(tmp$dm)
-tmp$fit <- pml(tmp$treeNJ, data = tmp$ASV_phang)
-tmp$fitGTR <- update(tmp$fit, k = 4, inv = 0.2)
-tmp$fitGTR <- optim.pml(tmp$fitGTR, model = "GTR", optInv = TRUE,
-optGamma = TRUE, rearrangement = "stochastic",
-control = pml.control(trace = 0))
-saveRDS(tmp$fitGTR, file = paste0(path, "/phylotree.rds"))}}
-##parse into phyloseq object
-row.names(tmp$samp_table) <- tmp$samp_list
-if (file.exists(paste0(path, "/phylotree.rds"))) {
-tmp$treep <- readRDS(paste0(path, "/phylotree.rds"))
-p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE),
-sample_data(tmp$samp_table),
-tax_table(tmp$taxap),
-phy_tree(tmp$treep$tree))
-}else{
-p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE),
-sample_data(tmp$samp_table), tax_table(tmp$taxap))}
-##Adding nucleotide info and giving sequences ASV## identifiers
-tmp$ASV_sequences <- Biostrings::DNAStringSet(taxa_names(p))
-taxa_names(p) <- paste0("ASV", seq(ntaxa(p)))
-names(tmp$ASV_sequences) <- taxa_names(p)
-p <- merge_phyloseq(p, tmp$ASV_sequences)
-##optional pruning
-if (parameters$prune_controls == "TRUE") {
-if (!is.null(parameters$controls)) {
-tmp$samp_clean <- tmp$samp_list[!tmp$samp_list %in% grep(paste0(
-parameters$controls, collapse = "|"), tmp$samp_list, value = T)]
-tmp$contr_pruned <- setdiff(tmp$samp_list, tmp$samp_clean)
-ps <- prune_samples(tmp$samp_clean, p)
-#Physeq object for Just controls
-ps.contr <- prune_samples(tmp$contr_pruned, p)
-ps.contr <- prune_taxa(taxa_sums(ps.contr) > 0, ps.contr)
-ps.transcontr <- transform_sample_counts(
-ps.contr, function(ASV) ASV/sum(ASV))
-message(cat(
-"\n",
-"Number of control samples that were pruned and will not be analysed:\n",
-length(tmp$samp_list) - length(tmp$samp_clean),
-"\n",
-"The following controls were pruned:\n",
-tmp$contr_pruned,
-"The controls are contained in a separate phyloseq object: ps.contr",
-"\n",
-sep = "\n"))
-}else{warning(cat(
-"\n\nparameters$prune_controls is TRUE but 'parameters$controls' is empty.
-No samples were pruned.\n\n"))}
-}else{ps <- p}
-# Prune ASVs defined as noise
-if (parameters$prune_noise_taxgroups == "TRUE") {
-tmp$ps_taxlvls <- colnames(tax_table(ps))
-tmp$noise_ASVs <- character(0)
-for (lvl in tmp$ps_taxlvls) {
-tmp$noise_ASVs <- c(tmp$noise_ASVs,
-cuphyr::list_subset_ASVs(
-physeq = ps, subv = parameters$noise_taxgroups,
-taxlvlsub = lvl))
-}
-tmp$noise_ASVs <- unique(tmp$noise_ASVs)
-tmp$no_noise_ASVs <- colnames(otu_table(ps))
-tmp$no_noise_ASVs <- setdiff(tmp$no_noise_ASVs, tmp$noise_ASVs)
-if (length(tmp$noise_ASVs) > 0) {
-ps <- prune_taxa(tmp$no_noise_ASVs, ps)
-tmp$no_noise_ps <- ps
-cat(length(tmp$noise_ASVs),
-"ASVs were pruned because they belonged to the following
-taxonomic groups:\n")
-cat(parameters$noise_taxgroups, "\n", sep = "\n")}
-else{
-cat("No ASVs were recognized as belonging to the following taxonomic groups
-defined as noise:\n")
-cat(parameters$noise_taxgroups, "\n", sep = "\n")
-}
-}
-# Prune samples with fewer than reads than minASVcount
-if (parameters$minASVcount > 0) {
-tmp$samp_pruned <- names(which(sample_sums(ps) < parameters$minASVcount))
-ps <- prune_samples(sample_sums(ps) >= parameters$minASVcount, ps)
-if (length(tmp$samp_pruned) > 0) {
-cat("The following samples were pruned because ASV counts were lower than",
-parameters$minASVcount,  ":\n")
-cat(tmp$samp_pruned, "\n", sep = "\n")
-}
-}
-# Remove 0 count ASVs (e.g. control ASVs that remain) from the base object
-ps <- prune_taxa(taxa_sums(ps) > 0, ps)
-sample_data(ps)[["ndvi"]] <- as.numeric(sample_data(ps)[["ndvi"]])
-# Transformed per sample (per-sample relative abundance)
-ps.trans <- transform_sample_counts(ps, function(ASV) ASV/sum(ASV))
-# Read descriptor values as numeric
-sample_data(ps.trans)[["ndvi"]] <- as.numeric(sample_data(ps.trans)[["ndvi"]])
-sample_data(ps.trans)[["ndvi_july"]] <- as.numeric(sample_data(ps.trans)[["ndvi_july"]])
-# Get a tbl of the base object for easier access in some phyloseq-independent
-# analyses. Takes some seconds, potentially up to minutes.
-ps_tbl <- as_tibble(psmelt(ps))
-ps_trans_tbl <- as_tibble(psmelt(ps.trans))
-# Condensing to Abundance per Genus and Sample
-genus_abundance_tbl_per_sample <- ps_trans_tbl %>%
-group_by(Genus, Sample) %>%
-mutate(Genus_Sample_Abundance = sum(Abundance)) %>%
-select(Genus, Sample, ndvi, ndvi_july, Genus_Sample_Abundance, Alias) %>%
-unique()
-if (parameters$roottree == "TRUE" && parameters$maketree == "TRUE") {
-phyloseq::phy_tree(ps) <- cuphyr::root_tree_in_outgroup(physeq = ps)}
-if (parameters$biom_export == "TRUE") {
-suppressWarnings(phyloseq.extended::write_phyloseq(
-p, biom_file = paste0(path, "all_samples.biom"),
-biom_format = "standard"))
-suppressWarnings(phyloseq.extended::write_phyloseq(
-ps, biom_file = file.path(path, "samples_without_controls.biom"),
-biom_format = "standard"))
-suppressWarnings(phyloseq.extended::write_phyloseq(
-ps.trans, biom_file = file.path(
-path, "samples_without_controls_rel_abundance.biom"),
-biom_format = "standard"))
-suppressWarnings(phyloseq.extended::write_phyloseq(
-ps.contr, biom_file = file.path(path, "just_controls.biom"),
-biom_format = "standard"))
-}
-ps
-##### Optional settings (sensible defaults) #####
-# Can be changed to adjust the output format for all plots. Default "pdf",
-# possible "eps"/"ps", "tex" (pictex), "jpeg", "tiff", "png", "bmp" and "svg"
-parameters$output_format = "pdf"
-# Can be changed to preferred ggplot2 theme. Recommended: "theme_bw()".
-theme_set(theme_bw())
-############### NO NEED FOR CHANGES BELOW ###############
-my_scale_col <- scale_color_viridis(discrete = TRUE)
-my_scale_fill <- scale_fill_viridis(discrete = TRUE)
-# Custom, more narrow color ranges based on viridis
-# Base order to have adjacent colors be distinct from each other
-tmp$sort_colors <- c(rbind(c(1:5), c(6:10), c(11:15), c(16:20)))
-# Customized vectors
-tmp$n_col <- 20
-tmp$viridis_greens <- viridis(tmp$n_col,  option = "D", begin = 0.85,
-end = 0.7)[tmp$sort_colors]
-tmp$viridis_reds <- viridis(tmp$n_col,  option = "B", begin = 0.7,
-end = 0.5)[tmp$sort_colors]
-tmp$viridis_blues <- viridis(tmp$n_col,  option = "D", begin = 0.2,
-end = 0.4)[tmp$sort_colors]
-tmp$viridis_yellows <- viridis(tmp$n_col,  option = "D", begin = 1,
-end = 0.9)[tmp$sort_colors]
-tmp$viridis_dark <- viridis(tmp$n_col,  option = "A", begin = 0,
-end = 0.1)[tmp$sort_colors]
-tmp$viridis_light <- viridis(tmp$n_col,  option = "A", begin = 1,
-end = 0.9)[tmp$sort_colors]
-# Collected list that is available in the global environment
-sub_viridis <- list(tmp$viridis_greens, tmp$viridis_blues, tmp$viridis_yellows,
-tmp$viridis_light, tmp$viridis_reds, tmp$viridis_dark)
-names(sub_viridis) <- c("greens", "blues", "yellows", "lights", "reds", "darks")
-tmp$out <- paste0(".", parameters$output_format)
-#################### Function ############################
-# Generic save function for plots that checks whether file exists and if so,
-# creates a new one with d/m/y+time info to avoid overwriting. Overwriting can
-# be triggered with overwrite = TRUE. Width, height and resolution are taken
-# from parameters in the 'set' environment or set to 20x20 cm with 300dpi.
-save_plot <- function(
-pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE){
-wp <- if (!is.null(set$wp)) set$wp else 20
-hp <- if (!is.null(set$hp)) set$hp else 20
-res <- if (!is.null(set$res)) set$res else 300
-name <- paste0("/", plot_name,filetype)
-if (file.exists(paste0(outp, name)) & !overwrite) {
-name <- paste0("/", plot_name, "_",
-format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)}
-ggsave(file.path(outp, name), pl,
-width = wp, height = hp, unit = "cm", dpi = res)
-}
-################################################
-# CHANGE ME to the sample group for color coding. Accepted values are the column
-# headers in the descriptor file.
-set$color_by = "Symptoms"
-##### Optional settings (sensible defaults) #####
-# Can be changed to change the width (in cm) of the saved plot.
-set$wp = 17
-# Can be changed to change the height (in cm) of the saved plot.
-set$hp = 20
-# Can be changed to change the resolution (in dpi) of the saved plot.
-set$res = 300
-############### NO NEED FOR CHANGES BELOW ###############
-# Rank samples
-set$ranked <- cuphyr::make_ranked_sums(p, myset = tmp$subset_id)
-set$ranked_ps <- cuphyr::make_ranked_sums(ps, myset = tmp$subset_id)
-set$ymax <- max(set$ranked$Abundance)
-set$ymax <- set$ymax + round(set$ymax/10)
-set$xmax <- nrow(set$ranked) + 1
-set$title2 <- "Samples (without controls)"
-# Stabilize colors
-set$color_vars <- set$ranked[,set$color_by]  %>%
-unlist() %>% as.character() %>% unique()
-set$color_vars <- sort(set$color_vars)
-set$color_varsPalette <- viridis(length(set$color_vars))
-names(set$color_varsPalette) <- set$color_vars
-set$my_scale_fill <- scale_fill_manual(values = set$color_varsPalette)
-# plot
-plots$overview_all <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) +
-aes_string(fill = set$color_by) +
-geom_col() + set$my_scale_fill + ggtitle("All samples") + ylim(0, set$ymax) +
-xlim(0,set$xmax) + ylab("ASV counts ('reads')")
-if (length(tmp$noise_ASVs) > 0) {
-set$ranked_nonoise <- cuphyr::make_ranked_sums(
-tmp$no_noise_ps, myset = tmp$subset_id)
-plots$overview_noise <- ggplot(
-data = set$ranked_nonoise, aes(x = Rank, y = Abundance)) +
-aes_string(fill = set$color_by) +
-geom_col() + set$my_scale_fill +
-ggtitle("Samples (without controls), noise ASVs removed") +
-ylim(0, set$ymax) +
-xlim(0,set$xmax) + ylab("ASV counts ('reads')")
-}
-if (parameters$minASVcount > 0) {
-plots$overview_all <- plots$overview_all +
-geom_hline(yintercept = parameters$minASVcount, linetype = "dashed") +
-ggtitle("All samples (ASV count cutoff indicated)")
-set$title2 <- "Samples (without controls and low count samps)"
-}
-plots$overview_ps <- ggplot(data = set$ranked_ps, aes(x = Rank, y = Abundance)) +
-aes_string(fill = set$color_by) +
-geom_col() + set$my_scale_fill + ggtitle(set$title2) + ylim(0, set$ymax) +
-xlim(0,set$xmax) + ylab("ASV counts ('reads')")
-plots$combo_overview <- ggarrange(
-plots$overview_all, plots$overview_ps, nrow = 2, align = "v",
-common.legend = TRUE, legend = "right")
--- a/analysis/Nems/.Rhistory
+++ b/analysis/Nems/.Rhistory