cleanup

957dc3aa · Simeon · 34eded4e · 34eded4e · 34eded4e
Commit 957dc3aa authored 5 months ago by Simeon
--- a/analysis/FITS1/.Rhistory
+++ b/analysis/FITS1/.Rhistory
-parameters$biom_export = "FALSE"
-# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
-path = "FITS1_DADA2_results_260821/"
-# CHANGE ME to TRUE to list all samples and generate an empty metadata file
-optional_sample_check = TRUE
-# CHANGE ME to TRUE to update cuphyr
-update_cuphyr = TRUE
-# Initiate by loading packages and setting knit options
-################# NO CHANGES NECESSARY BELOW #################
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_chunk$set(root.dir = paste0(path))
-knitr::opts_chunk$set(message = FALSE)
-knitr::opts_chunk$set(warning = FALSE)
-if (update_cuphyr) {
-devtools::install_github("simeross/cuphyr")
-}
-# Sequence and microbiome specific libraries
-library(dada2)
-library(Biostrings)
-library(DECIPHER)
-library(cuphyr)
-# The export of phyloseq objects to a BIOM format and the generation of fancier
-# ordination plots require the phyloseq-extended package. The first command
-# installs the package that is currently on the dev brach of the author's
-# repository, the second command sources some extra functions, including the
-# better ordination plot implementation.
-remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
-source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
-library(phyloseq)
-library(SIAMCAT)
-# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
-path = "FITS1_DADA2_results_260821/"
-# CHANGE ME to TRUE to list all samples and generate an empty metadata file
-optional_sample_check = TRUE
-# CHANGE ME to TRUE to update cuphyr
-update_cuphyr = TRUE
-# Initiate by loading packages and setting knit options
-################# NO CHANGES NECESSARY BELOW #################
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_chunk$set(root.dir = paste0(path))
-knitr::opts_chunk$set(message = FALSE)
-knitr::opts_chunk$set(warning = FALSE)
-if (update_cuphyr) {
-devtools::install_github("simeross/cuphyr")
-}
-# Sequence and microbiome specific libraries
-library(dada2)
-library(Biostrings)
-library(DECIPHER)
-library(cuphyr)
-# The export of phyloseq objects to a BIOM format and the generation of fancier
-# ordination plots require the phyloseq-extended package. The first command
-# installs the package that is currently on the dev brach of the author's
-# repository, the second command sources some extra functions, including the
-# better ordination plot implementation.
-remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
-source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
-library(phyloseq)
-#library(SIAMCAT)
-# Phylogeny libraries
-library(phangorn)
-library(ape)
-# Plotting and figure export
-library(gridExtra)
-library(viridis)
-library(ggpubr)
-# Tidyverse
-library(tidyverse)
-library(stringr)
-# Various packages for specific analysis
-library(readxl)
-library(openxlsx)
-library(ggpmisc)
-library(betareg)
-library(BBmisc)
-library(aod)
-library(betareg)
-#install.packages('MicrobiomeStat')
-library(MicrobiomeStat)
-# Checks whether output path exists and creates it if not. Throws warning if
-# directory exists.
-outp <- paste0(path,"/analysis_output")
-dir.create(file.path(outp))
-if (optional_sample_check) {
-seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds"))
-samps <- rownames(seqtabcheck)
-lensamps <- length(samps)
-blankcol <- vector(mode = "character", length = lensamps)
-blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol,
-ExampleProperty2 = blankcol,
-ExampleProperty3 = blankcol)
-write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"),
-sep = "\t", row.names = F)
-cat("'seqtab_nochim.rds' contains samples in the following order:\n",
-samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
-rm(optional_sample_check, seqtabcheck, samps,
-lensamps, blankcol, blanktable, update_cuphyr)
-}else{rm(optional_sample_check, update_cuphyr)}
-# Dedicated environment containing all global analysis settings for better
-# overview and collected export of settings
-parameters <- new.env()
-# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to
-# analyse all samples.
-parameters$prune_controls = "TRUE"
-# CHANGE ME to a list of unique identifiers that only occur in the names of
-# samples you do NOT want to analyse. Common examples are provided.
-parameters$controls = c("NegativK-4-Nem", "Vann", "Neg", "Positivkontroll-Nem", "Contr", "POSK")
-# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by
-# name. This is useful to exclude non-target organisms or noise from organelles
-# such as Chloroplasts and Mitochondria. It is recommended to first look at all
-# data before using this setting.
-parameters$prune_noise_taxgroups = "FALSE"
-# CHANGE ME to define the taxonomic groups to be removed as noise.
-parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
-# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should
-# minimally have. Samples with lower ASV counts than 'minread' will be pruned.
-# Set to 0 to not prune any samples.
-parameters$minASVcount = 3000
-# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of
-# using the default dada2 output ('taxa.rds').
-parameters$customTax = "TRUE"
-# CHANGE ME to the location of the custom taxonomy file. This only matters if
-# parameters$customTax="TRUE", otherwise it will be ignored.
-parameters$taxfile = "Nems_DADA2_results_260821/custom_BLAST_taxonomy_nt.txt"
-# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a
-# long time depending on the number of sequences (up to days for thousands).
-# If a tree is provided as 'phylotree.rds' in 'path', then it will be used
-# regardless of the value of 'parameters$maketree'
-parameters$maketree = "FALSE"
-# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the
-# leaf with the longest branch (outgroup). This makes analyses that rely on the
-# phylogenetic tree reproducible instead of picking a random leaf as root when
-# calculating UNIFRAC distances. Implementation based on
-# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers
-# in https://github.com/joey711/phyloseq/issues/597
-parameters$roottree = "TRUE"
-## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
-parameters$biom_export = "FALSE"
-# Prat
-set$prat_data$ndvi_temp <- (set$prat_data$ndvi+1)/2
-# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
-path = "Nems_DADA2_results_260821"
-# CHANGE ME to TRUE to list all samples and generate an empty metadata file
-optional_sample_check = TRUE
-# CHANGE ME to TRUE to update cuphyr
-update_cuphyr = TRUE
-# Initiate by loading packages and setting knit options
-################# NO CHANGES NECESSARY BELOW #################
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_chunk$set(root.dir = paste0(path))
-knitr::opts_chunk$set(message = FALSE)
-knitr::opts_chunk$set(warning = FALSE)
-if (update_cuphyr) {
-devtools::install_github("simeross/cuphyr")
-}
-# Sequence and microbiome specific libraries
-library(dada2)
-library(Biostrings)
-library(DECIPHER)
-library(cuphyr)
-# The export of phyloseq objects to a BIOM format and the generation of fancier
-# ordination plots require the phyloseq-extended package. The first command
-# installs the package that is currently on the dev brach of the author's
-# repository, the second command sources some extra functions, including the
-# better ordination plot implementation.
-remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
-source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R" )
-library(phyloseq)
-#library(SIAMCAT)
-# Phylogeny libraries
-library(phangorn)
-library(ape)
-# Plotting and figure export
-library(gridExtra)
-library(viridis)
-library(ggpubr)
-library(cowplot)
-# Tidyverse
-library(tidyverse)
-library(stringr)
-# Various packages for specific analysis
-library(readxl)
-library(openxlsx)
-library(ggpmisc)
-library(betareg)
-library(BBmisc)
-library(aod)
-library(betareg)
-#install.packages('MicrobiomeStat')
-library(MicrobiomeStat)
-# Checks whether output path exists and creates it if not. Throws warning if
-# directory exists.
-outp <- paste0(path,"/analysis_output")
-dir.create(file.path(outp))
-if (optional_sample_check) {
-seqtabcheck <- readRDS(paste0(path,"/seqtab_nochim.rds"))
-samps <- rownames(seqtabcheck)
-lensamps <- length(samps)
-blankcol <- vector(mode = "character", length = lensamps)
-blanktable <- data.frame(SampleIDs = samps, ExampleProperty1 = blankcol,
-ExampleProperty2 = blankcol,
-ExampleProperty3 = blankcol)
-write.table(blanktable, file = paste0(path, "/descriptors_blank.txt"),
-sep = "\t", row.names = F)
-cat("'seqtab_nochim.rds' contains samples in the following order:\n",
-samps, "\nThe number of samples in the file is:", lensamps, sep = "\n")
-rm(optional_sample_check, seqtabcheck, samps,
-lensamps, blankcol, blanktable, update_cuphyr)
-}else{rm(optional_sample_check, update_cuphyr)}
-# Dedicated environment containing all global analysis settings for better
-# overview and collected export of settings
-parameters <- new.env()
-# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to
-# analyse all samples.
-parameters$prune_controls = "TRUE"
-# CHANGE ME to a list of unique identifiers that only occur in the names of
-# samples you do NOT want to analyse. Common examples are provided.
-parameters$controls = c("NegativK-4-Nem", "Vann", "Neg", "Positivkontroll-Nem", "Contr", "POSK")
-# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by
-# name. This is useful to exclude non-target organisms or noise from organelles
-# such as Chloroplasts and Mitochondria. It is recommended to first look at all
-# data before using this setting.
-parameters$prune_noise_taxgroups = "FALSE"
-# CHANGE ME to define the taxonomic groups to be removed as noise.
-parameters$noise_taxgroups = c("Chloroplast", "Mitochondria")
-# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should
-# minimally have. Samples with lower ASV counts than 'minread' will be pruned.
-# Set to 0 to not prune any samples.
-parameters$minASVcount = 3000
-# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of
-# using the default dada2 output ('taxa.rds').
-parameters$customTax = "TRUE"
-# CHANGE ME to the location of the custom taxonomy file. This only matters if
-# parameters$customTax="TRUE", otherwise it will be ignored.
-parameters$taxfile = "Nems_DADA2_results_260821/custom_BLAST_taxonomy_nt.txt"
-# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a
-# long time depending on the number of sequences (up to days for thousands).
-# If a tree is provided as 'phylotree.rds' in 'path', then it will be used
-# regardless of the value of 'parameters$maketree'
-parameters$maketree = "FALSE"
-# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the
-# leaf with the longest branch (outgroup). This makes analyses that rely on the
-# phylogenetic tree reproducible instead of picking a random leaf as root when
-# calculating UNIFRAC distances. Implementation based on
-# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers
-# in https://github.com/joey711/phyloseq/issues/597
-parameters$roottree = "TRUE"
-## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
-parameters$biom_export = "FALSE"
-############### NO NEED FOR CHANGES BELOW ###############
-# Make dedicated environments to contain temporary values and manage other objects
-tmp <- new.env()
-plots <- new.env()
-set <- new.env()
-# Read in variables
-tmp$seqtabp <- readRDS(paste0(path,"/seqtab_nochim.rds"))
-if (parameters$customTax == "TRUE") {
-tmp$taxap <- read.delim(parameters$taxfile, header = TRUE, sep = "\t")
-rownames(tmp$taxap) <- colnames(tmp$seqtabp)
-tmp$taxap <- as.matrix(tmp$taxap)
-}else{
-tmp$taxap <- readRDS(paste0(path,"/taxa.rds"))}
-tmp$samp_table <- read.delim(paste0(path, "/descriptors.txt"),
-header = TRUE, sep = "\t")
-tmp$samp_list <- rownames(tmp$seqtabp)
-# Check if descriptors has the same samples as seqtabp
-if (length(tmp$samp_table[,1]) != length(tmp$samp_list)) {
-stop("There are ", length(tmp$samp_table[,1]),
-" samples in 'descriptors.txt', but ", length(tmp$samp_list),
-" samples in 'seqtab_nochim.rds'. Please make sure that the correct samples
-are contained in descriptors.txt.
-You may use 'optional_sample_check <- TRUE' in the first chunk to generate an
-empty template for 'descriptors.txt'" )
-} else if (!identical(tmp$samp_table[,1], tmp$samp_list)) {
-warning("Warning: The samples in 'descriptors.txt' do not have the same names
-or order as the samples in 'seqtab_nochim.rds'. This may be fine if
-abbreviated names were used or the sample names are not contained in
-the first column of 'descriptors.txt'. Double-checking never hurts!")
-}
-# generate phylogenetic tree of ASVs only if there is no file called
-# 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
-if (!file.exists(paste0(path, "/phylotree.rds"))) {
-if (parameters$maketree == "TRUE") {
-tmp$ASVs <- getSequences(tmp$seqtabp)
-names(tmp$ASVs) <- tmp$ASVs
-tmp$ASV_align <- AlignSeqs(DNAStringSet(tmp$ASVs), anchor = NA)
-tmp$ASV_phang <- phyDat(as(tmp$ASV_align, "matrix"), type = "DNA")
-tmp$dm <- dist.ml(tmp$ASV_phang)
-tmp$treeNJ <- NJ(tmp$dm)
-tmp$fit <- pml(tmp$treeNJ, data = tmp$ASV_phang)
-tmp$fitGTR <- update(tmp$fit, k = 4, inv = 0.2)
-tmp$fitGTR <- optim.pml(tmp$fitGTR, model = "GTR", optInv = TRUE,
-optGamma = TRUE, rearrangement = "stochastic",
-control = pml.control(trace = 0))
-saveRDS(tmp$fitGTR, file = paste0(path, "/phylotree.rds"))}}
-##parse into phyloseq object
-row.names(tmp$samp_table) <- tmp$samp_list
-if (file.exists(paste0(path, "/phylotree.rds"))) {
-tmp$treep <- readRDS(paste0(path, "/phylotree.rds"))
-p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE),
-sample_data(tmp$samp_table),
-tax_table(tmp$taxap),
-phy_tree(tmp$treep$tree))
-}else{
-p <- phyloseq(otu_table(tmp$seqtabp, taxa_are_rows = FALSE),
-sample_data(tmp$samp_table), tax_table(tmp$taxap))}
-##Adding nucleotide info and giving sequences ASV## identifiers
-tmp$ASV_sequences <- Biostrings::DNAStringSet(taxa_names(p))
-taxa_names(p) <- paste0("ASV", seq(ntaxa(p)))
-names(tmp$ASV_sequences) <- taxa_names(p)
-p <- merge_phyloseq(p, tmp$ASV_sequences)
-##optional pruning
-if (parameters$prune_controls == "TRUE") {
-if (!is.null(parameters$controls)) {
-tmp$samp_clean <- tmp$samp_list[!tmp$samp_list %in% grep(paste0(
-parameters$controls, collapse = "|"), tmp$samp_list, value = T)]
-tmp$contr_pruned <- setdiff(tmp$samp_list, tmp$samp_clean)
-ps <- prune_samples(tmp$samp_clean, p)
-#Physeq object for Just controls
-ps.contr <- prune_samples(tmp$contr_pruned, p)
-ps.contr <- prune_taxa(taxa_sums(ps.contr) > 0, ps.contr)
-ps.transcontr <- transform_sample_counts(
-ps.contr, function(ASV) ASV/sum(ASV))
-message(cat(
-"\n",
-"Number of control samples that were pruned and will not be analysed:\n",
-length(tmp$samp_list) - length(tmp$samp_clean),
-"\n",
-"The following controls were pruned:\n",
-tmp$contr_pruned,
-"The controls are contained in a separate phyloseq object: ps.contr",
-"\n",
-sep = "\n"))
-}else{warning(cat(
-"\n\nparameters$prune_controls is TRUE but 'parameters$controls' is empty.
-No samples were pruned.\n\n"))}
-}else{ps <- p}
-# Prune ASVs defined as noise
-if (parameters$prune_noise_taxgroups == "TRUE") {
-tmp$ps_taxlvls <- colnames(tax_table(ps))
-tmp$noise_ASVs <- character(0)
-for (lvl in tmp$ps_taxlvls) {
-tmp$noise_ASVs <- c(tmp$noise_ASVs,
-cuphyr::list_subset_ASVs(
-physeq = ps, subv = parameters$noise_taxgroups,
-taxlvlsub = lvl))
-}
-tmp$noise_ASVs <- unique(tmp$noise_ASVs)
-tmp$no_noise_ASVs <- colnames(otu_table(ps))
-tmp$no_noise_ASVs <- setdiff(tmp$no_noise_ASVs, tmp$noise_ASVs)
-if (length(tmp$noise_ASVs) > 0) {
-ps <- prune_taxa(tmp$no_noise_ASVs, ps)
-tmp$no_noise_ps <- ps
-cat(length(tmp$noise_ASVs),
-"ASVs were pruned because they belonged to the following
-taxonomic groups:\n")
-cat(parameters$noise_taxgroups, "\n", sep = "\n")}
-else{
-cat("No ASVs were recognized as belonging to the following taxonomic groups
-defined as noise:\n")
-cat(parameters$noise_taxgroups, "\n", sep = "\n")
-}
-}
-# Prune samples with fewer than reads than minASVcount
-if (parameters$minASVcount > 0) {
-tmp$samp_pruned <- names(which(sample_sums(ps) < parameters$minASVcount))
-ps <- prune_samples(sample_sums(ps) >= parameters$minASVcount, ps)
-if (length(tmp$samp_pruned) > 0) {
-cat("The following samples were pruned because ASV counts were lower than",
-parameters$minASVcount,  ":\n")
-cat(tmp$samp_pruned, "\n", sep = "\n")
-}
-}
-# Remove 0 count ASVs (e.g. control ASVs that remain) from the base object
-ps <- prune_taxa(taxa_sums(ps) > 0, ps)
-sample_data(ps)[["ndvi"]] <- as.numeric(sample_data(ps)[["ndvi"]])
-# Transformed per sample (per-sample relative abundance)
-ps.trans <- transform_sample_counts(ps, function(ASV) ASV/sum(ASV))
-# Read descriptor values as numeric
-sample_data(ps.trans)[["ndvi"]] <- as.numeric(sample_data(ps.trans)[["ndvi"]])
-sample_data(ps.trans)[["ndvi_july"]] <- as.numeric(sample_data(ps.trans)[["ndvi_july"]])
-# Get a tbl of the base object for easier access in some phyloseq-independent
-# analyses. Takes some seconds, potentially up to minutes.
-ps_tbl <- as_tibble(psmelt(ps))
-ps_trans_tbl <- as_tibble(psmelt(ps.trans))
-# Condensing to Abundance per Genus and Sample
-genus_abundance_tbl_per_sample <- ps_trans_tbl %>%
-group_by(Genus, Sample) %>%
-mutate(Genus_Sample_Abundance = sum(Abundance)) %>%
-select(Genus, Sample, ndvi, ndvi_july, Genus_Sample_Abundance, Alias) %>%
-unique()
-if (parameters$roottree == "TRUE" && parameters$maketree == "TRUE") {
-phyloseq::phy_tree(ps) <- cuphyr::root_tree_in_outgroup(physeq = ps)}
-if (parameters$biom_export == "TRUE") {
-suppressWarnings(phyloseq.extended::write_phyloseq(
-p, biom_file = paste0(path, "all_samples.biom"),
-biom_format = "standard"))
-suppressWarnings(phyloseq.extended::write_phyloseq(
-ps, biom_file = file.path(path, "samples_without_controls.biom"),
-biom_format = "standard"))
-suppressWarnings(phyloseq.extended::write_phyloseq(
-ps.trans, biom_file = file.path(
-path, "samples_without_controls_rel_abundance.biom"),
-biom_format = "standard"))
-suppressWarnings(phyloseq.extended::write_phyloseq(
-ps.contr, biom_file = file.path(path, "just_controls.biom"),
-biom_format = "standard"))
-}
-ps
-##### Optional settings (sensible defaults) #####
-# Can be changed to adjust the output format for all plots. Default "pdf",
-# possible "eps"/"ps", "tex" (pictex), "jpeg", "tiff", "png", "bmp" and "svg"
-parameters$output_format = "pdf"
-# Can be changed to preferred ggplot2 theme. Recommended: "theme_bw()".
-theme_set(theme_bw())
-############### NO NEED FOR CHANGES BELOW ###############
-my_scale_col <- scale_color_viridis(discrete = TRUE)
-my_scale_fill <- scale_fill_viridis(discrete = TRUE)
-# Custom, more narrow color ranges based on viridis
-# Base order to have adjacent colors be distinct from each other
-tmp$sort_colors <- c(rbind(c(1:5), c(6:10), c(11:15), c(16:20)))
-# Customized vectors
-tmp$n_col <- 20
-tmp$viridis_greens <- viridis(tmp$n_col,  option = "D", begin = 0.85,
-end = 0.7)[tmp$sort_colors]
-tmp$viridis_reds <- viridis(tmp$n_col,  option = "B", begin = 0.7,
-end = 0.5)[tmp$sort_colors]
-tmp$viridis_blues <- viridis(tmp$n_col,  option = "D", begin = 0.2,
-end = 0.4)[tmp$sort_colors]
-tmp$viridis_yellows <- viridis(tmp$n_col,  option = "D", begin = 1,
-end = 0.9)[tmp$sort_colors]
-tmp$viridis_dark <- viridis(tmp$n_col,  option = "A", begin = 0,
-end = 0.1)[tmp$sort_colors]
-tmp$viridis_light <- viridis(tmp$n_col,  option = "A", begin = 1,
-end = 0.9)[tmp$sort_colors]
-# Collected list that is available in the global environment
-sub_viridis <- list(tmp$viridis_greens, tmp$viridis_blues, tmp$viridis_yellows,
-tmp$viridis_light, tmp$viridis_reds, tmp$viridis_dark)
-names(sub_viridis) <- c("greens", "blues", "yellows", "lights", "reds", "darks")
-tmp$out <- paste0(".", parameters$output_format)
-#################### Function ############################
-# Generic save function for plots that checks whether file exists and if so,
-# creates a new one with d/m/y+time info to avoid overwriting. Overwriting can
-# be triggered with overwrite = TRUE. Width, height and resolution are taken
-# from parameters in the 'set' environment or set to 20x20 cm with 300dpi.
-save_plot <- function(
-pl, filetype = ".pdf", plot_name = "my_plot", overwrite=FALSE){
-wp <- if (!is.null(set$wp)) set$wp else 20
-hp <- if (!is.null(set$hp)) set$hp else 20
-res <- if (!is.null(set$res)) set$res else 300
-name <- paste0("/", plot_name,filetype)
-if (file.exists(paste0(outp, name)) & !overwrite) {
-name <- paste0("/", plot_name, "_",
-format(Sys.time(), "%d-%m-%y_%H%M%S"),filetype)}
-ggsave(file.path(outp, name), pl,
-width = wp, height = hp, unit = "cm", dpi = res)
-}
-################################################
-# CHANGE ME to the sample group for color coding. Accepted values are the column
-# headers in the descriptor file.
-set$color_by = "Symptoms"
-##### Optional settings (sensible defaults) #####
-# Can be changed to change the width (in cm) of the saved plot.
-set$wp = 17
-# Can be changed to change the height (in cm) of the saved plot.
-set$hp = 20
-# Can be changed to change the resolution (in dpi) of the saved plot.
-set$res = 300
-############### NO NEED FOR CHANGES BELOW ###############
-# Rank samples
-set$ranked <- cuphyr::make_ranked_sums(p, myset = tmp$subset_id)
-set$ranked_ps <- cuphyr::make_ranked_sums(ps, myset = tmp$subset_id)
-set$ymax <- max(set$ranked$Abundance)
-set$ymax <- set$ymax + round(set$ymax/10)
-set$xmax <- nrow(set$ranked) + 1
-set$title2 <- "Samples (without controls)"
-# Stabilize colors
-set$color_vars <- set$ranked[,set$color_by]  %>%
-unlist() %>% as.character() %>% unique()
-set$color_vars <- sort(set$color_vars)
-set$color_varsPalette <- viridis(length(set$color_vars))
-names(set$color_varsPalette) <- set$color_vars
-set$my_scale_fill <- scale_fill_manual(values = set$color_varsPalette)
-# plot
-plots$overview_all <- ggplot(data = set$ranked, aes(x = Rank, y = Abundance)) +
-aes_string(fill = set$color_by) +
-geom_col() + set$my_scale_fill + ggtitle("All samples") + ylim(0, set$ymax) +
-xlim(0,set$xmax) + ylab("ASV counts ('reads')")
-if (length(tmp$noise_ASVs) > 0) {
-set$ranked_nonoise <- cuphyr::make_ranked_sums(
-tmp$no_noise_ps, myset = tmp$subset_id)
-plots$overview_noise <- ggplot(
-data = set$ranked_nonoise, aes(x = Rank, y = Abundance)) +
-aes_string(fill = set$color_by) +
-geom_col() + set$my_scale_fill +
-ggtitle("Samples (without controls), noise ASVs removed") +
-ylim(0, set$ymax) +
-xlim(0,set$xmax) + ylab("ASV counts ('reads')")
-}
-if (parameters$minASVcount > 0) {
-plots$overview_all <- plots$overview_all +
-geom_hline(yintercept = parameters$minASVcount, linetype = "dashed") +
-ggtitle("All samples (ASV count cutoff indicated)")
-set$title2 <- "Samples (without controls and low count samps)"
-}
-plots$overview_ps <- ggplot(data = set$ranked_ps, aes(x = Rank, y = Abundance)) +
-aes_string(fill = set$color_by) +
-geom_col() + set$my_scale_fill + ggtitle(set$title2) + ylim(0, set$ymax) +
-xlim(0,set$xmax) + ylab("ASV counts ('reads')")
-plots$combo_overview <- ggarrange(
-plots$overview_all, plots$overview_ps, nrow = 2, align = "v",
-common.legend = TRUE, legend = "right")
--- a/analysis/Nems/.Rhistory
+++ b/analysis/Nems/.Rhistory
-top = set$top_n,
-ignore_na = set$ignore_na)
-set$topnASVs <- names(sort(taxa_sums(ps), decreasing = TRUE))[1:set$topASVs]
-set$ps.topnASVs <- prune_taxa(set$topnASVs, ps.trans)
-if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
-set$toptax <- union(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl],
-phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl])
-set$toptax <- sort(set$toptax)
-set$taxlvlPalette <- viridis(length(set$toptax))
-names(set$taxlvlPalette) <- set$toptax
-if (exists("highlight", envir = set)) {
-# It is possible to change the highlight color here by substituting
-# 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
-set$taxlvlPalette[set$highlight] <- sub_viridis$reds[4]
-}
-set$taxlvlPalette <- set$taxlvlPalette[sort(names(set$taxlvlPalette))]
-set$my_scale_fill <- scale_fill_manual(values = set$taxlvlPalette,
-na.value = "grey")
-}else{
-set$my_scale_fill <- my_scale_fill
-}
-# Plot
-if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
-set$my_scale_fill <- scale_fill_manual(
-values = set$taxlvlPalette[
-sort(unique(phyloseq::tax_table(set$ps.topnTax)[,set$taxlvl]))],
-na.value = "grey")
-}
-plots$topn_tax <- plot_bar(set$ps.topnTax,
-x = set$x_axis_value,
-fill = set$taxlvl,
-title = paste0("Top ", set$top_n, " ", set$taxlvl)) +
-facet_grid(paste0("~", set$panel_by), scales = "free", space = "free") +
-set$my_scale_fill +
-ylab(set$y_axis_label) +
-xlab("Sample") +
-theme(strip.background = element_blank(), strip.text = element_text(size = 16),
-axis.title=element_text(size=16), legend.text = element_text(size=14))
-if (set$fuse_ASVs) {
-plots$topn_tax <- plots$topn_tax + geom_bar(
-aes_string(color = set$taxlvl, fill = set$taxlvl),
-stat = "identity", position = "stack") +
-scale_color_manual(values = set$taxlvlPalette, na.value = NA)
-}
-if (set$unify_colors | exists("highlight", envir = set) | set$fuse_ASVs) {
-set$my_scale_fill <- scale_fill_manual(
-values = set$taxlvlPalette[
-sort(unique(phyloseq::tax_table(set$ps.topnASVs)[,set$taxlvl]))],
-na.value = "grey")
-}
-plots$topn_ASVs <- plot_bar(set$ps.topnASVs,
-x = set$x_axis_value,
-fill = set$taxlvl,
-title = paste0("Top", set$topASVs, "_ASVs")) +
-facet_grid(paste0("~", set$panel_by), scales = "free_x", space = "free") +
-set$my_scale_fill +
-ylab(set$y_axis_label) +
-xlab("Sample") +
-theme(strip.background = element_blank())
-# save
-save_plot(plots$topn_tax, plot_name = paste0("Top", set$top_n, "_", set$taxlvl),
-filetype = tmp$out)
-save_plot(plots$topn_ASVs, plot_name = paste0("Top", set$topASVs, "_ASVs"),
-filetype = tmp$out)
-# Clean up plot parameters
-rm(list = ls(set), envir = set)
-# Print to standard out
-plots$topn_tax
-plots$topn_ASVs
-# CHANGE ME to the desired sample categories on the x-axis.
-# Accepted values are the column headers in the descriptor file.
-set$x_axis_value = "ndvi"
-# CHANGE ME to the taxonomic level of interest (color coding). Accepted values
-# are the column headers in your descriptor file.
-set$taxlvl = "Genus"
-# CHANGE ME to change the number of Top n taxa to be plotted at
-# taxlvl.
-set$top_n = 10
-# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
-set$ignore_na = FALSE
-# CHANGE ME to an entry at the chosen taxonomic level you want to highlight.
-# Comment out to not highlight anything.
-# set$highlight =
-##### Optional settings (sensible defaults) #####
-# Can be changed to change the width (in cm) of the saved plot.
-set$wp = 20
-# Can be changed to change the height (in cm) of the saved plot.
-set$hp = 13
-# Can be changed to change the resolution (in dpi) of the saved plot.
-set$res = 300
-# Can be changed to change the y-axis label
-set$y_axis_label = "Relative abundance"
-# Can be changed to change the x-axis label
-set$x_axis_label = "Sample"
-############### NO NEED FOR CHANGES BELOW ###############
-# Estimate Alpha-diversity (Shannon)
-set$alpha_div_ps_trans <- estimate_richness(ps.trans, measures = "Shannon") %>%
-as_tibble(rownames = "Sample")
-# Make physeq objects of top n taxa and top n ASVs
-set$ps.topnTax <- cuphyr::abundant_tax_physeq(ps.trans, lvl = set$taxlvl,
-top = set$top_n,
-ignore_na = set$ignore_na)
-# Plot
-set$my_scale_fill <- my_scale_fill
-set$topntax_tbl <- psmelt(set$ps.topnTax) %>%
-as_tibble() %>%
-left_join(set$alpha_div_ps_trans, by = "Sample") %>%
-select(Genus, Alias, ndvi, ndvi_july, Abundance, Shannon) %>%
-filter(Abundance > 0) %>%
-group_by(Genus, Alias, ndvi, ndvi_july, Shannon) %>%
-summarise(Abundance = sum(Abundance)) %>%
-arrange(ndvi) %>%
-mutate(ndvi_rank = c(1:length(ndvi)))
-plots$topn_tax_custom <- ggplot(set$topntax_tbl, aes(x = fct_reorder(Alias, ndvi),
-y = Abundance,
-fill = Genus)) +
-#title = paste0("Top ", set$top_n, " ", set$taxlvl))) +
-geom_col(color = "black") +
-set$my_scale_fill +
-ylab(set$y_axis_label) +
-xlab(set$x_axis_label) +
-theme(strip.background = element_blank(),
-#strip.text = element_text(size = 12),
-#axis.title=element_text(size=12),
-#legend.text = element_text(size=12),
-legend.position = "bottom")
-plots$ndvi_dot_plot <- ggplot(set$topntax_tbl, aes(fct_reorder(Alias, ndvi),
-y = ndvi)) +
-geom_point() +
-theme(strip.background = element_blank(),
-# strip.text = element_text(size = 12),
-# axis.title=element_text(size=12),
-# legend.text = element_text(size=12),
-axis.title.x=element_blank()) +
-ylab("NDVI")
-plots$shannon_dot_plot <- ggplot(set$topntax_tbl,
-aes(fct_reorder(Alias, ndvi),
-y = Shannon)) +
-geom_point() +
-theme(strip.background = element_blank(),
-# strip.text = element_text(size = 12),
-# axis.title=element_text(size=12),
-# legend.text = element_text(size=12),
-axis.title.x=element_blank()) +
-ylab("Shannon")
-plots$combo_topn_custom <- ggarrange(plots$ndvi_dot_plot,
-plots$shannon_dot_plot,
-plots$topn_tax_custom,
-nrow = 3,
-heights = c(1, 1, 3),
-align = "v")
-plots$combo_topn_custom
-save_plot(plots$combo_topn_custom, plot_name = paste0("Customized_NDVI_Shannon_plot"),
-filetype = tmp$out)
-set$my_formula <- y ~ x
-set$plot_title <- "Nematodes"
-topntax_data <- set$topntax_tbl %>%
-mutate(Taxa = 'nematodes') %>%
-ungroup() %>%
-select(Alias, ndvi, Shannon, Taxa) %>%
-distinct()
-write.csv(topntax_data, file = "../topntax_all_taxa/topntax_data_nematodes.csv")
-# Excel file with field data
-morphological_id <- file.path("2020-patch-larvik-a-counts.csv")
-# Reduce abundance per sample and genus data to genera of interest
-genera_of_interest_mol_morph <- c("Globodera", "Meloidogyne", "Pratylenchus")
-genus_abundance_tbl_per_sample_mm <- genus_abundance_tbl_per_sample %>%
-filter(Genus %in% genera_of_interest_mol_morph)
-# read in and parse morphological count data
-morph_data <- read_delim(morphological_id) %>%
-mutate(genus = str_replace(genus, "cyst", "Globodera"),
-genus = str_replace(genus, "mel", "Meloidogyne"),
-genus = str_replace(genus, "prat", "Pratylenchus")) %>%
-dplyr::rename(Sample = rute, Genus = genus)
-# Combine morphological and metabarcoding data
-set$data_mol_morph <- genus_abundance_tbl_per_sample_mm %>%
-left_join(morph_data)
-### Plotting
-# New facet label names for Genus variable
-genus_names <- list(
-'Globodera'=expression(paste(italic("Globodera "))), #"spp.")),
-'Meloidogyne'=expression(paste(italic("Meloidogyne "))), #"spp.")),
-'Pratylenchus'=expression(paste(italic("Pratylenchus ")))#, "spp."))
-)
-genus_labeller <- function(variable,value){
-return(genus_names[value])
-}
-signif.labs <- c("pval > 0.05", "pval <= 0.05")
-names(signif.labs) <- c("p value > 0.05", "p value <= 0.05")
-# Subset data for modeling
-set$glob_data <- subset(set$data_mol_morph, Genus == "Globodera")
-set$mel_data <- subset(set$data_mol_morph, Genus == "Meloidogyne")
-set$prat_data <- subset(set$data_mol_morph, Genus == "Pratylenchus")
-# Quasi-binomial model for Globodera
-model_glob = glm(
-Genus_Sample_Abundance ~ total_count,
-data = set$glob_data,
-family = quasibinomial
-)
-# Quasi-binomial model for Meloidogyne
-model_mel = glm(
-Genus_Sample_Abundance ~ total_count,
-data = set$mel_data,
-family = quasibinomial
-)
-# Quasi-binomial model for Pratylenchus
-model_prat = glm(
-Genus_Sample_Abundance ~ total_count,
-data = set$prat_data,
-family = quasibinomial
-)
-pval_glm <- function(glm){
-coef(summary(glm))[,4][2]
-}
-set$data_mol_morph <- set$data_mol_morph %>%
-mutate(pval = ifelse(Genus == "Globodera", pval_glm(model_glob), NA)) %>%
-mutate(pval = ifelse(Genus == "Pratylenchus", pval_glm(model_prat), pval)) %>%
-mutate(pval = ifelse(Genus == "Meloidogyne", pval_glm(model_mel), pval)) %>%
-mutate(signif = ifelse(pval <= 0.05, "pval <= 0.05", "pval > 0.05"))
-# Other plotting variables
-# Can be changed to change the width (in cm) of the saved plot.
-set$wp = 20
-# Can be changed to change the height (in cm) of the saved plot.
-set$hp = 13
-# Can be changed to change the resolution (in dpi) of the saved plot.
-set$res = 300
-set$my_formula <- y ~ x
-set$plot_title <- paste("Molecular analysis vs",
-"morpological analysis", sep = "\n")
-# Plotting quasibinomial model
-plots$mol_morph_quasi <- ggplot(set$data_mol_morph,
-aes(x=total_count,
-y = Genus_Sample_Abundance)) +
-geom_point(alpha = 0.7) +
-#ggtitle(set$plot_title) +
-theme(plot.title = element_text(hjust = 0.5, size = 20),
-panel.grid.major = element_blank(),
-panel.grid.minor = element_blank(),
-panel.background = element_blank(),
-#axis.line = element_line(colour = "dark grey"),
-strip.background = element_blank(),
-strip.text = element_text(face = "italic", size = 16),
-axis.text = element_text(),
-text = element_text()) +
-xlab("Nematodes / 250 ml soil") +
-ylab("Relative abundance") +
-geom_smooth(method = "glm", formula = set$my_formula, se = F,
-method.args = list(family = quasibinomial),
-aes(color = signif, linetype = signif)) +
-scale_color_manual(values=c('black', 'lightgrey'),
-name= 'Significance',
-labels = names(signif.labs)) +
-scale_linetype_discrete(name= 'Significance',
-labels = names(signif.labs)) +
-facet_wrap(~Genus, scales = "free",
-#labeller = labeller(Genus = genus_labs)) +
-labeller = genus_labeller)
-#Print and save plot
-plots$mol_morph_quasi
-save_plot(plots$mol_morph_quasi, plot_name = paste0("Molecular_and_Morphological_Plot"),
-filetype = ".pdf")
-# Generate table overview of morphological counts for manuscript
-morph_table_manuscript <- pivot_wider(set$data_mol_morph %>%
-select(-Genus_Sample_Abundance,
-ndvi),
-names_from = Genus,
-values_from = total_count) %>%
-arrange(Alias)
-write.xlsx(morph_table_manuscript, file = "t1_morph_table_manuscript.xlsx")
-# Create properly formatted tibble with columns Sample, ndvi_01 (ndvi translated to (0, 1) interval) and one column for each genus
-# containing the sample abundances for that genus.
-ldf <- data.frame(genus_abundance_tbl_per_sample %>% pivot_wider(id_cols = c('Sample', 'ndvi'), names_from = 'Genus', values_from = 'Genus_Sample_Abundance'))
-ldf_genus_data <- data.frame(ldf) %>% select(!c('Sample', 'ndvi'))
-colnames(ldf_genus_data) <- gsub(' ', '.', colnames(ldf_genus_data))
-ldf <- cbind.data.frame(
-Sample = ldf$Sample,
-ndvi_01 = (ldf$ndvi + 1) / 2.0
-)
-ldf <- tibble(cbind(ldf, ldf_genus_data))
-n_samples_by_genus <- data.frame(ldf_genus_data > 0) %>% mutate_if(is.logical, as.numeric) %>% colSums() %>% sort(decreasing = TRUE)
-keep_n <- 100 # Maximum number of genera to include in the analysis
-top_n_occurence_genuses <- names(n_samples_by_genus[1:keep_n])
-top_n_occurence_genuses <- top_n_occurence_genuses[!is.na(top_n_occurence_genuses)]
-l_genus_ldf <- ldf %>% select(all_of(top_n_occurence_genuses))
-l_genus_ldf_transposed <- data.frame(t(l_genus_ldf))
-l_meta_ldf <- ldf %>% select('ndvi_01')
-l_model <- linda(
-l_genus_ldf_transposed,
-l_meta_ldf,
-formula = '~ ndvi_01',
-feature.dat.type = 'proportion',
-is.winsor = FALSE,
-alpha = 0.05
-)
-# Print model info
-l_model
-# Show effect size and significance plots
-linda.plot(
-l_model,
-variables.plot = c('ndvi_01'),
-alpha = 0.05,
-lfc.cut = 1,
-legend = TRUE
-)
-# Write supplementary table for manuscript
-l_model_df <- as.data.frame(l_model$output)
-write.xlsx(l_model_df, file = "supplementary_table_ndvi_regression_nematodes.xlsx", rowNames = TRUE, colnames = TRUE)
-# Prat
-set$prat_data$ndvi_temp <- (set$prat_data$ndvi+1)/2
-model_prat_count_ndvi = glm(
-ndvi_temp ~ total_count,
-data = set$prat_data,
-family = quasibinomial
-)
-summary(model_prat_count_ndvi)
-# Glob
-set$glob_data$ndvi_temp <- (set$glob_data$ndvi+1)/2
-model_glob_count_ndvi = glm(
-ndvi_temp ~ total_count,
-data = set$glob_data,
-family = quasibinomial
-)
-summary(model_glob_count_ndvi)
-# Mel
-set$mel_data$ndvi_temp <- (set$mel_data$ndvi+1)/2
-model_mel_count_ndvi = glm(
-ndvi_temp ~ total_count,
-data = set$mel_data,
-family = quasibinomial
-)
-summary(model_mel_count_ndvi)
-data_mol_morph_long_temp <- pivot_longer(set$data_mol_morph,
-cols = c(Genus_Sample_Abundance,
-total_count),
-names_to = "type",
-values_to = "value")
-data_mol_morph_long_temp$ndvi <- (data_mol_morph_long_temp$ndvi+1)/2
-pval_glm <- function(glm){
-coef(summary(glm))[,4][2]
-}
-set$data_mol_morph_long <- data_mol_morph_long_temp %>%
-mutate(pval = ifelse(Genus == "Globodera", pval_glm(model_glob), NA)) %>%
-mutate(pval = ifelse(Genus == "Pratylenchus", pval_glm(model_prat), pval)) %>%
-mutate(pval = ifelse(Genus == "Meloidogyne", pval_glm(model_mel), pval)) %>%
-mutate(signif = ifelse(pval <= 0.05, "pval <= 0.05", "pval > 0.05"))
-# Count data vs NDVI
-plots$morph_count_ndvi <- ggplot(set$data_mol_morph_long,
-aes(x = value,
-y = ndvi)) +
-geom_point(alpha = 0.7) +
-#ggtitle(set$plot_title) +
-theme(plot.title = element_text(hjust = 0.5, size = 20),
-panel.grid.major = element_blank(),
-panel.grid.minor = element_blank(),
-panel.background = element_blank(),
-axis.line = element_line(colour = "dark grey"),
-strip.background = element_blank(),
-strip.text.x = element_text(face = "italic")#,
-#axis.text = element_text(size = 12),
-#text = element_text(size = 14)
-) +
-#xlab("Nematodes / 250 ml soil") +
-#ylab("NDVI") +
-geom_smooth(method = "glm", formula = set$my_formula, se = F, color = "dark grey",
-method.args = list(family = quasibinomial)) +
-facet_wrap(~Genus, scales = "free_x")
-plots$morph_count_ndvi
-# Print and save plot
-plots$morph_count_ndvi
-save_plot(plots$morph_count_ndvi, plot_name = paste0("morph_count_ndvi"),
-filetype = tmp$out)
-set$data_mol_morph_long <- data_mol_morph_long_temp %>%
-mutate(pval = ifelse(Genus == "Globodera", pval_glm(model_glob_count_ndvi), NA)) %>%
-mutate(pval = ifelse(Genus == "Pratylenchus", pval_glm(model_prat_count_ndvi), pval)) %>%
-mutate(pval = ifelse(Genus == "Meloidogyne", pval_glm(model_mel_count_ndvi), pval)) %>%
-mutate(pval = ifelse(Genus == "Globodera" & type == "Genus_Sample_Abundance", 0.5574120890, pval)) %>%
-mutate(pval = ifelse(Genus == "Pratylenchus" & type == "Genus_Sample_Abundance", 0.5177548997, pval)) %>%
-mutate(pval = ifelse(Genus == "Meloidogyne" & type == "Genus_Sample_Abundance", 0.0001920515, pval)) %>%
-mutate(signif = ifelse(pval <= 0.05, "pval <= 0.05", "pval > 0.05"))
-# Morph data
-set$data_morph <- set$data_mol_morph_long[set$data_mol_morph_long$type == 'total_count',]
-set$data_morph <- set$data_morph %>%
-rename("value" = "Number_of_nematodes")
-set$data_mol_morph_long
-set$data_mol_morph_long <- data_mol_morph_long_temp %>%
-mutate(pval = ifelse(Genus == "Globodera", pval_glm(model_glob_count_ndvi), NA)) %>%
-mutate(pval = ifelse(Genus == "Pratylenchus", pval_glm(model_prat_count_ndvi), pval)) %>%
-mutate(pval = ifelse(Genus == "Meloidogyne", pval_glm(model_mel_count_ndvi), pval)) %>%
-mutate(pval = ifelse(Genus == "Globodera" & type == "Genus_Sample_Abundance", 0.5574120890, pval)) %>%
-mutate(pval = ifelse(Genus == "Pratylenchus" & type == "Genus_Sample_Abundance", 0.5177548997, pval)) %>%
-mutate(pval = ifelse(Genus == "Meloidogyne" & type == "Genus_Sample_Abundance", 0.0001920515, pval)) %>%
-mutate(signif = ifelse(pval <= 0.05, "pval <= 0.05", "pval > 0.05"))
-# Morph data
-set$data_morph <- set$data_mol_morph_long[set$data_mol_morph_long$type == 'total_count',]
-set$data_morph <- set$data_morph %>%
-dplyr::rename("Number_of_nematodes" = value)
-# Relative abundance data
-set$data_relabu <- set$data_mol_morph_long[set$data_mol_morph_long$type == 'Genus_Sample_Abundance',]
-set$data_relabu <- set$data_relabu %>%
-rename("value" = "Relative_abundance")
-set$data_mol_morph_long <- data_mol_morph_long_temp %>%
-mutate(pval = ifelse(Genus == "Globodera", pval_glm(model_glob_count_ndvi), NA)) %>%
-mutate(pval = ifelse(Genus == "Pratylenchus", pval_glm(model_prat_count_ndvi), pval)) %>%
-mutate(pval = ifelse(Genus == "Meloidogyne", pval_glm(model_mel_count_ndvi), pval)) %>%
-mutate(pval = ifelse(Genus == "Globodera" & type == "Genus_Sample_Abundance", 0.5574120890, pval)) %>%
-mutate(pval = ifelse(Genus == "Pratylenchus" & type == "Genus_Sample_Abundance", 0.5177548997, pval)) %>%
-mutate(pval = ifelse(Genus == "Meloidogyne" & type == "Genus_Sample_Abundance", 0.0001920515, pval)) %>%
-mutate(signif = ifelse(pval <= 0.05, "pval <= 0.05", "pval > 0.05"))
-# Morph data
-set$data_morph <- set$data_mol_morph_long[set$data_mol_morph_long$type == 'total_count',]
-set$data_morph <- set$data_morph %>%
-dplyr::rename("Number_of_nematodes" = value)
-# Relative abundance data
-set$data_relabu <- set$data_mol_morph_long[set$data_mol_morph_long$type == 'Genus_Sample_Abundance',]
-set$data_relabu <- set$data_relabu %>%
-dplyr::rename("Relative_abundance" = value)
-signif.labs <- c("pval > 0.05", "pval <= 0.05")
-names(signif.labs) <- c("p value > 0.05", "p value <= 0.05")
-breaks_morph <- function(x) { if (max(x) < 200) seq (0, 100, 100) else seq(0, 600, 50)}
-breaks_mb <- function(x) { if (max(x) < 0.1) seq(0, 0.002, 3) else seq(0, 3, 1) }
-breaks_fun_morph <- function(x) {
-if (max(x) > 500) {
-seq(0, 800, 200)
-} else if (max(x) > 150) {
-seq(0, 200, length.out = 5)
-} else {
-seq(0, 100, length.out = 5)
-}}
-# MORPHOLOGIAL AND NDVI PLOT
-# Relative abundance and NDVI plotting
-# Axis ticks MB
-breaks_fun_mb <- function(x) {
-if (max(x) > 0.8) {
-seq(0, 0.9, length.out = 4)
-} else if (max(x) > 0.2) {
-seq(0, 0.6, 0.2)
-} else {
-seq(0, 0.003, length.out = 4)
-}}
-p1 <- ggplot(set$data_relabu,
-aes(x = Relative_abundance,
-y = ndvi)) +
-geom_point(alpha = 0.7, color = "black") +
-ggtitle("a)") +
-theme(plot.title = element_text(hjust = 0, size = 12),
-panel.grid.major = element_blank(),
-panel.grid.minor = element_blank(),
-panel.background = element_blank(),
-strip.background = element_blank(),
-axis.title = element_text(size = 9),
-strip.text.x = element_text(face = "italic"),
-plot.caption = element_text(size = 7, hjust = 0, face = "italic",
-margin = margin(t = 0.2, unit = "cm")),
-plot.caption.position = "plot",
-plot.subtitle = element_text(size = 10, hjust = 0, vjust = 0.5,
-margin = margin(b = 0.8, unit = "cm"))) +
-xlab("Relative abundance") +
-ylab("NDVI") +
-geom_smooth(method = "glm", formula = set$my_formula, se = FALSE,
-method.args = list(family = quasibinomial),
-aes(color = signif, linetype = signif)) +
-scale_color_manual(values=c('black', 'lightgrey'),
-name= 'Significance',
-labels = names(signif.labs)) +
-scale_linetype_discrete(name= 'Significance',
-labels = names(signif.labs)) +
-facet_wrap(~Genus, scales = "free_x") +
-scale_x_continuous(breaks = breaks_fun_mb, limits = c(0, NA))
-p1
-# Plotting NDVI vs metabarcoding
-p2 <- ggplot(set$data_morph,
-aes(x = Number_of_nematodes,
-y = ndvi)) +
-geom_point(alpha = 0.7, color = "black") +
-ggtitle("b)") +
-theme(plot.title = element_text(hjust = 0, size = 12),
-panel.grid.major = element_blank(),
-panel.grid.minor = element_blank(),
-panel.background = element_blank(),
-strip.background = element_blank(),
-axis.title = element_text(size = 9),
-panel.spacing=unit(2, "lines"),
-strip.text.x = element_text(face = "italic"),
-plot.caption = element_text(size = 7, hjust = 0, face = "italic",
-margin = margin(t = 0.2, unit = "cm")),
-plot.caption.position = "plot",
-plot.subtitle = element_text(size = 10, hjust = 0, vjust = 0.5,
-margin = margin(b = 0.8, unit = "cm"))) +
-xlab("Nematodes / 250 ml soil") +
-ylab("NDVI") +
-geom_smooth(method = "glm", formula = set$my_formula, se = FALSE,
-method.args = list(family = quasibinomial),
-aes(color = signif, linetype = signif)) +
-scale_color_manual(values=c('black', 'lightgrey'),
-name= 'Significance',
-labels = names(signif.labs)) +
-scale_linetype_discrete(name= 'Significance',
-labels = names(signif.labs)) +
-facet_wrap(~Genus, scales = "free_x") +
-scale_x_continuous(breaks = breaks_fun_morph, limits = c(0, NA))
-p2
-# Combining the two plots
-plots$combo_morph_metab_ndvi <- plot_grid(p1, p2,
-ncol = 1,
-align = "hv")
-plots$combo_morph_metab_ndvi
-# Print and save plot
-save_plot(plots$combo_morph_metab_ndvi, plot_name = paste0("morph_relabu_ndvi"),
-filetype = tmp$out)
-found <- c(40, 24, 16, 37, 10, 27, 74, 100, 100, 78, 32, 28)
-not_found <- c(24,12, 7, 7, 24, 52)
-wilcox.test(found, not_found)