diff --git a/.Rhistory b/.Rhistory
index 1d32261190984fa56c90ac53b3def39fb108b00b..5c93200459bc3febf43089b757e5de12270e339b 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -1,3 +1,29 @@
+colors_oomyc_genera <- colors_oomyc_genera[order(names(colors_oomyc_genera))]
+colors_oomyc_genera["Phytophthora"] <- viridis_reds[1]
+colors_oomyc_genera["Pythium"] <- viridis_reds[4]
+#define ggplot theme (shared between genus and species level plots)
+shared_theme <- theme(legend.key.size = unit(3, "mm"),
+legend.position = "none",
+axis.title.x = element_blank(),
+plot.margin = margin(20,10,10,10),
+legend.text.align = 0)
+shared_theme_abu <- theme(legend.key.size = unit(3, "mm"),
+legend.position = "none",
+axis.title.x = element_blank(),
+plot.margin = margin(20,10,10,10),
+legend.text.align = 0)
+#Format legend labels
+if(customLabels){
+labelling <- c(expression(paste("uncultured ", italic("Apodachlya"))),
+expression(paste("uncultured ", italic("Lagenidium"))),
+expression(paste("uncultured ", italic("Oomycetes"))),
+expression(paste("uncultured ", italic("Phytophthora"))),
+expression(paste("uncultured ", italic("Pythium"))),
+expression(italic("Achlya"), italic("Albugo"), italic("Aphanomyces"),
+italic("Apodachlya"), italic("Atkinsiella"), italic("Bremia"),
+italic("Brevilegnia"), italic("Dictyuchus"), italic("Eurychasma"),
+italic("Geolegnia"), italic("Globisporangium"), italic("Halocrusticida"),
+italic("Haptoglossa"), italic("Hyaloperonospora"), italic("Lagena"),
 italic("Lagenidium"), italic("Leptolegnia"), italic("Myzocytiopsis"),
 italic("Paralagenidium"), italic("Peronospora"), italic("Phragmosporangium"),
 italic("Phytophthora"), italic("Phytopythium"), italic("Pilasporangium"),
@@ -7,506 +33,480 @@ italic("Saprolegnia"), italic("Thraustotheca"), italic("Wilsoniana")), NA)
 }else{
 labelling <- unique(ASVs_funguild$Genus)
 }
-guild_g <- ggplot(ASVs_funguild, aes(x = guild_genus, fill = Genus)) + geom_bar() + shared_theme +
-scale_fill_manual(values = colors_oomyc_genera, na.value = "grey", labels = labelling) +
+guild_g <- ggplot(ASVs_funguild, aes(x = guild_genus, fill = Genus)) +
+geom_bar() + shared_theme +
+scale_fill_manual(values = colors_oomyc_genera,
+na.value = "grey", labels = labelling) +
 ylab("Number of ASVs") + ylim(0,1500)
-guild_s <- ggplot(ASVs_funguild, aes(x = guild_species, fill = Genus)) + geom_bar() + shared_theme +
-scale_fill_manual(values = colors_oomyc_genera, na.value = "grey", labels = labelling) +
+guild_s <- ggplot(ASVs_funguild, aes(x = guild_species, fill = Genus)) +
+geom_bar() + shared_theme +
+scale_fill_manual(values = colors_oomyc_genera,
+na.value = "grey", labels = labelling) +
 ylab("Number of ASVs") + ylim(0,1500)
-guild_g_abu <- ggplot(ASVs_funguild, aes(x = guild_genus, y = Abundance_sum/1000000, fill = Genus)) +
+guild_g_abu <- ggplot(ASVs_funguild, aes(x = guild_genus,
+y = Abundance_sum/1000000,
+fill = Genus)) +
 geom_col() + shared_theme_abu +
-scale_fill_manual(values = colors_oomyc_genera, na.value = "grey", labels = labelling) +
+scale_fill_manual(values = colors_oomyc_genera,
+na.value = "grey", labels = labelling) +
 ylab("Abundance in M reads") + ylim(0,2.5)
-guild_s_abu <- ggplot(ASVs_funguild, aes(x = guild_species, y = Abundance_sum/1000000, fill = Genus)) +
+guild_s_abu <- ggplot(ASVs_funguild, aes(x = guild_species,
+y = Abundance_sum/1000000,
+fill = Genus)) +
 geom_col() + shared_theme_abu +
-scale_fill_manual(values = colors_oomyc_genera, na.value = "grey", labels = labelling) +
+scale_fill_manual(values = colors_oomyc_genera,
+na.value = "grey", labels = labelling) +
 ylab("Abundance in M reads") + ylim(0,2.5)
-guilds <- ggarrange(guild_g, guild_g_abu, guild_s, guild_s_abu, nrow = 2, ncol = 2,
+guilds <- ggarrange(guild_g, guild_g_abu, guild_s,
+guild_s_abu, nrow = 2, ncol = 2,
 common.legend = TRUE, legend = "right" ,align = "hv",
-labels = c("Genus level", "", "Species level", ""), font.label = list(size = 14))
+labels = c("Genus level", "", "Species level", ""),
+font.label = list(size = 14))
 guilds
-save_plot(guilds ,plot_name = "FunGuild_combo")
+save_plot(guilds ,plot_name = "FunGuild_combo_Fig2")
+cat("Chunk successfully run")
+## CHANGE ME to change the width (in cm) of the output.
+wp <- 17.5
+# CHANGE ME to change the height (in cm) of the output.
+hp <- 15
+# CHANGE ME to change the resolution (in dpi) of the output.
+res <- 150
+# calculate averages for display in graphs
+avgNames <- c("PPUntr", "PPBait","ChrUntr", "ChrBait",
+"O-PUntr", "O-PBait", "OthUntr", "OthBait")
+avgs <- c(mean(ranksUntrPP$Abundance),
+mean(ranksBaitPP$Abundance),
+mean(ranksUntrAlgae$Abundance),
+mean(ranksBaitAlgae$Abundance),
+mean(ranksUntrOwoP$Abundance),
+mean(ranksBaitOwoP$Abundance),
+mean(rankedOthersUntr$OtherASVcounts),
+mean(rankedOthersBait$OtherASVcounts))
+names(avgs) <- avgNames
+# multiply to transform relative abundances into avg percentages and round to
+# one decimal
+avgs <- avgs*100
+avgs <- format(round(avgs, 1), nsmall = 1)
+# Define fixed color schemes for consistency between plots
+plotvars <- c("Chrysophyceae", "All oomycetes",
+"Other oomycetes", "Other classes", "PP")
+# Just to catch others/exceptions, make a general palette that will be
+# overwritten below
+plotPalette <- viridis(length(plotvars))
+names(plotPalette) <- plotvars
+plotPalette["Other classes"] <- "#C0C0C0"
+plotPalette["Chrysophyceae"] <- viridis_yellows[2]
+plotPalette["PP"] <- viridis_reds[4]
+plotPalette["Other oomycetes"] <- viridis_reds[1]
+plotPalette["All oomycetes"] <- viridis_reds[1]
+# plot total ASV counts, sorted by rank and color by class.
+AllBait <- ggplot(data = OthersRankPP,
+aes(x = Rank, y = Abundance, fill = Set))
+AllUntr <- ggplot(data = UntrOthersRankPP,
+aes(x = Rank, y = Abundance, fill = Set))
+customTheme <- theme(legend.position = "none",
+axis.title = element_blank(),
+title = element_text(size = 6))
+customThemeA <- theme(legend.position = "none",
+legend.title = element_blank(),
+legend.key.size = unit(5,"mm"),
+axis.title.x = element_blank())
+xUpperLim <- nrow(OthersRankPP) + 1
+ytextl = 0.05
+xtextl = 15
+ytexts = 0.75
+xtexts = 45
+middleline <- geom_blank() #geom_hline(yintercept = 0.5,
+# linetype="dashed", color="black")
+pAllBait <- AllBait + geom_col() +
+geom_col(data = AlgaeInvRankOom) +
+geom_col(data = OomRankPP) +
+geom_col(data = ranksBaitPP) +
+scale_fill_manual(values = plotPalette) +
+ggtitle("After enrichment") + xlab("Sample rank") +
+labs(fill = "") + ylim(0,1) + xlim(0,xUpperLim) +
+customThemeA + middleline +
+annotate(geom = 'text', label = paste("Mean PP", avgs["PPBait"], "%"),
+x = xtextl, y = ytextl) +
+ylab("rel. Abundance")
+pAllUntr <- AllUntr + geom_col() +
+geom_col(data = UntrAlgaeInvRankOom) +
+geom_col(data = UntrOomRankPP) +
+geom_col(data = ranksUntrPP) +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Before enrichment") + xlab("Sample rank") +
+labs(fill = "") + ylim(0,1) + xlim(0,xUpperLim) +
+customThemeA + middleline +
+annotate(geom = 'text', label = paste("Mean PP", avgs["PPUntr"], "%"),
+x = xtextl, y = ytextl) +
+ylab("rel. Abundance")
+AlgBait <- ggplot(data = ranksBaitAlgae,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Chrysophyceae - after enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["ChrBait"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+OomBait <- ggplot(data = ranksBaitOwoP,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Other oomycetes - after enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["O-PBait"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+OthBait <- ggplot(data = rankedOthersBait,
+aes(x = Rank, y = OtherASVcounts, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Other classes - after enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["OthBait"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+AlgUntr <- ggplot(data = ranksUntrAlgae,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Chrysophyceae - before enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["ChrUntr"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+OomUntr <- ggplot(data = ranksUntrOwoP,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Other oomycetes - before enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["O-PUntr"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+OthUntr <- ggplot(data = rankedOthersUntr,
+aes(x = Rank, y = OtherASVcounts, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Other classes - before enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["OthUntr"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+#A.combo <- ggarrange(pAllUntr, pAllBait, nrow=2,
+# labels = c("A"), vjust = 1, common.legend = TRUE, legend = "bottom")
+untr_smallplots <- ggarrange(AlgUntr, OomUntr, OthUntr, nrow = 3)
+untr_smallplots_anot <- annotate_figure(untr_smallplots,
+left = text_grob("Rel. abundance",
+rot = 90, size = 11))
+bait_smallplots <- ggarrange(AlgBait, OomBait, OthBait, nrow = 3)
+bait_smallplots_anot <- annotate_figure(bait_smallplots,
+left = text_grob("Rel. abundance",
+rot = 90, size = 11))
+combo <- ggarrange(pAllUntr, untr_smallplots_anot, pAllBait,
+bait_smallplots_anot, nrow = 2, ncol = 2, widths = c(2,1),
+labels = c("A", "", "B", ""), common.legend = TRUE,
+legend = "bottom") + theme(plot.margin = margin(10,10,10,10))
+#Save plot
+save_plot(combo, plot_name = "Class_ranking_Fig3")
+combo
+cat("Chunk successfully run")
+library(rstatix)
+pairwise_test <- function(ASV_list){
+ps.temp <- prune_taxa(ASV_list, ps.trans)
+rankTemp <- cuphyr::make_ranked_sums(ps.temp)
+rankTemp <- as.data.frame(rankTemp)
+teststats <- pairwise_t_test(rankTemp, formula = Abundance ~ Bait)
+return(teststats)}
+ChrysUntrVsBait <- pairwise_test(subASVsAlg)
+subASVsO_P <- setdiff(subASVsOom, subASVsPP)
+OomUntrVsBait <- pairwise_test(subASVsO_P)
+PPUntrVsBait <- pairwise_test(subASVsPP)
+subASVsOth <- setdiff(taxa_names(ps), subASVsOom)
+subASVsOth <- setdiff(subASVsOth, subASVsAlg)
+OthUntrVsBait <- pairwise_test(subASVsOth)
+all_stats <- list(PPUntrVsBait, ChrysUntrVsBait, OomUntrVsBait, OthUntrVsBait)
+names(all_stats) <- c("PP", "Chrysophyceae", "Other oomycetes", "Other classes")
+all_stats <- bind_rows(all_stats,.id = "Genera from")
+all_stats
 cat("Chunk successfully run")
-topnt2$labels
-topnt2$plot_env$fill
-topnt2$plot_env$mdf
-topnt2$plot_env$fill
-topnt2$plot_env$mdf$Genus %>% unique()
-topnt2$plot_env$mdf$Genus %>% unique() %>% sort()
-names(colors_lvl2)
-names(colors_lvl2) %>% sort()
-#CHANGE ME to the amount of top taxlvl to plot (e.g. 'numt = 20' plots Top20 Taxa at taxlvl)
-numt <- 5
-#CHANGE ME to the amount of top taxa you want to plot (e.g. 'numt2 = 25' plots Top25 Taxa at taxlvl2)
-numt2 <- 10
-#CHANGE ME to the primary parameter of interest (x-axis). Accepted values are the column headers in the descriptor file.
-primaryPar <- "Soil"
-##CHANGE ME to the secondary parameter of interest (panels). Accepted values are the column headers in the descriptor file.
-secondaryPar <- "Bait"
-##CHANGE ME to the taxonomic level of interest (color coding). Accepted values are available taxonomic levels. 'taxlvl' should be higher than 'taxlvl2', e.g. Class and Genus
-taxlvl <- "Class"
-taxlvl2 <- "Genus"
-#Adds formatted legend labels for the specific output in the manuscript. WARNING! Will overwrite actual values and needs to be turned off/adjusted manually when data is updated
-customLabels <- TRUE
-##CHANGE ME to change the width (in cm) of the output.
 wp <- 20
-#CHANGE ME to change the height (in cm) of the output.
 hp <- 15
-#CHANGE ME to change the resolution (in dpi) of the output.
-res <- 300
-#Get total number of reads and number of Pythium/Phytophthora reads (without contro samples!) to find out percentage of Pyhium and Phytophthora
-total_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::mutate(sumAbu = sum(Abundance)) %>%
-select(sumAbu) %>%
-unique() %>%
-unlist() %>%
-unname()
-pythium_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::group_by(Genus) %>%
-summarise(sumAbu = sum(Abundance)) %>%
-dplyr::filter(Genus == "Pythium") %>%
-select(sumAbu) %>%
-unlist() %>%
-unname()
-phytoph_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::group_by(Genus) %>%
-summarise(sumAbu = sum(Abundance)) %>%
-dplyr::filter(Genus == "Phytophthora") %>%
-select(sumAbu) %>%
-unlist() %>%
-unname()
-prctg_pyth_phyt_str <- paste0("\n\nTotal number of reads in the samples: ",
-total_reads_no_contr ,
-", number of Pythium reads: ",
-pythium_reads_no_contr,
-", number of Phytophthora reads: ",
-phytoph_reads_no_contr,
-"\nPercentage of Pythium total: ",
-pythium_reads_no_contr/total_reads_no_contr*100,
-"%\nPercentage of Phytophthora total: ",
-phytoph_reads_no_contr/total_reads_no_contr*100, "%")
-#Most abundant at specified levels, phyloseq objects
-ps.topnt <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl, top = numt,
-ignore_na = TRUE, silent = FALSE)
-ps.topnt2 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans,lvl = taxlvl2, top = numt2,
-ignore_na = TRUE, silent = FALSE)
-#Abundance lists (sorted by abundance)
-taxlist_topnt1 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl, top = numt,
-output_format = "tops", ignore_na = TRUE)
-taxlist_topnt2 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl2, top = numt2,
-output_format = "tops", ignore_na = TRUE)
-#tidy evaluation translation magic (needed so the dplyr functions that extract 'tax_lookup' below can parse the strings from the variable)
-lvl1 <- sym(taxlvl)
-lvl2 <- sym(taxlvl2)
-lvl1 <- enquo(lvl1)
-lvl2 <- enquo(lvl2)
-#make lookup table to guide coloring
-tax_lookup <- ps_tbl %>%
-group_by(!!lvl1, !!lvl2) %>%
-summarise() %>%
-filter(!!lvl2 %in% taxlist_topnt2)
-#Define fixed color schemes for classes for consistency between plots. This is a color scheme specified for this particular published figure. Can be used for orientation in general cases.
-#Background colors to be overwritten for special cases
-colors_lvl1 <- viridis(length(taxlist_topnt1), option = "D", begin = 1 , end = 0.5)
-names(colors_lvl1) <- taxlist_topnt1
-colors_lvl2 <- viridis(length(taxlist_topnt2), option = "D", begin = 1 , end = 0.5)
-names(colors_lvl2) <- taxlist_topnt2
-colors_combo <- c(colors_lvl1, colors_lvl2)
-#Special cases
-colors_combo["Chrysophyceae"] <- viridis_yellows[2]
-colors_combo["Pedospumella"] <- viridis_yellows[1]
-colors_combo["'Spumella-like'"] <- viridis_light[1]
-colors_combo["Oomycetes"] <- viridis_reds[4]
-colors_combo["Phytophthora"] <- viridis_reds[1]
-colors_combo["Pythium"] <- viridis_reds[4]
-colors_combo["Aphanomyces"] <- viridis_blues[1]
-colors_combo["Globisporangium"] <- "#4C0000FF"
-colors_combo["Pythiogeton"] <- viridis_blues[2]
-colors_combo["Saprolegnia"] <- viridis_blues[4]
-#Format legend labels
-if(customLabels){
-labelling_lvl2 <- c(expression(paste(italic("Spumella"), "-like")),
-"uncultured fungus", expression(italic("Aphanomyces"),
-italic("Globisporangium"), italic("Haptoglossa"), italic("Pedospumella"),
-italic("Phytophthora"), italic("Pythiogeton"), italic("Pythiopsis"),
-italic("Pythium")))
-}else{
-labelling_lvl2 <- names(colors_lvl2) %>% sort()
-}
-#Re-label the facets
-enrich_labs <- c("Before enrichment", "After enrichment")
-names(enrich_labs) <- c("Before enrichment", "Enriched")
-theme_bar <- theme(legend.position = "bottom",
-legend.key.size = unit(0.4, "cm"),
-legend.spacing.x = unit(0.3, 'cm'),
-text = element_text(size = 10),
-strip.text.x = element_text(size = 10),
-strip.background = element_blank(),
-axis.text.x = element_text(angle = 0, hjust = 0.5))
-topnt <- plot_bar(ps.topnt, x = primaryPar, fill = taxlvl,
-title = paste("Top", numt, "Classes")) +
-facet_wrap(paste0("~", secondaryPar),
-labeller = labeller(Bait = enrich_labs)) +
-scale_x_discrete(breaks = c("S05" ,"S15", "S25", "S35", "S45" ,"S55")) +
-scale_fill_manual(values = colors_combo) +
-ylab("Relative abundance") +
-theme_bar +
-xlab("Soil samples S01-S64")
-topnt_summary <- cuphyr::summarise_physeq(physeq = ps,
-ASV_sublist = taxa_names(ps.topnt),
-sublist_id =
-paste0("top ", numt, " ", taxlvl),
-samp_names = FALSE)
-topnt2 <- plot_bar(ps.topnt2, x = primaryPar, fill = taxlvl2,
-title = paste("Top", numt2, "Genera")) +
-facet_wrap(paste0("~", secondaryPar),
-labeller = labeller(Bait = enrich_labs)) +
-scale_x_discrete(breaks = c("S05" ,"S15", "S25", "S35", "S45" ,"S55")) +
-scale_fill_manual(values = colors_combo) +
-ylab("Relative abundance") +
-theme_bar + xlab("Soil samples S01-S64")
-topnt2_summary <- cuphyr::summarise_physeq(physeq = ps,
-ASV_sublist = taxa_names(ps.topnt2),
-sublist_id =
-paste0("top ", numt2, " ", taxlvl2),
-samp_names = FALSE)
-#Combine the plots
-combobar1 <- ggarrange(topnt, topnt2, nrow = 2, labels = c("A", "B"), align = "hv")
-#Save
-save_plot(combobar1, plot_name = "Topn_barplot_Fig2")
-#Print to standard out
-cat(topnt_summary, "\n\n", topnt2_summary)
-cat(prctg_pyth_phyt_str)
-combobar1
+seqs <- readDNAStringSet(file = file.path(path,"iso_ASVs_refs_v3.fasta"),
+format = "fasta")
+align <- AlignSeqs(DNAStringSet(seqs), iterations = 100, refinements = 5)
+seqs_phang <- phangorn::phyDat(as(align, "matrix"), type = "DNA")
+seqs_dm <- phangorn::dist.ml(seqs_phang)
+seqs_treeNJ <- NJ(seqs_dm)
+seqs_fit = pml(seqs_treeNJ, data = seqs_phang)
+fitGTRseqs <- update(seqs_fit, k = 4, inv = 0.2)
+fitGTRseqs <- optim.pml(fitGTRseqs, model = "GTR", optInv = TRUE,
+optGamma = TRUE, rearrangement = "stochastic",
+control = pml.control(trace = 0))
+GTR_tree <- treeio::as.treedata(fitGTRseqs$tree, type = "ml")
+iso_ASV_tree <- ggtree(GTR_tree) + geom_tree() +
+geom_treescale(x = 0.5) +
+geom_tiplab() + xlim(-0.1,3)
+save_plot(iso_ASV_tree, plot_name = "IsolateSeqs_ASVs_ref_tree_Supfig4")
+iso_ASV_tree
 cat("Chunk successfully run")
-#CHANGE ME to the amount of top taxlvl to plot (e.g. 'numt = 20' plots Top20 Taxa at taxlvl)
-numt <- 5
-#CHANGE ME to the amount of top taxa you want to plot (e.g. 'numt2 = 25' plots Top25 Taxa at taxlvl2)
-numt2 <- 10
-#CHANGE ME to the primary parameter of interest (x-axis). Accepted values are the column headers in the descriptor file.
-primaryPar <- "Soil"
-##CHANGE ME to the secondary parameter of interest (panels). Accepted values are the column headers in the descriptor file.
-secondaryPar <- "Bait"
-##CHANGE ME to the taxonomic level of interest (color coding). Accepted values are available taxonomic levels. 'taxlvl' should be higher than 'taxlvl2', e.g. Class and Genus
-taxlvl <- "Class"
-taxlvl2 <- "Genus"
-#Adds formatted legend labels for the specific output in the manuscript. WARNING! Will overwrite actual values and needs to be turned off/adjusted manually when data is updated
-customLabels <- TRUE
 ##CHANGE ME to change the width (in cm) of the output.
-wp <- 20
+wp <- 12
 #CHANGE ME to change the height (in cm) of the output.
-hp <- 15
-#CHANGE ME to change the resolution (in dpi) of the output.
-res <- 300
-#Get total number of reads and number of Pythium/Phytophthora reads (without contro samples!) to find out percentage of Pyhium and Phytophthora
-total_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::mutate(sumAbu = sum(Abundance)) %>%
-select(sumAbu) %>%
-unique() %>%
-unlist() %>%
-unname()
-pythium_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::group_by(Genus) %>%
-summarise(sumAbu = sum(Abundance)) %>%
-dplyr::filter(Genus == "Pythium") %>%
-select(sumAbu) %>%
-unlist() %>%
-unname()
-phytoph_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::group_by(Genus) %>%
-summarise(sumAbu = sum(Abundance)) %>%
-dplyr::filter(Genus == "Phytophthora") %>%
-select(sumAbu) %>%
-unlist() %>%
-unname()
-prctg_pyth_phyt_str <- paste0("\n\nTotal number of reads in the samples: ",
-total_reads_no_contr ,
-", number of Pythium reads: ",
-pythium_reads_no_contr,
-", number of Phytophthora reads: ",
-phytoph_reads_no_contr,
-"\nPercentage of Pythium total: ",
-pythium_reads_no_contr/total_reads_no_contr*100,
-"%\nPercentage of Phytophthora total: ",
-phytoph_reads_no_contr/total_reads_no_contr*100, "%")
-#Most abundant at specified levels, phyloseq objects
-ps.topnt <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl, top = numt,
-ignore_na = TRUE, silent = FALSE)
-ps.topnt2 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans,lvl = taxlvl2, top = numt2,
-ignore_na = TRUE, silent = FALSE)
-#Abundance lists (sorted by abundance)
-taxlist_topnt1 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl, top = numt,
-output_format = "tops", ignore_na = TRUE)
-taxlist_topnt2 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl2, top = numt2,
-output_format = "tops", ignore_na = TRUE)
-#tidy evaluation translation magic (needed so the dplyr functions that extract 'tax_lookup' below can parse the strings from the variable)
-lvl1 <- sym(taxlvl)
-lvl2 <- sym(taxlvl2)
-lvl1 <- enquo(lvl1)
-lvl2 <- enquo(lvl2)
-#make lookup table to guide coloring
-tax_lookup <- ps_tbl %>%
-group_by(!!lvl1, !!lvl2) %>%
-summarise() %>%
-filter(!!lvl2 %in% taxlist_topnt2)
-#Define fixed color schemes for classes for consistency between plots. This is a color scheme specified for this particular published figure. Can be used for orientation in general cases.
-#Background colors to be overwritten for special cases
-colors_lvl1 <- viridis(length(taxlist_topnt1), option = "D", begin = 1 , end = 0.5)
-names(colors_lvl1) <- taxlist_topnt1
-colors_lvl2 <- viridis(length(taxlist_topnt2), option = "D", begin = 1 , end = 0.5)
-names(colors_lvl2) <- taxlist_topnt2
-colors_combo <- c(colors_lvl1, colors_lvl2)
-#Special cases
-colors_combo["Chrysophyceae"] <- viridis_yellows[2]
-colors_combo["Pedospumella"] <- viridis_yellows[1]
-colors_combo["'Spumella-like'"] <- viridis_light[1]
-colors_combo["Oomycetes"] <- viridis_reds[4]
-colors_combo["Phytophthora"] <- viridis_reds[1]
-colors_combo["Pythium"] <- viridis_reds[4]
-colors_combo["Aphanomyces"] <- viridis_blues[1]
-colors_combo["Globisporangium"] <- "#4C0000FF"
-colors_combo["Pythiogeton"] <- viridis_blues[2]
-colors_combo["Saprolegnia"] <- viridis_blues[4]
-#Format legend labels
-if(customLabels){
-labelling_lvl2 <- c(expression(paste(italic("Spumella"), "-like")),
-"uncultured fungus", expression(italic("Aphanomyces"),
-italic("Globisporangium"), italic("Haptoglossa"), italic("Pedospumella"),
-italic("Phytophthora"), italic("Pythiogeton"), italic("Pythiopsis"),
-italic("Pythium")))
-}else{
-labelling_lvl2 <- names(colors_lvl2) %>% sort()
-}
+hp <- 10
+# ASVs (before LULU!) that exactly match isolates seqs (identified by vsearch),
+# read in from fasta
+exASVs_seqs <- readDNAStringSet(paste0(path,"/exact_ASVs_v3.fasta"))
+iso_seqs <- readDNAStringSet(paste0(path,"/trim_isos_v3.fasta"))
+iso_ASV_match <- read_delim(paste0(path,"/match_trim-isolates_ASVs_v3.txt"),
+delim = "\t",
+col_names = c("Iso_Soil" ,"OTU",
+"Match", "Mism", "Gap"))
+#Parse headers to get ASV IDs listed in vector
+exASVs <- names(exASVs_seqs) %>% str_remove("\\|\\|.*")
+isos <- names(iso_seqs)
 #Re-label the facets
 enrich_labs <- c("Before enrichment", "After enrichment")
 names(enrich_labs) <- c("Before enrichment", "Enriched")
-theme_bar <- theme(legend.position = "bottom",
-legend.key.size = unit(0.4, "cm"),
-legend.spacing.x = unit(0.3, 'cm'),
-text = element_text(size = 10),
-strip.text.x = element_text(size = 10),
-strip.background = element_blank(),
-axis.text.x = element_text(angle = 0, hjust = 0.5))
-topnt <- plot_bar(ps.topnt, x = primaryPar, fill = taxlvl,
-title = paste("Top", numt, "Classes")) +
-facet_wrap(paste0("~", secondaryPar),
-labeller = labeller(Bait = enrich_labs)) +
-scale_x_discrete(breaks = c("S05" ,"S15", "S25", "S35", "S45" ,"S55")) +
-scale_fill_manual(values = colors_combo) +
-ylab("Relative abundance") +
-theme_bar +
-xlab("Soil samples S01-S64")
-topnt_summary <- cuphyr::summarise_physeq(physeq = ps,
-ASV_sublist = taxa_names(ps.topnt),
-sublist_id =
-paste0("top ", numt, " ", taxlvl),
-samp_names = FALSE)
-topnt2 <- plot_bar(ps.topnt2, x = primaryPar, fill = taxlvl2,
-title = paste("Top", numt2, "Genera")) +
-facet_wrap(paste0("~", secondaryPar),
-labeller = labeller(Bait = enrich_labs)) +
-scale_x_discrete(breaks = c("S05" ,"S15", "S25", "S35", "S45" ,"S55")) +
-scale_fill_manual(values = colors_combo, labels = labelling_lvl2) +
-ylab("Relative abundance") +
-theme_bar + xlab("Soil samples S01-S64")
-topnt2_summary <- cuphyr::summarise_physeq(physeq = ps,
-ASV_sublist = taxa_names(ps.topnt2),
-sublist_id =
-paste0("top ", numt2, " ", taxlvl2),
-samp_names = FALSE)
-#Combine the plots
-combobar1 <- ggarrange(topnt, topnt2, nrow = 2, labels = c("A", "B"), align = "hv")
-#Save
-save_plot(combobar1, plot_name = "Topn_barplot_Fig2")
-#Print to standard out
-cat(topnt_summary, "\n\n", topnt2_summary)
-cat(prctg_pyth_phyt_str)
-combobar1
+samp_sums <- ps_tbl %>%
+dplyr::group_by(SampleIDs) %>%
+summarise(sum_Abundance = sum(Abundance))
+ps_tr_tbl <- left_join(ps_tbl, samp_sums, by = "SampleIDs") %>%
+dplyr::mutate(Rel_Abundance = Abundance/sum_Abundance) %>%
+dplyr::mutate(Prcnt_Abundance = Rel_Abundance*100)
+exASV_counts <- ps_tr_tbl %>%
+dplyr::select(OTU:Bait, Sanger, Rel_Abundance, Prcnt_Abundance) %>%
+dplyr::filter(OTU %in% exASVs) %>%
+dplyr::filter(Abundance >= 1)
+iso_ASVs <- ggplot(exASV_counts, aes(x = OTU,
+y = reorder(Soil, dplyr::desc(Soil)),
+fill = Prcnt_Abundance)) +
+geom_tile() +
+scale_fill_viridis() +
+facet_wrap(~Bait, labeller = labeller(Bait = enrich_labs)) +
+theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
+panel.grid = element_blank(),
+axis.title = element_blank(),
+strip.background = element_rect(fill = "white", color = NA)) +
+labs(fill = "Abund. (%)") +
+scale_x_discrete(limits = exASVs)
+save_plot(iso_ASVs, plot_name = "IsolateSeqs_ASVs_heatmap_Fig4")
+iso_ASVs
 cat("Chunk successfully run")
-#CHANGE ME to the amount of top taxlvl to plot (e.g. 'numt = 20' plots Top20 Taxa at taxlvl)
-numt <- 5
-#CHANGE ME to the amount of top taxa you want to plot (e.g. 'numt2 = 25' plots Top25 Taxa at taxlvl2)
-numt2 <- 10
-#CHANGE ME to the primary parameter of interest (x-axis). Accepted values are the column headers in the descriptor file.
-primaryPar <- "Soil"
-##CHANGE ME to the secondary parameter of interest (panels). Accepted values are the column headers in the descriptor file.
-secondaryPar <- "Bait"
-##CHANGE ME to the taxonomic level of interest (color coding). Accepted values are available taxonomic levels. 'taxlvl' should be higher than 'taxlvl2', e.g. Class and Genus
-taxlvl <- "Class"
-taxlvl2 <- "Genus"
-#Adds formatted legend labels for the specific output in the manuscript. WARNING! Will overwrite actual values and needs to be turned off/adjusted manually when data is updated
-customLabels <- TRUE
 ##CHANGE ME to change the width (in cm) of the output.
-wp <- 20
+wp <- 17.5
 #CHANGE ME to change the height (in cm) of the output.
-hp <- 15
+hp <- 10
 #CHANGE ME to change the resolution (in dpi) of the output.
-res <- 300
-#Get total number of reads and number of Pythium/Phytophthora reads (without contro samples!) to find out percentage of Pyhium and Phytophthora
-total_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::mutate(sumAbu = sum(Abundance)) %>%
-select(sumAbu) %>%
-unique() %>%
-unlist() %>%
-unname()
-pythium_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::group_by(Genus) %>%
-summarise(sumAbu = sum(Abundance)) %>%
-dplyr::filter(Genus == "Pythium") %>%
-select(sumAbu) %>%
-unlist() %>%
-unname()
-phytoph_reads_no_contr <- ps_tbl %>%
-dplyr::filter(Pass == "Yes") %>%
-dplyr::group_by(Genus) %>%
-summarise(sumAbu = sum(Abundance)) %>%
-dplyr::filter(Genus == "Phytophthora") %>%
-select(sumAbu) %>%
-unlist() %>%
-unname()
-prctg_pyth_phyt_str <- paste0("\n\nTotal number of reads in the samples: ",
-total_reads_no_contr ,
-", number of Pythium reads: ",
-pythium_reads_no_contr,
-", number of Phytophthora reads: ",
-phytoph_reads_no_contr,
-"\nPercentage of Pythium total: ",
-pythium_reads_no_contr/total_reads_no_contr*100,
-"%\nPercentage of Phytophthora total: ",
-phytoph_reads_no_contr/total_reads_no_contr*100, "%")
-#Most abundant at specified levels, phyloseq objects
-ps.topnt <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl, top = numt,
-ignore_na = TRUE, silent = FALSE)
-ps.topnt2 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans,lvl = taxlvl2, top = numt2,
-ignore_na = TRUE, silent = FALSE)
-#Abundance lists (sorted by abundance)
-taxlist_topnt1 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl, top = numt,
-output_format = "tops", ignore_na = TRUE)
-taxlist_topnt2 <- cuphyr::abundant_tax_physeq(
-physeq = ps.trans, lvl = taxlvl2, top = numt2,
-output_format = "tops", ignore_na = TRUE)
-#tidy evaluation translation magic (needed so the dplyr functions that extract 'tax_lookup' below can parse the strings from the variable)
-lvl1 <- sym(taxlvl)
-lvl2 <- sym(taxlvl2)
-lvl1 <- enquo(lvl1)
-lvl2 <- enquo(lvl2)
-#make lookup table to guide coloring
-tax_lookup <- ps_tbl %>%
-group_by(!!lvl1, !!lvl2) %>%
-summarise() %>%
-filter(!!lvl2 %in% taxlist_topnt2)
-#Define fixed color schemes for classes for consistency between plots. This is a color scheme specified for this particular published figure. Can be used for orientation in general cases.
-#Background colors to be overwritten for special cases
-colors_lvl1 <- viridis(length(taxlist_topnt1), option = "D", begin = 1 , end = 0.5)
-names(colors_lvl1) <- taxlist_topnt1
-colors_lvl2 <- viridis(length(taxlist_topnt2), option = "D", begin = 1 , end = 0.5)
-names(colors_lvl2) <- taxlist_topnt2
-colors_combo <- c(colors_lvl1, colors_lvl2)
-#Special cases
-colors_combo["Chrysophyceae"] <- viridis_yellows[2]
-colors_combo["Pedospumella"] <- viridis_yellows[1]
-colors_combo["'Spumella-like'"] <- viridis_light[1]
-colors_combo["Oomycetes"] <- viridis_reds[4]
-colors_combo["Phytophthora"] <- viridis_reds[1]
-colors_combo["Pythium"] <- viridis_reds[4]
-colors_combo["Aphanomyces"] <- viridis_blues[1]
-colors_combo["Globisporangium"] <- "#4C0000FF"
-colors_combo["Pythiogeton"] <- viridis_blues[2]
-colors_combo["Saprolegnia"] <- viridis_blues[4]
-#Format legend labels
-if(customLabels){
-labelling_lvl2 <- c(expression(paste(italic("Spumella"), "-like")),
-"uncultured fungus", expression(italic("Aphanomyces"),
-italic("Globisporangium"), italic("Haptoglossa"), italic("Pedospumella"),
-italic("Phytophthora"), italic("Pythiogeton"), italic("Pythiopsis"),
-italic("Pythium")))
-}else{
-labelling_lvl2 <- names(colors_lvl2) %>% sort()
-}
-#Re-label the facets
-enrich_labs <- c("Before enrichment", "After enrichment")
-names(enrich_labs) <- c("Before enrichment", "Enriched")
-theme_bar <- theme(legend.position = "bottom",
-legend.key.size = unit(0.4, "cm"),
-legend.spacing.x = unit(0.3, 'cm'),
-text = element_text(size = 10),
-strip.text.x = element_text(size = 10),
-strip.background = element_blank(),
-axis.text.x = element_text(angle = 0, hjust = 0.5),
-legend.text.align = 0)
-topnt <- plot_bar(ps.topnt, x = primaryPar, fill = taxlvl,
-title = paste("Top", numt, "Classes")) +
-facet_wrap(paste0("~", secondaryPar),
-labeller = labeller(Bait = enrich_labs)) +
-scale_x_discrete(breaks = c("S05" ,"S15", "S25", "S35", "S45" ,"S55")) +
-scale_fill_manual(values = colors_combo) +
-ylab("Relative abundance") +
-theme_bar +
-xlab("Soil samples S01-S64")
-topnt_summary <- cuphyr::summarise_physeq(physeq = ps,
-ASV_sublist = taxa_names(ps.topnt),
-sublist_id =
-paste0("top ", numt, " ", taxlvl),
-samp_names = FALSE)
-topnt2 <- plot_bar(ps.topnt2, x = primaryPar, fill = taxlvl2,
-title = paste("Top", numt2, "Genera")) +
-facet_wrap(paste0("~", secondaryPar),
-labeller = labeller(Bait = enrich_labs)) +
-scale_x_discrete(breaks = c("S05" ,"S15", "S25", "S35", "S45" ,"S55")) +
-scale_fill_manual(values = colors_combo, labels = labelling_lvl2) +
-ylab("Relative abundance") +
-theme_bar + xlab("Soil samples S01-S64")
-topnt2_summary <- cuphyr::summarise_physeq(physeq = ps,
-ASV_sublist = taxa_names(ps.topnt2),
-sublist_id =
-paste0("top ", numt2, " ", taxlvl2),
-samp_names = FALSE)
-#Combine the plots
-combobar1 <- ggarrange(topnt, topnt2, nrow = 2, labels = c("A", "B"), align = "hv")
-#Save
-save_plot(combobar1, plot_name = "Topn_barplot_Fig2")
-#Print to standard out
-cat(topnt_summary, "\n\n", topnt2_summary)
-cat(prctg_pyth_phyt_str)
-combobar1
+res <- 150
+#calculate averages for display in graphs
+avgNames <- c("PPUntr", "PPBait","ChrUntr", "ChrBait")
+avgs <- c(mean(ranksUntrPP_tot$Abundance),mean(ranksBaitPP_tot$Abundance),
+mean(ranksUntrAlgae_tot$Abundance),mean(ranksBaitAlgae_tot$Abundance))
+names(avgs) <- avgNames
+avgs <- format(round(avgs, 1), nsmall = 1)
+#Define fixed color schemes for consistency between plots
+plotvars <- c("Chrysophyceae_tot", "All oomycetes",
+"Other oomycetes", "Other classes", "PP_tot")
+getPalette <- colorRampPalette(viridis(length(plotvars)))
+plotPalette <- getPalette(length(plotvars))
+names(plotPalette) <- plotvars
+plotPalette["Chrysophyceae_tot"] <- viridis_yellows[2]
+plotPalette["PP_tot"] <- viridis_reds[4]
+customTheme <- theme(legend.position = "none",
+axis.title = element_blank(),
+title = element_text(size = 7))
+customThemeA <- theme(legend.position = "none",
+legend.title = element_blank(),
+legend.key.size = unit(5,"mm"),
+axis.title = element_blank(),
+title = element_text(size = 10))
+xUpperLim <- nrow(ranksBaitAlgae_tot) + 1
+ytextl = 0.05
+xtextl = 15
+ytexts = 90000
+xtexts = 45
+pAllBait <- ggplot(data = ranksBaitPP_tot,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("PP - after enrichment") +
+xlim(0,xUpperLim) +
+customTheme +
+annotate(geom = 'text', label = paste("Mean", avgs["PPBait"]),
+x = xtexts, y = ytexts) +
+scale_y_continuous(limits = c(0,105000))
+pAllUntr <- ggplot(data = ranksUntrPP_tot,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("PP - before enrichment") +
+xlim(0,xUpperLim) +
+customTheme +
+annotate(geom = 'text', label = paste("Mean", avgs["PPUntr"]),
+x = xtexts, y = ytexts) +
+scale_y_continuous(limits = c(0,105000))
+AlgBait <- ggplot(data = ranksBaitAlgae_tot,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Chrysophyceae - after enrichment") +
+xlim(0,xUpperLim) +
+customTheme +
+annotate(geom = 'text', label = paste("Mean", avgs["ChrBait"]),
+x = xtexts, y = ytexts) +
+scale_y_continuous(limits = c(0,105000))
+AlgUntr <- ggplot(data = ranksUntrAlgae_tot,
+aes(x = Rank, y = Abundance, fill = Set)) +
+geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Chrysophyceae - before enrichment") +
+xlim(0,xUpperLim) +
+customTheme +
+annotate(geom = 'text', label = paste("Mean", avgs["ChrUntr"]),
+x = xtexts, y = ytexts) +
+scale_y_continuous(limits = c(0,105000))
+A.combo <- ggarrange(pAllUntr, pAllBait, nrow = 2, labels = c("A"), vjust = 1)
+B.combo <- ggarrange(AlgUntr, AlgBait, nrow = 2, labels = c("B"), vjust = 1)
+A.combo.anot <- annotate_figure(A.combo, left = text_grob("Abundance", rot = 90))
+combo.AB <- ggarrange(A.combo.anot, B.combo, ncol = 2) +
+theme(plot.margin = margin(10,10,10,10)) +
+scale_fill_manual(values = plotPalette)
+#Save plot
+save_plot(combo.AB, plot_name = "Class_absolute_ranking_Supfig3")
+combo.AB
+cat("Chunk successfully run")
+sessionInfo()
+## CHANGE ME to change the width (in cm) of the output.
+wp <- 17.5
+# CHANGE ME to change the height (in cm) of the output.
+hp <- 15
+# CHANGE ME to change the resolution (in dpi) of the output.
+res <- 150
+# calculate averages for display in graphs
+avgNames <- c("PPUntr", "PPBait","ChrUntr", "ChrBait",
+"O-PUntr", "O-PBait", "OthUntr", "OthBait")
+avgs <- c(mean(ranksUntrPP$Abundance),
+mean(ranksBaitPP$Abundance),
+mean(ranksUntrAlgae$Abundance),
+mean(ranksBaitAlgae$Abundance),
+mean(ranksUntrOwoP$Abundance),
+mean(ranksBaitOwoP$Abundance),
+mean(rankedOthersUntr$OtherASVcounts),
+mean(rankedOthersBait$OtherASVcounts))
+names(avgs) <- avgNames
+# multiply to transform relative abundances into avg percentages and round to
+# one decimal
+avgs <- avgs*100
+avgs <- format(round(avgs, 1), nsmall = 1)
+# Define fixed color schemes for consistency between plots
+plotvars <- c("Chrysophyceae", "All oomycetes",
+"Other oomycetes", "Other classes", "PP")
+# Just to catch others/exceptions, make a general palette that will be
+# overwritten below
+plotPalette <- viridis(length(plotvars))
+names(plotPalette) <- plotvars
+plotPalette["Other classes"] <- "#C0C0C0"
+plotPalette["Chrysophyceae"] <- viridis_yellows[2]
+plotPalette["PP"] <- viridis_reds[4]
+plotPalette["Other oomycetes"] <- viridis_reds[1]
+plotPalette["All oomycetes"] <- viridis_reds[1]
+# plot total ASV counts, sorted by rank and color by class.
+AllBait <- ggplot(data = OthersRankPP,
+aes(x = Rank, y = Abundance, fill = Set))
+AllUntr <- ggplot(data = UntrOthersRankPP,
+aes(x = Rank, y = Abundance, fill = Set))
+customTheme <- theme(legend.position = "none",
+axis.title = element_blank(),
+title = element_text(size = 6))
+customThemeA <- theme(legend.position = "none",
+legend.title = element_blank(),
+legend.key.size = unit(5,"mm"),
+axis.title.x = element_blank())
+xUpperLim <- nrow(OthersRankPP) + 1
+ytextl = 0.05
+xtextl = 15
+ytexts = 0.75
+xtexts = 45
+middleline <- geom_blank() #geom_hline(yintercept = 0.5,
+# linetype="dashed", color="black")
+pAllBait <- AllBait + geom_col() +
+geom_col(data = AlgaeInvRankOom) +
+geom_col(data = OomRankPP) +
+geom_col(data = ranksBaitPP) +
+scale_fill_manual(values = plotPalette) +
+ggtitle("After enrichment") + xlab("Sample rank") +
+labs(fill = "") + ylim(0,1) + xlim(0,xUpperLim) +
+customThemeA + middleline +
+annotate(geom = 'text', label = paste("Mean PP", avgs["PPBait"], "%"),
+x = xtextl, y = ytextl) +
+ylab("Rel. abundance")
+pAllUntr <- AllUntr + geom_col() +
+geom_col(data = UntrAlgaeInvRankOom) +
+geom_col(data = UntrOomRankPP) +
+geom_col(data = ranksUntrPP) +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Before enrichment") + xlab("Sample rank") +
+labs(fill = "") + ylim(0,1) + xlim(0,xUpperLim) +
+customThemeA + middleline +
+annotate(geom = 'text', label = paste("Mean PP", avgs["PPUntr"], "%"),
+x = xtextl, y = ytextl) +
+ylab("Rel. abundance")
+AlgBait <- ggplot(data = ranksBaitAlgae,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Chrysophyceae - after enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["ChrBait"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+OomBait <- ggplot(data = ranksBaitOwoP,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Other oomycetes - after enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["O-PBait"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+OthBait <- ggplot(data = rankedOthersBait,
+aes(x = Rank, y = OtherASVcounts, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Other classes - after enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["OthBait"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+AlgUntr <- ggplot(data = ranksUntrAlgae,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Chrysophyceae - before enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["ChrUntr"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+OomUntr <- ggplot(data = ranksUntrOwoP,
+aes(x = Rank, y = Abundance, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Other oomycetes - before enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["O-PUntr"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+OthUntr <- ggplot(data = rankedOthersUntr,
+aes(x = Rank, y = OtherASVcounts, fill = Set)) + geom_col() +
+scale_fill_manual(values = plotPalette) +
+ggtitle("Other classes - before enrichment") +
+xlim(0,xUpperLim) + customTheme + middleline +
+annotate(geom = 'text', label = paste("Mean", avgs["OthUntr"], "%"),
+x = xtexts, y = ytexts) +
+scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))
+#A.combo <- ggarrange(pAllUntr, pAllBait, nrow=2,
+# labels = c("A"), vjust = 1, common.legend = TRUE, legend = "bottom")
+untr_smallplots <- ggarrange(AlgUntr, OomUntr, OthUntr, nrow = 3)
+untr_smallplots_anot <- annotate_figure(untr_smallplots,
+left = text_grob("Rel. abundance",
+rot = 90, size = 11))
+bait_smallplots <- ggarrange(AlgBait, OomBait, OthBait, nrow = 3)
+bait_smallplots_anot <- annotate_figure(bait_smallplots,
+left = text_grob("Rel. abundance",
+rot = 90, size = 11))
+combo <- ggarrange(pAllUntr, untr_smallplots_anot, pAllBait,
+bait_smallplots_anot, nrow = 2, ncol = 2, widths = c(2,1),
+labels = c("A", "", "B", ""), common.legend = TRUE,
+legend = "bottom") + theme(plot.margin = margin(10,10,10,10))
+#Save plot
+save_plot(combo, plot_name = "Class_ranking_Fig3")
+combo
 cat("Chunk successfully run")
-# CHANGE ME according to the location containing 'seqtab_nochim.rds' and other relevant files.
-path = "Data_files/"
-# CHANGE ME to 'TRUE' to list all samples and generate an empty metadata file
-optional_sample_check <- "FALSE"
-# CHANGE ME to TRUE to update the cuphyr package. This is a package the author published to separate out general purpose functions from this script. If the package is not installed, it will be installed, regardless of the value chosen here.
-update_cuphyr <- TRUE
-################# NO CHANGES NECESSARY BELOW #################
-#global knitr settings to determine the output of the markdown document upon 'knitting'
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_chunk$set(root.dir = paste0(path))
-#Messages and warnings are turned off globally for a cleaner output. Change to TRUE for complete output
-knitr::opts_chunk$set(message = FALSE)
-knitr::opts_chunk$set(warning = FALSE)
-knitr::opts_chunk$set(tidy = TRUE)
-#CuPhyR is the author's own package that contains several functions used to process phyloseq objects. It is hosted on GitHub and installed/updated using this command if update_cuphyr is TRUE
-if (update_cuphyr | !"cuphyr" %in% rownames(installed.packages())) {
-devtools::install_github("simeross/cuphyr")
-}
-#Loading required packages
-library(phyloseq)
-library(dada2)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..44b737b7bc20857062c7c803dbeb189e5c3a7fdf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+
+.Rhistory
diff --git a/SupMat2_DADA2_submission_FEMSEc.Rmd b/SupMat2_DADA2_submission_FEMSEc.Rmd
index 7c2981a442043a08b34addc66a3e9abfe686a6fc..30ec313c09a8c17e23f32a5972f53cf25584d1ab 100644
--- a/SupMat2_DADA2_submission_FEMSEc.Rmd
+++ b/SupMat2_DADA2_submission_FEMSEc.Rmd
@@ -10,37 +10,44 @@ date_modified: "17.12.2019"
 version: 4.1
 ---
 
-This is an R markdown document for the analysis of MiSeq amplicon sequencing data. In order to customise the pipeline, search 'CHANGE ME' to find different settings you should adjust according to your data. All parameters requiring changes are located within the first 4 chunks. After the fourth chunk has been run, you can simply click 'Run'>'Run all chunks below' in order to process the data
-
+This is an R markdown document for the analysis of MiSeq amplicon sequencing data. In order to customise the pipeline, search 'CHANGE ME' to find different settings you should adjust according to your data.
 Comment SWR: A new chunk was added to allow for quick reanalysis of taxonomy and pooled analysis of multiple samples after error inference. Small changes in chunk 4 (exporting seqtab.RDP in addition) were made to facilitate this. Therefore it's not recommended to 'Run all chunks below' from chunk 4 in this version.
 
-### v4 update
+To reproduce the analysis for the manuscript "DNA metabarcoding reveals broad presence of plant pathogenic oomycetes in soil from internationally traded plants", please use the R Markdown version of this document available with all other code on the [GitLab repository belonging to the publication](https://gitlab.nibio.no/simeon/oomycete-metabarcoding-supplementary)
+
+#### v4 update
 Comment SWR: Two more chunks were added to integrate [LULU](https://www.nature.com/articles/s41467-017-01312-x) post-processing into this pipeline. The first of the two new chunks is a shell script chunk that generates a match list (most similar ASVs for each ASV) using vsearch. The match list can also be generated by other means (f. eks. BLAST). The second of the new chunks is the core LULU chunk that takes the seqtab_nochim file from the dada2 analysis and the match list and performs the LULU post-processing with standard parameters. After this, a new seqtab_nochim, ASV-fasta and RDP taxonomy (optional) are generated in a lulu-subdirectory.
 
 #### v4.1
 - Fixed an error when attempting to plot empty files
 - Fixed an error when attempting to sample more files than available for plotting
 
-########################################################
-#
+
+# General comments
+This script was used per sequencing run, the runs were then combined using `Poolruns` in the last code chunk. This is recommended since the error profiles that DADA2 estimates may vary from run to run.
+
 # Setup R Environment
-#
-#########################################################
 
-Comment SWR: If the OS is Linux, this chunk now passes the path and marker variables to the shell script chunk, making it unnecessary to specify this separately below and avoiding issues with misspelled variables. It is possible that this also works with Mac OS, but this was not included since no Mac machine was available for testing.
+Comment SWR: If the OS is Linux, this chunk now passes the path and marker variables to the shell script chunk, making it unnecessary to specify this separately below and avoiding issues with misspelled variables. It is possible that this also works with Mac OS, but this was not included since no Mac machine was available for testing. It is generally recommended to run this script under a Linux OS. The raw reads for the manuscript "DNA metabarcoding reveals broad presence of plant pathogenic oomycetes in soil from internationally traded plants" used in this version of the pipeline can be downloaded from [The European Nucleotide Archive (ENA)](https://www.ebi.ac.uk/ena/browser/view/PRJEB40676).
+
 ```{r setup, eval=FALSE}
 ############set paths and marker name
-path="/home/simeon/Documents/Bioimmigrants/Run2_all//" # CHANGE ME to the directory (ABSOLUTE FILEPATH e.g. on Linux /home/usr/...) containing the fastq files after unzipping.
-marker=c("OITS1")  #CHANGE ME to the marker you have sequenced
-tax_database="/home/simeon/Documents/Git/bioimmigrants_scripts/CoFiMiTrO8_OomycITS1_dada2_DB.fasta" #CHANGE ME  to the path for the taxonomy database you will be using for identification
-
-#set knitr chunk options
+# CHANGE ME to the directory (ABSOLUTE FILEPATH e.g. on Linux /home/usr/...) 
+# containing the fastq files after unzipping.
+path="/home/simeon/Documents/Bioimmigrants/Run2_all/" 
+# CHANGE ME to the sequenced marker
+marker=c("OITS1")  
+# CHANGE ME to the path for the taxonomy database, if RDP classifier is used 
+# (not used in the published version of the manuscript)
+tax_database="/CoFiMiTrO8_OomycITS1_dada2_DB.fasta" 
+
+# set knitr chunk options
 knitr::opts_chunk$set(echo = TRUE)
 knitr::opts_chunk$set(eval = FALSE)
 knitr::opts_chunk$set(tidy = TRUE)
 knitr::opts_chunk$set(root.dir=paste0(path))
 
-#load libraries for R session
+# load libraries for R session
 library(dada2)
 library(phyloseq)
 library(ggplot2)
@@ -48,90 +55,90 @@ library(Biostrings)
 library(grid)
 library(gridExtra)
 
-#Get info on the operating system. If Linux, make 'path' and 'marker' available globally, if not, warn that the script needs additional attention.
+# Get info on the operating system. If Linux, make 'path' and 'marker' 
+# available globally, if not, warn that the script needs additional attention.
 os <- Sys.info()['sysname']
 if(os=="Linux"){
   Sys.setenv(projectpath=path)
   Sys.setenv(marker=marker)
 }else{
-  cat(os, "does not support running shell code through R Markdown well. Please uncomment 'projectpath' and 'marker' in chunk 2 and 'dada2_folder' in chunk7 manually and adjust them to the appropriate values")
+  cat(os, "does not support running shell code through R Markdown well.",
+  "Please uncomment 'projectpath' and 'marker' in chunk 2 and 'dada2_folder'",
+  "in chunk7 manually and adjust them to the appropriate values")
 }
 ```
 
-########################################################
-#
+
 # Setup bash Environment
-#
-#########################################################
 
 #basic setup for all markers plus marker specific trimming of the reads
 Comment SWR: Uncomment and update projectpath and marker if NOT on Linux
-```{bash cutadapt, eval=FALSE, tidy=TRUE}
-#Start by creating a file structure for the output and analysis
-#This assumes that all of the files from a run are located in a single folder
-#basic structure being created is project/marker/raw_data and project/marker/trim
-
-#define a variable with the path to the folder containing the R1/R2 reads
-#UNCOMMENT AND CHANGE ME (only if you're not running Linux as your OS) to the directory containing the fastq files
+```{bash cutadapt, eval=FALSE, tidy=FALSE}
+# Start by creating a file structure for the output and analysis
+# This assumes that all of the files from a run are located in a single folder
+# basic structure being created is project/marker/raw_data and project/marker/trim
+
+# define a variable with the path to the folder containing the R1/R2 reads
+# UNCOMMENT AND CHANGE ME (only if you're not running Linux as your OS) to the 
+# directory containing the fastq files
   # projectpath="/mnt/c/R/dada2results/Empties_test"
 
-#define a variable with the marker name 
-#UNCOMMENT AND CHANGE ME (only if you're not running Linux as your OS) to the name of the marker that has been sequenced - *NB make sure this EXACTLY matches the 'marker' you defined in the R chunk above
+# define a variable with the marker name 
+# UNCOMMENT AND CHANGE ME (only if you're not running Linux as your OS) 
+# to the name of the marker that has been sequenced - 
+# *NB make sure this EXACTLY matches the 'marker' you defined in the R chunk above
   # marker="OITS"
 
-#make new folders for the marker and the raw data/processed data
-# mkdir "${projectpath}"/"${marker}"/
-#  mkdir "${projectpath}"/"${marker}"/raw_data
-#  mkdir "${projectpath}"/"${marker}"/trim
+# make new folders for the marker and the raw data/processed data
+ mkdir "${projectpath}"/"${marker}"/
+ mkdir "${projectpath}"/"${marker}"/raw_data
+ mkdir "${projectpath}"/"${marker}"/trim
 
-#move the files into the newly created raw_data folder 
-#this script assumes that you have run multiple markers in the same run, and the marker name is contained in the file names
-#CHANGE ME if you have NOT run multiple markers in the run and the marker name is NOT contained in the file names. Add a hashtag (#) to the beginning of the following line so that it does not run.
-  #mv "${projectpath}"/*"${marker}"* "${projectpath}"/"${marker}"/raw_data/
-
-#CHANGE ME if you have NOT run multiple markers in the run and the marker name is NOT contained in the file names, you can remove the hashtag (#) from the following line and run this command
-#  mv "${projectpath}"/*fastq* "${projectpath}"/"${marker}"/raw_data/
+ mv "${projectpath}"/*fastq* "${projectpath}"/"${marker}"/raw_data/
 
 #cutadapt will need uncompressed files - use gunzip to do this
 gunzip "${projectpath}"/"${marker}"/raw_data/*.fastq.gz
 
-#define variables for the reverse complements of the F and R primers
-#CHANGE ME according to the primers you have used. Remove the hashtag (#) from the beginning of the two lines that correspond to your marker
-##SWR: Corrected a mistake in the Fungal ITS1 sequences
+# define variables for the reverse complements of the F and R primers
+# CHANGE ME according to the primers you have used. Remove the hashtag (#) from 
+# the beginning of the two lines that correspond to your marker
 
-#for the Oomycete ITS1 marker:  
+# for the Oomycete ITS1 marker:  
   rc_Fprimer="GTGGTAATGATCCTTCCG"
   rc_Rprimer="CAGCAGTGGATGTCTAGGCT"
-#for the Oomycete ITS2 marker:
-  #rc_Fprimer="AAGTGCAATRTGCGTTCAA"
-  #rc_Rprimer="GCATATCAATAAGCGGAGGA"
-#for the Fungal ITS1 marker:
-  #rc_Fprimer="GGAAACCTTGTTACGACTT"
-  #rc_Rprimer="GATCTCTTGGYTCTBGCA"
-#for the Fungal ITS2 marker:
-  #rc_Fprimer="CAAAGATTCGATGAYTCAC"
-  #rc_Rprimer="GCATATCAATAAGCGGAGGA"
-#for the Bacterial 16S marker:
-  #rc_Fprimer="TTACCGCGGCKGCTGRCAC"
-  #rc_Rprimer="ATTAGAWACCCBNGTAGTCC"
-
-#define a variable for the minimum length parameter to be used in cutadapt
-#CHANGE ME according to the length of your primers. It is advisable to use a value between 10 and the shorter of the two primer lengths
+
+# define a variable for the minimum length parameter to be used in cutadapt
+# CHANGE ME according to the length of your primers. It is advisable to use a 
+# value between 10 and the shorter of the two primer lengths
   MIN_LENGTH=15
 
-##SWR: define a variable for the number of mismatches to be used in cutadapt
-#CHANGE ME to the rate of mismatches (0 =< value < 1) you want cutadapt to accept when finding the primer. When choosing a value different from 0, it is recommended to compare how many sequences get trimmed when using both, 0 and the alternative value.
+## SWR: define a variable for the number of mismatches to be used in cutadapt
+# CHANGE ME to the rate of mismatches (0 =< value < 1) you want cutadapt to 
+# accept when finding the primer. When choosing a value different from 0, it is 
+# recommended to compare how many sequences get trimmed when using both, 0 and 
+# the alternative value.
   MISMATCH=0
 
-#define a variable with the path to the cutadapt program
-#this command assumes that cutadapt has been added to your PATH variable during installation
-#CHANGE ME if cutadapt is not in your PATH variable. Add a hashtag (#) in front of the following line, and remove the # from the line after it.  Change the specified path accordingly
-#CUTADAPT="$(which cutadapt) --minimum-length ${MIN_LENGTH} --no-indels -e ${MISMATCH} -f fastq "
-CUTADAPT="/usr/local/bin/cutadapt --minimum-length ${MIN_LENGTH} --no-indels -e ${MISMATCH} -f fastq "
+# define a variable with the path to the cutadapt program
+# this command assumes that cutadapt has been added to your PATH variable during
+# installation
+# CHANGE ME if cutadapt is not in your PATH variable. Comment out the following 
+# line, and use the line after it.  Change the specified path accordingly
+
+# CUTADAPT="$(which cutadapt) --minimum-length ${MIN_LENGTH} --no-indels \
+# -e ${MISMATCH} -f fastq "
+
+CUTADAPT="/usr/local/bin/cutadapt --minimum-length ${MIN_LENGTH} --no-indels \
+-e ${MISMATCH} -f fastq "
 
 #use cutadapt to remove the rev primer from the fwd reads and vice versa
-for i in $(ls "${projectpath}"/"${marker}"/raw_data/*R1_001.fastq); do python3 ${CUTADAPT} -a "${rc_Rprimer}"  -o $i.trim.fastq $i > "${projectpath}"/"${marker}"/R1_cutadapt_output.txt; done
-for i in $(ls "${projectpath}"/"${marker}"/raw_data/*R2_001.fastq); do python3 ${CUTADAPT} -a "${rc_Fprimer}"  -o $i.trim.fastq $i > "${projectpath}"/"${marker}"/R2_cutadapt_output.txt; done
+for i in $(ls "${projectpath}"/"${marker}"/raw_data/*R1_001.fastq); do python3 \
+${CUTADAPT} -a "${rc_Rprimer}"  -o $i.trim.fastq $i > \
+"${projectpath}"/"${marker}"/R1_cutadapt_output.txt; done
+
+for i in $(ls "${projectpath}"/"${marker}"/raw_data/*R2_001.fastq); do python3 \
+${CUTADAPT} -a "${rc_Fprimer}"  -o $i.trim.fastq $i > \
+"${projectpath}"/"${marker}"/R2_cutadapt_output.txt; done
 
 #compress the files again to save space
 gzip "${projectpath}"/"${marker}"/raw_data/*.fastq
@@ -143,10 +150,13 @@ mv "${projectpath}"/"${marker}"/raw_data/*trim.fastq.gz "${projectpath}"/"${mark
 #look at a random sample of R1/R2 to assess run quality
 ```{r, QC, eval=FALSE}
 #Create Lists of Forward and Reverse Filenames and a List of Sample Names
-fnFs=sort(list.files(paste0(path,"/",marker,"/trim/"), pattern="_R1_001.fastq.trim.fastq.gz", full.names = TRUE))
-fnRs=sort(list.files(paste0(path,"/",marker,"/trim/"), pattern="_R2_001.fastq.trim.fastq.gz", full.names = TRUE))
+fnFs=sort(list.files(paste0(path,"/",marker,"/trim/"), 
+                     pattern="_R1_001.fastq.trim.fastq.gz", full.names = TRUE))
+fnRs=sort(list.files(paste0(path,"/",marker,"/trim/"), 
+                     pattern="_R2_001.fastq.trim.fastq.gz", full.names = TRUE))
 
-# SWR: Make a list of fnF and fnR for quality plotting that excludes very small files (default is < 100 bytes) which may cause problems in quality plotting and exit the chunk with error
+# SWR: Make a list of fnF and fnR for quality plotting that excludes very small 
+# files (default is < 100 bytes) which may cause problems in quality plotting 
 min_file_size <- function(files, minsize=100){
   info <- file.info(files)
   index_bigs <- which(info$size > 100)
@@ -159,48 +169,116 @@ fnRL <- min_file_size(fnRs)
 sample_number <- if (length(fnFL) < 10) length(fnFL) else 10
 sample_number <- if (length(fnRL) < sample_number) length(fnRL) else sample_number
 
-lapply(c(sample(fnFL, sample_number), sample(fnRL, sample_number)), plotQualityProfile)
+lapply(c(sample(fnFL, sample_number), sample(fnRL, sample_number)), 
+       plotQualityProfile)
 ```
 
 ##set parameters for dada2
 ```{r, set_params, eval=FALSE}
-
-analysis="both" #CHANGE ME according to the quality of the sequencing run. For good quality runs, analysing both R1/R2 is desirable. For very poor quality runs, it may be worthwhile to analyse only R1. Select from the following: "R01"   "both"
-assign_taxonomy="TRUE" #CHANGE ME use "TRUE" to assign taxonomy, "FALSE" to proceed without taxonomic assignments
-plotQC="SUB" #CHANGE ME use "TRUE" to plot quality profiles for each sample, and "FALSE" to speed up analysis by skipping this step (plotting takes some time) and "SUB" to plot a subset of 10 samples
-
-my_maxEEf=2 #CHANGE ME according to the quality of the sequencing run. This determines the maximum expected errors for R1 and R2 during the filtering step. A reasonably conservative threshold is (2,2). If the data is of lower quality (as it is in the bioimmigrants run), it may be worthwhile to run with higher EEs ex/ (3,5)
-my_maxEEr=2 #CHANGE ME according to the quality of the sequencing run. This determines the maximum expected errors for R1 and R2 during the filtering step. A reasonably conservative threshold is (2,2). If the data is of lower quality (as it is in the bioimmigrants run), it may be worthwhile to run with higher EEs ex/ (3,5)
-my_maxN=0 #CHANGE ME according to the quality of the sequencing run. This determines the maximum number of ambiguous bases allowed in the reads.
-my_truncQ=2 #CHANGE ME according to the quality of the sequencing run. This is a PHRED score quality treshold - all sequences will be truncated at the first base with a quality score below this value
-my_truncLen=c(0,0)#CHANGE ME according to the quality of the sequencing run and according to the length of the target region. This is the length to cut the (forward,reverse) sequences at. Use 0,0 for no truncation.
-my_minLen=135 #CHANGE ME according to the marker used - this is the minimum length of the reads after trimming
-my_minoverlap=30 #CHANGE ME specify the minimum number of bases to overlap during merging, must be over 10!
-my_minBootstrap=80 #CHANGE ME to specify the minimum confidence interval for RDP assignment of taxonomy. 
-
-#SWR: CHANGE ME to TRUE and give a value (number of bases) to collapse ASVs with an overlap of 'Collaps_minOverlap' (recommended to be the length of the shortest ASV that is identical to a longer one). This is only recommended to be used in cases where a number of ASVs occur that differ in length but are otherwise identical. If this frequently occurs, it may be an issue of primer trimming.
+# CHANGE ME according to the quality of the sequencing run. For good quality 
+# runs, analysing both R1/R2 is desirable. For very poor quality runs, it may be 
+# worthwhile to analyse only R1. Select from the following: "R01"   "both"
+analysis="both"
+
+# CHANGE ME use "TRUE" to assign taxonomy, "FALSE" to proceed without taxonomic 
+# assignments
+assign_taxonomy="FALSE" 
+
+# CHANGE ME use "TRUE" to plot quality profiles for each sample, and "FALSE" to 
+# speed up analysis by skipping this step (plotting takes some time) and "SUB" 
+# to plot a subset of 10 samples
+plotQC="SUB" 
+
+# CHANGE ME according to the quality of the sequencing run. This determines the 
+# maximum expected errors for R1 and R2 during the filtering step. A reasonably 
+# conservative threshold is (2,2). If the data is of lower quality, it may be 
+# worthwhile to run with higher EEs ex/ (3,5)
+my_maxEEf=2
+
+# CHANGE ME according to the quality of the sequencing run. This determines the 
+# maximum expected errors for R1 and R2 during the filtering step. A reasonably 
+# conservative threshold is (2,2). If the data is of lower quality it may be 
+# worthwhile to run with higher EEs ex/ (3,5)
+my_maxEEr=2 
+
+# CHANGE ME according to the quality of the sequencing run. This determines the 
+# maximum number of ambiguous bases allowed in the reads.
+my_maxN=0 
+
+# CHANGE ME according to the quality of the sequencing run. This is a PHRED score 
+# quality treshold - all sequences will be truncated at the first base with a 
+# quality score below this value
+my_truncQ=2 
+
+# CHANGE ME according to the quality of the sequencing run and according to the 
+# length of the target region. This is the length to cut the (forward,reverse) 
+# sequences at. Use 0,0 for no truncation.
+my_truncLen=c(0,0)
+
+# CHANGE ME according to the marker used - this is the minimum length of the 
+# reads after trimming
+my_minLen=135 
+
+# CHANGE ME specify the minimum number of bases to overlap during merging, 
+# must be over 10!
+my_minoverlap=30
+
+# CHANGE ME to specify the minimum confidence interval for RDP assignment of 
+# taxonomy.
+my_minBootstrap=80  
+
+# SWR: CHANGE ME to TRUE and give a value (number of bases) to collapse ASVs with 
+# an overlap of 'Collaps_minOverlap' (recommended to be the length of the 
+# shortest ASV that is identical to a longer one). This is only recommended to 
+# be used in cases where a number of ASVs occur that differ in length but are 
+# otherwise identical. If this frequently occurs, it may be an issue of primer 
+# trimming.
 Collapse_overlapping=FALSE
 Collapse_minOverlap=200
 
-#SWR: Guide denoising with the help of sequences that are expected to be present. Set pseudo_pooling to 'TRUE' and provide a fasta-file of sequences to guide with. More info on this https://benjjneb.github.io/dada2/ReleaseNotes_1_8.html
-pseudo_pooling = "TRUE"
-guide_seqs= "/home/simeon/Documents/Bioimmigrants/PosKontr_seqs_MBB/Oomycetes/trim_pos.fasta"
+# SWR: Guide denoising with the help of sequences that are expected to be present. 
+# Set pseudo_pooling to 'TRUE' and provide a fasta-file of sequences to guide 
+# with. More info on this https://benjjneb.github.io/dada2/ReleaseNotes_1_8.html
+pseudo_pooling = "FALSE"
+guide_seqs= "/home/simeon/Documents/..."
+
+parameters=c(paste0("Path: ",path),"","Samples :",
+             sapply(strsplit(basename(sort(list.files(
+               paste0(path,"/",marker,"/trim/"), 
+               pattern="_R1_001.fastq.trim.fastq.gz", full.names = TRUE))), 
+               "_"), `[`, 1),"",paste0("Analysis type: ",analysis),
+             "",paste0("Database type: ",tax_database),"","dada2 Parameters:",
+             paste("maximum expected errors R1: ",my_maxEEf),
+             paste("maximum expected errors R2:",my_maxEEr),
+             paste("maximum ambiguous basses: ",my_maxN),
+             paste("minimum length: ",my_minLen),
+             paste("truncate at first instance of Qscore: ",my_truncQ),
+             paste("minimum overlap during merging: ",my_minoverlap),
+             paste("taxonomy bootstrap threshold: ",my_minBootstrap), 
+             paste("Collapse overlapping ASVs: ", Collapse_overlapping), 
+             paste("Minimum overlap if collapsing overlapping ASVs: ",
+                   Collapse_minOverlap) , 
+             paste("Pseudo pooling (DADA with prior sequence info)", 
+                   pseudo_pooling))
 
-parameters=c(paste0("Path: ",path),"","Samples :",sapply(strsplit(basename(sort(list.files(paste0(path,"/",marker,"/trim/"), pattern="_R1_001.fastq.trim.fastq.gz", full.names = TRUE))), "_"), `[`, 1),"",paste0("Analysis type: ",analysis),"",paste0("Database type: ",tax_database),"","dada2 Parameters:",paste("maximum expected errors R1: ",my_maxEEf),paste("maximum expected errors R2:",my_maxEEr),paste("maximum ambiguous basses: ",my_maxN),paste("minimum length: ",my_minLen),paste("truncate at first instance of Qscore: ",my_truncQ),paste("minimum overlap during merging: ",my_minoverlap),paste("taxonomy bootstrap threshold: ",my_minBootstrap), paste("Collapse overlapping ASVs: ", Collapse_overlapping), paste("Minimum overlap if collapsing overlapping ASVs: ",Collapse_minOverlap) , paste("Pseudo pooling (DADA with prior sequence info)", pseudo_pooling))
 write.table(parameters,paste0(path,"/",marker,"/parameters.txt"),row.names=F)
 
 ```
 
-##########################################
-# DO NOT ALTER SCRIPT BEYOND THIS POINT!!!
-#############################################
+
+# Computational code chunk, no changes needed.
+
 Comment SWR: This chunk now passes the output directory to a global variable on Linux, so that the shell script chunk that generates the match list can access it.
 ```{r, dada2, eval=FALSE}
 #Create Lists of Forward and Reverse Filenames and a List of Sample Names
-fnFs=sort(list.files(paste0(path,"/",marker,"/trim/"), pattern="_R1_001.fastq.trim.fastq.gz", full.names = TRUE))
-fnRs=sort(list.files(paste0(path,"/",marker,"/trim/"), pattern="_R2_001.fastq.trim.fastq.gz", full.names = TRUE))
-
-# SWR: Make a list of fnF and fnR for quality plotting that excludes very small files (default is < 100 bytes) which may cause problems in quality plotting and exit the chunk with error
+fnFs=sort(list.files(paste0(path,"/",marker,"/trim/"), 
+                     pattern="_R1_001.fastq.trim.fastq.gz", full.names = TRUE))
+fnRs=sort(list.files(paste0(path,"/",marker,"/trim/"), 
+                     pattern="_R2_001.fastq.trim.fastq.gz", full.names = TRUE))
+
+# SWR: Make a list of fnF and fnR for quality plotting that excludes very small 
+# files (default is < 100 bytes) which may cause problems in quality plotting 
+# and exit the chunk with error
 min_file_size <- function(files, minsize=100){
   info <- file.info(files)
   index_bigs <- which(info$size > 100)
@@ -218,44 +296,53 @@ sample.names=sapply(strsplit(basename(fnFs), "_"), `[`, 1)
 if(analysis=="both"){
 
   #Create paths for the filtered files in a subdirectory called filtered/
-  filtFs=file.path(path,marker, "/R01_R02/filtered/", paste0(sample.names, "_F_filt.fastq"))
-  filtRs=file.path(path,marker, "/R01_R02/filtered/", paste0(sample.names, "_R_filt.fastq"))
+  filtFs=file.path(path,marker, "/R01_R02/filtered/", 
+                   paste0(sample.names, "_F_filt.fastq"))
+  filtRs=file.path(path,marker, "/R01_R02/filtered/", 
+                   paste0(sample.names, "_R_filt.fastq"))
   
   outp=paste0(path, "/", marker, "/R01_R02/")
 
   #Filter sequences
   ##SWR: fixed paired end pairing by adding matchIDs=TRUE
   out=filterAndTrim(fnFs, filtFs, fnRs, filtRs,
-              maxN=my_maxN, maxEE=c(my_maxEEf,my_maxEEr), truncQ=my_truncQ, minLen=my_minLen, truncLen=my_truncLen, rm.phix=TRUE,
-              compress=TRUE, multithread=TRUE, matchIDs=TRUE) # On Windows set multithread=FALSE
+              maxN=my_maxN, maxEE=c(my_maxEEf,my_maxEEr), truncQ=my_truncQ, 
+              minLen=my_minLen, truncLen=my_truncLen, rm.phix=TRUE,
+              compress=TRUE, multithread=TRUE, matchIDs=TRUE)
   head(out)
   write.table(out,paste0(path,"/",marker,"/R01_R02/out.txt"))
 
   if(plotQC=="TRUE"){
-    #Plot Forward/Reverse Quality scores for each sample (not strictly necessary, time intensive)
+    # Plot Forward/Reverse Quality scores for each sample 
+    # (not strictly necessary, time intensive)
     pl=lapply(c(fnFL),plotQualityProfile)
     print(pl)
     ml=marrangeGrob(pl,nrow=2,ncol=1)
-    ggsave(paste0(path,"/",marker,"/R01_R02/qualityplots_fwd.pdf"),ml,width=20,height=26,unit="cm",dpi=300)
+    ggsave(paste0(path,"/",marker,"/R01_R02/qualityplots_fwd.pdf"),
+           ml,width=20,height=26,unit="cm",dpi=300)
     plr=lapply(c(fnRL),plotQualityProfile)
     print(plr)
     mlr=marrangeGrob(plr,nrow=2,ncol=1)
-    ggsave(paste0(path,"/",marker,"/R01_R02/qualityplots_rev.pdf"),mlr,width=20,height=26,unit="cm",dpi=300)
+    ggsave(paste0(path,"/",marker,"/R01_R02/qualityplots_rev.pdf"),
+           mlr,width=20,height=26,unit="cm",dpi=300)
   }
 
   if(plotQC=="SUB"){
-    #Plot Forward/Reverse Quality scores for each sample (not strictly necessary, time intensive)
+    #Plot Forward/Reverse Quality scores for each sample 
+    # (not strictly necessary, time intensive)
     sample_number <- if (length(fnFL) < 10) length(fnFL) else 10
     sample_number <- if (length(fnRL) < sample_number) length(fnRL) else sample_number
     
     pl=lapply(sample(c(fnFL),sample_number),plotQualityProfile)
     print(pl)
     ml=marrangeGrob(pl,nrow=2,ncol=1)
-    ggsave(paste0(path,"/",marker,"/R01_R02/qualityplots_fwd.pdf"),ml,width=20,height=26,unit="cm",dpi=300)
+    ggsave(paste0(path,"/",marker,"/R01_R02/qualityplots_fwd.pdf"),
+           ml,width=20,height=26,unit="cm",dpi=300)
     plr=lapply(sample(c(fnRL),sample_number),plotQualityProfile)
     print(plr)
     mlr=marrangeGrob(plr,nrow=2,ncol=1)
-    ggsave(paste0(path,"/",marker,"/R01_R02/qualityplots_rev.pdf"),mlr,width=20,height=26,unit="cm",dpi=300)
+    ggsave(paste0(path,"/",marker,"/R01_R02/qualityplots_rev.pdf"),
+           mlr,width=20,height=26,unit="cm",dpi=300)
 
   }
   #dereplicate
@@ -275,9 +362,11 @@ if(analysis=="both"){
   ## Plot the Estimated Error Rates for the Transition Types
   #check that model and data match reasonably well
   eFplot=plotErrors(errF, nominalQ=TRUE)
-  ggsave(paste0(path,"/",marker,"/R01_R02/R1_error_profile.pdf"),eFplot,width=20,height=26,unit="cm",dpi=300)
+  ggsave(paste0(path,"/",marker,"/R01_R02/R1_error_profile.pdf"),
+         eFplot,width=20,height=26,unit="cm",dpi=300)
   eRplot=plotErrors(errR, nominalQ=TRUE)
-  ggsave(paste0(path,"/",marker,"/R01_R02/R2_error_profile.pdf"),eRplot,width=20,height=26,unit="cm",dpi=300)
+  ggsave(paste0(path,"/",marker,"/R01_R02/R2_error_profile.pdf"),
+         eRplot,width=20,height=26,unit="cm",dpi=300)
 
   #sample inference
   if(pseudo_pooling=="TRUE"){
@@ -290,59 +379,79 @@ if(analysis=="both"){
   }
 
   #merge forward and reverse reads
-  mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs,minOverlap=my_minoverlap,verbose=TRUE)
+  mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs,
+                        minOverlap=my_minoverlap,verbose=TRUE)
+  
   # Inspect the merger data.frame from the first sample
   head(mergers[[1]])
   merge.tab=data.frame(matrix(ncol=10,nrow=0))
-  colnames(merge.tab)=c("sample","sequence","abundance","forward","reverse","nmatch","nmismatch","nindel","prefer","accept")
+  colnames(merge.tab)=c("sample","sequence","abundance","forward",
+                        "reverse","nmatch","nmismatch","nindel",
+                        "prefer","accept")
+  
   for(i in (1:length(mergers))){
     if(nrow(mergers[[i]])>0){
       sub=cbind(names(mergers)[i],mergers[[i]])
       merge.tab=rbind(merge.tab,sub)
     } else {}
   }
-  colnames(merge.tab)=c("sample","sequence","abundance","forward","reverse","nmatch","nmismatch","nindel","prefer","accept")
-  write.table(merge.tab,paste0(path,"/",marker,"/R01_R02/mergers.txt"),row.names=F)
+  colnames(merge.tab)=c("sample","sequence","abundance","forward",
+                        "reverse","nmatch","nmismatch","nindel",
+                        "prefer","accept")
+  write.table(merge.tab,paste0(path,"/",marker,"/R01_R02/mergers.txt"),
+              row.names=F)
 
 
-  #make sequence table and distribution table for sequence lengths
+  # make sequence table and distribution table for sequence lengths
   seqtab=makeSequenceTable(mergers)
-  seqtab=seqtab[,nchar(colnames(seqtab)) >49]#use only sequences longer than 50bp as the rdp classifier can't accept less than that
+  # use only sequences longer than 50bp as the rdp classifier can't accept less 
+  # than that
+  seqtab=seqtab[,nchar(colnames(seqtab)) >49]
 
-  ##SWR: If Collapse_overlapping=TRUE, collapses identical sequences together based on an overlap specified in 'Collapse_minOverlap'. 
+  ## SWR: If Collapse_overlapping=TRUE, collapses identical sequences together 
+  # based on an overlap specified in 'Collapse_minOverlap'. 
   if(Collapse_overlapping==TRUE){
-     seqtab <- collapseNoMismatch(seqtab, minOverlap = Collapse_minOverlap, orderBy = "abundance", identicalOnly = FALSE, 
+     seqtab <- collapseNoMismatch(seqtab, minOverlap = Collapse_minOverlap, 
+                                  orderBy = "abundance", identicalOnly = FALSE, 
                                   vec = TRUE, verbose = FALSE)
   }
 
   dim(seqtab)
   table(nchar(getSequences(seqtab)))
 
-  ##SWR: save RDS format seqtab to allow for merging of multiple runs and later analysis without rerunning the whole pipeline
+  ## SWR: save RDS format seqtab to allow for merging of multiple runs and later 
+  # analysis without rerunning the whole pipeline
   saveRDS(seqtab, paste0(path,"/",marker,"/R01_R02/seqtab.rds"))
 
 
   #remove chimeric sequences
-  seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE)
+  seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", 
+                                      multithread=TRUE, verbose=TRUE)
   dim(seqtab.nochim)
-  sum(seqtab.nochim)/sum(seqtab)#calculate percent of sequences that are non-chimeric
+  # calculate percent of sequences that are non-chimeric
+  sum(seqtab.nochim)/sum(seqtab)
   write.table(seqtab.nochim,paste0(path,"/",marker,"/R01_R02/seqtab.nochim.txt"))
 
-  ##SWR: additional .RDS output table for easy downstream use in e.g. phyloseq
+  ## SWR: additional .RDS output table for easy downstream use in e.g. phyloseq
   saveRDS(seqtab.nochim, paste0(path,"/",marker, "/R01_R02/seqtab_nochim.rds"))
 
   #output table for each sample 
-  getN = function(x) sum(getUniques(x)) #define a function that counts sequences in a file/object
+  #define a function that counts sequences in a file/object
+  getN = function(x) sum(getUniques(x)) 
   if(min(out[,2])>0){
-    track=cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim),rowSums(seqtab.nochim)/out[,1]*100)
-    colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim","percent_retained")
+    track=cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), 
+                sapply(mergers, getN), rowSums(seqtab.nochim),
+                rowSums(seqtab.nochim)/out[,1]*100)
+    colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", 
+                         "merged", "nonchim","percent_retained")
     rownames(track) =sample.names
   } else {
   track=cbind(out[out[,2]>0,], sapply(dadaFs, getN), sapply(dadaRs, getN), 
-              sapply(mergers, getN), ... = rowSums(seqtab.nochim),rowSums(seqtab.nochim)/out[,1][out[,2]>0]*100)
-  # If processing a single sample, remove the sapply calls: e.g.   replace sapply(dadaFs, getN) with getN(dadaFs)
+              sapply(mergers, getN), ... = rowSums(seqtab.nochim),
+              rowSums(seqtab.nochim)/out[,1][out[,2]>0]*100)
   track=rbind(track,cbind(out[,1][out[,2]==0],out[,2][out[,2]==0],NA,NA,NA,NA,NA))
-  colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim","percent_retained")
+  colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", 
+                       "merged", "nonchim","percent_retained")
   rownames(track) = c(sample.names[out[,2]>0],sample.names[out[,2]==0])
   }
 
@@ -351,39 +460,45 @@ if(analysis=="both"){
 } else {
 
 
-  ######################################################################################
+  #########################################################################
   #  Forward Reads Only
-  ###################################################################################
+  #########################################################################
 
   # Create paths for the filtered files in a subdirectory called filtered/
    
-  filtFsR1=file.path(path,marker, "R01_only/filtered", paste0(sample.names, "_F_filt.fastq"))
+  filtFsR1=file.path(path,marker, "R01_only/filtered", paste0(sample.names, 
+                                                              "_F_filt.fastq"))
   
   outp=paste0(path, "/", marker, "/R01_only/")
 
   #Filter sequences
   outR1=filterAndTrim(fnFs, filtFsR1,
-              maxN=my_maxN, maxEE=my_maxEEf, truncQ=my_truncQ,minLen = my_minLen, rm.phix=TRUE,
-              compress=TRUE, multithread=TRUE) # On Windows set multithread=FALSE
+              maxN=my_maxN, maxEE=my_maxEEf, truncQ=my_truncQ,minLen = my_minLen, 
+              rm.phix=TRUE,
+              compress=TRUE, multithread=TRUE) 
   head(outR1)
   write.table(outR1,paste0(path,"/",marker,"/R01_only/out.txt"))
 
   if(plotQC=="TRUE"){
-    #Plot Forward/Reverse Quality scores for each sample (not strictly necessary, time intensive)
+    # Plot Forward/Reverse Quality scores for each sample 
+    # (not strictly necessary, time intensive)
     pl=lapply(c(fnFL),plotQualityProfile)
     print(pl)
     ml=marrangeGrob(pl,nrow=2,ncol=1)
-    ggsave(paste0(path,"/",marker,"/R01_only/qualityplots_fwd.pdf"),ml,width=20,height=26,unit="cm",dpi=300)
+    ggsave(paste0(path,"/",marker,"/R01_only/qualityplots_fwd.pdf"),ml,
+           width=20,height=26,unit="cm",dpi=300)
 
   }
 
   if(plotQC=="SUB"){
-    #Plot Forward/Reverse Quality scores for each sample (not strictly necessary, time intensive)
+    # Plot Forward/Reverse Quality scores for each sample 
+    # (not strictly necessary, time intensive)
     sample_number <- if (length(fnFL) < 10) length(fnFL) else 10
     pl=lapply(sample(c(fnFL),sample_number),plotQualityProfile)
     print(pl)
     ml=marrangeGrob(pl,nrow=2,ncol=1)
-    ggsave(paste0(path,"/",marker,"/R01_only/qualityplots_fwd.pdf"),ml,width=20,height=26,unit="cm",dpi=300)
+    ggsave(paste0(path,"/",marker,"/R01_only/qualityplots_fwd.pdf"),ml,
+           width=20,height=26,unit="cm",dpi=300)
 
   }  
  
@@ -392,7 +507,8 @@ if(analysis=="both"){
   errFR1 <- learnErrors(filtFsR1[outR1[,2]>0], multithread=TRUE)
 
   eFplot=plotErrors(errFR1, nominalQ=TRUE)
-  ggsave(paste0(path,"/",marker,"/R01_only/R1_error_profile.pdf"),eFplot,width=20,height=26,unit="cm",dpi=300)
+  ggsave(paste0(path,"/",marker,"/R01_only/R1_error_profile.pdf"),eFplot,
+         width=20,height=26,unit="cm",dpi=300)
 
   #dereplicate
   derepFsR1 <- derepFastq(filtFsR1[outR1[,2]>0], verbose=TRUE)
@@ -402,7 +518,8 @@ if(analysis=="both"){
   #sample inference
   if(pseudo_pooling=="TRUE"){
     prior_seqs <- readDNAStringSet(guide_seqs)
-    dadaFsR1 <- dada(derepFsR1, err=errFR1, priors = prior_seqs, multithread=TRUE)
+    dadaFsR1 <- dada(derepFsR1, err=errFR1, priors = prior_seqs, 
+                     multithread=TRUE)
   }else{
     dadaFsR1 <- dada(derepFsR1, err=errFR1, multithread=TRUE)
   }
@@ -410,38 +527,54 @@ if(analysis=="both"){
 
   #make sequence table and distribution table for sequence lengths
   seqtabR1=makeSequenceTable(dadaFsR1)
-  ##SWR: If Collapse_overlapping=TRUE, collapses identical sequences together based on an overlap specified in 'Collapse_minOverlap'. 
+  ##SWR: If Collapse_overlapping=TRUE, collapses identical sequences together 
+  # based on an overlap specified in 'Collapse_minOverlap'. 
   if(Collapse_overlapping==TRUE){
-    seqtabR1 <- collapseNoMismatch(seqtabR1, minOverlap = Collapse_minOverlap, orderBy = "abundance", identicalOnly = FALSE, 
+    seqtabR1 <- collapseNoMismatch(seqtabR1, minOverlap = Collapse_minOverlap, 
+                                   orderBy = "abundance", identicalOnly = FALSE, 
                                    vec = TRUE, verbose = FALSE)
   }
   dim(seqtabR1)
   table(nchar(getSequences(seqtabR1)))
-  seqtabR1=seqtabR1[,nchar(colnames(seqtabR1))>49]#RDP can't assign taxonomy to sequences shorter than 50bp, so filter to >50bp
+  #RDP can't assign taxonomy to sequences shorter than 50bp, so filter to >50bp
+  seqtabR1=seqtabR1[,nchar(colnames(seqtabR1))>49]
 
-  ##SWR: save RDS format seqtab to allow for merging of multiple runs and later analysis without rerunning the whole pipeline
+  ## SWR: save RDS format seqtab to allow for merging of multiple runs and later 
+  # analysis without rerunning the whole pipeline
   saveRDS(seqtabR1, paste0(path,"/",marker,"/R01_only/seqtab.rds"))
 
   #remove chimeric sequences
-  seqtabR1.nochim <- removeBimeraDenovo(seqtabR1, method="consensus", multithread=TRUE, verbose=TRUE)
+  seqtabR1.nochim <- removeBimeraDenovo(seqtabR1, method="consensus", 
+                                        multithread=TRUE, verbose=TRUE)
   dim(seqtabR1.nochim)
-  sum(seqtabR1.nochim)/sum(seqtabR1)#calculate percent of sequences that are non-chimeric
-  write.table(seqtabR1.nochim,paste0(path,"/",marker,"/R01_only/seqtabR1.nochim"))
+  # calculate percent of sequences that are non-chimeric
+  sum(seqtabR1.nochim)/sum(seqtabR1)
+  write.table(seqtabR1.nochim,paste0(path,"/",marker,
+                                     "/R01_only/seqtabR1.nochim"))
 
   ##SWR: additional .RDS output table for easy downstream use in e.g. phyloseq
-  saveRDS(seqtabR1.nochim, paste0(path,"/",marker, "/R01_only/seqtab_nochim.rds"))
+  saveRDS(seqtabR1.nochim, paste0(path,"/",marker, 
+                                  "/R01_only/seqtab_nochim.rds"))
 
   #output table for each sample 
   getN=function(x) sum(getUniques(x))
   if(min(outR1[,2])>0){
-    trackR1=cbind(outR1, sapply(dadaFsR1,getN),rowSums(seqtabR1.nochim),rowSums(seqtabR1.nochim)/outR1[,1]*100)
-    colnames(trackR1) <- c("input", "filtered", "denoisedF", "nonchim","percent_retained")
+    trackR1=cbind(outR1, sapply(dadaFsR1,getN),rowSums(seqtabR1.nochim),
+                  rowSums(seqtabR1.nochim)/outR1[,1]*100)
+    colnames(trackR1) <- c("input", "filtered", "denoisedF", 
+                           "nonchim","percent_retained")
     rownames(trackR1) = c(sample.names)
   } else {
-    trackR1=cbind(outR1[outR1[,2]>0,], sapply(dadaFsR1, getN),rowSums(seqtabR1.nochim),rowSums(seqtabR1.nochim)/outR1[,1][outR1[,2]>0]*100)
-    trackR1=rbind(trackR1,cbind(outR1[,1][outR1[,2]==0],outR1[,2][outR1[,2]==0],NA,NA,NA))
-    colnames(trackR1) <- c("input", "filtered", "denoisedF", "nonchim","percent_retained")
-    rownames(trackR1) = c(sample.names[!outR1[,2]==0],sample.names[outR1[,2]==0])
+    trackR1=cbind(outR1[outR1[,2]>0,], sapply(dadaFsR1, getN),
+                  rowSums(seqtabR1.nochim),
+                  rowSums(seqtabR1.nochim)/outR1[,1][outR1[,2]>0]*100)
+    
+    trackR1=rbind(trackR1,cbind(outR1[,1][outR1[,2]==0],
+                                outR1[,2][outR1[,2]==0],NA,NA,NA))
+    colnames(trackR1) <- c("input", "filtered", "denoisedF", 
+                           "nonchim","percent_retained")
+    rownames(trackR1) = c(sample.names[!outR1[,2]==0],
+                          sample.names[outR1[,2]==0])
   }
 
   head(trackR1)
@@ -450,67 +583,86 @@ if(analysis=="both"){
 
 if(analysis=="both"){
 if(assign_taxonomy=="TRUE"){
-    taxa <- assignTaxonomy(seqtab.nochim,tax_database,multithread=TRUE,minBoot=my_minBootstrap,outputBootstraps = TRUE)
+    taxa <- assignTaxonomy(seqtab.nochim,tax_database,multithread=TRUE,
+                           minBoot=my_minBootstrap,outputBootstraps = TRUE)
     sum(rownames(taxa[[1]])==colnames(seqtab.nochim))==nrow(taxa[[1]])
     tmp=cbind(rownames(taxa[[1]]),taxa[[1]],taxa[[2]])
     rownames(tmp)=paste0("ASV",1:length(colnames(seqtab.nochim)))
     tmp=data.frame(tmp)
     write.table(tmp,paste0(path,"/",marker,"/R01_R02/taxonomy_ASVs_NC_R1R2.txt"))
 
-    ##SWR: save RDS file omitting bootstrap values for downstream use in e.g phyloseq
+    ## SWR: save RDS file omitting bootstrap values for downstream use in e.g 
+    # phyloseq
     saveRDS(taxa$tax, paste0(path,"/",marker,"/R01_R02/taxa.rds"))
 
     tmp2=cbind(t(seqtab.nochim),tmp)
     rownames(tmp2)=paste0("ASV",1:length(colnames(seqtab.nochim)))
     write.table(tmp2,paste0(path,"/",marker,"/R01_R02/seqtab.nochim_withtax.txt"))
 
-    #write raw ASVs to a fasta file
+    # write raw ASVs to a fasta file
     seqsnochim=DNAStringSet(colnames(seqtab.nochim))
     seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim)))
-    writeXStringSet(seqsnochim,paste0(path,"/",marker,"/R01_R02/ASVs_raw.fasta"),format="fasta")
+    writeXStringSet(seqsnochim,paste0(path,"/",marker,"/R01_R02/ASVs_raw.fasta"),
+                    format="fasta")
     
-    #write ASVs with taxonmy in the header to a fasta file
+    # write ASVs with taxonmy in the header to a fasta file
     seqsnochim=DNAStringSet(colnames(seqtab.nochim))
-    seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim)),"|","|",
-                                   paste0(tmp$Kingdom,";",tmp$Phylum,";",tmp$Class,";",tmp$Order,";",tmp$Family,";",tmp$Genus,";",tmp$Species))
-    writeXStringSet(seqsnochim,paste0(path,"/",marker,"/R01_R02/ASVs_withtax.fasta"),format="fasta")
+    seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim)),
+                                   "|","|",
+                                   paste0(tmp$Kingdom,";",tmp$Phylum,";",
+                                          tmp$Class,";",tmp$Order,";",
+                                          tmp$Family,";",tmp$Genus,";",
+                                          tmp$Species))
+    writeXStringSet(seqsnochim,paste0(
+      path,"/",marker,"/R01_R02/ASVs_withtax.fasta"),format="fasta")
   } else {
-    #write raw ASVs to a fasta file
+    # write raw ASVs to a fasta file
     seqsnochim=DNAStringSet(colnames(seqtab.nochim))
     seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim)))
-    writeXStringSet(seqsnochim,paste0(path,"/",marker,"/R01_R02/ASVs_raw.fasta"),format="fasta")
+    writeXStringSet(seqsnochim,paste0(
+      path,"/",marker,"/R01_R02/ASVs_raw.fasta"),format="fasta")
   }
 }else{
   if(assign_taxonomy=="TRUE"){
-    taxaR1= assignTaxonomy(seqtabR1.nochim,tax_database,multithread=TRUE,minBoot=my_minBootstrap,outputBootstraps = TRUE)
+    taxaR1= assignTaxonomy(seqtabR1.nochim,tax_database,
+                           multithread=TRUE,minBoot=my_minBootstrap,
+                           outputBootstraps = TRUE)
     sum(rownames(taxaR1[[1]])==colnames(seqtabR1.nochim))==nrow(taxaR1[[1]])
     tmp=cbind(rownames(taxaR1[[1]]),taxaR1[[1]],taxaR1[[2]])
     rownames(tmp)=paste0("ASV",1:length(colnames(seqtabR1.nochim)))
     tmp=data.frame(tmp)
     write.table(tmp,paste0(path,"/",marker,"/R01_only/taxonomy_ASVs_NC_R1.txt"))
 
-    ##SWR: save RDS file omitting bootstrap values for downstream use in e.g phyloseq
+    ## SWR: save RDS file omitting bootstrap values for downstream use in e.g 
+    # phyloseq
     saveRDS(taxaR1$tax, paste0(path,"/",marker,"/R01_only/taxa.rds"))
 
     tmp2=cbind(t(seqtabR1.nochim),tmp)
     rownames(tmp2)=paste0("ASV",1:length(colnames(seqtabR1.nochim)))
-    write.table(tmp2,paste0(path,"/",marker,"/R01_only/seqtabR1.nochim_withtax.txt"))
+    write.table(tmp2,paste0(
+      path,"/",marker,"/R01_only/seqtabR1.nochim_withtax.txt"))
     
     #write raw ASVs to a fasta file
     seqsnochimR1=DNAStringSet(colnames(seqtabR1.nochim))
     seqsnochimR1@ranges@NAMES=paste0("ASV",1:length(colnames(seqtabR1.nochim)))
-    writeXStringSet(seqsnochimR1,paste0(path,"/",marker,"/R01_only/ASVs_raw.fasta"),format="fasta")
+    writeXStringSet(seqsnochimR1,paste0(
+      path,"/",marker,"/R01_only/ASVs_raw.fasta"),format="fasta")
     
     #write ASVs with taxonomy in the header to a fasta file
     seqsnochim=DNAStringSet(colnames(seqtabR1.nochim))
     seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtabR1.nochim)),"|",
-                                   paste0(tmp$Kingdom,";",tmp$Phylum,";",tmp$Class,";",tmp$Order,";",tmp$Family,";",tmp$Genus,";",tmp$Species))
-    writeXStringSet(seqsnochim,paste0(path,"/",marker,"/R01_only/ASVs_withtax_R1.fasta"),format="fasta")
+                                   paste0(tmp$Kingdom,";",tmp$Phylum,";",
+                                          tmp$Class,";",tmp$Order,";",
+                                          tmp$Family,";",tmp$Genus,";",
+                                          tmp$Species))
+    writeXStringSet(seqsnochim,paste0(
+      path,"/",marker,"/R01_only/ASVs_withtax_R1.fasta"),format="fasta")
   }else{
     #write raw ASVs to a fasta file
     seqsnochimR1=DNAStringSet(colnames(seqtabR1.nochim))
     seqsnochimR1@ranges@NAMES=paste0("ASV",1:length(colnames(seqtabR1.nochim)))
-    writeXStringSet(seqsnochimR1,paste0(path,"/",marker,"/R01_only/ASVs_raw.fasta"),format="fasta")
+    writeXStringSet(seqsnochimR1,paste0(
+      path,"/",marker,"/R01_only/ASVs_raw.fasta"),format="fasta")
 
   }
 }
@@ -526,33 +678,44 @@ dada2_dir <- outp
 
 ## Make match list
 Comment SWR: This is a short bash chunk to create the match list necessary for LULU using vsearch (must be installed). If you are on Windows or would rather use BLAST than vsearch check the (LULU Github)[https://github.com/tobiasgf/lulu] for additional info.
-```{bash, eval=FALSE, tidy=TRUE}
-#UNCOMMENT AND CHANGE ME (only if you're not running Linux as your OS) to the directory containing the dada2
+```{bash, eval=FALSE, tidy=FALSE}
+# UNCOMMENT AND CHANGE ME (only if you're not running Linux as your OS) to the 
+# directory containing the dada2
 dada2Folder="/home/simeon/Documents/Bioimmigrants/OITS1/with_priors_R1_R2"
 
 ASV="${dada2Folder}"/ASVs_raw.fasta
 matchList="${dada2Folder}"/match_list.txt
 
-vsearch --usearch_global "${ASV}" --db "${ASV}" --self --id .84 --iddef 1 --userout "${matchList}" -userfields query+target+id --maxaccepts 0 --query_cov .9 --maxhits 10
+vsearch --usearch_global "${ASV}" --db "${ASV}" --self --id .84 --iddef 1 \
+--userout "${matchList}" -userfields query+target+id --maxaccepts 0 \
+--query_cov .9 --maxhits 10
 ```
 
 ## LULU post-processing
 Comment SWR: This chunk will run the LULU post-processing on the seqtab_nochim table and produce a new seqtab_nochim table, as well as a new raw ASV table. It requires the 'seqtab_nochim' and 'match_list' tables as input and will automatically get them when running the whole pipeline on a Linux OS. It also has the posibility to assign taxonomy to this new ASV table using the RDP classifier. All files will be saved to a subdirectory of the main output called 'lulu_output'. In the current form, the original ASV designations from the dada2 pipeline are not retained in the LULU output.
-```{r lulu, eval=FALSE, tidy=TRUE}
-#If you want to run this as a stand-alone chunk or are on Windows, uncomment and change the 'dada2_dir' variable. The directory must contain 'match_list.txt' and 'seqtab.nochim.rds'. If you run the whole pipeline on Linux, this will be set.
+
+In the published version of the manuscript, LULU was not performed as it removed valid ASVs in the positive control, indicating overly conservative ASV fusion.
+```{r lulu, eval=FALSE, tidy=FALSE}
+# If you want to run this as a stand-alone chunk or are on Windows, uncomment 
+# and change the 'dada2_dir' variable. The directory must contain 
+# 'match_list.txt' and 'seqtab.nochim.rds'. If you run the whole pipeline on 
+# Linux, this will be set.
 dada2_dir = "/home/simeon/Documents/Bioimmigrants/OITS1/with_priors_R1_R2"
 
-#CHANGE ME  to the path for the taxonomy database you will be using for identification (if not already specified above)
+# CHANGE ME to the path for the taxonomy database you will be using for 
+# identification (if not already specified above)
 tax_database=tax_database
 
-#CHANGE ME to specify the minimum confidence interval for RDP assignment of taxonomy.
+# CHANGE ME to specify the minimum confidence interval for RDP assignment of 
+# taxonomy.
 my_minBootstrap=80 
 
-#CHANGE ME to "TRUE" or "FALSE" if you want to use settings that differ from the setup chunk.
+# CHANGE ME to "TRUE" or "FALSE" if you want to use settings that differ from 
+# the setup chunk.
 assign_taxonomy=assign_taxonomy
 
-###NO CHANGES NEEDED BELOW
-#load libraries (including dada2 so it can be run standalone)
+### NO CHANGES NEEDED BELOW
+# load libraries (including dada2 so it can be run standalone)
 library(lulu)
 library(dada2)
 library(Biostrings)
@@ -563,7 +726,8 @@ dir.create(file.path(lulu_output))
 knitr::opts_chunk$set(root.dir=lulu_output)
 
 matchlist <- paste0(dada2_dir, "/match_list.txt")
-matchlist <- read.table(matchlist, header=FALSE,as.is=TRUE, stringsAsFactors=FALSE)
+matchlist <- read.table(matchlist, header=FALSE,
+                        as.is=TRUE, stringsAsFactors=FALSE)
 
 ASV_table <- readRDS(paste0(dada2_dir, "/seqtab_nochim.rds"))
 ASV_table <- as.data.frame(t(ASV_table))
@@ -578,7 +742,8 @@ curated_ASVs <- invisible(lulu(ASV_table, matchlist))
 #Parse and save curated seqtab_nochim
 seqtab.nochim.lulu <- curated_ASVs$curated_table
 seqtab.nochim.lulu$order <- row.names(seqtab.nochim.lulu)
-seqtab.nochim.lulu <- seqtab.nochim.lulu[match(ASV_list, seqtab.nochim.lulu$order),]
+seqtab.nochim.lulu <- seqtab.nochim.lulu[match(ASV_list, 
+                                               seqtab.nochim.lulu$order),]
 seqtab.nochim.lulu <- seqtab.nochim.lulu[complete.cases(seqtab.nochim.lulu),]
 seqtab.nochim.lulu$order <- NULL
 delASVs <- curated_ASVs$discarded_otus
@@ -589,9 +754,11 @@ seqtab.nochim.lulu <- t(seqtab.nochim.lulu)
 
 saveRDS(seqtab.nochim.lulu, file = paste0(lulu_output, "/seqtab_nochim.rds"))
 
-#Optional RDS taxonomy and create seqtab_nochim_withtax and ASV_withtax, otherwise just output a new raw ASV table
+# Optional RDS taxonomy and create seqtab_nochim_withtax and ASV_withtax, 
+# otherwise just output a new raw ASV table
 if(assign_taxonomy=="TRUE"){
-  taxa <- assignTaxonomy(seqtab.nochim.lulu,tax_database,multithread=TRUE,minBoot=my_minBootstrap,outputBootstraps = TRUE)
+  taxa <- assignTaxonomy(seqtab.nochim.lulu,tax_database,multithread=TRUE,
+                         minBoot=my_minBootstrap,outputBootstraps = TRUE)
   sum(rownames(taxa[[1]])==colnames(seqtab.nochim.lulu))==nrow(taxa[[1]])
   tmp=cbind(rownames(taxa[[1]]),taxa[[1]],taxa[[2]])
   rownames(tmp)=paste0("ASV",1:length(colnames(seqtab.nochim.lulu)))
@@ -606,54 +773,64 @@ if(assign_taxonomy=="TRUE"){
   #write raw ASVs to a fasta file
   seqsnochim=DNAStringSet(colnames(seqtab.nochim.lulu))
   seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim.lulu)))
-  writeXStringSet(seqsnochim,paste0(lulu_output,"/ASVs_raw.fasta"),format="fasta")
+  writeXStringSet(seqsnochim,paste0(lulu_output,"/ASVs_raw.fasta"),
+                  format="fasta")
   
   #write ASVs containing taxonomy in the header to a fasta file
   seqsnochim=DNAStringSet(colnames(seqtab.nochim.lulu))
-  seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim.lulu)),"|","|",
-                                 paste0(tmp$Kingdom,";",tmp$Phylum,";",tmp$Class,";",tmp$Order,";",tmp$Family,";",tmp$Genus,";",tmp$Species))
-  writeXStringSet(seqsnochim,paste0(lulu_output,"/ASVs_withtax.fasta"),format="fasta")
+  seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim.lulu)),
+                                 "|","|",
+                                 paste0(tmp$Kingdom,";",tmp$Phylum,";",
+                                        tmp$Class,";",tmp$Order,";",tmp$Family,
+                                        ";",tmp$Genus,";",tmp$Species))
+  writeXStringSet(seqsnochim,paste0(
+    lulu_output,"/ASVs_withtax.fasta"),format="fasta")
 } else {
   #write raw ASVs to a fasta file
   seqsnochim=DNAStringSet(colnames(seqtab.nochim.lulu))
   seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim.lulu)))
-  writeXStringSet(seqsnochim,paste0(lulu_output,"/ASVs_raw.fasta"),format="fasta")
+  writeXStringSet(seqsnochim,paste0(
+    lulu_output,"/ASVs_raw.fasta"),format="fasta")
 }
 
 ```
 
 ## SWR: Extra chunk to reanalyse
 Reanalyse e.g. with a different taxonomy database or pool runs. This chunk can be run completely independently (all libraries load again), given that the seqtab.RDS files were generated before
-```{r Reanalysis/Pooling, eval=FALSE, tidy=TRUE}
-#set knitr chunk options
-knitr::opts_chunk$set(echo = TRUE)
-knitr::opts_chunk$set(root.dir=paste0(path))
-
+```{r Reanalysis/Pooling, eval=FALSE, tidy=FALSE}
 #load libraries for R session
 library(dada2)
 library(Biostrings)
 
-###reset paths and marker name and reload libraries to make this chunk standalone
-
-path1="/home/simeon/Documents/Bioimmigrants/Run1_rerun_all/OITS1/R01_R02/" # CHANGE ME to the directory containing the file seqtab.rds for run1
-path2="/home/simeon/Documents/Bioimmigrants/Run2_all/OITS1/R01_R02/" # CHANGE ME to the directory containing the file seqtab.rds for run2 (if applicable)
-path3="C:/Users/mada/Documents/dada2_test/test/" # CHANGE ME to the directory containing the file seqtab.rds for run3 (if applicable)
-path4="C:/Users/mada/Documents/dada2_test/test/" # CHANGE ME to the directory containing the file seqtab.rds for run4 (if applicable)
-
-#CHANGE ME to the path you want to save the output to. If reanalyzing one run, output will automatically reset to be equal to 'path1'.
+# reset paths and marker name and reload libraries to make this chunk standalone
+# CHANGE ME to the directory containing the file seqtab.rds for run1
+path1="/home/simeon/Documents/Bioimmigrants/Run1_rerun_all/OITS1/R01_R02/"
+# CHANGE ME to the directory containing the file seqtab.rds for run2 (if applicable)
+path2="/home/simeon/Documents/Bioimmigrants/Run2_all/OITS1/R01_R02/"
+# CHANGE ME to the directory containing the file seqtab.rds for run3 (if applicable)
+path3="C:/Users/mada/Documents/dada2_test/test/"
+# CHANGE ME to the directory containing the file seqtab.rds for run4 (if applicable)
+path4="C:/Users/mada/Documents/dada2_test/test/"
+
+# CHANGE ME to the path you want to save the output to. If reanalyzing one run, 
+# output will automatically reset to be equal to 'path1'.
 output="/home/simeon/Documents/Bioimmigrants/OITS1/R1_R2_pooled_final/qual2/"
 
-#CHANGE ME  to the path for the taxonomy database you will be using for identification (if not already specified above)
-tax_database="/home/simeon/Documents/Git/bioimmigrants_scripts/CoFiMiTrO8_OomycITS1_dada2_DB.fasta"
+# CHANGE ME  to the path for the taxonomy database you will be using for 
+# identification (if not already specified above)
+tax_database="CoFiMiTrO8_OomycITS1_dada2_DB.fasta"
 
-#CHANGE ME to specify the minimum confidence interval for RDP assignment of taxonomy.
+# CHANGE ME to specify the minimum confidence interval for RDP assignment of 
+# taxonomy.
 my_minBootstrap=80 
 
-#CHANGE ME "TRUE" to pool multiple runs (add more paths if more than 4 runs are to be analysed), "FALSE" to reanalyse a single run
+# CHANGE ME "TRUE" to pool multiple runs (add more paths if more than 4 runs are 
+# to be analysed), "FALSE" to reanalyse a single run
 Poolruns="TRUE"
 
-#CHANGE ME to "TRUE" or "FALSE" if you want to use settings that differ from the setup chunk.
-assign_taxonomy="TRUE"
+# CHANGE ME to "TRUE" or "FALSE" if you want to use settings that differ from 
+# the setup chunk.
+assign_taxonomy="FALSE"
 
 if(Poolruns=="TRUE"){
   #could implement for loop here, if ever used widely
@@ -670,17 +847,20 @@ if(Poolruns=="TRUE"){
   output <- path1
 }
 
-#remove chimeric sequences
+# remove chimeric sequences
 if(Poolruns=="TRUE"){
-  seqtab.nochim <- removeBimeraDenovo(st.all, method="consensus", multithread=TRUE, verbose=TRUE)
+  seqtab.nochim <- removeBimeraDenovo(st.all, method="consensus", 
+                                      multithread=TRUE, verbose=TRUE)
   dim(seqtab.nochim)
-  sum(seqtab.nochim)/sum(st.all)#calculate percent of sequences that are non-chimeric
+  # calculate percent of sequences that are non-chimeric
+  sum(seqtab.nochim)/sum(st.all)
   write.table(seqtab.nochim,paste0(output,"/seqtab.nochim.txt"))
   saveRDS(seqtab.nochim, paste0(output, "/seqtab_nochim.rds"))
 }
 
 if(assign_taxonomy=="TRUE"){
-  taxa <- assignTaxonomy(seqtab.nochim,tax_database,multithread=TRUE,minBoot=my_minBootstrap,outputBootstraps = TRUE)
+  taxa <- assignTaxonomy(seqtab.nochim,tax_database,multithread=TRUE,
+                         minBoot=my_minBootstrap,outputBootstraps = TRUE)
   sum(rownames(taxa[[1]])==colnames(seqtab.nochim))==nrow(taxa[[1]])
   tmp=cbind(rownames(taxa[[1]]),taxa[[1]],taxa[[2]])
   rownames(tmp)=paste0("ASV",1:length(colnames(seqtab.nochim)))
@@ -700,7 +880,9 @@ if(assign_taxonomy=="TRUE"){
   #write ASVs containing taxonomy in the header to a fasta file
   seqsnochim=DNAStringSet(colnames(seqtab.nochim))
   seqsnochim@ranges@NAMES=paste0("ASV",1:length(colnames(seqtab.nochim)),"|","|",
-                                 paste0(tmp$Kingdom,";",tmp$Phylum,";",tmp$Class,";",tmp$Order,";",tmp$Family,";",tmp$Genus,";",tmp$Species))
+                                 paste0(tmp$Kingdom,";",tmp$Phylum,";",
+                                        tmp$Class,";",tmp$Order,";",tmp$Family,
+                                        ";",tmp$Genus,";",tmp$Species))
   writeXStringSet(seqsnochim,paste0(output,"/ASVs_withtax.fasta"),format="fasta")
 } else {
   #write raw ASVs to a fasta file
@@ -716,12 +898,3 @@ dada2_dir <- output
     Sys.setenv(dada2Folder=dada2_dir)
   }
 ```
-
-##Delete intermediate files
-```{r, eval=FALSE}
-unlink(paste0(path,"/",marker,"/trim/*"),recursive=T)
-unlink(paste0(path,"/",marker,"/trim"),recursive=T)
-#sapply(paste0(path,"/",marker,"/trim/"file.remove(paste0(path,))"
-#file.remove(paste0(path,"/",marker,"/trim/*"))
-```
-
diff --git a/SupMat2_DADA2_submission_FEMSEc.pdf b/SupMat2_DADA2_submission_FEMSEc.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..ee177b435ede984d78cd442b9a75e2df4a0373e9
Binary files /dev/null and b/SupMat2_DADA2_submission_FEMSEc.pdf differ