Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
M
MP Tangvik et al 2025 metabarcoding
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Simeon Rossmann
MP Tangvik et al 2025 metabarcoding
Commits
957dc3aa
Commit
957dc3aa
authored
5 months ago
by
Simeon
Browse files
Options
Downloads
Patches
Plain Diff
cleanup
parent
34eded4e
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
analysis/FITS1/.Rhistory
+0
-512
0 additions, 512 deletions
analysis/FITS1/.Rhistory
analysis/Nems/.Rhistory
+0
-512
0 additions, 512 deletions
analysis/Nems/.Rhistory
with
0 additions
and
1024 deletions
analysis/FITS1/.Rhistory
deleted
100644 → 0
+
0
−
512
View file @
34eded4e
parameters
$
biom_export
=
"FALSE"
# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
path
=
"FITS1_DADA2_results_260821/"
# CHANGE ME to TRUE to list all samples and generate an empty metadata file
optional_sample_check
=
TRUE
# CHANGE ME to TRUE to update cuphyr
update_cuphyr
=
TRUE
# Initiate by loading packages and setting knit options
################# NO CHANGES NECESSARY BELOW #################
knitr
::
opts_chunk
$
set
(
echo
=
TRUE
)
knitr
::
opts_chunk
$
set
(
root.dir
=
paste0
(
path
))
knitr
::
opts_chunk
$
set
(
message
=
FALSE
)
knitr
::
opts_chunk
$
set
(
warning
=
FALSE
)
if
(
update_cuphyr
)
{
devtools
::
install_github
(
"simeross/cuphyr"
)
}
# Sequence and microbiome specific libraries
library
(
dada2
)
library
(
Biostrings
)
library
(
DECIPHER
)
library
(
cuphyr
)
# The export of phyloseq objects to a BIOM format and the generation of fancier
# ordination plots require the phyloseq-extended package. The first command
# installs the package that is currently on the dev brach of the author's
# repository, the second command sources some extra functions, including the
# better ordination plot implementation.
remotes
::
install_github
(
"mahendra-mariadassou/phyloseq-extended"
,
ref
=
"dev"
)
source
(
"https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R"
)
library
(
phyloseq
)
library
(
SIAMCAT
)
# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
path
=
"FITS1_DADA2_results_260821/"
# CHANGE ME to TRUE to list all samples and generate an empty metadata file
optional_sample_check
=
TRUE
# CHANGE ME to TRUE to update cuphyr
update_cuphyr
=
TRUE
# Initiate by loading packages and setting knit options
################# NO CHANGES NECESSARY BELOW #################
knitr
::
opts_chunk
$
set
(
echo
=
TRUE
)
knitr
::
opts_chunk
$
set
(
root.dir
=
paste0
(
path
))
knitr
::
opts_chunk
$
set
(
message
=
FALSE
)
knitr
::
opts_chunk
$
set
(
warning
=
FALSE
)
if
(
update_cuphyr
)
{
devtools
::
install_github
(
"simeross/cuphyr"
)
}
# Sequence and microbiome specific libraries
library
(
dada2
)
library
(
Biostrings
)
library
(
DECIPHER
)
library
(
cuphyr
)
# The export of phyloseq objects to a BIOM format and the generation of fancier
# ordination plots require the phyloseq-extended package. The first command
# installs the package that is currently on the dev brach of the author's
# repository, the second command sources some extra functions, including the
# better ordination plot implementation.
remotes
::
install_github
(
"mahendra-mariadassou/phyloseq-extended"
,
ref
=
"dev"
)
source
(
"https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R"
)
library
(
phyloseq
)
#library(SIAMCAT)
# Phylogeny libraries
library
(
phangorn
)
library
(
ape
)
# Plotting and figure export
library
(
gridExtra
)
library
(
viridis
)
library
(
ggpubr
)
# Tidyverse
library
(
tidyverse
)
library
(
stringr
)
# Various packages for specific analysis
library
(
readxl
)
library
(
openxlsx
)
library
(
ggpmisc
)
library
(
betareg
)
library
(
BBmisc
)
library
(
aod
)
library
(
betareg
)
#install.packages('MicrobiomeStat')
library
(
MicrobiomeStat
)
# Checks whether output path exists and creates it if not. Throws warning if
# directory exists.
outp
<-
paste0
(
path
,
"/analysis_output"
)
dir.create
(
file.path
(
outp
))
if
(
optional_sample_check
)
{
seqtabcheck
<-
readRDS
(
paste0
(
path
,
"/seqtab_nochim.rds"
))
samps
<-
rownames
(
seqtabcheck
)
lensamps
<-
length
(
samps
)
blankcol
<-
vector
(
mode
=
"character"
,
length
=
lensamps
)
blanktable
<-
data.frame
(
SampleIDs
=
samps
,
ExampleProperty1
=
blankcol
,
ExampleProperty2
=
blankcol
,
ExampleProperty3
=
blankcol
)
write.table
(
blanktable
,
file
=
paste0
(
path
,
"/descriptors_blank.txt"
),
sep
=
"\t"
,
row.names
=
F
)
cat
(
"'seqtab_nochim.rds' contains samples in the following order:\n"
,
samps
,
"\nThe number of samples in the file is:"
,
lensamps
,
sep
=
"\n"
)
rm
(
optional_sample_check
,
seqtabcheck
,
samps
,
lensamps
,
blankcol
,
blanktable
,
update_cuphyr
)
}
else
{
rm
(
optional_sample_check
,
update_cuphyr
)}
# Dedicated environment containing all global analysis settings for better
# overview and collected export of settings
parameters
<-
new.env
()
# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to
# analyse all samples.
parameters
$
prune_controls
=
"TRUE"
# CHANGE ME to a list of unique identifiers that only occur in the names of
# samples you do NOT want to analyse. Common examples are provided.
parameters
$
controls
=
c
(
"NegativK-4-Nem"
,
"Vann"
,
"Neg"
,
"Positivkontroll-Nem"
,
"Contr"
,
"POSK"
)
# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by
# name. This is useful to exclude non-target organisms or noise from organelles
# such as Chloroplasts and Mitochondria. It is recommended to first look at all
# data before using this setting.
parameters
$
prune_noise_taxgroups
=
"FALSE"
# CHANGE ME to define the taxonomic groups to be removed as noise.
parameters
$
noise_taxgroups
=
c
(
"Chloroplast"
,
"Mitochondria"
)
# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should
# minimally have. Samples with lower ASV counts than 'minread' will be pruned.
# Set to 0 to not prune any samples.
parameters
$
minASVcount
=
3000
# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of
# using the default dada2 output ('taxa.rds').
parameters
$
customTax
=
"TRUE"
# CHANGE ME to the location of the custom taxonomy file. This only matters if
# parameters$customTax="TRUE", otherwise it will be ignored.
parameters
$
taxfile
=
"Nems_DADA2_results_260821/custom_BLAST_taxonomy_nt.txt"
# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a
# long time depending on the number of sequences (up to days for thousands).
# If a tree is provided as 'phylotree.rds' in 'path', then it will be used
# regardless of the value of 'parameters$maketree'
parameters
$
maketree
=
"FALSE"
# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the
# leaf with the longest branch (outgroup). This makes analyses that rely on the
# phylogenetic tree reproducible instead of picking a random leaf as root when
# calculating UNIFRAC distances. Implementation based on
# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers
# in https://github.com/joey711/phyloseq/issues/597
parameters
$
roottree
=
"TRUE"
## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
parameters
$
biom_export
=
"FALSE"
# Prat
set
$
prat_data
$
ndvi_temp
<-
(
set
$
prat_data
$
ndvi
+1
)
/
2
# CHANGE ME to the directory that contains 'seqtab_nochim.rds'
path
=
"Nems_DADA2_results_260821"
# CHANGE ME to TRUE to list all samples and generate an empty metadata file
optional_sample_check
=
TRUE
# CHANGE ME to TRUE to update cuphyr
update_cuphyr
=
TRUE
# Initiate by loading packages and setting knit options
################# NO CHANGES NECESSARY BELOW #################
knitr
::
opts_chunk
$
set
(
echo
=
TRUE
)
knitr
::
opts_chunk
$
set
(
root.dir
=
paste0
(
path
))
knitr
::
opts_chunk
$
set
(
message
=
FALSE
)
knitr
::
opts_chunk
$
set
(
warning
=
FALSE
)
if
(
update_cuphyr
)
{
devtools
::
install_github
(
"simeross/cuphyr"
)
}
# Sequence and microbiome specific libraries
library
(
dada2
)
library
(
Biostrings
)
library
(
DECIPHER
)
library
(
cuphyr
)
# The export of phyloseq objects to a BIOM format and the generation of fancier
# ordination plots require the phyloseq-extended package. The first command
# installs the package that is currently on the dev brach of the author's
# repository, the second command sources some extra functions, including the
# better ordination plot implementation.
remotes
::
install_github
(
"mahendra-mariadassou/phyloseq-extended"
,
ref
=
"dev"
)
source
(
"https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R"
)
library
(
phyloseq
)
#library(SIAMCAT)
# Phylogeny libraries
library
(
phangorn
)
library
(
ape
)
# Plotting and figure export
library
(
gridExtra
)
library
(
viridis
)
library
(
ggpubr
)
library
(
cowplot
)
# Tidyverse
library
(
tidyverse
)
library
(
stringr
)
# Various packages for specific analysis
library
(
readxl
)
library
(
openxlsx
)
library
(
ggpmisc
)
library
(
betareg
)
library
(
BBmisc
)
library
(
aod
)
library
(
betareg
)
#install.packages('MicrobiomeStat')
library
(
MicrobiomeStat
)
# Checks whether output path exists and creates it if not. Throws warning if
# directory exists.
outp
<-
paste0
(
path
,
"/analysis_output"
)
dir.create
(
file.path
(
outp
))
if
(
optional_sample_check
)
{
seqtabcheck
<-
readRDS
(
paste0
(
path
,
"/seqtab_nochim.rds"
))
samps
<-
rownames
(
seqtabcheck
)
lensamps
<-
length
(
samps
)
blankcol
<-
vector
(
mode
=
"character"
,
length
=
lensamps
)
blanktable
<-
data.frame
(
SampleIDs
=
samps
,
ExampleProperty1
=
blankcol
,
ExampleProperty2
=
blankcol
,
ExampleProperty3
=
blankcol
)
write.table
(
blanktable
,
file
=
paste0
(
path
,
"/descriptors_blank.txt"
),
sep
=
"\t"
,
row.names
=
F
)
cat
(
"'seqtab_nochim.rds' contains samples in the following order:\n"
,
samps
,
"\nThe number of samples in the file is:"
,
lensamps
,
sep
=
"\n"
)
rm
(
optional_sample_check
,
seqtabcheck
,
samps
,
lensamps
,
blankcol
,
blanktable
,
update_cuphyr
)
}
else
{
rm
(
optional_sample_check
,
update_cuphyr
)}
# Dedicated environment containing all global analysis settings for better
# overview and collected export of settings
parameters
<-
new.env
()
# CHANGE ME to "TRUE" to remove control samples from the analysis or "FALSE" to
# analyse all samples.
parameters
$
prune_controls
=
"TRUE"
# CHANGE ME to a list of unique identifiers that only occur in the names of
# samples you do NOT want to analyse. Common examples are provided.
parameters
$
controls
=
c
(
"NegativK-4-Nem"
,
"Vann"
,
"Neg"
,
"Positivkontroll-Nem"
,
"Contr"
,
"POSK"
)
# CHANGE ME to "TRUE" to remove certain taxonomic groups from the analysis by
# name. This is useful to exclude non-target organisms or noise from organelles
# such as Chloroplasts and Mitochondria. It is recommended to first look at all
# data before using this setting.
parameters
$
prune_noise_taxgroups
=
"FALSE"
# CHANGE ME to define the taxonomic groups to be removed as noise.
parameters
$
noise_taxgroups
=
c
(
"Chloroplast"
,
"Mitochondria"
)
# CHANGE ME to a number of ASV counts [~reads] that analyzed samples should
# minimally have. Samples with lower ASV counts than 'minread' will be pruned.
# Set to 0 to not prune any samples.
parameters
$
minASVcount
=
3000
# CHANGE ME to "TRUE", if you want to provide a custom taxonomy table instead of
# using the default dada2 output ('taxa.rds').
parameters
$
customTax
=
"TRUE"
# CHANGE ME to the location of the custom taxonomy file. This only matters if
# parameters$customTax="TRUE", otherwise it will be ignored.
parameters
$
taxfile
=
"Nems_DADA2_results_260821/custom_BLAST_taxonomy_nt.txt"
# CHANGE ME to "TRUE" to generate a phylogenetic tree. This process takes a
# long time depending on the number of sequences (up to days for thousands).
# If a tree is provided as 'phylotree.rds' in 'path', then it will be used
# regardless of the value of 'parameters$maketree'
parameters
$
maketree
=
"FALSE"
# CHANGE ME to "TRUE" to root the used phylogenetic tree (if one exists) on the
# leaf with the longest branch (outgroup). This makes analyses that rely on the
# phylogenetic tree reproducible instead of picking a random leaf as root when
# calculating UNIFRAC distances. Implementation based on
# http://john-quensen.com/r/unifrac-and-tree-roots/ and answers
# in https://github.com/joey711/phyloseq/issues/597
parameters
$
roottree
=
"TRUE"
## CHANGE ME to "TRUE" to export all generated phyloseq objects as .biom objects
parameters
$
biom_export
=
"FALSE"
############### NO NEED FOR CHANGES BELOW ###############
# Make dedicated environments to contain temporary values and manage other objects
tmp
<-
new.env
()
plots
<-
new.env
()
set
<-
new.env
()
# Read in variables
tmp
$
seqtabp
<-
readRDS
(
paste0
(
path
,
"/seqtab_nochim.rds"
))
if
(
parameters
$
customTax
==
"TRUE"
)
{
tmp
$
taxap
<-
read.delim
(
parameters
$
taxfile
,
header
=
TRUE
,
sep
=
"\t"
)
rownames
(
tmp
$
taxap
)
<-
colnames
(
tmp
$
seqtabp
)
tmp
$
taxap
<-
as.matrix
(
tmp
$
taxap
)
}
else
{
tmp
$
taxap
<-
readRDS
(
paste0
(
path
,
"/taxa.rds"
))}
tmp
$
samp_table
<-
read.delim
(
paste0
(
path
,
"/descriptors.txt"
),
header
=
TRUE
,
sep
=
"\t"
)
tmp
$
samp_list
<-
rownames
(
tmp
$
seqtabp
)
# Check if descriptors has the same samples as seqtabp
if
(
length
(
tmp
$
samp_table
[,
1
])
!=
length
(
tmp
$
samp_list
))
{
stop
(
"There are "
,
length
(
tmp
$
samp_table
[,
1
]),
" samples in 'descriptors.txt', but "
,
length
(
tmp
$
samp_list
),
" samples in 'seqtab_nochim.rds'. Please make sure that the correct samples
are contained in descriptors.txt.
You may use 'optional_sample_check <- TRUE' in the first chunk to generate an
empty template for 'descriptors.txt'"
)
}
else
if
(
!
identical
(
tmp
$
samp_table
[,
1
],
tmp
$
samp_list
))
{
warning
(
"Warning: The samples in 'descriptors.txt' do not have the same names
or order as the samples in 'seqtab_nochim.rds'. This may be fine if
abbreviated names were used or the sample names are not contained in
the first column of 'descriptors.txt'. Double-checking never hurts!"
)
}
# generate phylogenetic tree of ASVs only if there is no file called
# 'phylotree.rds' in the working directory and 'parameters$maketree' is "TRUE"
if
(
!
file.exists
(
paste0
(
path
,
"/phylotree.rds"
)))
{
if
(
parameters
$
maketree
==
"TRUE"
)
{
tmp
$
ASVs
<-
getSequences
(
tmp
$
seqtabp
)
names
(
tmp
$
ASVs
)
<-
tmp
$
ASVs
tmp
$
ASV_align
<-
AlignSeqs
(
DNAStringSet
(
tmp
$
ASVs
),
anchor
=
NA
)
tmp
$
ASV_phang
<-
phyDat
(
as
(
tmp
$
ASV_align
,
"matrix"
),
type
=
"DNA"
)
tmp
$
dm
<-
dist.ml
(
tmp
$
ASV_phang
)
tmp
$
treeNJ
<-
NJ
(
tmp
$
dm
)
tmp
$
fit
<-
pml
(
tmp
$
treeNJ
,
data
=
tmp
$
ASV_phang
)
tmp
$
fitGTR
<-
update
(
tmp
$
fit
,
k
=
4
,
inv
=
0.2
)
tmp
$
fitGTR
<-
optim.pml
(
tmp
$
fitGTR
,
model
=
"GTR"
,
optInv
=
TRUE
,
optGamma
=
TRUE
,
rearrangement
=
"stochastic"
,
control
=
pml.control
(
trace
=
0
))
saveRDS
(
tmp
$
fitGTR
,
file
=
paste0
(
path
,
"/phylotree.rds"
))}}
##parse into phyloseq object
row.names
(
tmp
$
samp_table
)
<-
tmp
$
samp_list
if
(
file.exists
(
paste0
(
path
,
"/phylotree.rds"
)))
{
tmp
$
treep
<-
readRDS
(
paste0
(
path
,
"/phylotree.rds"
))
p
<-
phyloseq
(
otu_table
(
tmp
$
seqtabp
,
taxa_are_rows
=
FALSE
),
sample_data
(
tmp
$
samp_table
),
tax_table
(
tmp
$
taxap
),
phy_tree
(
tmp
$
treep
$
tree
))
}
else
{
p
<-
phyloseq
(
otu_table
(
tmp
$
seqtabp
,
taxa_are_rows
=
FALSE
),
sample_data
(
tmp
$
samp_table
),
tax_table
(
tmp
$
taxap
))}
##Adding nucleotide info and giving sequences ASV## identifiers
tmp
$
ASV_sequences
<-
Biostrings
::
DNAStringSet
(
taxa_names
(
p
))
taxa_names
(
p
)
<-
paste0
(
"ASV"
,
seq
(
ntaxa
(
p
)))
names
(
tmp
$
ASV_sequences
)
<-
taxa_names
(
p
)
p
<-
merge_phyloseq
(
p
,
tmp
$
ASV_sequences
)
##optional pruning
if
(
parameters
$
prune_controls
==
"TRUE"
)
{
if
(
!
is.null
(
parameters
$
controls
))
{
tmp
$
samp_clean
<-
tmp
$
samp_list
[
!
tmp
$
samp_list
%in%
grep
(
paste0
(
parameters
$
controls
,
collapse
=
"|"
),
tmp
$
samp_list
,
value
=
T
)]
tmp
$
contr_pruned
<-
setdiff
(
tmp
$
samp_list
,
tmp
$
samp_clean
)
ps
<-
prune_samples
(
tmp
$
samp_clean
,
p
)
#Physeq object for Just controls
ps.contr
<-
prune_samples
(
tmp
$
contr_pruned
,
p
)
ps.contr
<-
prune_taxa
(
taxa_sums
(
ps.contr
)
>
0
,
ps.contr
)
ps.transcontr
<-
transform_sample_counts
(
ps.contr
,
function
(
ASV
)
ASV
/
sum
(
ASV
))
message
(
cat
(
"\n"
,
"Number of control samples that were pruned and will not be analysed:\n"
,
length
(
tmp
$
samp_list
)
-
length
(
tmp
$
samp_clean
),
"\n"
,
"The following controls were pruned:\n"
,
tmp
$
contr_pruned
,
"The controls are contained in a separate phyloseq object: ps.contr"
,
"\n"
,
sep
=
"\n"
))
}
else
{
warning
(
cat
(
"\n\nparameters$prune_controls is TRUE but 'parameters$controls' is empty.
No samples were pruned.\n\n"
))}
}
else
{
ps
<-
p
}
# Prune ASVs defined as noise
if
(
parameters
$
prune_noise_taxgroups
==
"TRUE"
)
{
tmp
$
ps_taxlvls
<-
colnames
(
tax_table
(
ps
))
tmp
$
noise_ASVs
<-
character
(
0
)
for
(
lvl
in
tmp
$
ps_taxlvls
)
{
tmp
$
noise_ASVs
<-
c
(
tmp
$
noise_ASVs
,
cuphyr
::
list_subset_ASVs
(
physeq
=
ps
,
subv
=
parameters
$
noise_taxgroups
,
taxlvlsub
=
lvl
))
}
tmp
$
noise_ASVs
<-
unique
(
tmp
$
noise_ASVs
)
tmp
$
no_noise_ASVs
<-
colnames
(
otu_table
(
ps
))
tmp
$
no_noise_ASVs
<-
setdiff
(
tmp
$
no_noise_ASVs
,
tmp
$
noise_ASVs
)
if
(
length
(
tmp
$
noise_ASVs
)
>
0
)
{
ps
<-
prune_taxa
(
tmp
$
no_noise_ASVs
,
ps
)
tmp
$
no_noise_ps
<-
ps
cat
(
length
(
tmp
$
noise_ASVs
),
"ASVs were pruned because they belonged to the following
taxonomic groups:\n"
)
cat
(
parameters
$
noise_taxgroups
,
"\n"
,
sep
=
"\n"
)}
else
{
cat
(
"No ASVs were recognized as belonging to the following taxonomic groups
defined as noise:\n"
)
cat
(
parameters
$
noise_taxgroups
,
"\n"
,
sep
=
"\n"
)
}
}
# Prune samples with fewer than reads than minASVcount
if
(
parameters
$
minASVcount
>
0
)
{
tmp
$
samp_pruned
<-
names
(
which
(
sample_sums
(
ps
)
<
parameters
$
minASVcount
))
ps
<-
prune_samples
(
sample_sums
(
ps
)
>=
parameters
$
minASVcount
,
ps
)
if
(
length
(
tmp
$
samp_pruned
)
>
0
)
{
cat
(
"The following samples were pruned because ASV counts were lower than"
,
parameters
$
minASVcount
,
":\n"
)
cat
(
tmp
$
samp_pruned
,
"\n"
,
sep
=
"\n"
)
}
}
# Remove 0 count ASVs (e.g. control ASVs that remain) from the base object
ps
<-
prune_taxa
(
taxa_sums
(
ps
)
>
0
,
ps
)
sample_data
(
ps
)[[
"ndvi"
]]
<-
as.numeric
(
sample_data
(
ps
)[[
"ndvi"
]])
# Transformed per sample (per-sample relative abundance)
ps.trans
<-
transform_sample_counts
(
ps
,
function
(
ASV
)
ASV
/
sum
(
ASV
))
# Read descriptor values as numeric
sample_data
(
ps.trans
)[[
"ndvi"
]]
<-
as.numeric
(
sample_data
(
ps.trans
)[[
"ndvi"
]])
sample_data
(
ps.trans
)[[
"ndvi_july"
]]
<-
as.numeric
(
sample_data
(
ps.trans
)[[
"ndvi_july"
]])
# Get a tbl of the base object for easier access in some phyloseq-independent
# analyses. Takes some seconds, potentially up to minutes.
ps_tbl
<-
as_tibble
(
psmelt
(
ps
))
ps_trans_tbl
<-
as_tibble
(
psmelt
(
ps.trans
))
# Condensing to Abundance per Genus and Sample
genus_abundance_tbl_per_sample
<-
ps_trans_tbl
%>%
group_by
(
Genus
,
Sample
)
%>%
mutate
(
Genus_Sample_Abundance
=
sum
(
Abundance
))
%>%
select
(
Genus
,
Sample
,
ndvi
,
ndvi_july
,
Genus_Sample_Abundance
,
Alias
)
%>%
unique
()
if
(
parameters
$
roottree
==
"TRUE"
&&
parameters
$
maketree
==
"TRUE"
)
{
phyloseq
::
phy_tree
(
ps
)
<-
cuphyr
::
root_tree_in_outgroup
(
physeq
=
ps
)}
if
(
parameters
$
biom_export
==
"TRUE"
)
{
suppressWarnings
(
phyloseq.extended
::
write_phyloseq
(
p
,
biom_file
=
paste0
(
path
,
"all_samples.biom"
),
biom_format
=
"standard"
))
suppressWarnings
(
phyloseq.extended
::
write_phyloseq
(
ps
,
biom_file
=
file.path
(
path
,
"samples_without_controls.biom"
),
biom_format
=
"standard"
))
suppressWarnings
(
phyloseq.extended
::
write_phyloseq
(
ps.trans
,
biom_file
=
file.path
(
path
,
"samples_without_controls_rel_abundance.biom"
),
biom_format
=
"standard"
))
suppressWarnings
(
phyloseq.extended
::
write_phyloseq
(
ps.contr
,
biom_file
=
file.path
(
path
,
"just_controls.biom"
),
biom_format
=
"standard"
))
}
ps
##### Optional settings (sensible defaults) #####
# Can be changed to adjust the output format for all plots. Default "pdf",
# possible "eps"/"ps", "tex" (pictex), "jpeg", "tiff", "png", "bmp" and "svg"
parameters
$
output_format
=
"pdf"
# Can be changed to preferred ggplot2 theme. Recommended: "theme_bw()".
theme_set
(
theme_bw
())
############### NO NEED FOR CHANGES BELOW ###############
my_scale_col
<-
scale_color_viridis
(
discrete
=
TRUE
)
my_scale_fill
<-
scale_fill_viridis
(
discrete
=
TRUE
)
# Custom, more narrow color ranges based on viridis
# Base order to have adjacent colors be distinct from each other
tmp
$
sort_colors
<-
c
(
rbind
(
c
(
1
:
5
),
c
(
6
:
10
),
c
(
11
:
15
),
c
(
16
:
20
)))
# Customized vectors
tmp
$
n_col
<-
20
tmp
$
viridis_greens
<-
viridis
(
tmp
$
n_col
,
option
=
"D"
,
begin
=
0.85
,
end
=
0.7
)[
tmp
$
sort_colors
]
tmp
$
viridis_reds
<-
viridis
(
tmp
$
n_col
,
option
=
"B"
,
begin
=
0.7
,
end
=
0.5
)[
tmp
$
sort_colors
]
tmp
$
viridis_blues
<-
viridis
(
tmp
$
n_col
,
option
=
"D"
,
begin
=
0.2
,
end
=
0.4
)[
tmp
$
sort_colors
]
tmp
$
viridis_yellows
<-
viridis
(
tmp
$
n_col
,
option
=
"D"
,
begin
=
1
,
end
=
0.9
)[
tmp
$
sort_colors
]
tmp
$
viridis_dark
<-
viridis
(
tmp
$
n_col
,
option
=
"A"
,
begin
=
0
,
end
=
0.1
)[
tmp
$
sort_colors
]
tmp
$
viridis_light
<-
viridis
(
tmp
$
n_col
,
option
=
"A"
,
begin
=
1
,
end
=
0.9
)[
tmp
$
sort_colors
]
# Collected list that is available in the global environment
sub_viridis
<-
list
(
tmp
$
viridis_greens
,
tmp
$
viridis_blues
,
tmp
$
viridis_yellows
,
tmp
$
viridis_light
,
tmp
$
viridis_reds
,
tmp
$
viridis_dark
)
names
(
sub_viridis
)
<-
c
(
"greens"
,
"blues"
,
"yellows"
,
"lights"
,
"reds"
,
"darks"
)
tmp
$
out
<-
paste0
(
"."
,
parameters
$
output_format
)
#################### Function ############################
# Generic save function for plots that checks whether file exists and if so,
# creates a new one with d/m/y+time info to avoid overwriting. Overwriting can
# be triggered with overwrite = TRUE. Width, height and resolution are taken
# from parameters in the 'set' environment or set to 20x20 cm with 300dpi.
save_plot
<-
function
(
pl
,
filetype
=
".pdf"
,
plot_name
=
"my_plot"
,
overwrite
=
FALSE
){
wp
<-
if
(
!
is.null
(
set
$
wp
))
set
$
wp
else
20
hp
<-
if
(
!
is.null
(
set
$
hp
))
set
$
hp
else
20
res
<-
if
(
!
is.null
(
set
$
res
))
set
$
res
else
300
name
<-
paste0
(
"/"
,
plot_name
,
filetype
)
if
(
file.exists
(
paste0
(
outp
,
name
))
&
!
overwrite
)
{
name
<-
paste0
(
"/"
,
plot_name
,
"_"
,
format
(
Sys.time
(),
"%d-%m-%y_%H%M%S"
),
filetype
)}
ggsave
(
file.path
(
outp
,
name
),
pl
,
width
=
wp
,
height
=
hp
,
unit
=
"cm"
,
dpi
=
res
)
}
################################################
# CHANGE ME to the sample group for color coding. Accepted values are the column
# headers in the descriptor file.
set
$
color_by
=
"Symptoms"
##### Optional settings (sensible defaults) #####
# Can be changed to change the width (in cm) of the saved plot.
set
$
wp
=
17
# Can be changed to change the height (in cm) of the saved plot.
set
$
hp
=
20
# Can be changed to change the resolution (in dpi) of the saved plot.
set
$
res
=
300
############### NO NEED FOR CHANGES BELOW ###############
# Rank samples
set
$
ranked
<-
cuphyr
::
make_ranked_sums
(
p
,
myset
=
tmp
$
subset_id
)
set
$
ranked_ps
<-
cuphyr
::
make_ranked_sums
(
ps
,
myset
=
tmp
$
subset_id
)
set
$
ymax
<-
max
(
set
$
ranked
$
Abundance
)
set
$
ymax
<-
set
$
ymax
+
round
(
set
$
ymax
/
10
)
set
$
xmax
<-
nrow
(
set
$
ranked
)
+
1
set
$
title2
<-
"Samples (without controls)"
# Stabilize colors
set
$
color_vars
<-
set
$
ranked
[,
set
$
color_by
]
%>%
unlist
()
%>%
as.character
()
%>%
unique
()
set
$
color_vars
<-
sort
(
set
$
color_vars
)
set
$
color_varsPalette
<-
viridis
(
length
(
set
$
color_vars
))
names
(
set
$
color_varsPalette
)
<-
set
$
color_vars
set
$
my_scale_fill
<-
scale_fill_manual
(
values
=
set
$
color_varsPalette
)
# plot
plots
$
overview_all
<-
ggplot
(
data
=
set
$
ranked
,
aes
(
x
=
Rank
,
y
=
Abundance
))
+
aes_string
(
fill
=
set
$
color_by
)
+
geom_col
()
+
set
$
my_scale_fill
+
ggtitle
(
"All samples"
)
+
ylim
(
0
,
set
$
ymax
)
+
xlim
(
0
,
set
$
xmax
)
+
ylab
(
"ASV counts ('reads')"
)
if
(
length
(
tmp
$
noise_ASVs
)
>
0
)
{
set
$
ranked_nonoise
<-
cuphyr
::
make_ranked_sums
(
tmp
$
no_noise_ps
,
myset
=
tmp
$
subset_id
)
plots
$
overview_noise
<-
ggplot
(
data
=
set
$
ranked_nonoise
,
aes
(
x
=
Rank
,
y
=
Abundance
))
+
aes_string
(
fill
=
set
$
color_by
)
+
geom_col
()
+
set
$
my_scale_fill
+
ggtitle
(
"Samples (without controls), noise ASVs removed"
)
+
ylim
(
0
,
set
$
ymax
)
+
xlim
(
0
,
set
$
xmax
)
+
ylab
(
"ASV counts ('reads')"
)
}
if
(
parameters
$
minASVcount
>
0
)
{
plots
$
overview_all
<-
plots
$
overview_all
+
geom_hline
(
yintercept
=
parameters
$
minASVcount
,
linetype
=
"dashed"
)
+
ggtitle
(
"All samples (ASV count cutoff indicated)"
)
set
$
title2
<-
"Samples (without controls and low count samps)"
}
plots
$
overview_ps
<-
ggplot
(
data
=
set
$
ranked_ps
,
aes
(
x
=
Rank
,
y
=
Abundance
))
+
aes_string
(
fill
=
set
$
color_by
)
+
geom_col
()
+
set
$
my_scale_fill
+
ggtitle
(
set
$
title2
)
+
ylim
(
0
,
set
$
ymax
)
+
xlim
(
0
,
set
$
xmax
)
+
ylab
(
"ASV counts ('reads')"
)
plots
$
combo_overview
<-
ggarrange
(
plots
$
overview_all
,
plots
$
overview_ps
,
nrow
=
2
,
align
=
"v"
,
common.legend
=
TRUE
,
legend
=
"right"
)
This diff is collapsed.
Click to expand it.
analysis/Nems/.Rhistory
deleted
100644 → 0
+
0
−
512
View file @
34eded4e
top
=
set
$
top_n
,
ignore_na
=
set
$
ignore_na
)
set
$
topnASVs
<-
names
(
sort
(
taxa_sums
(
ps
),
decreasing
=
TRUE
))[
1
:
set
$
topASVs
]
set
$
ps.topnASVs
<-
prune_taxa
(
set
$
topnASVs
,
ps.trans
)
if
(
set
$
unify_colors
|
exists
(
"highlight"
,
envir
=
set
)
|
set
$
fuse_ASVs
)
{
set
$
toptax
<-
union
(
phyloseq
::
tax_table
(
set
$
ps.topnTax
)[,
set
$
taxlvl
],
phyloseq
::
tax_table
(
set
$
ps.topnASVs
)[,
set
$
taxlvl
])
set
$
toptax
<-
sort
(
set
$
toptax
)
set
$
taxlvlPalette
<-
viridis
(
length
(
set
$
toptax
))
names
(
set
$
taxlvlPalette
)
<-
set
$
toptax
if
(
exists
(
"highlight"
,
envir
=
set
))
{
# It is possible to change the highlight color here by substituting
# 'sub_viridis$reds[4]' with a hexcode-string, e.g. '#ff7f7f"'
set
$
taxlvlPalette
[
set
$
highlight
]
<-
sub_viridis
$
reds
[
4
]
}
set
$
taxlvlPalette
<-
set
$
taxlvlPalette
[
sort
(
names
(
set
$
taxlvlPalette
))]
set
$
my_scale_fill
<-
scale_fill_manual
(
values
=
set
$
taxlvlPalette
,
na.value
=
"grey"
)
}
else
{
set
$
my_scale_fill
<-
my_scale_fill
}
# Plot
if
(
set
$
unify_colors
|
exists
(
"highlight"
,
envir
=
set
)
|
set
$
fuse_ASVs
)
{
set
$
my_scale_fill
<-
scale_fill_manual
(
values
=
set
$
taxlvlPalette
[
sort
(
unique
(
phyloseq
::
tax_table
(
set
$
ps.topnTax
)[,
set
$
taxlvl
]))],
na.value
=
"grey"
)
}
plots
$
topn_tax
<-
plot_bar
(
set
$
ps.topnTax
,
x
=
set
$
x_axis_value
,
fill
=
set
$
taxlvl
,
title
=
paste0
(
"Top "
,
set
$
top_n
,
" "
,
set
$
taxlvl
))
+
facet_grid
(
paste0
(
"~"
,
set
$
panel_by
),
scales
=
"free"
,
space
=
"free"
)
+
set
$
my_scale_fill
+
ylab
(
set
$
y_axis_label
)
+
xlab
(
"Sample"
)
+
theme
(
strip.background
=
element_blank
(),
strip.text
=
element_text
(
size
=
16
),
axis.title
=
element_text
(
size
=
16
),
legend.text
=
element_text
(
size
=
14
))
if
(
set
$
fuse_ASVs
)
{
plots
$
topn_tax
<-
plots
$
topn_tax
+
geom_bar
(
aes_string
(
color
=
set
$
taxlvl
,
fill
=
set
$
taxlvl
),
stat
=
"identity"
,
position
=
"stack"
)
+
scale_color_manual
(
values
=
set
$
taxlvlPalette
,
na.value
=
NA
)
}
if
(
set
$
unify_colors
|
exists
(
"highlight"
,
envir
=
set
)
|
set
$
fuse_ASVs
)
{
set
$
my_scale_fill
<-
scale_fill_manual
(
values
=
set
$
taxlvlPalette
[
sort
(
unique
(
phyloseq
::
tax_table
(
set
$
ps.topnASVs
)[,
set
$
taxlvl
]))],
na.value
=
"grey"
)
}
plots
$
topn_ASVs
<-
plot_bar
(
set
$
ps.topnASVs
,
x
=
set
$
x_axis_value
,
fill
=
set
$
taxlvl
,
title
=
paste0
(
"Top"
,
set
$
topASVs
,
"_ASVs"
))
+
facet_grid
(
paste0
(
"~"
,
set
$
panel_by
),
scales
=
"free_x"
,
space
=
"free"
)
+
set
$
my_scale_fill
+
ylab
(
set
$
y_axis_label
)
+
xlab
(
"Sample"
)
+
theme
(
strip.background
=
element_blank
())
# save
save_plot
(
plots
$
topn_tax
,
plot_name
=
paste0
(
"Top"
,
set
$
top_n
,
"_"
,
set
$
taxlvl
),
filetype
=
tmp
$
out
)
save_plot
(
plots
$
topn_ASVs
,
plot_name
=
paste0
(
"Top"
,
set
$
topASVs
,
"_ASVs"
),
filetype
=
tmp
$
out
)
# Clean up plot parameters
rm
(
list
=
ls
(
set
),
envir
=
set
)
# Print to standard out
plots
$
topn_tax
plots
$
topn_ASVs
# CHANGE ME to the desired sample categories on the x-axis.
# Accepted values are the column headers in the descriptor file.
set
$
x_axis_value
=
"ndvi"
# CHANGE ME to the taxonomic level of interest (color coding). Accepted values
# are the column headers in your descriptor file.
set
$
taxlvl
=
"Genus"
# CHANGE ME to change the number of Top n taxa to be plotted at
# taxlvl.
set
$
top_n
=
10
# Can be changed to include (FALSE) or exclude (TRUE) NA values in the barplot
set
$
ignore_na
=
FALSE
# CHANGE ME to an entry at the chosen taxonomic level you want to highlight.
# Comment out to not highlight anything.
# set$highlight =
##### Optional settings (sensible defaults) #####
# Can be changed to change the width (in cm) of the saved plot.
set
$
wp
=
20
# Can be changed to change the height (in cm) of the saved plot.
set
$
hp
=
13
# Can be changed to change the resolution (in dpi) of the saved plot.
set
$
res
=
300
# Can be changed to change the y-axis label
set
$
y_axis_label
=
"Relative abundance"
# Can be changed to change the x-axis label
set
$
x_axis_label
=
"Sample"
############### NO NEED FOR CHANGES BELOW ###############
# Estimate Alpha-diversity (Shannon)
set
$
alpha_div_ps_trans
<-
estimate_richness
(
ps.trans
,
measures
=
"Shannon"
)
%>%
as_tibble
(
rownames
=
"Sample"
)
# Make physeq objects of top n taxa and top n ASVs
set
$
ps.topnTax
<-
cuphyr
::
abundant_tax_physeq
(
ps.trans
,
lvl
=
set
$
taxlvl
,
top
=
set
$
top_n
,
ignore_na
=
set
$
ignore_na
)
# Plot
set
$
my_scale_fill
<-
my_scale_fill
set
$
topntax_tbl
<-
psmelt
(
set
$
ps.topnTax
)
%>%
as_tibble
()
%>%
left_join
(
set
$
alpha_div_ps_trans
,
by
=
"Sample"
)
%>%
select
(
Genus
,
Alias
,
ndvi
,
ndvi_july
,
Abundance
,
Shannon
)
%>%
filter
(
Abundance
>
0
)
%>%
group_by
(
Genus
,
Alias
,
ndvi
,
ndvi_july
,
Shannon
)
%>%
summarise
(
Abundance
=
sum
(
Abundance
))
%>%
arrange
(
ndvi
)
%>%
mutate
(
ndvi_rank
=
c
(
1
:
length
(
ndvi
)))
plots
$
topn_tax_custom
<-
ggplot
(
set
$
topntax_tbl
,
aes
(
x
=
fct_reorder
(
Alias
,
ndvi
),
y
=
Abundance
,
fill
=
Genus
))
+
#title = paste0("Top ", set$top_n, " ", set$taxlvl))) +
geom_col
(
color
=
"black"
)
+
set
$
my_scale_fill
+
ylab
(
set
$
y_axis_label
)
+
xlab
(
set
$
x_axis_label
)
+
theme
(
strip.background
=
element_blank
(),
#strip.text = element_text(size = 12),
#axis.title=element_text(size=12),
#legend.text = element_text(size=12),
legend.position
=
"bottom"
)
plots
$
ndvi_dot_plot
<-
ggplot
(
set
$
topntax_tbl
,
aes
(
fct_reorder
(
Alias
,
ndvi
),
y
=
ndvi
))
+
geom_point
()
+
theme
(
strip.background
=
element_blank
(),
# strip.text = element_text(size = 12),
# axis.title=element_text(size=12),
# legend.text = element_text(size=12),
axis.title.x
=
element_blank
())
+
ylab
(
"NDVI"
)
plots
$
shannon_dot_plot
<-
ggplot
(
set
$
topntax_tbl
,
aes
(
fct_reorder
(
Alias
,
ndvi
),
y
=
Shannon
))
+
geom_point
()
+
theme
(
strip.background
=
element_blank
(),
# strip.text = element_text(size = 12),
# axis.title=element_text(size=12),
# legend.text = element_text(size=12),
axis.title.x
=
element_blank
())
+
ylab
(
"Shannon"
)
plots
$
combo_topn_custom
<-
ggarrange
(
plots
$
ndvi_dot_plot
,
plots
$
shannon_dot_plot
,
plots
$
topn_tax_custom
,
nrow
=
3
,
heights
=
c
(
1
,
1
,
3
),
align
=
"v"
)
plots
$
combo_topn_custom
save_plot
(
plots
$
combo_topn_custom
,
plot_name
=
paste0
(
"Customized_NDVI_Shannon_plot"
),
filetype
=
tmp
$
out
)
set
$
my_formula
<-
y
~
x
set
$
plot_title
<-
"Nematodes"
topntax_data
<-
set
$
topntax_tbl
%>%
mutate
(
Taxa
=
'nematodes'
)
%>%
ungroup
()
%>%
select
(
Alias
,
ndvi
,
Shannon
,
Taxa
)
%>%
distinct
()
write.csv
(
topntax_data
,
file
=
"../topntax_all_taxa/topntax_data_nematodes.csv"
)
# Excel file with field data
morphological_id
<-
file.path
(
"2020-patch-larvik-a-counts.csv"
)
# Reduce abundance per sample and genus data to genera of interest
genera_of_interest_mol_morph
<-
c
(
"Globodera"
,
"Meloidogyne"
,
"Pratylenchus"
)
genus_abundance_tbl_per_sample_mm
<-
genus_abundance_tbl_per_sample
%>%
filter
(
Genus
%in%
genera_of_interest_mol_morph
)
# read in and parse morphological count data
morph_data
<-
read_delim
(
morphological_id
)
%>%
mutate
(
genus
=
str_replace
(
genus
,
"cyst"
,
"Globodera"
),
genus
=
str_replace
(
genus
,
"mel"
,
"Meloidogyne"
),
genus
=
str_replace
(
genus
,
"prat"
,
"Pratylenchus"
))
%>%
dplyr
::
rename
(
Sample
=
rute
,
Genus
=
genus
)
# Combine morphological and metabarcoding data
set
$
data_mol_morph
<-
genus_abundance_tbl_per_sample_mm
%>%
left_join
(
morph_data
)
### Plotting
# New facet label names for Genus variable
genus_names
<-
list
(
'Globodera'
=
expression
(
paste
(
italic
(
"Globodera "
))),
#"spp.")),
'Meloidogyne'
=
expression
(
paste
(
italic
(
"Meloidogyne "
))),
#"spp.")),
'Pratylenchus'
=
expression
(
paste
(
italic
(
"Pratylenchus "
)))
#, "spp."))
)
genus_labeller
<-
function
(
variable
,
value
){
return
(
genus_names
[
value
])
}
signif.labs
<-
c
(
"pval > 0.05"
,
"pval <= 0.05"
)
names
(
signif.labs
)
<-
c
(
"p value > 0.05"
,
"p value <= 0.05"
)
# Subset data for modeling
set
$
glob_data
<-
subset
(
set
$
data_mol_morph
,
Genus
==
"Globodera"
)
set
$
mel_data
<-
subset
(
set
$
data_mol_morph
,
Genus
==
"Meloidogyne"
)
set
$
prat_data
<-
subset
(
set
$
data_mol_morph
,
Genus
==
"Pratylenchus"
)
# Quasi-binomial model for Globodera
model_glob
=
glm
(
Genus_Sample_Abundance
~
total_count
,
data
=
set
$
glob_data
,
family
=
quasibinomial
)
# Quasi-binomial model for Meloidogyne
model_mel
=
glm
(
Genus_Sample_Abundance
~
total_count
,
data
=
set
$
mel_data
,
family
=
quasibinomial
)
# Quasi-binomial model for Pratylenchus
model_prat
=
glm
(
Genus_Sample_Abundance
~
total_count
,
data
=
set
$
prat_data
,
family
=
quasibinomial
)
pval_glm
<-
function
(
glm
){
coef
(
summary
(
glm
))[,
4
][
2
]
}
set
$
data_mol_morph
<-
set
$
data_mol_morph
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Globodera"
,
pval_glm
(
model_glob
),
NA
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Pratylenchus"
,
pval_glm
(
model_prat
),
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Meloidogyne"
,
pval_glm
(
model_mel
),
pval
))
%>%
mutate
(
signif
=
ifelse
(
pval
<=
0.05
,
"pval <= 0.05"
,
"pval > 0.05"
))
# Other plotting variables
# Can be changed to change the width (in cm) of the saved plot.
set
$
wp
=
20
# Can be changed to change the height (in cm) of the saved plot.
set
$
hp
=
13
# Can be changed to change the resolution (in dpi) of the saved plot.
set
$
res
=
300
set
$
my_formula
<-
y
~
x
set
$
plot_title
<-
paste
(
"Molecular analysis vs"
,
"morpological analysis"
,
sep
=
"\n"
)
# Plotting quasibinomial model
plots
$
mol_morph_quasi
<-
ggplot
(
set
$
data_mol_morph
,
aes
(
x
=
total_count
,
y
=
Genus_Sample_Abundance
))
+
geom_point
(
alpha
=
0.7
)
+
#ggtitle(set$plot_title) +
theme
(
plot.title
=
element_text
(
hjust
=
0.5
,
size
=
20
),
panel.grid.major
=
element_blank
(),
panel.grid.minor
=
element_blank
(),
panel.background
=
element_blank
(),
#axis.line = element_line(colour = "dark grey"),
strip.background
=
element_blank
(),
strip.text
=
element_text
(
face
=
"italic"
,
size
=
16
),
axis.text
=
element_text
(),
text
=
element_text
())
+
xlab
(
"Nematodes / 250 ml soil"
)
+
ylab
(
"Relative abundance"
)
+
geom_smooth
(
method
=
"glm"
,
formula
=
set
$
my_formula
,
se
=
F
,
method.args
=
list
(
family
=
quasibinomial
),
aes
(
color
=
signif
,
linetype
=
signif
))
+
scale_color_manual
(
values
=
c
(
'black'
,
'lightgrey'
),
name
=
'Significance'
,
labels
=
names
(
signif.labs
))
+
scale_linetype_discrete
(
name
=
'Significance'
,
labels
=
names
(
signif.labs
))
+
facet_wrap
(
~
Genus
,
scales
=
"free"
,
#labeller = labeller(Genus = genus_labs)) +
labeller
=
genus_labeller
)
#Print and save plot
plots
$
mol_morph_quasi
save_plot
(
plots
$
mol_morph_quasi
,
plot_name
=
paste0
(
"Molecular_and_Morphological_Plot"
),
filetype
=
".pdf"
)
# Generate table overview of morphological counts for manuscript
morph_table_manuscript
<-
pivot_wider
(
set
$
data_mol_morph
%>%
select
(
-
Genus_Sample_Abundance
,
-
ndvi
),
names_from
=
Genus
,
values_from
=
total_count
)
%>%
arrange
(
Alias
)
write.xlsx
(
morph_table_manuscript
,
file
=
"t1_morph_table_manuscript.xlsx"
)
# Create properly formatted tibble with columns Sample, ndvi_01 (ndvi translated to (0, 1) interval) and one column for each genus
# containing the sample abundances for that genus.
ldf
<-
data.frame
(
genus_abundance_tbl_per_sample
%>%
pivot_wider
(
id_cols
=
c
(
'Sample'
,
'ndvi'
),
names_from
=
'Genus'
,
values_from
=
'Genus_Sample_Abundance'
))
ldf_genus_data
<-
data.frame
(
ldf
)
%>%
select
(
!
c
(
'Sample'
,
'ndvi'
))
colnames
(
ldf_genus_data
)
<-
gsub
(
' '
,
'.'
,
colnames
(
ldf_genus_data
))
ldf
<-
cbind.data.frame
(
Sample
=
ldf
$
Sample
,
ndvi_01
=
(
ldf
$
ndvi
+
1
)
/
2.0
)
ldf
<-
tibble
(
cbind
(
ldf
,
ldf_genus_data
))
n_samples_by_genus
<-
data.frame
(
ldf_genus_data
>
0
)
%>%
mutate_if
(
is.logical
,
as.numeric
)
%>%
colSums
()
%>%
sort
(
decreasing
=
TRUE
)
keep_n
<-
100
# Maximum number of genera to include in the analysis
top_n_occurence_genuses
<-
names
(
n_samples_by_genus
[
1
:
keep_n
])
top_n_occurence_genuses
<-
top_n_occurence_genuses
[
!
is.na
(
top_n_occurence_genuses
)]
l_genus_ldf
<-
ldf
%>%
select
(
all_of
(
top_n_occurence_genuses
))
l_genus_ldf_transposed
<-
data.frame
(
t
(
l_genus_ldf
))
l_meta_ldf
<-
ldf
%>%
select
(
'ndvi_01'
)
l_model
<-
linda
(
l_genus_ldf_transposed
,
l_meta_ldf
,
formula
=
'~ ndvi_01'
,
feature.dat.type
=
'proportion'
,
is.winsor
=
FALSE
,
alpha
=
0.05
)
# Print model info
l_model
# Show effect size and significance plots
linda.plot
(
l_model
,
variables.plot
=
c
(
'ndvi_01'
),
alpha
=
0.05
,
lfc.cut
=
1
,
legend
=
TRUE
)
# Write supplementary table for manuscript
l_model_df
<-
as.data.frame
(
l_model
$
output
)
write.xlsx
(
l_model_df
,
file
=
"supplementary_table_ndvi_regression_nematodes.xlsx"
,
rowNames
=
TRUE
,
colnames
=
TRUE
)
# Prat
set
$
prat_data
$
ndvi_temp
<-
(
set
$
prat_data
$
ndvi
+1
)
/
2
model_prat_count_ndvi
=
glm
(
ndvi_temp
~
total_count
,
data
=
set
$
prat_data
,
family
=
quasibinomial
)
summary
(
model_prat_count_ndvi
)
# Glob
set
$
glob_data
$
ndvi_temp
<-
(
set
$
glob_data
$
ndvi
+1
)
/
2
model_glob_count_ndvi
=
glm
(
ndvi_temp
~
total_count
,
data
=
set
$
glob_data
,
family
=
quasibinomial
)
summary
(
model_glob_count_ndvi
)
# Mel
set
$
mel_data
$
ndvi_temp
<-
(
set
$
mel_data
$
ndvi
+1
)
/
2
model_mel_count_ndvi
=
glm
(
ndvi_temp
~
total_count
,
data
=
set
$
mel_data
,
family
=
quasibinomial
)
summary
(
model_mel_count_ndvi
)
data_mol_morph_long_temp
<-
pivot_longer
(
set
$
data_mol_morph
,
cols
=
c
(
Genus_Sample_Abundance
,
total_count
),
names_to
=
"type"
,
values_to
=
"value"
)
data_mol_morph_long_temp
$
ndvi
<-
(
data_mol_morph_long_temp
$
ndvi
+1
)
/
2
pval_glm
<-
function
(
glm
){
coef
(
summary
(
glm
))[,
4
][
2
]
}
set
$
data_mol_morph_long
<-
data_mol_morph_long_temp
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Globodera"
,
pval_glm
(
model_glob
),
NA
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Pratylenchus"
,
pval_glm
(
model_prat
),
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Meloidogyne"
,
pval_glm
(
model_mel
),
pval
))
%>%
mutate
(
signif
=
ifelse
(
pval
<=
0.05
,
"pval <= 0.05"
,
"pval > 0.05"
))
# Count data vs NDVI
plots
$
morph_count_ndvi
<-
ggplot
(
set
$
data_mol_morph_long
,
aes
(
x
=
value
,
y
=
ndvi
))
+
geom_point
(
alpha
=
0.7
)
+
#ggtitle(set$plot_title) +
theme
(
plot.title
=
element_text
(
hjust
=
0.5
,
size
=
20
),
panel.grid.major
=
element_blank
(),
panel.grid.minor
=
element_blank
(),
panel.background
=
element_blank
(),
axis.line
=
element_line
(
colour
=
"dark grey"
),
strip.background
=
element_blank
(),
strip.text.x
=
element_text
(
face
=
"italic"
)
#,
#axis.text = element_text(size = 12),
#text = element_text(size = 14)
)
+
#xlab("Nematodes / 250 ml soil") +
#ylab("NDVI") +
geom_smooth
(
method
=
"glm"
,
formula
=
set
$
my_formula
,
se
=
F
,
color
=
"dark grey"
,
method.args
=
list
(
family
=
quasibinomial
))
+
facet_wrap
(
~
Genus
,
scales
=
"free_x"
)
plots
$
morph_count_ndvi
# Print and save plot
plots
$
morph_count_ndvi
save_plot
(
plots
$
morph_count_ndvi
,
plot_name
=
paste0
(
"morph_count_ndvi"
),
filetype
=
tmp
$
out
)
set
$
data_mol_morph_long
<-
data_mol_morph_long_temp
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Globodera"
,
pval_glm
(
model_glob_count_ndvi
),
NA
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Pratylenchus"
,
pval_glm
(
model_prat_count_ndvi
),
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Meloidogyne"
,
pval_glm
(
model_mel_count_ndvi
),
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Globodera"
&
type
==
"Genus_Sample_Abundance"
,
0.5574120890
,
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Pratylenchus"
&
type
==
"Genus_Sample_Abundance"
,
0.5177548997
,
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Meloidogyne"
&
type
==
"Genus_Sample_Abundance"
,
0.0001920515
,
pval
))
%>%
mutate
(
signif
=
ifelse
(
pval
<=
0.05
,
"pval <= 0.05"
,
"pval > 0.05"
))
# Morph data
set
$
data_morph
<-
set
$
data_mol_morph_long
[
set
$
data_mol_morph_long
$
type
==
'total_count'
,]
set
$
data_morph
<-
set
$
data_morph
%>%
rename
(
"value"
=
"Number_of_nematodes"
)
set
$
data_mol_morph_long
set
$
data_mol_morph_long
<-
data_mol_morph_long_temp
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Globodera"
,
pval_glm
(
model_glob_count_ndvi
),
NA
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Pratylenchus"
,
pval_glm
(
model_prat_count_ndvi
),
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Meloidogyne"
,
pval_glm
(
model_mel_count_ndvi
),
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Globodera"
&
type
==
"Genus_Sample_Abundance"
,
0.5574120890
,
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Pratylenchus"
&
type
==
"Genus_Sample_Abundance"
,
0.5177548997
,
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Meloidogyne"
&
type
==
"Genus_Sample_Abundance"
,
0.0001920515
,
pval
))
%>%
mutate
(
signif
=
ifelse
(
pval
<=
0.05
,
"pval <= 0.05"
,
"pval > 0.05"
))
# Morph data
set
$
data_morph
<-
set
$
data_mol_morph_long
[
set
$
data_mol_morph_long
$
type
==
'total_count'
,]
set
$
data_morph
<-
set
$
data_morph
%>%
dplyr
::
rename
(
"Number_of_nematodes"
=
value
)
# Relative abundance data
set
$
data_relabu
<-
set
$
data_mol_morph_long
[
set
$
data_mol_morph_long
$
type
==
'Genus_Sample_Abundance'
,]
set
$
data_relabu
<-
set
$
data_relabu
%>%
rename
(
"value"
=
"Relative_abundance"
)
set
$
data_mol_morph_long
<-
data_mol_morph_long_temp
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Globodera"
,
pval_glm
(
model_glob_count_ndvi
),
NA
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Pratylenchus"
,
pval_glm
(
model_prat_count_ndvi
),
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Meloidogyne"
,
pval_glm
(
model_mel_count_ndvi
),
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Globodera"
&
type
==
"Genus_Sample_Abundance"
,
0.5574120890
,
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Pratylenchus"
&
type
==
"Genus_Sample_Abundance"
,
0.5177548997
,
pval
))
%>%
mutate
(
pval
=
ifelse
(
Genus
==
"Meloidogyne"
&
type
==
"Genus_Sample_Abundance"
,
0.0001920515
,
pval
))
%>%
mutate
(
signif
=
ifelse
(
pval
<=
0.05
,
"pval <= 0.05"
,
"pval > 0.05"
))
# Morph data
set
$
data_morph
<-
set
$
data_mol_morph_long
[
set
$
data_mol_morph_long
$
type
==
'total_count'
,]
set
$
data_morph
<-
set
$
data_morph
%>%
dplyr
::
rename
(
"Number_of_nematodes"
=
value
)
# Relative abundance data
set
$
data_relabu
<-
set
$
data_mol_morph_long
[
set
$
data_mol_morph_long
$
type
==
'Genus_Sample_Abundance'
,]
set
$
data_relabu
<-
set
$
data_relabu
%>%
dplyr
::
rename
(
"Relative_abundance"
=
value
)
signif.labs
<-
c
(
"pval > 0.05"
,
"pval <= 0.05"
)
names
(
signif.labs
)
<-
c
(
"p value > 0.05"
,
"p value <= 0.05"
)
breaks_morph
<-
function
(
x
)
{
if
(
max
(
x
)
<
200
)
seq
(
0
,
100
,
100
)
else
seq
(
0
,
600
,
50
)}
breaks_mb
<-
function
(
x
)
{
if
(
max
(
x
)
<
0.1
)
seq
(
0
,
0.002
,
3
)
else
seq
(
0
,
3
,
1
)
}
breaks_fun_morph
<-
function
(
x
)
{
if
(
max
(
x
)
>
500
)
{
seq
(
0
,
800
,
200
)
}
else
if
(
max
(
x
)
>
150
)
{
seq
(
0
,
200
,
length.out
=
5
)
}
else
{
seq
(
0
,
100
,
length.out
=
5
)
}}
# MORPHOLOGIAL AND NDVI PLOT
# Relative abundance and NDVI plotting
# Axis ticks MB
breaks_fun_mb
<-
function
(
x
)
{
if
(
max
(
x
)
>
0.8
)
{
seq
(
0
,
0.9
,
length.out
=
4
)
}
else
if
(
max
(
x
)
>
0.2
)
{
seq
(
0
,
0.6
,
0.2
)
}
else
{
seq
(
0
,
0.003
,
length.out
=
4
)
}}
p1
<-
ggplot
(
set
$
data_relabu
,
aes
(
x
=
Relative_abundance
,
y
=
ndvi
))
+
geom_point
(
alpha
=
0.7
,
color
=
"black"
)
+
ggtitle
(
"a)"
)
+
theme
(
plot.title
=
element_text
(
hjust
=
0
,
size
=
12
),
panel.grid.major
=
element_blank
(),
panel.grid.minor
=
element_blank
(),
panel.background
=
element_blank
(),
strip.background
=
element_blank
(),
axis.title
=
element_text
(
size
=
9
),
strip.text.x
=
element_text
(
face
=
"italic"
),
plot.caption
=
element_text
(
size
=
7
,
hjust
=
0
,
face
=
"italic"
,
margin
=
margin
(
t
=
0.2
,
unit
=
"cm"
)),
plot.caption.position
=
"plot"
,
plot.subtitle
=
element_text
(
size
=
10
,
hjust
=
0
,
vjust
=
0.5
,
margin
=
margin
(
b
=
0.8
,
unit
=
"cm"
)))
+
xlab
(
"Relative abundance"
)
+
ylab
(
"NDVI"
)
+
geom_smooth
(
method
=
"glm"
,
formula
=
set
$
my_formula
,
se
=
FALSE
,
method.args
=
list
(
family
=
quasibinomial
),
aes
(
color
=
signif
,
linetype
=
signif
))
+
scale_color_manual
(
values
=
c
(
'black'
,
'lightgrey'
),
name
=
'Significance'
,
labels
=
names
(
signif.labs
))
+
scale_linetype_discrete
(
name
=
'Significance'
,
labels
=
names
(
signif.labs
))
+
facet_wrap
(
~
Genus
,
scales
=
"free_x"
)
+
scale_x_continuous
(
breaks
=
breaks_fun_mb
,
limits
=
c
(
0
,
NA
))
p1
# Plotting NDVI vs metabarcoding
p2
<-
ggplot
(
set
$
data_morph
,
aes
(
x
=
Number_of_nematodes
,
y
=
ndvi
))
+
geom_point
(
alpha
=
0.7
,
color
=
"black"
)
+
ggtitle
(
"b)"
)
+
theme
(
plot.title
=
element_text
(
hjust
=
0
,
size
=
12
),
panel.grid.major
=
element_blank
(),
panel.grid.minor
=
element_blank
(),
panel.background
=
element_blank
(),
strip.background
=
element_blank
(),
axis.title
=
element_text
(
size
=
9
),
panel.spacing
=
unit
(
2
,
"lines"
),
strip.text.x
=
element_text
(
face
=
"italic"
),
plot.caption
=
element_text
(
size
=
7
,
hjust
=
0
,
face
=
"italic"
,
margin
=
margin
(
t
=
0.2
,
unit
=
"cm"
)),
plot.caption.position
=
"plot"
,
plot.subtitle
=
element_text
(
size
=
10
,
hjust
=
0
,
vjust
=
0.5
,
margin
=
margin
(
b
=
0.8
,
unit
=
"cm"
)))
+
xlab
(
"Nematodes / 250 ml soil"
)
+
ylab
(
"NDVI"
)
+
geom_smooth
(
method
=
"glm"
,
formula
=
set
$
my_formula
,
se
=
FALSE
,
method.args
=
list
(
family
=
quasibinomial
),
aes
(
color
=
signif
,
linetype
=
signif
))
+
scale_color_manual
(
values
=
c
(
'black'
,
'lightgrey'
),
name
=
'Significance'
,
labels
=
names
(
signif.labs
))
+
scale_linetype_discrete
(
name
=
'Significance'
,
labels
=
names
(
signif.labs
))
+
facet_wrap
(
~
Genus
,
scales
=
"free_x"
)
+
scale_x_continuous
(
breaks
=
breaks_fun_morph
,
limits
=
c
(
0
,
NA
))
p2
# Combining the two plots
plots
$
combo_morph_metab_ndvi
<-
plot_grid
(
p1
,
p2
,
ncol
=
1
,
align
=
"hv"
)
plots
$
combo_morph_metab_ndvi
# Print and save plot
save_plot
(
plots
$
combo_morph_metab_ndvi
,
plot_name
=
paste0
(
"morph_relabu_ndvi"
),
filetype
=
tmp
$
out
)
found
<-
c
(
40
,
24
,
16
,
37
,
10
,
27
,
74
,
100
,
100
,
78
,
32
,
28
)
not_found
<-
c
(
24
,
12
,
7
,
7
,
24
,
52
)
wilcox.test
(
found
,
not_found
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment