From bc18803d5d4498b5281b6173bd21f79a1ac6cd64 Mon Sep 17 00:00:00 2001
From: Csilla Farkas <csilla.farkas@nibio.no>
Date: Tue, 10 Sep 2024 20:56:40 +0000
Subject: [PATCH] Upload New File: Settings used for to_ChRis.rsd with 2400
 runs

---
 .../hardcal_workflow_simult.R                 | 848 ++++++++++++++++++
 1 file changed, 848 insertions(+)
 create mode 100644 SWAT_HardCal/NON_Simons_DropBox/hardcal_workflow_simult.R

diff --git a/SWAT_HardCal/NON_Simons_DropBox/hardcal_workflow_simult.R b/SWAT_HardCal/NON_Simons_DropBox/hardcal_workflow_simult.R
new file mode 100644
index 0000000..187bc76
--- /dev/null
+++ b/SWAT_HardCal/NON_Simons_DropBox/hardcal_workflow_simult.R
@@ -0,0 +1,848 @@
+# Hard calibration + validation workflow ---------------------------------------
+# 
+# Version     0.1.3
+# Date:       2024-01-16
+# Developers: Svajunas Plunge    svajunas_plunge@sggw.edu.pl
+#             Christoph Schürz   christoph.schuerz@ufz.de
+#             Michael Strauch    michael.strauch@ufz.de
+# 
+# The parameter identifiability plot is adapted from Guse et al., 2020
+# DOI: https://doi.org/10.1080/02626667.2020.1734204
+# 
+# ------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+## 1) Load (or install&load) libraries
+##------------------------------------------------------------------------------
+library(data.table)
+library(dplyr)
+library(ggplot2)
+library(hydroGOF)
+library(lhs)
+library(lubridate)
+library(purrr)
+library(readr)
+library(stringr)
+library(SWATrunR)
+library(tibble)
+library(tidyr)
+library(parallel)
+library(scales)
+library(xts)
+library(dygraphs)
+
+# Load additional functions ----------------------------------------------------
+source('c:/00/scripts/functions.R')
+
+##------------------------------------------------------------------------------
+## 2) Settings to set paths to directories, files, outflow channel ids, etc.
+##------------------------------------------------------------------------------
+
+# Path to the SWAT+ project
+model_path <- 'c:/00/TxtInOut'
+
+# Channel IDs where gauges are located at the channel outlets.
+cha_ids <- c(37)
+
+# Define the path to the observed discharge data
+qobs_path <- "c:/00/data/observation/q_cha37_Skut.csv"
+
+# Define the path to the observed WQ data
+wqobs_path <- "c:/00/data/observation/TOT_N_Kure.csv"
+
+# Define the simulation save file names to be used in SWATrunR
+save_file_name <- "KR_"
+
+# Number parameter combinations to run
+n_comb <- 2400
+
+# Fraction of the best parameter combinations to be used for the final 
+# WQ calibration
+n_best <- NULL # example n_best <- .02 # 2% of the best parameter combinations, 
+# if NULL, n_best = 20/n_comb
+
+# Number of combination drawn for the nutrient parameters (this number 
+# is multiplied by the number of selected parameter combinations for the flow.
+# For example n_comb = 1000, n_best .1, means 100 best runs selected, multiplied
+# with n_nutr = 100 results in 10,000 model runs for the nutrient calibration.
+# If null, 1000/(n_comb*n_best) formula applied resulting in 1000 runs)
+n_nutr <- 100
+
+# Number of cores to be used for the parallelization of the model runs
+n_cores <- NULL ## If NULL, all available physical cores - 2 will be used
+
+# Define time period to be used in model runs. Please note this period should
+# match your management data period, if it was prepared with SWATfarmR. You can
+# safely modify 'start_date_print' and 'end_date' to define the period for which
+# you want to print the model output. However, please take great care, if you 
+# modify 'start_date' as you might need to update your management files too. 
+
+# Start date for model runs (if NULL, the start date of the model setup files 
+# will be used!!!)
+#start_date <- NULL # start_date <- '2000-01-01'
+# End date for model runs (if NULL, the end date of observation data will be used).
+#end_date <- NULL # end_date <- '2010-12-31'
+# Start printing model output (if NULL, the start date of observation data will used).
+#start_date_print <- NULL # start_date_print <-'2003-01-01'
+
+start_date <- '2013-01-01' # start_date <- '2000-01-01'
+# End date for model runs
+end_date <- '2020-12-31' # end_date <- '2010-12-31'
+# Start printing model output
+start_date_print <- '2015-01-01' # start_date_print <-'2003-01-01'
+
+
+# Define calibration and validation periods. Please note that the validation 
+# should take around 1/3 and calibration 2/3 of the total period. Also the 
+# workflow will run all simulations for the whole period (defined above), 
+# these periods are only used for the performance calculation and plotting.
+
+# Q calibration period
+q_cal_period <- c('2015-01-01', '2017-12-31')
+# Calibration end date
+q_val_period <- c('2018-01-01', '2020-12-31')
+
+# WQ calibration period
+wq_cal_period <- c('2015-01-01', '2017-12-31')
+# Calibration end date
+wq_val_period <- c('2018-01-01', '2020-12-31')
+##------------------------------------------------------------------------------
+## 3) Preparing parameters sets for the flow calibration
+##------------------------------------------------------------------------------
+
+# Discharge calibration ---------------------------------------------------
+# Sampling parameter combinations -----------------------------------------
+## The parameters cn3_swf and latq_co are initialized based on the runoff
+## potential of an HRU, while the parameter perco is initialized based
+## on its leaching potential. 
+## The calibration of these 3 parameters should consider their different initial
+## parameter values. Therefore the parameter sampling will be done the following:
+## Base parameter set:
+## - For the parameters perco, cn3_swf, and latq_co normalized ranges (0,1) are
+##   sampled.
+## - all other parameters are sampled in their actual parameter ranges.
+## - A base LHS set is drawn for the defined parameter boundaries.
+## 
+## Translating base parameter values for perco, cn3_swf, and latq_co:
+## - From hydrology.hyd HRUs IDs are drawn with low/mod/high leaching and runoff
+##   potentials.
+## - Those IDs are put together in text strings which are required for the 
+##   parameter definition via the parameter names.
+## - Parameter boundaries for the different low/mod/high potentials are defined.
+## - The normalized parameter ranges of the parameter ranges are translated into
+##   the defined boundaries.
+
+# Base parameter boundary definition. This is just a very reduced set for testing
+# and does not include all parameters which should be used in OPTAIN model setups.
+
+# !!! Calibration and validation of all hydrology and water quality/load related 
+# parameters could be done in one step. For this you have to define the nutrient
+# parameters together with hydrology.Please look step 12 for example how to do
+# define sediment, nitrogen, and phosphorus parameters. Example here present only
+# hydrology parameters.
+
+par_bound <- tibble(
+  #Suggested list and ranges of SWAT+ paramaters for hydrology calibration in OPTAIN. Parameters are roughly divided into groups by processes which they affect most. Some groups should be considered optional
+  #snow (optional - use if average snow fall to precipitation ratio is higher than 5%)
+  'snomelt_tmp.hru | change = absval' = c(-1.5, 1.5),
+  'snofall_tmp.hru | change = absval' = c(-1.5, 1.5),
+  'snomelt_lag.hru | change = absval' = c(0, 1),
+  'snomelt_min.hru | change = absval' = c(0, 10),
+  'snomelt_max.hru | change = absval' = c(0, 10),
+  #ET (note: it is suggested that a narrow range for esco selected in soft calibration of water balance is used instead of the wide (0,1) range)
+  'esco.hru | change = absval' = c(0.1, 0.95),
+  'epco.hru | change = absval' = c(0.05, 1),
+  'awc.sol | change = relchg' = c(-0.25, 0.25),
+ #  'canmx.hru | change = relchg' = c(-0.5, 0.5),
+  #surface runoff
+  'cn2.hru | change = relchg' = c(-0.15, 0.15),
+  'cn3_swf.hru | change = absval' = c(0, 1),
+  'ovn.hru | change  = relchg ' = c(-0.25, 0.25),
+  'surlag.bsn | change = absval' = c(0.05, 0.8),
+  #lateral flow (optional - use if lateral flow constitutes at least 5% of total water yield)
+  #'lat_time.hru | change = relchg' = c(-0,15, 0.15),
+  'lat_len.hru | change = abschg' = c(-30, -25),
+  'latq_co.hru | change = absval' = c(0, 1),
+  'bd.sol | change = relchg' = c(-0.3, 0.1),
+  'k.sol | change = relchg' = c(1.5, 3),
+  # tile flow (optional - use if tile flow constitutes at least 5% of total water yield; note: tile_lag and tile_dtime should be active only if tile_drain is set to 0 in codes.bsn file))
+  'tile_dep.hru | change = relchg' = c(0.1, 0.3),
+  'tile_lag.hru | change = absval' = c(24, 60),
+  'tile_dtime.hru | change = absval' = c(48, 100),
+  #percolation/aquifer
+  'perco.hru | change = absval' = c(0, 1),
+  # 'flo_min.aqu | change = abschg' = c(-2, 2),
+  # 'revap_co.aqu | change = absval' = c(0.02, 0.2),
+  # 'revap_min.aqu | change = abschg' = c(-2, 2),
+  # 'alpha.aqu | change = absval' = c(0.001, 0.5),
+  # 'sp_yld.aqu | change = absval' = c(0.001, 0.05),
+  # 'bf_max.aqu | change = absval' = c(0.5, 2),
+  #channel routing
+  'chn.rte | change = absval' = c(0.02, 0.1),
+  # Nitrogen parameters from here
+   "n_updis.bsn | change = absval" = c(0, 100),
+   "nperco.bsn | change = absval" = c(0, 1),
+  "sdnco.bsn | change = absval" = c(0.75, 1.1),
+ # "hlife_n.aqu | change = absval" = c(0, 200),
+ # "no3_init.aqu | change = absval" = c(0, 30),
+  "cmn.bsn | change = absval" = c(0.001, 0.003),
+  "rsdco.bsn | change = absval" = c(0.02, 0.1))
+
+# Draw a first parameter set of parameter combinations 
+par_flow <- sample_lhs(par_bound, n_comb)
+
+# Read the hydrology.hyd file to identify the HRU IDs with the different 
+# runoff and leaching potentials.
+hyd_hyd <- read_tbl(paste0(model_path, '/hydrology.hyd'))
+
+# Generate the ID text strings to be included in the parameter names so that 
+# parameter values for HRUs can be addressed separately.
+init_perco <- c(low = 0.05, mod = 0.50, high = 0.90)
+
+id_lch_pot <- c(low  = group_values(which(hyd_hyd$perco == init_perco['low'])),
+                mod  = group_values(which(hyd_hyd$perco == init_perco['mod'])),
+                high = group_values(which(hyd_hyd$perco == init_perco['high'])))
+
+init_cn3 <- c(low = 0.95, mod = 0.30, high = 0.00)
+
+id_run_pot <- c(low  = group_values(which(hyd_hyd$cn3_swf == init_cn3['low'])),
+                mod  = group_values(which(hyd_hyd$cn3_swf == init_cn3['mod'])),
+                high = group_values(which(hyd_hyd$cn3_swf == init_cn3['high'])))
+
+# Define parameter boundaries in which the parameter values should vary for 
+# the different leaching and runoff potentials.
+perco_bound <- list(low = c(0.05, 0.30), mod = c(0.30, 0.60), high = c(0.5, 0.90))
+cn3_bound   <- list(low = c(0.50, 0.95), mod = c(0.15, 0.45), high = c(0.0, 0.30))
+latq_bound  <- list(low = c(0.01, 0.30), mod = c(0.10, 0.40), high = c(0.5, 0.90))
+
+# Translate the normalized parameter ranges into the defined boundary ranges.
+for(i in c('low', 'mod', 'high')) {
+  if(nchar(id_lch_pot[i]) > 0) {
+    par_flow[[paste0('perco_', 
+                     i, '::perco.hru | change = absval | unit = c(', 
+                     id_lch_pot[i], ')')]] <-  
+      par_flow[['perco.hru | change = absval']] * (perco_bound[[i]][2] - perco_bound[[i]][1]) + perco_bound[[i]][1]
+  }
+  if(nchar(id_run_pot[i]) > 0) {
+    par_flow[[paste0('cn3_', i, '::cn3_swf.hru | change = absval | unit = c(', 
+                     id_run_pot[i], ')')]] <- 
+      par_flow[['cn3_swf.hru | change = absval']] * (cn3_bound[[i]][2] - cn3_bound[[i]][1]) + cn3_bound[[i]][1]
+    par_flow[[paste0('latq_', i, '::latq_co.hru | change = absval | unit = c(', 
+                     id_run_pot[i], ')')]] <- par_flow[['latq_co.hru | change = absval']] * 
+      (latq_bound[[i]][2] - latq_bound[[i]][1]) + latq_bound[[i]][1]
+  }
+}
+
+# Remove the initial normalized parameters.
+par_flow <- par_flow %>% 
+  select(- starts_with(c('perco.hru', 'cn3_swf.hru', 'latq_co.hru')))
+
+##------------------------------------------------------------------------------
+## 4) Running simulations
+##------------------------------------------------------------------------------
+
+# Read the observed discharge data to obtain period needed for the runs.
+obs_full <- read_csv(qobs_path) 
+# Get the number of cores to be used for parallel processing.
+n_cores <- ifelse(length(n_cores) == 0, parallel::detectCores(logical = FALSE) - 
+                    2, n_cores)
+
+# In this first test only the daily hydrograph outputs are used for the 
+# locations where observed discharge is available.
+
+# Simulate daily discharge for the channel IDs.
+sim_flow_full_bck <- run_swatplus(project_path = model_path,
+                                  output = list(
+                                    flo_day = define_output(file = 'channel_sd_day',
+                                                            variable = 'flo_out',
+                                                            unit = cha_ids),
+                                     no3_day = define_output(file = 'channel_sd_day',
+                                                          variable = 'no3_out',
+                                                          unit = cha_ids)
+                                  ## please add parameters for other nutrients, sediments,
+                                  ## if you plan to use them.
+
+                                    ,orgn_day = define_output(file = 'channel_sd_day',
+                                                            variable = 'orgn_out',
+                                                            unit = cha_ids),
+                                    nh3_day = define_output(file = 'channel_sd_day',
+                                                              variable = 'nh3_out',
+                                                              unit = cha_ids),
+                                    no2_day = define_output(file = 'channel_sd_day',
+                                                            variable = 'no2_out',
+                                                            unit = cha_ids),
+                                    solp_day = define_output(file = 'channel_sd_day',
+                                                            variable = 'solp_out',
+                                                            unit = cha_ids),
+                                    sedp_day = define_output(file = 'channel_sd_day',
+                                                             variable = 'sedp_out',
+                                                             unit = cha_ids)
+                                    # sed_out = define_output(file = 'channel_sd_day',
+                                                             # variable = 'sed_out',
+                                                            #  unit = cha_ids)
+                                  ),
+                                  
+                                  parameter = par_flow,
+                                  start_date = start_date,
+                                  end_date = end_date,
+                                  start_date_print = as.Date(
+                                    ifelse(length(start_date_print)==0, min(obs_full$date), 
+                                           start_date_print)),
+                                  n_thread = n_cores,
+                                  save_file = paste0(save_file_name, '_base')
+) 
+
+# If you have to remove the runs that did not finish, use the following code.
+sim_flow_full <- remove_unsuccesful_runs(sim_flow_full_bck)
+
+# If you have to load the runs from the saved runs in the data bases, use the
+# following code.
+# sim_flow <- load_swat_run(paste0(model_path, '/', save_file_name))
+
+# If you have calculate and use concentrations together with flow values in the 
+# next steps, use the following code.
+# get_conc() function has two optional arguments: not_conc and sediment.
+# 'not_conc' is a regular expression which defines the variables which should not
+# be converted to concentrations. Default is '^flo_day|^gwd'. 
+# sediment is a regular expression which defines the variables which should be 
+# identified as sediment variables. Default is '^sed'.
+
+# If you need to get total nitrogen or total phosphorus, you need to have actived 
+# n parts saving in run_swatplus() function above. Following code will calculate
+# total nitrogen. You can adapt it to calculate total phosphorus as well
+n_parts <- c("no3_day", "orgn_day", "nh3_day", "no2_day")
+# 
+sim_flow_full$simulation$ntot_day <- 
+  bind_cols(sim_flow_full$simulation[[1]]["date"], 
+            Reduce("+", map(n_parts, ~sim_flow_full$simulation[[.x]][-1])))
+
+sim_flow_full <- get_conc(sim_flow_full)
+# saveRDS(sim_flow_full, "to_svajunas.rds")
+# undebug(get_conc)
+##------------------------------------------------------------------------------
+## 5) Readjusting observation and simulation date for calibration period 
+##------------------------------------------------------------------------------
+
+# Readjust the dates of the simulated and observed discharge data with the 
+# function fix_dates() to match each other.
+# The function could also trim the data to the period given. Example of usage 
+# tmp <- fix_dates(runr_obj = sim_flow, obs_obj = obs, 
+# trim_start = "2010-01-01", trim_end = "2015-01-01")
+
+tmp <- fix_dates(sim_flow_full, obs_full, trim_start = q_cal_period[1], 
+                 trim_end = q_cal_period[2])
+sim_flow <- tmp$sim
+obs <- tmp$obs
+
+# In case you plan to use multiple variables in performance calculation and 
+# need different dates for each variable, you can use the following code example.
+# tmp <- fix_dates(sim_flow, obs_full, trim_start = "2010-01-01", trim_end = "2015-01-01"))
+# obs <- tmp$obs
+# 
+# In case you want to use water quality data, read it here before applying the following code.
+# obs2 <- read_csv(wqobs_path) 
+# tmp <- fix_dates(sim_flow, obs_wq_full, trim_start = "2015-01-01", trim_end = "2020-01-01"))
+# obs2 <- tmp$obs
+
+##------------------------------------------------------------------------------
+## 6) Calculate performance metrics for calibration period
+##------------------------------------------------------------------------------
+# Calculate the performance metrics for the simulated discharge.
+
+# 'sim' is the object with simulated discharge data.
+# 'obs' is the object with observed discharge data.
+# "par_name" is the name of the variable for which the performance metrics 
+# should be calculated. Default is NULL. It will calculate performance metrics
+# for first variable is simulation list. If you have multiple variables in the
+# you have to specify the name of the variable for which you want to be used in 
+# calculation.
+# 'perf_metrics' allows selecting the performance metrics to be used in calculation on final 
+# ranking, which in step 7 will be used in selection of parameters sets. 
+# 'perf_metrics' can be a vector of performance metrics or if it is NULL all
+# metrics will be used.
+# 'period' defines the period for which performance metrics should be calculated.
+# it could be "week", "month", "year", etc. 
+# 'fn_summarize' defines the name of function to be used for summarizing the data
+# for the period defined in 'period'. It could be "mean", "median", "max", "min",
+# "sum", etc.
+# Example of usage: 
+# obj_tbl <- calculate_performance(sim = sim_flow, obs, "flo_day", c("kge", "nse"), 
+# "month", "sum")
+
+obj_tbl <- calculate_performance(sim_flow, obs)
+
+# Calculate the performance metrics could be also applied for the multiple variables
+# in the simulation list. In this case the performance metrics will be calculated
+# for each variable separately and then the mean of the performance rank will
+# be calculated. In this case also weights for each variable could be defined.
+
+# Example of usage:
+var_to_use <- c("flo_day",  "ntot_day_conc")
+var_weights <- c(0.5, 0.5) ## The sum of weights should be 1.
+
+
+periods <- list(q_cal_period, wq_cal_period)
+
+# Read the another observed data
+# In case you want to use water quality data, read it here before applying the following code.
+obs2 <- read_csv(wqobs_path) 
+
+# obs3 <- read_csv(gwobs_path) 
+
+# Calculate the performance metrics for each variable separately.
+obj_tbl_list <- pmap(list(var_to_use, list(obs, obs2), periods), function(.x, .y, .z){
+  tmp <- fix_dates(sim_flow_full, .y, trim_start = .z[1], 
+                   trim_end = .z[2])
+  calculate_performance(tmp$sim, tmp$obs, par_name = .x, period = "month",
+                        perf_metrics = c("nse", "kge", "pbias", "r2", "mae")) ## rsr calculation eliminated from performance metrics
+}) %>% set_names(var_to_use)
+
+# Calculate the mean of the performance rank for all parameter sets. In this case
+# the weights for each variable should be defined and result could be used with 
+# only option 1 in step 8
+obj_tbl <- map2(obj_tbl_list, var_weights, function(.x, .y){
+  select(.x, c(run_id, rank_tot)) %>% 
+    mutate(rank_tot = rank_tot * .y)}) %>% 
+  reduce(., left_join, by = 'run_id')%>%
+  mutate(sum_rank = rowSums(across(starts_with("rank_tot")))) %>% 
+  mutate(rank_tot = as.integer(rank(sum_rank))) %>% 
+  select(run_id, rank_tot)
+
+ View(obj_tbl_list[["flo_day"]])
+ View(obj_tbl_list[["ntot_day_conc"]])
+##------------------------------------------------------------------------------
+## 7) Plotting parameter identifiability and dotty plots
+##------------------------------------------------------------------------------
+
+# The plot below shows the ranges of parameters which perform better for 
+# a certain performance metric. 
+# The run_fraction defines the fraction of simulations which should be considered
+# in the analysis (with 1000 runs this value should maybe not be lower than 0.2, 
+# too large values also do not make much sense). For 5000 runs I think 0.1 would
+# be a good value (I think this is what Bjoern used in his publication).
+
+# In case you used multiple variables in performance calculation, you can use
+# obj_tbl_list$var_name to select the variable for which you want to plot
+# parameter identifiability. Similar adjustment could be done for dotty plots.
+
+plot_parameter_identifiability(parameters = sim_flow$parameter$values,
+                               objectives = 
+                                 select(obj_tbl_list$flo_day, -starts_with(c('rank', 'run'))), 
+                               ## This starts_with vector could be amended with
+                               ## other columns which should not be considered
+                               run_fraction = .2)
+
+
+
+# For comparison you can use function plot_dotty(). It allows allows to examine 
+# model performance results vs parameter values.
+
+# The function has the following arguments:
+# 'par' a data frame of model parameter values for each simulation.
+# 'var' model performance result vector or list of vectors to be plotted 
+# against parameter values.
+# 'y_label' is the label or vector of labels for the y-axis. Default is 'y'.
+# 'n_col' number of columns in the facet grid. Default is 3.
+# 'y_lim' defines limits for the y-axis. Default is NULL.
+# 'y_inter' Y-axis intercept value. Default is NULL.
+# 'trend' logical, indicating whether to add a trend line. Default is FALSE.
+# 'run_ids' a numeric vector of run IDs to be highlighted in the plot.
+# 'low_up' logical. logical, TRUE if whole possible parameter range should be used 
+#' for x axis. Default is FALSE.
+
+# Example of usage:
+plot_dotty(sim_flow$parameter$values, obj_tbl_list$flo_day$nse)
+plot_dotty(sim_flow$parameter$values, obj_tbl_list$ntot_day_conc$kge, n_col = 5, y_lim = c(0.0, 0.9))
+plot_dotty(sim_flow$parameter$values, obj_tbl$pbias, n_col = 5)
+plot_dotty(sim_flow$parameter$values, obj_tbl$nse, run_ids = c(1, 2, 5), low_up = TRUE)
+plot_dotty(sim_flow$parameter$values, obj_tbl$kge, trend = TRUE)
+
+plot_dotty(sim_flow$parameter$values, obj_tbl_list$flo_day$nse)
+
+# Dotty plots could be also done for multiple variables in the simulation list.
+# In this case the dotty plot will be done plotted on same plot with different 
+# colors for each variable. Trends could be also added to the plot.
+# Water flow and water quality variables could be plotted on same plot.
+plot_dotty(par = sim_flow$parameter$values, 
+           var = list(obj_tbl_list$flo_day_52$nse, 
+                      obj_tbl_list$no3_day_52_conc$nse,
+                      obj_tbl_list$gwd_day$nse), 
+           y_label =  c('Flow at 52r (m3/s)', "Nitrate at 52r (mgN/l)", "Groundwater depth (m)"),
+           trend = TRUE)
+
+##------------------------------------------------------------------------------
+## 8) Selecting the best performing parameter sets to continue into WQ calibration
+##------------------------------------------------------------------------------
+
+
+# Selection could be done in two ways:
+
+# 1) Based on the rank of performance metrics
+# Filter the parameter sets to only include the best performing ones.
+n_best <- ifelse(length(n_best) == 0, 20/n_comb, n_best)
+
+run_sel_ids <- obj_tbl %>% 
+  arrange(rank_tot)%>%
+  top_frac(-n_best, wt = rank_tot) %>% 
+  pull(run_id) %>% 
+  as.numeric
+
+# 2) Based on the thresholds of performance metrics. These thresholds should be 
+# defined by the user based on the plots in step 7.
+# Below is an example code of how to select the parameter sets based on the thresholds
+# Select runs based on certain criteria
+
+run_sel_ids <- as.numeric(str_remove(names(which(obj_tbl$nse > 0.5 & 
+                                                   obj_tbl$kge > 0.5 & 
+                                                   abs(obj_tbl$pbias) < 15)), 'run_'))
+
+# You can examine the selected parameter sets by running the following code
+print(paste0("Selected run ids are: ", paste(run_sel_ids, collapse = ", ")))
+obj_tbl_cal<- obj_tbl[run_sel_ids,]
+
+##------------------------------------------------------------------------------
+## 9) Plotting results of selected parameter sets vs observed discharge
+##------------------------------------------------------------------------------
+
+# The function plot_selected_sim() allows to plot the results comparing the 
+# selectedsimulated data vs observation data.
+
+# Function has the following arguments:
+# 'sim' object from SWATrunR
+#' obs' dataframe for the observed data with two columns: 'date' and 'value'.
+# 'par_name' name of the parameter set to be used (default is NULL, which selects 
+# the first record). But if you have multiple parameter sets, you can use this 
+# argument to select the one you want to use. Example par_name = "flo_day" or
+# par_name = no3_day_conc".
+# 'run_ids' character vector with the ids of the runs to be used in the plot.
+# 'run_sel' character vector with the id/ids of the runs to emphasized in the plot.
+# 'plot_bands' logical. If TRUE, the plot will include lower and upper bands of 
+# results.
+# 'period' character. If NULL, the plot will be based on the original time step
+# of the simulation. Other examples are "day", "week", "month", "year", etc. 
+# If "average monthly", the plot will be based on the multi-annual monthly values.
+# 'fn_summarize' function to recalculate to time interval (default is "mean"), 
+# are "mean", median", "sum", "min", "max", etc).
+# 'x_label' character. Label for the x-axis. Default is "Date".
+# 'y_label' character. Label for the y-axis. Default is 
+# "Discharge (m<sup>3</sup> s<sup>-1</sup>)".
+
+plot_selected_sim(sim_flow, obs, par_name = "flo_day_52", run_ids = run_sel_ids)
+
+# # Another example how all function arguments are used 
+# plot_selected_sim(sim = sim_flow, 
+#                   obs = obs, 
+#                   par_name = "flo_day",
+#                   run_ids = run_sel_ids,
+#                   run_sel = run_sel_ids[1], 
+#                   plot_bands = TRUE,
+#                   period = "average monthly",
+#                   fn_summarize = "mean",
+#                   x_label = "Month",
+#                   y_label = "Monthly flow average (m<sup>3</sup> s<sup>-1</sup>)")
+
+##------------------------------------------------------------------------------
+## 10) OPTIONAL step to perform an OAT analysis
+##------------------------------------------------------------------------------
+
+# If you want to perform an OAT analysis
+# Select the parameter combination around which you want to do the OAT analysis
+par_center_id <- 2
+
+# Extract the center parameter set and assign again the "full" names to be used
+# with SWATrunR
+par_center <- sim_flow$parameter$values[par_center_id, ] %>% 
+  set_names(sim_flow$parameter$definition$full_name)
+
+# You can define your own parameter boundaries for the selected parameters
+# Here a way how you can quickly extract the boundaries from the last simulation
+par_bnd <- sim_flow$parameter$values %>% 
+  map(., ~ c(min(.x), max(.x))) %>% 
+  map(., ~ round(.x, 3)) %>% 
+  bind_cols(.) %>% 
+  set_names(sim_flow$parameter$definition$full_name)
+
+# Sample the OAT parameter set and run the simulations
+par_oat <- sample_oat(par_center = par_center, par_boundary = par_bnd)
+
+par_oat <- filter(par_oat, parameter %in% c('center', 'bf_max.aqu | change = absval'))
+
+sim_oat <- run_swatplus(project_path = model_path,
+                        output = list(
+                          flo_day = define_output(file = 'channel_sd_day',
+                                                  variable = 'flo_out',
+                                                  unit = cha_ids)),
+                        parameter = par_oat[,3:ncol(par_oat)],
+                        n_thread = n_cores)
+# save_file = '231128_sim_oat')
+
+# To visualize the OAT runs e.g. for the parameter surlag:
+plot_oat(sim = sim_oat, obs = obs, variable = 'flo_day')
+
+##------------------------------------------------------------------------------
+## 11) Validating best selected parameter sets vs observed data
+##------------------------------------------------------------------------------
+
+## This step is very important. You have to proof that the best parameter combination
+## results in an acceptable model performance also in a different simulation period
+## or at a different (ideally hydrologically independend) gauge
+## Below you find an example for validation using a different simulation period
+
+# Readjusting the dates of the simulated and observed data for validation period
+tmp <- fix_dates(sim_flow_full, obs_full, trim_start = q_val_period[1], 
+                 trim_end = q_val_period[2])
+sim_flow_val <- tmp$sim
+obs_val <- tmp$obs
+
+# Calculate the performance metrics for validation period.
+obj_tbl_val <- calculate_performance(sim_flow_val, obs_val, par_name = "flo_day_52", 
+                                     run_ids = obj_tbl_cal$run_id)
+
+# Plot comparison of performance results between calibration and validation period.
+# plot_calval_comparison() function has the following arguments:
+# cal_tbl - data frame with performance metrics for calibration period
+# val_tbl - data frame with performance metrics for validation period
+# indexes - vector of performance metrics to be plotted. Default is NULL, all 
+# indexes should be plotted.
+plot_calval_comparison(obj_tbl_cal, obj_tbl_val, 
+                       indexes = c("nse", "kge", "pbias", "r2", "mae" ))
+
+# Plot results on the time series figure for validation period
+plot_selected_sim(sim_flow_val, obs_val, par_name = "flo_day_52", 
+                  run_ids = obj_tbl_cal$run_id)
+
+##------------------------------------------------------------------------------
+## 12) Define nutrient parameters and merge with hydrology parameters
+##------------------------------------------------------------------------------
+
+# !!! This part is only needed, if you want to calibrate hydrology and 
+# nutrient/sediment parameters separately
+
+# Extract the hydrology parameter set that you want to combine the nutrient
+# parameters with
+par_q <- sim_flow$parameter$values[run_sel_ids,] %>%
+  set_names(., sim_flow$parameter$definition$full_name)
+
+# Draw a sample of n_nutr combinations for the nutrient parameters that should
+# be calibrated and combine them with all selected hydrology parameter sets.
+
+# Below are examples of how to define the nutrient parameters their boundaries.
+# For nitrogen, sedimends and phosphorus parameters. You can use the same one of 
+# these, combine or define your own. 
+
+# Define the nitrogen parameters and their boundaries.
+par_nutr_bound <- tibble("n_updis.bsn | change = absval" = c(0, 100), # may affect crop yields, please check!
+                         "nperco.bsn | change = absval" = c(0, 1),
+                         "sdnco.bsn | change = absval" = c(0.75, 1.1),
+                         "hlife_n.aqu | change = absval" = c(0, 200),
+                         "no3_init.aqu | change = absval" = c(0, 30),
+                         "cmn.bsn | change = absval" = c(0.001, 0.003),
+                         "rsdco.bsn | change = absval" = c(0.02, 0.1))
+
+# Sediment parameters and their boundaries
+par_sed_bound <- tibble("cov.rte | change = absval" = c(0,10),
+                        "ch_clay.rte | change = absval" = c(0,100),
+                        "bedldcoef.rte | change = absval" = c(0,1),
+                        "slope_len.hru | change = absval" = c(10,150),
+                        "chs.rte | change = relchg" = c(-0.5,0.5),
+                        # "wd_rto.rte | change = relchg" = c(-0.5,0.5),
+                        "cherod.rte | change = absval" = c(0,0.6))
+
+# Sediment parameters and their boundaries
+par_phos_bound <- tibble("p_updis.bsn | change = absval" = c(0, 100), # super sensitive but also affecting crop yields. Print out yields as well!
+                         "pperco.bsn | change = absval" = c(10, 17.5),
+                         "phoskd.bsn | change = absval" = c(100, 200), 
+                         "psp.bsn | change = absval" = c(0.01, 0.7))
+
+# If you want to combine parameters, you can use the following code
+par_nutr_bound <- cbind(par_nutr_bound, par_sed_bound, par_phos_bound)
+
+
+
+# Nutrient parameter set repeated the number of times of selected hydr. params.
+n_nutr <- ifelse(length(n_nutr) == 0, 1000/(n_comb*n_best), n_nutr)
+
+par_nutr <- sample_lhs(par_nutr_bound, n_nutr) %>%
+  slice(rep(1:n(), each = nrow(par_q)))
+
+# The new parameter set is a combination of all hydrology parameter sets and
+# the sampled nutrient parameter sets.
+par_cal <- par_q %>%
+  slice(., rep(1:n(), n_nutr)) %>%
+  bind_cols(., par_nutr)
+
+##------------------------------------------------------------------------------
+## 13) Running simulations
+##------------------------------------------------------------------------------
+
+# Read the observed water quality data to determine the period needed for the runs.
+obs_wq_full <- read_csv(wqobs_path)
+
+## Run simulations for the nutrient parameters.
+# check if you need to redefine start_date, end_date and start_date_print
+sim_wq_full_bck <- run_swatplus(project_path = model_path,
+                                output = list(
+                                  flo_day = define_output(file = 'channel_sd_day',
+                                                          variable = 'flo_out',
+                                                          unit = cha_ids),
+                                  no3_day = define_output(file = 'channel_sd_day',
+                                                          variable = 'no3_out',
+                                                          unit = cha_ids)
+                                  ## please add parameters for other nutrients, sediments, 
+                                  ## if you plan to use them.
+                                  
+                                  # ,orgn_day = define_output(file = 'channel_sd_day',
+                                  #                         variable = 'orgn_out',
+                                  #                         unit = cha_ids),
+                                  # nh3_day = define_output(file = 'channel_sd_day',
+                                  #                           variable = 'nh3_out',
+                                  #                           unit = cha_ids),
+                                  # no2_day = define_output(file = 'channel_sd_day',
+                                  #                         variable = 'no2_out',
+                                  #                         unit = cha_ids),
+                                  # solp_day = define_output(file = 'channel_sd_day',
+                                  #                         variable = 'solp_out',
+                                  #                         unit = cha_ids),
+                                  # sedp_day = define_output(file = 'channel_sd_day',
+                                  #                          variable = 'sedp_out',
+                                  #                          unit = cha_ids),
+                                  # sed_out = define_output(file = 'channel_sd_day',
+                                  #                          variable = 'sed_out',
+                                  #                          unit = cha_ids)
+                                ),
+                                parameter = par_cal,
+                                start_date = start_date,
+                                end_date = end_date,
+                                start_date_print = as.Date(
+                                  ifelse(length(start_date_print)==0, 
+                                         min(obs_wq_full$date), start_date_print)),
+                                n_thread = n_cores,
+                                save_file = paste0(save_file_name, '_wq')
+)
+
+# If you have to remove the runs that did not finish, use the following code.
+sim_wq_full <- remove_unsuccesful_runs(sim_wq_full_bck)
+
+# If you need to get total nitrogen or total phosphorus, you need to have actived 
+# n parts saving in run_swatplus() function above. Following code will calculate
+# total nitrogen. You can adapt it to calculate total phosphorus as well
+  n_parts <- c("no3_day", "orgn_day", "nh3_day", "no2_day")
+# 
+ sim_flow_full$simulation$ntot_day <- 
+   bind_cols(sim_flow_full$simulation[[1]]["date"], 
+             Reduce("+", map(n_parts, ~sim_flow_full$simulation[[.x]][-1])))
+##------------------------------------------------------------------------------
+## 14) Calculate concentrations from simulated nutrients
+##------------------------------------------------------------------------------
+
+sim_wq_full <- get_conc(sim_wq_full)
+
+##------------------------------------------------------------------------------
+## 15) Re-adjusting observation and simulation date (so that they match)
+##------------------------------------------------------------------------------
+
+tmp <- fix_dates(sim_wq_full, obs_wq_full, trim_start = wq_cal_period[1], 
+                 trim_end = wq_cal_period[2])
+
+sim_wq <- tmp$sim
+obs_wq <- tmp$obs
+
+##------------------------------------------------------------------------------
+## 16) Calculate performance metrics
+##------------------------------------------------------------------------------
+
+obj_tbl_wq <- calculate_performance(sim_wq, obs_wq, "no3_day_52_conc")
+
+##------------------------------------------------------------------------------
+## 17) Plotting dotty plots
+##------------------------------------------------------------------------------
+
+# For comparison you can do dotty plots.
+plot_dotty(sim_wq$parameter$values, obj_tbl_wq$nse)
+plot_dotty(sim_wq$parameter$values, obj_tbl_wq$rsr_20_70)
+plot_dotty(sim_wq$parameter$values, obj_tbl_wq$pbias)
+plot_dotty(sim_wq$parameter$values, obj_tbl_wq$r2)
+
+##------------------------------------------------------------------------------
+## 18) Selecting the best performing parameter sets
+##------------------------------------------------------------------------------
+
+# Filter the parameter sets to only include the best performing ones.
+run_sel_ids <- obj_tbl_wq %>% 
+  arrange(rank_tot)%>%
+  top_frac(-n_best, wt = rank_tot) %>% 
+  pull(run_id) %>% 
+  as.numeric
+
+# Extract the parameter set that you want use in further modeling
+par <- sim_wq$parameter$values[run_sel_ids,] %>%
+  set_names(., sim_wq$parameter$definition$full_name)
+
+obj_tbl_cal <- obj_tbl_wq[run_sel_ids,]
+
+##------------------------------------------------------------------------------
+## 19) Plotting results of selected parameter sets vs WQ observed data
+##------------------------------------------------------------------------------
+
+plot_selected_sim(sim_wq, obs_wq, run_ids = run_sel_ids, par_name = "no3_day_52_conc")
+
+##------------------------------------------------------------------------------
+## 20) Validation of best WQ parameters vs observed data
+##------------------------------------------------------------------------------
+
+# Readjusting the dates of the simulated and observed data for validation period
+tmp <- fix_dates(sim_wq_full, obs_wq_full, trim_start = wq_val_period[1], 
+                 trim_end = wq_val_period[2])
+sim_wq_val <- tmp$sim
+obs_wq_val <- tmp$obs
+
+# Calculate the performance metrics for validation period
+obj_tbl_val <- calculate_performance(sim_wq_val, obs_wq_val, par_name = "no3_day_52_conc", 
+                                     run_ids = obj_tbl_cal$run_id)
+
+# Plot comparison of performance results between calibration and validation period.
+plot_calval_comparison(obj_tbl_cal, obj_tbl_val, 
+                       indexes = c("nse", "kge", "pbias", "r2", "mae" ))
+
+# Plot results on the time series figure for validation period
+plot_selected_sim(sim_wq_val, obs_wq_val, par_name = "no3_day_52_conc", 
+                  run_ids = obj_tbl_cal$run_id)
+
+##------------------------------------------------------------------------------
+## 21) OPTIONAL step, if you want to write out calibration.cal file
+##------------------------------------------------------------------------------
+
+# If you want to write out the calibration.cal file, you can use the following code.
+# The function write_cal_file() has the following arguments:
+# 'par_table' is the parameter set that you want to use.
+# 'write_path' is the path where you want to write the calibration.cal file.
+# 'i_run' is the number of the parameter set that you want to use for the
+# calibration.cal file.
+
+# Example of usage:
+write_cal_file(par_table = par_flow, 
+               write_path = getwd(),
+               i_run = 1)
+
+par_bound# Another option is just to save your parameter set as a .rds file and then use it
+# later with SWATrunR for scenario runs.
+
+# Example of usage:
+# To save the parameter set
+saveRDS(par, "path/to/write/par.rds")
+# To load the parameter set
+par <- readRDS("path/to/write/par.rds")
+
+##------------------------------------------------------------------------------
+## 22) SAVE THE RESULTS
+##------------------------------------------------------------------------------
+
+# Save the results from running the workflow. This is important in case you need to
+# produce or update figures for reports or publications at a later stage. 
+# This way, you will not need to rerun simulations.
+
+save.image("your_file_name.RData")
+# load("your_file_name.RData")
+
+
+
-- 
GitLab