diff --git a/SWAT_HardCal/NON_Simons_DropBox/hardcal_workflow_simult.R b/SWAT_HardCal/NON_Simons_DropBox/hardcal_workflow_simult.R new file mode 100644 index 0000000000000000000000000000000000000000..187bc763f862403b7cc0ecaa38cb3ea17a26e494 --- /dev/null +++ b/SWAT_HardCal/NON_Simons_DropBox/hardcal_workflow_simult.R @@ -0,0 +1,848 @@ +# Hard calibration + validation workflow --------------------------------------- +# +# Version 0.1.3 +# Date: 2024-01-16 +# Developers: Svajunas Plunge svajunas_plunge@sggw.edu.pl +# Christoph Schürz christoph.schuerz@ufz.de +# Michael Strauch michael.strauch@ufz.de +# +# The parameter identifiability plot is adapted from Guse et al., 2020 +# DOI: https://doi.org/10.1080/02626667.2020.1734204 +# +# ------------------------------------------------------------------------------ + +##------------------------------------------------------------------------------ +## 1) Load (or install&load) libraries +##------------------------------------------------------------------------------ +library(data.table) +library(dplyr) +library(ggplot2) +library(hydroGOF) +library(lhs) +library(lubridate) +library(purrr) +library(readr) +library(stringr) +library(SWATrunR) +library(tibble) +library(tidyr) +library(parallel) +library(scales) +library(xts) +library(dygraphs) + +# Load additional functions ---------------------------------------------------- +source('c:/00/scripts/functions.R') + +##------------------------------------------------------------------------------ +## 2) Settings to set paths to directories, files, outflow channel ids, etc. +##------------------------------------------------------------------------------ + +# Path to the SWAT+ project +model_path <- 'c:/00/TxtInOut' + +# Channel IDs where gauges are located at the channel outlets. +cha_ids <- c(37) + +# Define the path to the observed discharge data +qobs_path <- "c:/00/data/observation/q_cha37_Skut.csv" + +# Define the path to the observed WQ data +wqobs_path <- "c:/00/data/observation/TOT_N_Kure.csv" + +# Define the simulation save file names to be used in SWATrunR +save_file_name <- "KR_" + +# Number parameter combinations to run +n_comb <- 2400 + +# Fraction of the best parameter combinations to be used for the final +# WQ calibration +n_best <- NULL # example n_best <- .02 # 2% of the best parameter combinations, +# if NULL, n_best = 20/n_comb + +# Number of combination drawn for the nutrient parameters (this number +# is multiplied by the number of selected parameter combinations for the flow. +# For example n_comb = 1000, n_best .1, means 100 best runs selected, multiplied +# with n_nutr = 100 results in 10,000 model runs for the nutrient calibration. +# If null, 1000/(n_comb*n_best) formula applied resulting in 1000 runs) +n_nutr <- 100 + +# Number of cores to be used for the parallelization of the model runs +n_cores <- NULL ## If NULL, all available physical cores - 2 will be used + +# Define time period to be used in model runs. Please note this period should +# match your management data period, if it was prepared with SWATfarmR. You can +# safely modify 'start_date_print' and 'end_date' to define the period for which +# you want to print the model output. However, please take great care, if you +# modify 'start_date' as you might need to update your management files too. + +# Start date for model runs (if NULL, the start date of the model setup files +# will be used!!!) +#start_date <- NULL # start_date <- '2000-01-01' +# End date for model runs (if NULL, the end date of observation data will be used). +#end_date <- NULL # end_date <- '2010-12-31' +# Start printing model output (if NULL, the start date of observation data will used). +#start_date_print <- NULL # start_date_print <-'2003-01-01' + +start_date <- '2013-01-01' # start_date <- '2000-01-01' +# End date for model runs +end_date <- '2020-12-31' # end_date <- '2010-12-31' +# Start printing model output +start_date_print <- '2015-01-01' # start_date_print <-'2003-01-01' + + +# Define calibration and validation periods. Please note that the validation +# should take around 1/3 and calibration 2/3 of the total period. Also the +# workflow will run all simulations for the whole period (defined above), +# these periods are only used for the performance calculation and plotting. + +# Q calibration period +q_cal_period <- c('2015-01-01', '2017-12-31') +# Calibration end date +q_val_period <- c('2018-01-01', '2020-12-31') + +# WQ calibration period +wq_cal_period <- c('2015-01-01', '2017-12-31') +# Calibration end date +wq_val_period <- c('2018-01-01', '2020-12-31') +##------------------------------------------------------------------------------ +## 3) Preparing parameters sets for the flow calibration +##------------------------------------------------------------------------------ + +# Discharge calibration --------------------------------------------------- +# Sampling parameter combinations ----------------------------------------- +## The parameters cn3_swf and latq_co are initialized based on the runoff +## potential of an HRU, while the parameter perco is initialized based +## on its leaching potential. +## The calibration of these 3 parameters should consider their different initial +## parameter values. Therefore the parameter sampling will be done the following: +## Base parameter set: +## - For the parameters perco, cn3_swf, and latq_co normalized ranges (0,1) are +## sampled. +## - all other parameters are sampled in their actual parameter ranges. +## - A base LHS set is drawn for the defined parameter boundaries. +## +## Translating base parameter values for perco, cn3_swf, and latq_co: +## - From hydrology.hyd HRUs IDs are drawn with low/mod/high leaching and runoff +## potentials. +## - Those IDs are put together in text strings which are required for the +## parameter definition via the parameter names. +## - Parameter boundaries for the different low/mod/high potentials are defined. +## - The normalized parameter ranges of the parameter ranges are translated into +## the defined boundaries. + +# Base parameter boundary definition. This is just a very reduced set for testing +# and does not include all parameters which should be used in OPTAIN model setups. + +# !!! Calibration and validation of all hydrology and water quality/load related +# parameters could be done in one step. For this you have to define the nutrient +# parameters together with hydrology.Please look step 12 for example how to do +# define sediment, nitrogen, and phosphorus parameters. Example here present only +# hydrology parameters. + +par_bound <- tibble( + #Suggested list and ranges of SWAT+ paramaters for hydrology calibration in OPTAIN. Parameters are roughly divided into groups by processes which they affect most. Some groups should be considered optional + #snow (optional - use if average snow fall to precipitation ratio is higher than 5%) + 'snomelt_tmp.hru | change = absval' = c(-1.5, 1.5), + 'snofall_tmp.hru | change = absval' = c(-1.5, 1.5), + 'snomelt_lag.hru | change = absval' = c(0, 1), + 'snomelt_min.hru | change = absval' = c(0, 10), + 'snomelt_max.hru | change = absval' = c(0, 10), + #ET (note: it is suggested that a narrow range for esco selected in soft calibration of water balance is used instead of the wide (0,1) range) + 'esco.hru | change = absval' = c(0.1, 0.95), + 'epco.hru | change = absval' = c(0.05, 1), + 'awc.sol | change = relchg' = c(-0.25, 0.25), + # 'canmx.hru | change = relchg' = c(-0.5, 0.5), + #surface runoff + 'cn2.hru | change = relchg' = c(-0.15, 0.15), + 'cn3_swf.hru | change = absval' = c(0, 1), + 'ovn.hru | change = relchg ' = c(-0.25, 0.25), + 'surlag.bsn | change = absval' = c(0.05, 0.8), + #lateral flow (optional - use if lateral flow constitutes at least 5% of total water yield) + #'lat_time.hru | change = relchg' = c(-0,15, 0.15), + 'lat_len.hru | change = abschg' = c(-30, -25), + 'latq_co.hru | change = absval' = c(0, 1), + 'bd.sol | change = relchg' = c(-0.3, 0.1), + 'k.sol | change = relchg' = c(1.5, 3), + # tile flow (optional - use if tile flow constitutes at least 5% of total water yield; note: tile_lag and tile_dtime should be active only if tile_drain is set to 0 in codes.bsn file)) + 'tile_dep.hru | change = relchg' = c(0.1, 0.3), + 'tile_lag.hru | change = absval' = c(24, 60), + 'tile_dtime.hru | change = absval' = c(48, 100), + #percolation/aquifer + 'perco.hru | change = absval' = c(0, 1), + # 'flo_min.aqu | change = abschg' = c(-2, 2), + # 'revap_co.aqu | change = absval' = c(0.02, 0.2), + # 'revap_min.aqu | change = abschg' = c(-2, 2), + # 'alpha.aqu | change = absval' = c(0.001, 0.5), + # 'sp_yld.aqu | change = absval' = c(0.001, 0.05), + # 'bf_max.aqu | change = absval' = c(0.5, 2), + #channel routing + 'chn.rte | change = absval' = c(0.02, 0.1), + # Nitrogen parameters from here + "n_updis.bsn | change = absval" = c(0, 100), + "nperco.bsn | change = absval" = c(0, 1), + "sdnco.bsn | change = absval" = c(0.75, 1.1), + # "hlife_n.aqu | change = absval" = c(0, 200), + # "no3_init.aqu | change = absval" = c(0, 30), + "cmn.bsn | change = absval" = c(0.001, 0.003), + "rsdco.bsn | change = absval" = c(0.02, 0.1)) + +# Draw a first parameter set of parameter combinations +par_flow <- sample_lhs(par_bound, n_comb) + +# Read the hydrology.hyd file to identify the HRU IDs with the different +# runoff and leaching potentials. +hyd_hyd <- read_tbl(paste0(model_path, '/hydrology.hyd')) + +# Generate the ID text strings to be included in the parameter names so that +# parameter values for HRUs can be addressed separately. +init_perco <- c(low = 0.05, mod = 0.50, high = 0.90) + +id_lch_pot <- c(low = group_values(which(hyd_hyd$perco == init_perco['low'])), + mod = group_values(which(hyd_hyd$perco == init_perco['mod'])), + high = group_values(which(hyd_hyd$perco == init_perco['high']))) + +init_cn3 <- c(low = 0.95, mod = 0.30, high = 0.00) + +id_run_pot <- c(low = group_values(which(hyd_hyd$cn3_swf == init_cn3['low'])), + mod = group_values(which(hyd_hyd$cn3_swf == init_cn3['mod'])), + high = group_values(which(hyd_hyd$cn3_swf == init_cn3['high']))) + +# Define parameter boundaries in which the parameter values should vary for +# the different leaching and runoff potentials. +perco_bound <- list(low = c(0.05, 0.30), mod = c(0.30, 0.60), high = c(0.5, 0.90)) +cn3_bound <- list(low = c(0.50, 0.95), mod = c(0.15, 0.45), high = c(0.0, 0.30)) +latq_bound <- list(low = c(0.01, 0.30), mod = c(0.10, 0.40), high = c(0.5, 0.90)) + +# Translate the normalized parameter ranges into the defined boundary ranges. +for(i in c('low', 'mod', 'high')) { + if(nchar(id_lch_pot[i]) > 0) { + par_flow[[paste0('perco_', + i, '::perco.hru | change = absval | unit = c(', + id_lch_pot[i], ')')]] <- + par_flow[['perco.hru | change = absval']] * (perco_bound[[i]][2] - perco_bound[[i]][1]) + perco_bound[[i]][1] + } + if(nchar(id_run_pot[i]) > 0) { + par_flow[[paste0('cn3_', i, '::cn3_swf.hru | change = absval | unit = c(', + id_run_pot[i], ')')]] <- + par_flow[['cn3_swf.hru | change = absval']] * (cn3_bound[[i]][2] - cn3_bound[[i]][1]) + cn3_bound[[i]][1] + par_flow[[paste0('latq_', i, '::latq_co.hru | change = absval | unit = c(', + id_run_pot[i], ')')]] <- par_flow[['latq_co.hru | change = absval']] * + (latq_bound[[i]][2] - latq_bound[[i]][1]) + latq_bound[[i]][1] + } +} + +# Remove the initial normalized parameters. +par_flow <- par_flow %>% + select(- starts_with(c('perco.hru', 'cn3_swf.hru', 'latq_co.hru'))) + +##------------------------------------------------------------------------------ +## 4) Running simulations +##------------------------------------------------------------------------------ + +# Read the observed discharge data to obtain period needed for the runs. +obs_full <- read_csv(qobs_path) +# Get the number of cores to be used for parallel processing. +n_cores <- ifelse(length(n_cores) == 0, parallel::detectCores(logical = FALSE) - + 2, n_cores) + +# In this first test only the daily hydrograph outputs are used for the +# locations where observed discharge is available. + +# Simulate daily discharge for the channel IDs. +sim_flow_full_bck <- run_swatplus(project_path = model_path, + output = list( + flo_day = define_output(file = 'channel_sd_day', + variable = 'flo_out', + unit = cha_ids), + no3_day = define_output(file = 'channel_sd_day', + variable = 'no3_out', + unit = cha_ids) + ## please add parameters for other nutrients, sediments, + ## if you plan to use them. + + ,orgn_day = define_output(file = 'channel_sd_day', + variable = 'orgn_out', + unit = cha_ids), + nh3_day = define_output(file = 'channel_sd_day', + variable = 'nh3_out', + unit = cha_ids), + no2_day = define_output(file = 'channel_sd_day', + variable = 'no2_out', + unit = cha_ids), + solp_day = define_output(file = 'channel_sd_day', + variable = 'solp_out', + unit = cha_ids), + sedp_day = define_output(file = 'channel_sd_day', + variable = 'sedp_out', + unit = cha_ids) + # sed_out = define_output(file = 'channel_sd_day', + # variable = 'sed_out', + # unit = cha_ids) + ), + + parameter = par_flow, + start_date = start_date, + end_date = end_date, + start_date_print = as.Date( + ifelse(length(start_date_print)==0, min(obs_full$date), + start_date_print)), + n_thread = n_cores, + save_file = paste0(save_file_name, '_base') +) + +# If you have to remove the runs that did not finish, use the following code. +sim_flow_full <- remove_unsuccesful_runs(sim_flow_full_bck) + +# If you have to load the runs from the saved runs in the data bases, use the +# following code. +# sim_flow <- load_swat_run(paste0(model_path, '/', save_file_name)) + +# If you have calculate and use concentrations together with flow values in the +# next steps, use the following code. +# get_conc() function has two optional arguments: not_conc and sediment. +# 'not_conc' is a regular expression which defines the variables which should not +# be converted to concentrations. Default is '^flo_day|^gwd'. +# sediment is a regular expression which defines the variables which should be +# identified as sediment variables. Default is '^sed'. + +# If you need to get total nitrogen or total phosphorus, you need to have actived +# n parts saving in run_swatplus() function above. Following code will calculate +# total nitrogen. You can adapt it to calculate total phosphorus as well +n_parts <- c("no3_day", "orgn_day", "nh3_day", "no2_day") +# +sim_flow_full$simulation$ntot_day <- + bind_cols(sim_flow_full$simulation[[1]]["date"], + Reduce("+", map(n_parts, ~sim_flow_full$simulation[[.x]][-1]))) + +sim_flow_full <- get_conc(sim_flow_full) +# saveRDS(sim_flow_full, "to_svajunas.rds") +# undebug(get_conc) +##------------------------------------------------------------------------------ +## 5) Readjusting observation and simulation date for calibration period +##------------------------------------------------------------------------------ + +# Readjust the dates of the simulated and observed discharge data with the +# function fix_dates() to match each other. +# The function could also trim the data to the period given. Example of usage +# tmp <- fix_dates(runr_obj = sim_flow, obs_obj = obs, +# trim_start = "2010-01-01", trim_end = "2015-01-01") + +tmp <- fix_dates(sim_flow_full, obs_full, trim_start = q_cal_period[1], + trim_end = q_cal_period[2]) +sim_flow <- tmp$sim +obs <- tmp$obs + +# In case you plan to use multiple variables in performance calculation and +# need different dates for each variable, you can use the following code example. +# tmp <- fix_dates(sim_flow, obs_full, trim_start = "2010-01-01", trim_end = "2015-01-01")) +# obs <- tmp$obs +# +# In case you want to use water quality data, read it here before applying the following code. +# obs2 <- read_csv(wqobs_path) +# tmp <- fix_dates(sim_flow, obs_wq_full, trim_start = "2015-01-01", trim_end = "2020-01-01")) +# obs2 <- tmp$obs + +##------------------------------------------------------------------------------ +## 6) Calculate performance metrics for calibration period +##------------------------------------------------------------------------------ +# Calculate the performance metrics for the simulated discharge. + +# 'sim' is the object with simulated discharge data. +# 'obs' is the object with observed discharge data. +# "par_name" is the name of the variable for which the performance metrics +# should be calculated. Default is NULL. It will calculate performance metrics +# for first variable is simulation list. If you have multiple variables in the +# you have to specify the name of the variable for which you want to be used in +# calculation. +# 'perf_metrics' allows selecting the performance metrics to be used in calculation on final +# ranking, which in step 7 will be used in selection of parameters sets. +# 'perf_metrics' can be a vector of performance metrics or if it is NULL all +# metrics will be used. +# 'period' defines the period for which performance metrics should be calculated. +# it could be "week", "month", "year", etc. +# 'fn_summarize' defines the name of function to be used for summarizing the data +# for the period defined in 'period'. It could be "mean", "median", "max", "min", +# "sum", etc. +# Example of usage: +# obj_tbl <- calculate_performance(sim = sim_flow, obs, "flo_day", c("kge", "nse"), +# "month", "sum") + +obj_tbl <- calculate_performance(sim_flow, obs) + +# Calculate the performance metrics could be also applied for the multiple variables +# in the simulation list. In this case the performance metrics will be calculated +# for each variable separately and then the mean of the performance rank will +# be calculated. In this case also weights for each variable could be defined. + +# Example of usage: +var_to_use <- c("flo_day", "ntot_day_conc") +var_weights <- c(0.5, 0.5) ## The sum of weights should be 1. + + +periods <- list(q_cal_period, wq_cal_period) + +# Read the another observed data +# In case you want to use water quality data, read it here before applying the following code. +obs2 <- read_csv(wqobs_path) + +# obs3 <- read_csv(gwobs_path) + +# Calculate the performance metrics for each variable separately. +obj_tbl_list <- pmap(list(var_to_use, list(obs, obs2), periods), function(.x, .y, .z){ + tmp <- fix_dates(sim_flow_full, .y, trim_start = .z[1], + trim_end = .z[2]) + calculate_performance(tmp$sim, tmp$obs, par_name = .x, period = "month", + perf_metrics = c("nse", "kge", "pbias", "r2", "mae")) ## rsr calculation eliminated from performance metrics +}) %>% set_names(var_to_use) + +# Calculate the mean of the performance rank for all parameter sets. In this case +# the weights for each variable should be defined and result could be used with +# only option 1 in step 8 +obj_tbl <- map2(obj_tbl_list, var_weights, function(.x, .y){ + select(.x, c(run_id, rank_tot)) %>% + mutate(rank_tot = rank_tot * .y)}) %>% + reduce(., left_join, by = 'run_id')%>% + mutate(sum_rank = rowSums(across(starts_with("rank_tot")))) %>% + mutate(rank_tot = as.integer(rank(sum_rank))) %>% + select(run_id, rank_tot) + + View(obj_tbl_list[["flo_day"]]) + View(obj_tbl_list[["ntot_day_conc"]]) +##------------------------------------------------------------------------------ +## 7) Plotting parameter identifiability and dotty plots +##------------------------------------------------------------------------------ + +# The plot below shows the ranges of parameters which perform better for +# a certain performance metric. +# The run_fraction defines the fraction of simulations which should be considered +# in the analysis (with 1000 runs this value should maybe not be lower than 0.2, +# too large values also do not make much sense). For 5000 runs I think 0.1 would +# be a good value (I think this is what Bjoern used in his publication). + +# In case you used multiple variables in performance calculation, you can use +# obj_tbl_list$var_name to select the variable for which you want to plot +# parameter identifiability. Similar adjustment could be done for dotty plots. + +plot_parameter_identifiability(parameters = sim_flow$parameter$values, + objectives = + select(obj_tbl_list$flo_day, -starts_with(c('rank', 'run'))), + ## This starts_with vector could be amended with + ## other columns which should not be considered + run_fraction = .2) + + + +# For comparison you can use function plot_dotty(). It allows allows to examine +# model performance results vs parameter values. + +# The function has the following arguments: +# 'par' a data frame of model parameter values for each simulation. +# 'var' model performance result vector or list of vectors to be plotted +# against parameter values. +# 'y_label' is the label or vector of labels for the y-axis. Default is 'y'. +# 'n_col' number of columns in the facet grid. Default is 3. +# 'y_lim' defines limits for the y-axis. Default is NULL. +# 'y_inter' Y-axis intercept value. Default is NULL. +# 'trend' logical, indicating whether to add a trend line. Default is FALSE. +# 'run_ids' a numeric vector of run IDs to be highlighted in the plot. +# 'low_up' logical. logical, TRUE if whole possible parameter range should be used +#' for x axis. Default is FALSE. + +# Example of usage: +plot_dotty(sim_flow$parameter$values, obj_tbl_list$flo_day$nse) +plot_dotty(sim_flow$parameter$values, obj_tbl_list$ntot_day_conc$kge, n_col = 5, y_lim = c(0.0, 0.9)) +plot_dotty(sim_flow$parameter$values, obj_tbl$pbias, n_col = 5) +plot_dotty(sim_flow$parameter$values, obj_tbl$nse, run_ids = c(1, 2, 5), low_up = TRUE) +plot_dotty(sim_flow$parameter$values, obj_tbl$kge, trend = TRUE) + +plot_dotty(sim_flow$parameter$values, obj_tbl_list$flo_day$nse) + +# Dotty plots could be also done for multiple variables in the simulation list. +# In this case the dotty plot will be done plotted on same plot with different +# colors for each variable. Trends could be also added to the plot. +# Water flow and water quality variables could be plotted on same plot. +plot_dotty(par = sim_flow$parameter$values, + var = list(obj_tbl_list$flo_day_52$nse, + obj_tbl_list$no3_day_52_conc$nse, + obj_tbl_list$gwd_day$nse), + y_label = c('Flow at 52r (m3/s)', "Nitrate at 52r (mgN/l)", "Groundwater depth (m)"), + trend = TRUE) + +##------------------------------------------------------------------------------ +## 8) Selecting the best performing parameter sets to continue into WQ calibration +##------------------------------------------------------------------------------ + + +# Selection could be done in two ways: + +# 1) Based on the rank of performance metrics +# Filter the parameter sets to only include the best performing ones. +n_best <- ifelse(length(n_best) == 0, 20/n_comb, n_best) + +run_sel_ids <- obj_tbl %>% + arrange(rank_tot)%>% + top_frac(-n_best, wt = rank_tot) %>% + pull(run_id) %>% + as.numeric + +# 2) Based on the thresholds of performance metrics. These thresholds should be +# defined by the user based on the plots in step 7. +# Below is an example code of how to select the parameter sets based on the thresholds +# Select runs based on certain criteria + +run_sel_ids <- as.numeric(str_remove(names(which(obj_tbl$nse > 0.5 & + obj_tbl$kge > 0.5 & + abs(obj_tbl$pbias) < 15)), 'run_')) + +# You can examine the selected parameter sets by running the following code +print(paste0("Selected run ids are: ", paste(run_sel_ids, collapse = ", "))) +obj_tbl_cal<- obj_tbl[run_sel_ids,] + +##------------------------------------------------------------------------------ +## 9) Plotting results of selected parameter sets vs observed discharge +##------------------------------------------------------------------------------ + +# The function plot_selected_sim() allows to plot the results comparing the +# selectedsimulated data vs observation data. + +# Function has the following arguments: +# 'sim' object from SWATrunR +#' obs' dataframe for the observed data with two columns: 'date' and 'value'. +# 'par_name' name of the parameter set to be used (default is NULL, which selects +# the first record). But if you have multiple parameter sets, you can use this +# argument to select the one you want to use. Example par_name = "flo_day" or +# par_name = no3_day_conc". +# 'run_ids' character vector with the ids of the runs to be used in the plot. +# 'run_sel' character vector with the id/ids of the runs to emphasized in the plot. +# 'plot_bands' logical. If TRUE, the plot will include lower and upper bands of +# results. +# 'period' character. If NULL, the plot will be based on the original time step +# of the simulation. Other examples are "day", "week", "month", "year", etc. +# If "average monthly", the plot will be based on the multi-annual monthly values. +# 'fn_summarize' function to recalculate to time interval (default is "mean"), +# are "mean", median", "sum", "min", "max", etc). +# 'x_label' character. Label for the x-axis. Default is "Date". +# 'y_label' character. Label for the y-axis. Default is +# "Discharge (m<sup>3</sup> s<sup>-1</sup>)". + +plot_selected_sim(sim_flow, obs, par_name = "flo_day_52", run_ids = run_sel_ids) + +# # Another example how all function arguments are used +# plot_selected_sim(sim = sim_flow, +# obs = obs, +# par_name = "flo_day", +# run_ids = run_sel_ids, +# run_sel = run_sel_ids[1], +# plot_bands = TRUE, +# period = "average monthly", +# fn_summarize = "mean", +# x_label = "Month", +# y_label = "Monthly flow average (m<sup>3</sup> s<sup>-1</sup>)") + +##------------------------------------------------------------------------------ +## 10) OPTIONAL step to perform an OAT analysis +##------------------------------------------------------------------------------ + +# If you want to perform an OAT analysis +# Select the parameter combination around which you want to do the OAT analysis +par_center_id <- 2 + +# Extract the center parameter set and assign again the "full" names to be used +# with SWATrunR +par_center <- sim_flow$parameter$values[par_center_id, ] %>% + set_names(sim_flow$parameter$definition$full_name) + +# You can define your own parameter boundaries for the selected parameters +# Here a way how you can quickly extract the boundaries from the last simulation +par_bnd <- sim_flow$parameter$values %>% + map(., ~ c(min(.x), max(.x))) %>% + map(., ~ round(.x, 3)) %>% + bind_cols(.) %>% + set_names(sim_flow$parameter$definition$full_name) + +# Sample the OAT parameter set and run the simulations +par_oat <- sample_oat(par_center = par_center, par_boundary = par_bnd) + +par_oat <- filter(par_oat, parameter %in% c('center', 'bf_max.aqu | change = absval')) + +sim_oat <- run_swatplus(project_path = model_path, + output = list( + flo_day = define_output(file = 'channel_sd_day', + variable = 'flo_out', + unit = cha_ids)), + parameter = par_oat[,3:ncol(par_oat)], + n_thread = n_cores) +# save_file = '231128_sim_oat') + +# To visualize the OAT runs e.g. for the parameter surlag: +plot_oat(sim = sim_oat, obs = obs, variable = 'flo_day') + +##------------------------------------------------------------------------------ +## 11) Validating best selected parameter sets vs observed data +##------------------------------------------------------------------------------ + +## This step is very important. You have to proof that the best parameter combination +## results in an acceptable model performance also in a different simulation period +## or at a different (ideally hydrologically independend) gauge +## Below you find an example for validation using a different simulation period + +# Readjusting the dates of the simulated and observed data for validation period +tmp <- fix_dates(sim_flow_full, obs_full, trim_start = q_val_period[1], + trim_end = q_val_period[2]) +sim_flow_val <- tmp$sim +obs_val <- tmp$obs + +# Calculate the performance metrics for validation period. +obj_tbl_val <- calculate_performance(sim_flow_val, obs_val, par_name = "flo_day_52", + run_ids = obj_tbl_cal$run_id) + +# Plot comparison of performance results between calibration and validation period. +# plot_calval_comparison() function has the following arguments: +# cal_tbl - data frame with performance metrics for calibration period +# val_tbl - data frame with performance metrics for validation period +# indexes - vector of performance metrics to be plotted. Default is NULL, all +# indexes should be plotted. +plot_calval_comparison(obj_tbl_cal, obj_tbl_val, + indexes = c("nse", "kge", "pbias", "r2", "mae" )) + +# Plot results on the time series figure for validation period +plot_selected_sim(sim_flow_val, obs_val, par_name = "flo_day_52", + run_ids = obj_tbl_cal$run_id) + +##------------------------------------------------------------------------------ +## 12) Define nutrient parameters and merge with hydrology parameters +##------------------------------------------------------------------------------ + +# !!! This part is only needed, if you want to calibrate hydrology and +# nutrient/sediment parameters separately + +# Extract the hydrology parameter set that you want to combine the nutrient +# parameters with +par_q <- sim_flow$parameter$values[run_sel_ids,] %>% + set_names(., sim_flow$parameter$definition$full_name) + +# Draw a sample of n_nutr combinations for the nutrient parameters that should +# be calibrated and combine them with all selected hydrology parameter sets. + +# Below are examples of how to define the nutrient parameters their boundaries. +# For nitrogen, sedimends and phosphorus parameters. You can use the same one of +# these, combine or define your own. + +# Define the nitrogen parameters and their boundaries. +par_nutr_bound <- tibble("n_updis.bsn | change = absval" = c(0, 100), # may affect crop yields, please check! + "nperco.bsn | change = absval" = c(0, 1), + "sdnco.bsn | change = absval" = c(0.75, 1.1), + "hlife_n.aqu | change = absval" = c(0, 200), + "no3_init.aqu | change = absval" = c(0, 30), + "cmn.bsn | change = absval" = c(0.001, 0.003), + "rsdco.bsn | change = absval" = c(0.02, 0.1)) + +# Sediment parameters and their boundaries +par_sed_bound <- tibble("cov.rte | change = absval" = c(0,10), + "ch_clay.rte | change = absval" = c(0,100), + "bedldcoef.rte | change = absval" = c(0,1), + "slope_len.hru | change = absval" = c(10,150), + "chs.rte | change = relchg" = c(-0.5,0.5), + # "wd_rto.rte | change = relchg" = c(-0.5,0.5), + "cherod.rte | change = absval" = c(0,0.6)) + +# Sediment parameters and their boundaries +par_phos_bound <- tibble("p_updis.bsn | change = absval" = c(0, 100), # super sensitive but also affecting crop yields. Print out yields as well! + "pperco.bsn | change = absval" = c(10, 17.5), + "phoskd.bsn | change = absval" = c(100, 200), + "psp.bsn | change = absval" = c(0.01, 0.7)) + +# If you want to combine parameters, you can use the following code +par_nutr_bound <- cbind(par_nutr_bound, par_sed_bound, par_phos_bound) + + + +# Nutrient parameter set repeated the number of times of selected hydr. params. +n_nutr <- ifelse(length(n_nutr) == 0, 1000/(n_comb*n_best), n_nutr) + +par_nutr <- sample_lhs(par_nutr_bound, n_nutr) %>% + slice(rep(1:n(), each = nrow(par_q))) + +# The new parameter set is a combination of all hydrology parameter sets and +# the sampled nutrient parameter sets. +par_cal <- par_q %>% + slice(., rep(1:n(), n_nutr)) %>% + bind_cols(., par_nutr) + +##------------------------------------------------------------------------------ +## 13) Running simulations +##------------------------------------------------------------------------------ + +# Read the observed water quality data to determine the period needed for the runs. +obs_wq_full <- read_csv(wqobs_path) + +## Run simulations for the nutrient parameters. +# check if you need to redefine start_date, end_date and start_date_print +sim_wq_full_bck <- run_swatplus(project_path = model_path, + output = list( + flo_day = define_output(file = 'channel_sd_day', + variable = 'flo_out', + unit = cha_ids), + no3_day = define_output(file = 'channel_sd_day', + variable = 'no3_out', + unit = cha_ids) + ## please add parameters for other nutrients, sediments, + ## if you plan to use them. + + # ,orgn_day = define_output(file = 'channel_sd_day', + # variable = 'orgn_out', + # unit = cha_ids), + # nh3_day = define_output(file = 'channel_sd_day', + # variable = 'nh3_out', + # unit = cha_ids), + # no2_day = define_output(file = 'channel_sd_day', + # variable = 'no2_out', + # unit = cha_ids), + # solp_day = define_output(file = 'channel_sd_day', + # variable = 'solp_out', + # unit = cha_ids), + # sedp_day = define_output(file = 'channel_sd_day', + # variable = 'sedp_out', + # unit = cha_ids), + # sed_out = define_output(file = 'channel_sd_day', + # variable = 'sed_out', + # unit = cha_ids) + ), + parameter = par_cal, + start_date = start_date, + end_date = end_date, + start_date_print = as.Date( + ifelse(length(start_date_print)==0, + min(obs_wq_full$date), start_date_print)), + n_thread = n_cores, + save_file = paste0(save_file_name, '_wq') +) + +# If you have to remove the runs that did not finish, use the following code. +sim_wq_full <- remove_unsuccesful_runs(sim_wq_full_bck) + +# If you need to get total nitrogen or total phosphorus, you need to have actived +# n parts saving in run_swatplus() function above. Following code will calculate +# total nitrogen. You can adapt it to calculate total phosphorus as well + n_parts <- c("no3_day", "orgn_day", "nh3_day", "no2_day") +# + sim_flow_full$simulation$ntot_day <- + bind_cols(sim_flow_full$simulation[[1]]["date"], + Reduce("+", map(n_parts, ~sim_flow_full$simulation[[.x]][-1]))) +##------------------------------------------------------------------------------ +## 14) Calculate concentrations from simulated nutrients +##------------------------------------------------------------------------------ + +sim_wq_full <- get_conc(sim_wq_full) + +##------------------------------------------------------------------------------ +## 15) Re-adjusting observation and simulation date (so that they match) +##------------------------------------------------------------------------------ + +tmp <- fix_dates(sim_wq_full, obs_wq_full, trim_start = wq_cal_period[1], + trim_end = wq_cal_period[2]) + +sim_wq <- tmp$sim +obs_wq <- tmp$obs + +##------------------------------------------------------------------------------ +## 16) Calculate performance metrics +##------------------------------------------------------------------------------ + +obj_tbl_wq <- calculate_performance(sim_wq, obs_wq, "no3_day_52_conc") + +##------------------------------------------------------------------------------ +## 17) Plotting dotty plots +##------------------------------------------------------------------------------ + +# For comparison you can do dotty plots. +plot_dotty(sim_wq$parameter$values, obj_tbl_wq$nse) +plot_dotty(sim_wq$parameter$values, obj_tbl_wq$rsr_20_70) +plot_dotty(sim_wq$parameter$values, obj_tbl_wq$pbias) +plot_dotty(sim_wq$parameter$values, obj_tbl_wq$r2) + +##------------------------------------------------------------------------------ +## 18) Selecting the best performing parameter sets +##------------------------------------------------------------------------------ + +# Filter the parameter sets to only include the best performing ones. +run_sel_ids <- obj_tbl_wq %>% + arrange(rank_tot)%>% + top_frac(-n_best, wt = rank_tot) %>% + pull(run_id) %>% + as.numeric + +# Extract the parameter set that you want use in further modeling +par <- sim_wq$parameter$values[run_sel_ids,] %>% + set_names(., sim_wq$parameter$definition$full_name) + +obj_tbl_cal <- obj_tbl_wq[run_sel_ids,] + +##------------------------------------------------------------------------------ +## 19) Plotting results of selected parameter sets vs WQ observed data +##------------------------------------------------------------------------------ + +plot_selected_sim(sim_wq, obs_wq, run_ids = run_sel_ids, par_name = "no3_day_52_conc") + +##------------------------------------------------------------------------------ +## 20) Validation of best WQ parameters vs observed data +##------------------------------------------------------------------------------ + +# Readjusting the dates of the simulated and observed data for validation period +tmp <- fix_dates(sim_wq_full, obs_wq_full, trim_start = wq_val_period[1], + trim_end = wq_val_period[2]) +sim_wq_val <- tmp$sim +obs_wq_val <- tmp$obs + +# Calculate the performance metrics for validation period +obj_tbl_val <- calculate_performance(sim_wq_val, obs_wq_val, par_name = "no3_day_52_conc", + run_ids = obj_tbl_cal$run_id) + +# Plot comparison of performance results between calibration and validation period. +plot_calval_comparison(obj_tbl_cal, obj_tbl_val, + indexes = c("nse", "kge", "pbias", "r2", "mae" )) + +# Plot results on the time series figure for validation period +plot_selected_sim(sim_wq_val, obs_wq_val, par_name = "no3_day_52_conc", + run_ids = obj_tbl_cal$run_id) + +##------------------------------------------------------------------------------ +## 21) OPTIONAL step, if you want to write out calibration.cal file +##------------------------------------------------------------------------------ + +# If you want to write out the calibration.cal file, you can use the following code. +# The function write_cal_file() has the following arguments: +# 'par_table' is the parameter set that you want to use. +# 'write_path' is the path where you want to write the calibration.cal file. +# 'i_run' is the number of the parameter set that you want to use for the +# calibration.cal file. + +# Example of usage: +write_cal_file(par_table = par_flow, + write_path = getwd(), + i_run = 1) + +par_bound# Another option is just to save your parameter set as a .rds file and then use it +# later with SWATrunR for scenario runs. + +# Example of usage: +# To save the parameter set +saveRDS(par, "path/to/write/par.rds") +# To load the parameter set +par <- readRDS("path/to/write/par.rds") + +##------------------------------------------------------------------------------ +## 22) SAVE THE RESULTS +##------------------------------------------------------------------------------ + +# Save the results from running the workflow. This is important in case you need to +# produce or update figures for reports or publications at a later stage. +# This way, you will not need to rerun simulations. + +save.image("your_file_name.RData") +# load("your_file_name.RData") + + +