From f247ff26836e72bfde3732a8631f16e88feb56f2 Mon Sep 17 00:00:00 2001 From: biopsichas Date: Mon, 24 Jun 2024 21:00:29 +0300 Subject: [PATCH] 6a step added --- DESCRIPTION | 3 +- NAMESPACE | 14 ++ R/globals.R | 2 +- R/helper.R | 107 +++++++++++++ R/prepare.R | 157 ++++++++++++++++++++ README.md | 2 +- docs/404.html | 2 +- docs/LICENSE-text.html | 2 +- docs/articles/hc.html | 51 ++++++- docs/articles/index.html | 2 +- docs/articles/sc-crops.html | 2 +- docs/articles/sc-wy.html | 2 +- docs/authors.html | 6 +- docs/index.html | 4 +- docs/news/index.html | 2 +- docs/pkgdown.yml | 2 +- docs/reference/add_suffix_to_duplicate.html | 2 +- docs/reference/aggregate_aa.html | 2 +- docs/reference/calc_fdc.html | 108 ++++++++++++++ docs/reference/calc_fdc_rsr.html | 125 ++++++++++++++++ docs/reference/calculate_performance.html | 148 ++++++++++++++++++ docs/reference/calculate_wyr.html | 2 +- docs/reference/copy_file_version.html | 2 +- docs/reference/dotty_fig.html | 2 +- docs/reference/find_par_range.html | 2 +- docs/reference/fix_dates.html | 2 +- docs/reference/get_conc.html | 2 +- docs/reference/group_values.html | 2 +- docs/reference/id_text_strings.html | 2 +- docs/reference/index.html | 7 +- docs/reference/paste_runs.html | 2 +- docs/reference/plot_dotty.html | 2 +- docs/reference/plot_dotty_yields.html | 2 +- docs/reference/plot_esco_range.html | 2 +- docs/reference/plot_phu_yld_bms.html | 2 +- docs/reference/read_tbl.html | 2 +- docs/reference/rsr_df.html | 111 ++++++++++++++ docs/reference/sample_lhs.html | 8 +- docs/reference/truncate.html | 2 +- docs/reference/update_par.html | 2 +- docs/reference/write_tbl.html | 2 +- docs/search.json | 2 +- docs/sitemap.xml | 12 ++ man/calc_fdc.Rd | 24 +++ man/calc_fdc_rsr.Rd | 33 ++++ man/calculate_performance.Rd | 52 +++++++ man/rsr_df.Rd | 25 ++++ vignettes/hc.Rmd | 66 +++++++- 48 files changed, 1072 insertions(+), 47 deletions(-) create mode 100644 docs/reference/calc_fdc.html create mode 100644 docs/reference/calc_fdc_rsr.html create mode 100644 docs/reference/calculate_performance.html create mode 100644 docs/reference/rsr_df.html create mode 100644 man/calc_fdc.Rd create mode 100644 man/calc_fdc_rsr.Rd create mode 100644 man/calculate_performance.Rd create mode 100644 man/rsr_df.Rd diff --git a/DESCRIPTION b/DESCRIPTION index dbf33ab..bccb866 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: SWATtunR Type: Package Title: Soft & Hard Calibration, Validation Package for SWAT+ models -Version: 0.0.1.9006 +Version: 0.0.1.9008 Authors@R: c( person("Svajunas", "Plunge", , "svajunas.plunge@gmail.com", role = c("aut", "cre"), comment = c(ORCID = '0000-0001-8897-3349')), @@ -23,6 +23,7 @@ Imports: dplyr, gridExtra, ggplot2, + hydroGOF, lhs, lubridate, tibble, diff --git a/NAMESPACE b/NAMESPACE index fb7a097..5703c74 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,11 @@ # Generated by roxygen2: do not edit by hand +export(calculate_performance) export(calculate_wyr) export(copy_file_version) export(fix_dates) export(get_conc) +export(id_text_strings) export(plot_dotty) export(plot_dotty_yields) export(plot_esco_range) @@ -25,8 +27,10 @@ importFrom(dplyr,group_by) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,rename) +importFrom(dplyr,row_number) importFrom(dplyr,select) importFrom(dplyr,summarise) +importFrom(dplyr,summarize_all) importFrom(ggplot2,aes) importFrom(ggplot2,coord_cartesian) importFrom(ggplot2,element_blank) @@ -52,19 +56,29 @@ importFrom(ggplot2,theme_bw) importFrom(ggplot2,vars) importFrom(ggplot2,ylim) importFrom(gridExtra,grid.arrange) +importFrom(hydroGOF,KGE) +importFrom(hydroGOF,NSE) +importFrom(hydroGOF,mae) +importFrom(hydroGOF,pbias) +importFrom(hydroGOF,rsr) importFrom(lhs,randomLHS) +importFrom(lubridate,floor_date) +importFrom(lubridate,month) importFrom(lubridate,year) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map2_chr) importFrom(purrr,map2_df) +importFrom(purrr,map_dbl) importFrom(purrr,map_int) importFrom(purrr,map_vec) importFrom(purrr,modify2) importFrom(purrr,reduce) importFrom(purrr,set_names) +importFrom(readr,parse_number) importFrom(readr,write_lines) importFrom(stats,approx) +importFrom(stats,cor) importFrom(stats,quantile) importFrom(stringr,str_remove) importFrom(stringr,str_replace) diff --git a/R/globals.R b/R/globals.R index c256e0d..61a17cd 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,3 +1,3 @@ utils::globalVariables(c(".", "hru", "plant_name", "name", "value", "yield_mean", "run", "parameter", "values", "yld_50", "yld_25", "yld_75", - "model_path", "abs_min", "abs_max")) + "model_path", "abs_min", "abs_max", "p", "run_id")) diff --git a/R/helper.R b/R/helper.R index f6f8f7c..c255f4a 100644 --- a/R/helper.R +++ b/R/helper.R @@ -248,6 +248,7 @@ truncate <- function(x, n, side = 'left') { #' #' @return A vector of HRU ID text strings separated into groups according to #' provided values. +#' @export #' @examples #' \dontrun{ #' hyd_hyd <- read_tbl(paste0(model_path, '/hydrology.hyd')) @@ -355,3 +356,109 @@ fix_dates <- function(runr_obj, obs_obj, trim_start = NULL, trim_end = NULL){ } return(list(sim=runr_obj, obs=obs_obj)) } + +# Flow duration curve calculation functions ------------------------------------ + +#' Calculate Flow Duration Curve (FDC) +#' +#' This function calculates the flow duration curve for a given vector or dataframe. +#' +#' @param x a vector or a tibble with flow values. +#' +#' @return a tibble with sorted values and their corresponding exceedance +#' probabilities. +#' +#' @importFrom tibble tibble +#' @importFrom dplyr mutate %>% +#' +#' @examples +#' \dontrun{ +#' fdc <- calc_fdc(c(3, 1, 4, 1, 5, 9, 2, 6, 5)) +#' } +#' @keywords internal + +calc_fdc <- function(x) { + if(is.vector(x)) { + x <- tibble(value = x) + } + + n <- nrow(x) + + x %>% + apply(., 2, sort, decreasing = TRUE) %>% + as_tibble(.) %>% + mutate(p = 100 * 1:n / (n + 1), .before = 1) +} + +#' Calculate RSR for Flow Duration Curve Segments +#' +#' This function calculates the ratio of RMSE and standard deviation for +#' different segments of the flow duration curve (FDC). +#' +#' @param fdc_sim a tibble with simulated flow data. +#' @param fdc_obs a tibble with observed flow data. +#' @param quantile_splits a numeric vector with quantiles for splitting the FDC. +#' @param out_tbl character specifying the output format ('long' or 'wide'). +#' Default \code{out_tbl = 'long'}. +#' +#' @return a tibble with RSR values for the different segments of the FDC. +#' +#' @importFrom dplyr select mutate bind_cols bind_rows %>% +#' @importFrom purrr map2 map_dbl +#' +#' @examples +#' \dontrun{ +#' fdc_sim <- calc_fdc(runif(100)) +#' fdc_obs <- calc_fdc(runif(100)) +#' rsr_values <- calc_fdc_rsr(fdc_sim, fdc_obs, c(5, 20, 70, 95)) +#' } +#' @keywords internal + + +calc_fdc_rsr <- function(fdc_sim, fdc_obs, quantile_splits, out_tbl = 'long') { + if(all(quantile_splits <= 1)) { + quantile_splits <- 100 * quantile_splits + } + quantile_splits <- sort(unique(c(0, 100, quantile_splits))) + p_cuts <- cut(fdc_obs$p, quantile_splits) + obs <- split(select(fdc_obs, -p), p_cuts) + sim <- split(select(fdc_sim, -p), p_cuts) + + rsr_list <- map2(sim, obs, ~ rsr_df(.x, .y[[1]])) + + if(out_tbl == 'long') { + n_col <- length(quantile_splits) - 1 + col_names <- paste0('p_', quantile_splits[1:n_col], + '_', quantile_splits[2:(n_col + 1)]) + rsr <- bind_cols(rsr_list) %>% + set_names(col_names) %>% + mutate(., run = names(fdc_sim)[2:ncol(fdc_sim)], .before = 1) + } else { + rsr <- rsr_list %>% + bind_rows(.) %>% + mutate(p = unique(p_cuts), .before = 1) + } + return(rsr) +} + +#' Calculate RSR for Dataframe Segments +#' +#' This function calculates the RSR values for the different segments of the data. +#' +#' @param df_sim a dataframe with simulated values. +#' @param v_obs a dataframe with observed values. +#' +#' @return a numeric vector with RSR values for each segment. +#' +#' @importFrom purrr map_dbl +#' @importFrom hydroGOF rsr +#' +#' @examples +#' \dontrun{ +#' rsr_values <- rsr_df(df_sim, v_obs) +#' } +#' @keywords internal + +rsr_df <- function(df_sim, v_obs) { + map_dbl(df_sim, ~ rsr(.x, v_obs)) +} diff --git a/R/prepare.R b/R/prepare.R index 83abde3..b7ca11b 100644 --- a/R/prepare.R +++ b/R/prepare.R @@ -124,3 +124,160 @@ calculate_wyr <- function(sim) { return(wyr) } + +#' Calculate All Performance Metrics +#' +#' This function calculates various performance metrics for a given simulation +#' and observed data. +#' +#' @param sim Object from SWATrunR +#' @param obs Dataframe for the observed data with two columns: date and value +#' @param par_name (optional) Character for name of the parameter set to be used. +#' Default \code{par_name = NULL}, which select first variable in sim object. +#' But if you have multiple parameter sets, you can use this argument to select +#' the one you want to use. Example par_name = "flo_day" or par_name = no3_day_conc". +#' @param perf_metrics (optional) Character vector with the names of the performance metrics +#' to used in the rank_tot calculation. Default \code{perf_metrics = NULL}, +#' which means that all performance metrics will be used in calculation. +#' Other example could be perf_metrics = c("kge", "nse"), which means that only +#' KGE and NSE will be used in calculation. +#' @param period (optional) Character describing, which time interval to display. +#' Default \code{period = NULL}, which mean not activated, other examples are +#' "day", "week", "month", etc). +#' See [lubridate::floor_date](https://www.rdocumentation.org/packages/lubridate/versions/1.3.3/topics/floor_date) for details. +#' @param fn_summarize (optional) Function to recalculate to the specified time interval. +#' Default \code{fn_summarize ="mean"}, other examples are "median", "sum". See [dplyr::summarise](https://dplyr.tidyverse.org/reference/summarise.html) for details. +#' @return A dataframe with the performance metrics and ranking. +#' @importFrom dplyr mutate group_by summarize_all left_join select everything row_number %>% +#' @importFrom lubridate month floor_date +#' @importFrom purrr map_dbl map2 reduce +#' @importFrom stats cor +#' @importFrom readr parse_number +#' @importFrom hydroGOF NSE KGE pbias mae rsr +#' @export +#' @examples +#' \dontrun{ +#' obj_tbl <- calculate_performance(sim = sim_flow, obs, "flo_day", c("kge", "nse"), "month", "sum") +#' } +#' @keywords calculate + +calculate_performance <- function(sim, obs, par_name = NULL, perf_metrics = NULL, + period = NULL, + fn_summarize = 'mean') { + + if(is.null(par_name)) { + if(length(sim$simulation) > 1) { + warning(paste0("You have multiple variable sets in the simulation object.\n + They are ", paste(names(sim$simulation), collapse = ", "), + "\nCurrently, the first one is used, which is ", names(sim$simulation)[1], + ".\n If you want to use another one, please specify, which one you want to use with 'par_name' argument.")) + } + # Filter to parameter set of interest. + sim <- sim$simulation[[1]] + } else { + sim <- sim$simulation[[par_name]] + } + + # Adding list of metric if not provided + if(is.null(perf_metrics)) perf_metrics <- c("nse", "kge", "pbias", "r2", "mae", "rsr") + + # Initializing result table + t <- data.frame(run_id = parse_number(names(sim[-1]))) + + # Calculate the mean absolute error (MAE) for the average monthly values. + if("mae" %in% perf_metrics){ + sim_m <- mutate(sim, date = month(date)) %>% + group_by(date) %>% + summarize_all(get(fn_summarize)) + obs_m <- mutate(obs, date = month(date)) %>% + group_by(date) %>% + summarize_all(get(fn_summarize)) + t$mae <- map_dbl(select(sim_m, - date), ~mae(.x, obs_m$value)) + t$rank_mae <- rank(abs(t$mae)) + + } + + # Change the time step + if(!is.null(period)) { + sim <- mutate(sim, date = floor_date(date , period)) %>% + group_by(date) %>% + summarize_all(get(fn_summarize)) + obs <- mutate(obs, date = floor_date(date , period)) %>% + group_by(date) %>% + summarize_all(get(fn_summarize)) + } + + # Calculate the root square ratio + if("rsr" %in% perf_metrics){ + # Calculate flow duration curves (FDC) for observed data and the simulations. + fdc_obs <- calc_fdc(obs$value) + fdc_sim <- calc_fdc(select(sim, -date)) + + # Splitting intervals for the flow duration curve (FDC) + p <- c(5, 20, 70, 95) + p_lbl <- c('p_0_5', 'p_5_20', 'p_20_70', 'p_70_95', 'p_95_100') + + perf_metrics <- c(perf_metrics, gsub("p", "rsr", p_lbl)) + + # Calculate the ratio of RSME and standard deviation for different segments + # of the FDC (same as in the publications of the Kiel working group). + rsr_fdc <- calc_fdc_rsr(fdc_sim, fdc_obs, p) + + + fdc_thrs <- c(max(fdc_obs$value), + approx(fdc_obs$p, fdc_obs$value, p)$y, + -0.1) + # Separate the hydrograph into high medium and low flows. + obs_sep <- map2(fdc_thrs[1:(length(fdc_thrs) - 1)], + fdc_thrs[2:length(fdc_thrs)], + ~ mutate(obs, value = ifelse(value <= .x & value > .y, value, NA))) %>% + map2(., p_lbl, ~ set_names(.x, c('date', .y))) %>% + reduce(., left_join, by = 'date') + + t$rsr_vh <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_0_5)) + t$rsr_h <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_5_20)) + t$rsr_m <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_20_70)) + t$rsr_l <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_70_95)) + t$rsr_vl <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_95_100)) + + t$rsr_0_5 <- -rsr_fdc$p_0_5 + t$rsr_5_20 <- -rsr_fdc$p_5_20 + t$rsr_20_70 <- -rsr_fdc$p_20_70 + t$rsr_70_95 <- -rsr_fdc$p_70_95 + t$rsr_95_100 <- -rsr_fdc$p_95_100 + + t$rank_rsr_0_5 <- rank(-t$rsr_0_5) + t$rank_rsr_5_20 <- rank(-t$rsr_5_20) + t$rank_rsr_20_70 <- rank(-t$rsr_20_70) + t$rank_rsr_70_95 <- rank(-t$rsr_70_95) + t$rank_rsr_95_100 <- rank(-t$rsr_95_100) + } + # Calculate the Nash-Sutcliffe efficiency (NSE) + if ("nse" %in% perf_metrics){ + t$nse <- map_dbl(select(sim, - date), ~NSE(.x, obs$value)) + t$rank_nse <- rank(-t$nse) + } + # Calculate the Kling-Gupta efficiency (KGE) + if("kge" %in% perf_metrics){ + t$kge <- map_dbl(select(sim, - date), ~KGE(.x, obs$value)) + t$rank_kge <- rank(-t$kge) + } + # Calculate the percent bias (PBIAS) + if("pbias" %in% perf_metrics){ + t$pbias <- map_dbl(select(sim, - date), ~pbias(.x, obs$value)) + t$rank_pbias <- rank(abs(-abs(t$pbias))) + } + # Calculate the coefficient of determination (R2) + if("r2" %in% perf_metrics){ + t$r2 <- map_dbl(select(sim, - date), ~cor(.x, obs$value)^2) + t$rank_r2 <- rank(-t$r2) + } + + # Calculate rank_tot. + t <- t %>% mutate(rank_tot = as.integer(rank(rowSums( + select(., starts_with(paste0('rank_', tolower(perf_metrics)))))))) %>% + select(run_id, everything()) + + return(t) +} + diff --git a/README.md b/README.md index f6e5d36..62ccf69 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Introduction to SWATtunR # SWATtunR -[![](https://img.shields.io/badge/devel%20version-0.0.1.9006-blue.svg)](https://github.com/biopsichas/SWATtunR) +[![](https://img.shields.io/badge/devel%20version-0.0.1.9008-blue.svg)](https://github.com/biopsichas/SWATtunR) [![](https://img.shields.io/github/last-commit/biopsichas/SWATtunR.svg)](https://github.com/biopsichas/SWATtunR/commits/green) [![](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![Project Status: Active - The project has reached a stable, usable diff --git a/docs/404.html b/docs/404.html index 7306a17..ac4b27f 100644 --- a/docs/404.html +++ b/docs/404.html @@ -31,7 +31,7 @@ SWATtunR - 0.0.1.9006 + 0.0.1.9008 + + + + + +
+
+
+ +
+

This function calculates the flow duration curve for a given vector or dataframe.

+
+ +
+

Usage

+
calc_fdc(x)
+
+ +
+

Arguments

+
x
+

a vector or a tibble with flow values.

+ +
+
+

Value

+ + +

a tibble with sorted values and their corresponding exceedance +probabilities.

+
+ +
+

Examples

+
if (FALSE) {
+fdc <- calc_fdc(c(3, 1, 4, 1, 5, 9, 2, 6, 5))
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/calc_fdc_rsr.html b/docs/reference/calc_fdc_rsr.html new file mode 100644 index 0000000..599158f --- /dev/null +++ b/docs/reference/calc_fdc_rsr.html @@ -0,0 +1,125 @@ + +Calculate RSR for Flow Duration Curve Segments — calc_fdc_rsr • SWATtunR + Skip to contents + + +
+
+
+ +
+

This function calculates the ratio of RMSE and standard deviation for +different segments of the flow duration curve (FDC).

+
+ +
+

Usage

+
calc_fdc_rsr(fdc_sim, fdc_obs, quantile_splits, out_tbl = "long")
+
+ +
+

Arguments

+
fdc_sim
+

a tibble with simulated flow data.

+ + +
fdc_obs
+

a tibble with observed flow data.

+ + +
quantile_splits
+

a numeric vector with quantiles for splitting the FDC.

+ + +
out_tbl
+

character specifying the output format ('long' or 'wide'). +Default out_tbl = 'long'.

+ +
+
+

Value

+ + +

a tibble with RSR values for the different segments of the FDC.

+
+ +
+

Examples

+
if (FALSE) {
+fdc_sim <- calc_fdc(runif(100))
+fdc_obs <- calc_fdc(runif(100))
+rsr_values <- calc_fdc_rsr(fdc_sim, fdc_obs, c(5, 20, 70, 95))
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/calculate_performance.html b/docs/reference/calculate_performance.html new file mode 100644 index 0000000..ff6795d --- /dev/null +++ b/docs/reference/calculate_performance.html @@ -0,0 +1,148 @@ + +Calculate All Performance Metrics — calculate_performance • SWATtunR + Skip to contents + + +
+
+
+ +
+

This function calculates various performance metrics for a given simulation +and observed data.

+
+ +
+

Usage

+
calculate_performance(
+  sim,
+  obs,
+  par_name = NULL,
+  perf_metrics = NULL,
+  period = NULL,
+  fn_summarize = "mean"
+)
+
+ +
+

Arguments

+
sim
+

Object from SWATrunR

+ + +
obs
+

Dataframe for the observed data with two columns: date and value

+ + +
par_name
+

(optional) Character for name of the parameter set to be used. +Default par_name = NULL, which select first variable in sim object. +But if you have multiple parameter sets, you can use this argument to select +the one you want to use. Example par_name = "flo_day" or par_name = no3_day_conc".

+ + +
perf_metrics
+

(optional) Character vector with the names of the performance metrics +to used in the rank_tot calculation. Default perf_metrics = NULL, +which means that all performance metrics will be used in calculation. +Other example could be perf_metrics = c("kge", "nse"), which means that only +KGE and NSE will be used in calculation.

+ + +
period
+

(optional) Character describing, which time interval to display. +Default period = NULL, which mean not activated, other examples are +"day", "week", "month", etc). +See lubridate::floor_date for details.

+ + +
fn_summarize
+

(optional) Function to recalculate to the specified time interval. +Default fn_summarize ="mean", other examples are "median", "sum". See dplyr::summarise for details.

+ +
+
+

Value

+ + +

A dataframe with the performance metrics and ranking.

+
+ +
+

Examples

+
if (FALSE) {
+obj_tbl <- calculate_performance(sim = sim_flow, obs, "flo_day", c("kge", "nse"), "month", "sum")
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/calculate_wyr.html b/docs/reference/calculate_wyr.html index a10597f..117e7ca 100644 --- a/docs/reference/calculate_wyr.html +++ b/docs/reference/calculate_wyr.html @@ -16,7 +16,7 @@ SWATtunR - 0.0.1.9006 + 0.0.1.9008 + + + + + +
+
+
+ +
+

This function calculates the RSR values for the different segments of the data.

+
+ +
+

Usage

+
rsr_df(df_sim, v_obs)
+
+ +
+

Arguments

+
df_sim
+

a dataframe with simulated values.

+ + +
v_obs
+

a dataframe with observed values.

+ +
+
+

Value

+ + +

a numeric vector with RSR values for each segment.

+
+ +
+

Examples

+
if (FALSE) {
+rsr_values <- rsr_df(df_sim, v_obs)
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/sample_lhs.html b/docs/reference/sample_lhs.html index c47562e..57e3cd0 100644 --- a/docs/reference/sample_lhs.html +++ b/docs/reference/sample_lhs.html @@ -10,7 +10,7 @@ SWATtunR - 0.0.1.9006 + 0.0.1.9008