6a step added

biopsichas · Jun 24, 2024 · f247ff2 · f247ff2
1 parent d9dba0f
commit f247ff2
Show file tree

Hide file tree

Showing 48 changed files with 1,072 additions and 47 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SWATtunR
 Type: Package
 Title: Soft & Hard Calibration, Validation Package for SWAT+ models
-Version: 0.0.1.9006
+Version: 0.0.1.9008
 Authors@R: c(
     person("Svajunas", "Plunge", , "svajunas.plunge@gmail.com", role = c("aut", "cre"),
         comment = c(ORCID = '0000-0001-8897-3349')),
@@ -23,6 +23,7 @@ Imports:
     dplyr,
     gridExtra,
     ggplot2,
+    hydroGOF,
     lhs,
     lubridate,
     tibble, 

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,9 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
+export(calculate_performance)
 export(calculate_wyr)
 export(copy_file_version)
 export(fix_dates)
 export(get_conc)
+export(id_text_strings)
 export(plot_dotty)
 export(plot_dotty_yields)
 export(plot_esco_range)
@@ -25,8 +27,10 @@ importFrom(dplyr,group_by)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,rename)
+importFrom(dplyr,row_number)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
+importFrom(dplyr,summarize_all)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,coord_cartesian)
 importFrom(ggplot2,element_blank)
@@ -52,19 +56,29 @@ importFrom(ggplot2,theme_bw)
 importFrom(ggplot2,vars)
 importFrom(ggplot2,ylim)
 importFrom(gridExtra,grid.arrange)
+importFrom(hydroGOF,KGE)
+importFrom(hydroGOF,NSE)
+importFrom(hydroGOF,mae)
+importFrom(hydroGOF,pbias)
+importFrom(hydroGOF,rsr)
 importFrom(lhs,randomLHS)
+importFrom(lubridate,floor_date)
+importFrom(lubridate,month)
 importFrom(lubridate,year)
 importFrom(purrr,map)
 importFrom(purrr,map2)
 importFrom(purrr,map2_chr)
 importFrom(purrr,map2_df)
+importFrom(purrr,map_dbl)
 importFrom(purrr,map_int)
 importFrom(purrr,map_vec)
 importFrom(purrr,modify2)
 importFrom(purrr,reduce)
 importFrom(purrr,set_names)
+importFrom(readr,parse_number)
 importFrom(readr,write_lines)
 importFrom(stats,approx)
+importFrom(stats,cor)
 importFrom(stats,quantile)
 importFrom(stringr,str_remove)
 importFrom(stringr,str_replace)

diff --git a/R/globals.R b/R/globals.R
@@ -1,3 +1,3 @@
 utils::globalVariables(c(".", "hru", "plant_name", "name", "value", "yield_mean",
                          "run", "parameter", "values", "yld_50", "yld_25", "yld_75",
-                         "model_path", "abs_min", "abs_max"))
+                         "model_path", "abs_min", "abs_max", "p", "run_id"))
diff --git a/R/helper.R b/R/helper.R
@@ -248,6 +248,7 @@ truncate <- function(x, n, side = 'left') {
 #'
 #' @return A vector of HRU ID text strings separated into groups according to
 #' provided values.
+#' @export
 #' @examples
 #' \dontrun{
 #' hyd_hyd <- read_tbl(paste0(model_path, '/hydrology.hyd'))
@@ -355,3 +356,109 @@ fix_dates <- function(runr_obj, obs_obj, trim_start = NULL, trim_end = NULL){
   }
   return(list(sim=runr_obj, obs=obs_obj))
 }
+
+# Flow duration curve calculation functions ------------------------------------
+
+#' Calculate Flow Duration Curve (FDC)
+#'
+#' This function calculates the flow duration curve for a given vector or dataframe.
+#'
+#' @param x a vector or a tibble with flow values.
+#'
+#' @return a tibble with sorted values and their corresponding exceedance
+#' probabilities.
+#'
+#' @importFrom tibble tibble
+#' @importFrom dplyr mutate %>%
+#'
+#' @examples
+#' \dontrun{
+#' fdc <- calc_fdc(c(3, 1, 4, 1, 5, 9, 2, 6, 5))
+#' }
+#' @keywords internal
+
+calc_fdc <- function(x) {
+  if(is.vector(x)) {
+    x <- tibble(value = x)
+  }
+
+  n <- nrow(x)
+
+  x %>%
+    apply(., 2, sort, decreasing = TRUE) %>%
+    as_tibble(.) %>%
+    mutate(p = 100 * 1:n / (n + 1), .before = 1)
+}
+
+#' Calculate RSR for Flow Duration Curve Segments
+#'
+#' This function calculates the ratio of RMSE and standard deviation for
+#' different segments of the flow duration curve (FDC).
+#'
+#' @param fdc_sim a tibble with simulated flow data.
+#' @param fdc_obs a tibble with observed flow data.
+#' @param quantile_splits a numeric vector with quantiles for splitting the FDC.
+#' @param out_tbl character specifying the output format ('long' or 'wide').
+#' Default \code{out_tbl = 'long'}.
+#'
+#' @return a tibble with RSR values for the different segments of the FDC.
+#'
+#' @importFrom dplyr select mutate bind_cols bind_rows %>%
+#' @importFrom purrr map2 map_dbl
+#'
+#' @examples
+#' \dontrun{
+#' fdc_sim <- calc_fdc(runif(100))
+#' fdc_obs <- calc_fdc(runif(100))
+#' rsr_values <- calc_fdc_rsr(fdc_sim, fdc_obs, c(5, 20, 70, 95))
+#' }
+#' @keywords internal
+
+
+calc_fdc_rsr <- function(fdc_sim, fdc_obs, quantile_splits, out_tbl = 'long') {
+  if(all(quantile_splits <= 1)) {
+    quantile_splits <- 100 * quantile_splits
+  }
+  quantile_splits <- sort(unique(c(0, 100, quantile_splits)))
+  p_cuts <- cut(fdc_obs$p, quantile_splits)
+  obs <- split(select(fdc_obs, -p), p_cuts)
+  sim <- split(select(fdc_sim, -p), p_cuts)
+
+  rsr_list <- map2(sim, obs, ~ rsr_df(.x, .y[[1]]))
+
+  if(out_tbl == 'long') {
+    n_col <- length(quantile_splits) - 1
+    col_names <- paste0('p_', quantile_splits[1:n_col],
+                        '_',  quantile_splits[2:(n_col + 1)])
+    rsr <- bind_cols(rsr_list) %>%
+      set_names(col_names) %>%
+      mutate(., run = names(fdc_sim)[2:ncol(fdc_sim)], .before = 1)
+  } else {
+    rsr <- rsr_list %>%
+      bind_rows(.) %>%
+      mutate(p = unique(p_cuts), .before = 1)
+  }
+  return(rsr)
+}
+
+#' Calculate RSR for Dataframe Segments
+#'
+#' This function calculates the RSR values for the different segments of the data.
+#'
+#' @param df_sim a dataframe with simulated values.
+#' @param v_obs a dataframe with observed values.
+#'
+#' @return a numeric vector with RSR values for each segment.
+#'
+#' @importFrom purrr map_dbl
+#' @importFrom hydroGOF rsr
+#'
+#' @examples
+#' \dontrun{
+#' rsr_values <- rsr_df(df_sim, v_obs)
+#' }
+#' @keywords internal
+
+rsr_df <- function(df_sim, v_obs) {
+  map_dbl(df_sim, ~ rsr(.x, v_obs))
+}
diff --git a/R/prepare.R b/R/prepare.R
@@ -124,3 +124,160 @@ calculate_wyr <- function(sim) {
 
   return(wyr)
 }
+
+#' Calculate All Performance Metrics
+#'
+#' This function calculates various performance metrics for a given simulation
+#' and observed data.
+#'
+#' @param sim Object from SWATrunR
+#' @param obs Dataframe for the observed data with two columns: date and value
+#' @param par_name (optional) Character for name of the parameter set to be used.
+#' Default \code{par_name = NULL}, which select first variable in sim object.
+#' But if you have multiple parameter sets, you can use this argument to select
+#' the one you want to use. Example par_name = "flo_day" or par_name = no3_day_conc".
+#' @param perf_metrics (optional) Character vector with the names of the performance metrics
+#' to used in the rank_tot calculation. Default \code{perf_metrics = NULL},
+#' which means that all performance metrics will be used in calculation.
+#' Other example could be perf_metrics = c("kge", "nse"), which means that only
+#' KGE and NSE will be used in calculation.
+#' @param period (optional) Character describing, which time interval to display.
+#' Default \code{period = NULL}, which mean not activated, other examples are
+#' "day", "week", "month", etc).
+#' See [lubridate::floor_date](https://www.rdocumentation.org/packages/lubridate/versions/1.3.3/topics/floor_date) for details.
+#' @param fn_summarize (optional) Function to recalculate to the specified time interval.
+#' Default \code{fn_summarize ="mean"}, other examples are "median", "sum". See [dplyr::summarise](https://dplyr.tidyverse.org/reference/summarise.html) for details.
+#' @return A dataframe with the performance metrics and ranking.
+#' @importFrom dplyr mutate group_by summarize_all left_join select everything row_number %>%
+#' @importFrom lubridate month floor_date
+#' @importFrom purrr map_dbl map2 reduce
+#' @importFrom stats cor
+#' @importFrom readr parse_number
+#' @importFrom hydroGOF NSE KGE pbias mae rsr
+#' @export
+#' @examples
+#' \dontrun{
+#' obj_tbl <- calculate_performance(sim = sim_flow, obs, "flo_day", c("kge", "nse"), "month", "sum")
+#' }
+#' @keywords calculate
+
+calculate_performance <- function(sim, obs, par_name = NULL, perf_metrics = NULL,
+                                  period = NULL,
+                                  fn_summarize = 'mean') {
+
+  if(is.null(par_name)) {
+    if(length(sim$simulation) > 1) {
+      warning(paste0("You have multiple variable sets in the simulation object.\n
+      They are ", paste(names(sim$simulation), collapse = ", "),
+                     "\nCurrently, the first one is used, which is ", names(sim$simulation)[1],
+                     ".\n If you want to use another one, please specify, which one you want to use with 'par_name' argument."))
+    }
+    # Filter to parameter set of interest.
+    sim <- sim$simulation[[1]]
+  } else {
+    sim <- sim$simulation[[par_name]]
+  }
+
+  # Adding list of metric if not provided
+  if(is.null(perf_metrics)) perf_metrics <- c("nse", "kge", "pbias", "r2", "mae", "rsr")
+
+  # Initializing result table
+  t <- data.frame(run_id = parse_number(names(sim[-1])))
+
+  # Calculate the mean absolute error (MAE) for the average monthly values.
+  if("mae" %in% perf_metrics){
+    sim_m <- mutate(sim, date = month(date)) %>%
+      group_by(date) %>%
+      summarize_all(get(fn_summarize))
+    obs_m <- mutate(obs, date = month(date)) %>%
+      group_by(date) %>%
+      summarize_all(get(fn_summarize))
+    t$mae <- map_dbl(select(sim_m, - date), ~mae(.x, obs_m$value))
+    t$rank_mae <- rank(abs(t$mae))
+
+  }
+
+  # Change the time step
+  if(!is.null(period)) {
+    sim <- mutate(sim, date = floor_date(date , period)) %>%
+      group_by(date) %>%
+      summarize_all(get(fn_summarize))
+    obs <- mutate(obs, date = floor_date(date , period)) %>%
+      group_by(date) %>%
+      summarize_all(get(fn_summarize))
+  }
+
+  # Calculate the root square ratio
+  if("rsr" %in% perf_metrics){
+    # Calculate flow duration curves (FDC) for observed data and the simulations.
+    fdc_obs <- calc_fdc(obs$value)
+    fdc_sim <- calc_fdc(select(sim, -date))
+
+    # Splitting intervals for the flow duration curve (FDC)
+    p <- c(5, 20, 70, 95)
+    p_lbl <- c('p_0_5', 'p_5_20', 'p_20_70', 'p_70_95', 'p_95_100')
+
+    perf_metrics <- c(perf_metrics, gsub("p", "rsr", p_lbl))
+
+    # Calculate the ratio of RSME and standard deviation for different segments
+    # of the FDC (same as in the publications of the Kiel working group).
+    rsr_fdc <- calc_fdc_rsr(fdc_sim, fdc_obs, p)
+
+
+    fdc_thrs <- c(max(fdc_obs$value),
+                  approx(fdc_obs$p, fdc_obs$value, p)$y,
+                  -0.1)
+    # Separate the hydrograph into high medium and low flows.
+    obs_sep <- map2(fdc_thrs[1:(length(fdc_thrs) - 1)],
+                    fdc_thrs[2:length(fdc_thrs)],
+                    ~ mutate(obs, value = ifelse(value <= .x & value > .y, value, NA))) %>%
+      map2(., p_lbl, ~ set_names(.x, c('date', .y))) %>%
+      reduce(., left_join, by = 'date')
+
+    t$rsr_vh <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_0_5))
+    t$rsr_h <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_5_20))
+    t$rsr_m <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_20_70))
+    t$rsr_l <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_70_95))
+    t$rsr_vl <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_95_100))
+
+    t$rsr_0_5 <- -rsr_fdc$p_0_5
+    t$rsr_5_20 <- -rsr_fdc$p_5_20
+    t$rsr_20_70 <- -rsr_fdc$p_20_70
+    t$rsr_70_95 <- -rsr_fdc$p_70_95
+    t$rsr_95_100 <- -rsr_fdc$p_95_100
+
+    t$rank_rsr_0_5 <- rank(-t$rsr_0_5)
+    t$rank_rsr_5_20 <- rank(-t$rsr_5_20)
+    t$rank_rsr_20_70 <- rank(-t$rsr_20_70)
+    t$rank_rsr_70_95 <- rank(-t$rsr_70_95)
+    t$rank_rsr_95_100 <- rank(-t$rsr_95_100)
+  }
+  # Calculate the Nash-Sutcliffe efficiency (NSE)
+  if ("nse" %in% perf_metrics){
+    t$nse <- map_dbl(select(sim, - date), ~NSE(.x, obs$value))
+    t$rank_nse <- rank(-t$nse)
+  }
+  # Calculate the Kling-Gupta efficiency (KGE)
+  if("kge" %in% perf_metrics){
+    t$kge <- map_dbl(select(sim, - date), ~KGE(.x, obs$value))
+    t$rank_kge <- rank(-t$kge)
+  }
+  # Calculate the percent bias (PBIAS)
+  if("pbias" %in% perf_metrics){
+    t$pbias <- map_dbl(select(sim, - date), ~pbias(.x, obs$value))
+    t$rank_pbias <- rank(abs(-abs(t$pbias)))
+  }
+  # Calculate the coefficient of determination (R2)
+  if("r2" %in% perf_metrics){
+    t$r2 <- map_dbl(select(sim, - date), ~cor(.x, obs$value)^2)
+    t$rank_r2 <- rank(-t$r2)
+  }
+
+  # Calculate rank_tot.
+  t <- t %>% mutate(rank_tot = as.integer(rank(rowSums(
+    select(., starts_with(paste0('rank_', tolower(perf_metrics)))))))) %>%
+    select(run_id, everything())
+
+  return(t)
+}
+
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@ Introduction to SWATtunR
 
 # SWATtunR
 
-[![](https://img.shields.io/badge/devel%20version-0.0.1.9006-blue.svg)](https://github.com/biopsichas/SWATtunR)
+[![](https://img.shields.io/badge/devel%20version-0.0.1.9008-blue.svg)](https://github.com/biopsichas/SWATtunR)
 [![](https://img.shields.io/github/last-commit/biopsichas/SWATtunR.svg)](https://github.com/biopsichas/SWATtunR/commits/green)
 [![](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
 [![Project Status: Active - The project has reached a stable, usable

diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html