Skip to content

Commit

Permalink
6a step added
Browse files Browse the repository at this point in the history
  • Loading branch information
biopsichas committed Jun 24, 2024
1 parent d9dba0f commit f247ff2
Show file tree
Hide file tree
Showing 48 changed files with 1,072 additions and 47 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SWATtunR
Type: Package
Title: Soft & Hard Calibration, Validation Package for SWAT+ models
Version: 0.0.1.9006
Version: 0.0.1.9008
Authors@R: c(
person("Svajunas", "Plunge", , "svajunas.plunge@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = '0000-0001-8897-3349')),
Expand All @@ -23,6 +23,7 @@ Imports:
dplyr,
gridExtra,
ggplot2,
hydroGOF,
lhs,
lubridate,
tibble,
Expand Down
14 changes: 14 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Generated by roxygen2: do not edit by hand

export(calculate_performance)
export(calculate_wyr)
export(copy_file_version)
export(fix_dates)
export(get_conc)
export(id_text_strings)
export(plot_dotty)
export(plot_dotty_yields)
export(plot_esco_range)
Expand All @@ -25,8 +27,10 @@ importFrom(dplyr,group_by)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
importFrom(dplyr,row_number)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(dplyr,summarize_all)
importFrom(ggplot2,aes)
importFrom(ggplot2,coord_cartesian)
importFrom(ggplot2,element_blank)
Expand All @@ -52,19 +56,29 @@ importFrom(ggplot2,theme_bw)
importFrom(ggplot2,vars)
importFrom(ggplot2,ylim)
importFrom(gridExtra,grid.arrange)
importFrom(hydroGOF,KGE)
importFrom(hydroGOF,NSE)
importFrom(hydroGOF,mae)
importFrom(hydroGOF,pbias)
importFrom(hydroGOF,rsr)
importFrom(lhs,randomLHS)
importFrom(lubridate,floor_date)
importFrom(lubridate,month)
importFrom(lubridate,year)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map2_chr)
importFrom(purrr,map2_df)
importFrom(purrr,map_dbl)
importFrom(purrr,map_int)
importFrom(purrr,map_vec)
importFrom(purrr,modify2)
importFrom(purrr,reduce)
importFrom(purrr,set_names)
importFrom(readr,parse_number)
importFrom(readr,write_lines)
importFrom(stats,approx)
importFrom(stats,cor)
importFrom(stats,quantile)
importFrom(stringr,str_remove)
importFrom(stringr,str_replace)
Expand Down
2 changes: 1 addition & 1 deletion R/globals.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
utils::globalVariables(c(".", "hru", "plant_name", "name", "value", "yield_mean",
"run", "parameter", "values", "yld_50", "yld_25", "yld_75",
"model_path", "abs_min", "abs_max"))
"model_path", "abs_min", "abs_max", "p", "run_id"))
107 changes: 107 additions & 0 deletions R/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ truncate <- function(x, n, side = 'left') {
#'
#' @return A vector of HRU ID text strings separated into groups according to
#' provided values.
#' @export
#' @examples
#' \dontrun{
#' hyd_hyd <- read_tbl(paste0(model_path, '/hydrology.hyd'))
Expand Down Expand Up @@ -355,3 +356,109 @@ fix_dates <- function(runr_obj, obs_obj, trim_start = NULL, trim_end = NULL){
}
return(list(sim=runr_obj, obs=obs_obj))
}

# Flow duration curve calculation functions ------------------------------------

#' Calculate Flow Duration Curve (FDC)
#'
#' This function calculates the flow duration curve for a given vector or dataframe.
#'
#' @param x a vector or a tibble with flow values.
#'
#' @return a tibble with sorted values and their corresponding exceedance
#' probabilities.
#'
#' @importFrom tibble tibble
#' @importFrom dplyr mutate %>%
#'
#' @examples
#' \dontrun{
#' fdc <- calc_fdc(c(3, 1, 4, 1, 5, 9, 2, 6, 5))
#' }
#' @keywords internal

calc_fdc <- function(x) {
if(is.vector(x)) {
x <- tibble(value = x)
}

n <- nrow(x)

x %>%
apply(., 2, sort, decreasing = TRUE) %>%
as_tibble(.) %>%
mutate(p = 100 * 1:n / (n + 1), .before = 1)
}

#' Calculate RSR for Flow Duration Curve Segments
#'
#' This function calculates the ratio of RMSE and standard deviation for
#' different segments of the flow duration curve (FDC).
#'
#' @param fdc_sim a tibble with simulated flow data.
#' @param fdc_obs a tibble with observed flow data.
#' @param quantile_splits a numeric vector with quantiles for splitting the FDC.
#' @param out_tbl character specifying the output format ('long' or 'wide').
#' Default \code{out_tbl = 'long'}.
#'
#' @return a tibble with RSR values for the different segments of the FDC.
#'
#' @importFrom dplyr select mutate bind_cols bind_rows %>%
#' @importFrom purrr map2 map_dbl
#'
#' @examples
#' \dontrun{
#' fdc_sim <- calc_fdc(runif(100))
#' fdc_obs <- calc_fdc(runif(100))
#' rsr_values <- calc_fdc_rsr(fdc_sim, fdc_obs, c(5, 20, 70, 95))
#' }
#' @keywords internal


calc_fdc_rsr <- function(fdc_sim, fdc_obs, quantile_splits, out_tbl = 'long') {
if(all(quantile_splits <= 1)) {
quantile_splits <- 100 * quantile_splits
}
quantile_splits <- sort(unique(c(0, 100, quantile_splits)))
p_cuts <- cut(fdc_obs$p, quantile_splits)
obs <- split(select(fdc_obs, -p), p_cuts)
sim <- split(select(fdc_sim, -p), p_cuts)

rsr_list <- map2(sim, obs, ~ rsr_df(.x, .y[[1]]))

if(out_tbl == 'long') {
n_col <- length(quantile_splits) - 1
col_names <- paste0('p_', quantile_splits[1:n_col],
'_', quantile_splits[2:(n_col + 1)])
rsr <- bind_cols(rsr_list) %>%
set_names(col_names) %>%
mutate(., run = names(fdc_sim)[2:ncol(fdc_sim)], .before = 1)
} else {
rsr <- rsr_list %>%
bind_rows(.) %>%
mutate(p = unique(p_cuts), .before = 1)
}
return(rsr)
}

#' Calculate RSR for Dataframe Segments
#'
#' This function calculates the RSR values for the different segments of the data.
#'
#' @param df_sim a dataframe with simulated values.
#' @param v_obs a dataframe with observed values.
#'
#' @return a numeric vector with RSR values for each segment.
#'
#' @importFrom purrr map_dbl
#' @importFrom hydroGOF rsr
#'
#' @examples
#' \dontrun{
#' rsr_values <- rsr_df(df_sim, v_obs)
#' }
#' @keywords internal

rsr_df <- function(df_sim, v_obs) {
map_dbl(df_sim, ~ rsr(.x, v_obs))
}
157 changes: 157 additions & 0 deletions R/prepare.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,160 @@ calculate_wyr <- function(sim) {

return(wyr)
}

#' Calculate All Performance Metrics
#'
#' This function calculates various performance metrics for a given simulation
#' and observed data.
#'
#' @param sim Object from SWATrunR
#' @param obs Dataframe for the observed data with two columns: date and value
#' @param par_name (optional) Character for name of the parameter set to be used.
#' Default \code{par_name = NULL}, which select first variable in sim object.
#' But if you have multiple parameter sets, you can use this argument to select
#' the one you want to use. Example par_name = "flo_day" or par_name = no3_day_conc".
#' @param perf_metrics (optional) Character vector with the names of the performance metrics
#' to used in the rank_tot calculation. Default \code{perf_metrics = NULL},
#' which means that all performance metrics will be used in calculation.
#' Other example could be perf_metrics = c("kge", "nse"), which means that only
#' KGE and NSE will be used in calculation.
#' @param period (optional) Character describing, which time interval to display.
#' Default \code{period = NULL}, which mean not activated, other examples are
#' "day", "week", "month", etc).
#' See [lubridate::floor_date](https://www.rdocumentation.org/packages/lubridate/versions/1.3.3/topics/floor_date) for details.
#' @param fn_summarize (optional) Function to recalculate to the specified time interval.
#' Default \code{fn_summarize ="mean"}, other examples are "median", "sum". See [dplyr::summarise](https://dplyr.tidyverse.org/reference/summarise.html) for details.
#' @return A dataframe with the performance metrics and ranking.
#' @importFrom dplyr mutate group_by summarize_all left_join select everything row_number %>%
#' @importFrom lubridate month floor_date
#' @importFrom purrr map_dbl map2 reduce
#' @importFrom stats cor
#' @importFrom readr parse_number
#' @importFrom hydroGOF NSE KGE pbias mae rsr
#' @export
#' @examples
#' \dontrun{
#' obj_tbl <- calculate_performance(sim = sim_flow, obs, "flo_day", c("kge", "nse"), "month", "sum")
#' }
#' @keywords calculate

calculate_performance <- function(sim, obs, par_name = NULL, perf_metrics = NULL,
period = NULL,
fn_summarize = 'mean') {

if(is.null(par_name)) {
if(length(sim$simulation) > 1) {
warning(paste0("You have multiple variable sets in the simulation object.\n
They are ", paste(names(sim$simulation), collapse = ", "),
"\nCurrently, the first one is used, which is ", names(sim$simulation)[1],
".\n If you want to use another one, please specify, which one you want to use with 'par_name' argument."))
}
# Filter to parameter set of interest.
sim <- sim$simulation[[1]]
} else {
sim <- sim$simulation[[par_name]]
}

# Adding list of metric if not provided
if(is.null(perf_metrics)) perf_metrics <- c("nse", "kge", "pbias", "r2", "mae", "rsr")

# Initializing result table
t <- data.frame(run_id = parse_number(names(sim[-1])))

# Calculate the mean absolute error (MAE) for the average monthly values.
if("mae" %in% perf_metrics){
sim_m <- mutate(sim, date = month(date)) %>%
group_by(date) %>%
summarize_all(get(fn_summarize))
obs_m <- mutate(obs, date = month(date)) %>%
group_by(date) %>%
summarize_all(get(fn_summarize))
t$mae <- map_dbl(select(sim_m, - date), ~mae(.x, obs_m$value))
t$rank_mae <- rank(abs(t$mae))

}

# Change the time step
if(!is.null(period)) {
sim <- mutate(sim, date = floor_date(date , period)) %>%
group_by(date) %>%
summarize_all(get(fn_summarize))
obs <- mutate(obs, date = floor_date(date , period)) %>%
group_by(date) %>%
summarize_all(get(fn_summarize))
}

# Calculate the root square ratio
if("rsr" %in% perf_metrics){
# Calculate flow duration curves (FDC) for observed data and the simulations.
fdc_obs <- calc_fdc(obs$value)
fdc_sim <- calc_fdc(select(sim, -date))

# Splitting intervals for the flow duration curve (FDC)
p <- c(5, 20, 70, 95)
p_lbl <- c('p_0_5', 'p_5_20', 'p_20_70', 'p_70_95', 'p_95_100')

perf_metrics <- c(perf_metrics, gsub("p", "rsr", p_lbl))

# Calculate the ratio of RSME and standard deviation for different segments
# of the FDC (same as in the publications of the Kiel working group).
rsr_fdc <- calc_fdc_rsr(fdc_sim, fdc_obs, p)


fdc_thrs <- c(max(fdc_obs$value),
approx(fdc_obs$p, fdc_obs$value, p)$y,
-0.1)
# Separate the hydrograph into high medium and low flows.
obs_sep <- map2(fdc_thrs[1:(length(fdc_thrs) - 1)],
fdc_thrs[2:length(fdc_thrs)],
~ mutate(obs, value = ifelse(value <= .x & value > .y, value, NA))) %>%
map2(., p_lbl, ~ set_names(.x, c('date', .y))) %>%
reduce(., left_join, by = 'date')

t$rsr_vh <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_0_5))
t$rsr_h <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_5_20))
t$rsr_m <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_20_70))
t$rsr_l <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_70_95))
t$rsr_vl <- -map_dbl(select(sim, - date), ~rsr(.x, obs_sep$p_95_100))

t$rsr_0_5 <- -rsr_fdc$p_0_5
t$rsr_5_20 <- -rsr_fdc$p_5_20
t$rsr_20_70 <- -rsr_fdc$p_20_70
t$rsr_70_95 <- -rsr_fdc$p_70_95
t$rsr_95_100 <- -rsr_fdc$p_95_100

t$rank_rsr_0_5 <- rank(-t$rsr_0_5)
t$rank_rsr_5_20 <- rank(-t$rsr_5_20)
t$rank_rsr_20_70 <- rank(-t$rsr_20_70)
t$rank_rsr_70_95 <- rank(-t$rsr_70_95)
t$rank_rsr_95_100 <- rank(-t$rsr_95_100)
}
# Calculate the Nash-Sutcliffe efficiency (NSE)
if ("nse" %in% perf_metrics){
t$nse <- map_dbl(select(sim, - date), ~NSE(.x, obs$value))
t$rank_nse <- rank(-t$nse)
}
# Calculate the Kling-Gupta efficiency (KGE)
if("kge" %in% perf_metrics){
t$kge <- map_dbl(select(sim, - date), ~KGE(.x, obs$value))
t$rank_kge <- rank(-t$kge)
}
# Calculate the percent bias (PBIAS)
if("pbias" %in% perf_metrics){
t$pbias <- map_dbl(select(sim, - date), ~pbias(.x, obs$value))
t$rank_pbias <- rank(abs(-abs(t$pbias)))
}
# Calculate the coefficient of determination (R2)
if("r2" %in% perf_metrics){
t$r2 <- map_dbl(select(sim, - date), ~cor(.x, obs$value)^2)
t$rank_r2 <- rank(-t$r2)
}

# Calculate rank_tot.
t <- t %>% mutate(rank_tot = as.integer(rank(rowSums(
select(., starts_with(paste0('rank_', tolower(perf_metrics)))))))) %>%
select(run_id, everything())

return(t)
}

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Introduction to SWATtunR

# SWATtunR

[![](https://img.shields.io/badge/devel%20version-0.0.1.9006-blue.svg)](https://github.com/biopsichas/SWATtunR)
[![](https://img.shields.io/badge/devel%20version-0.0.1.9008-blue.svg)](https://github.com/biopsichas/SWATtunR)
[![](https://img.shields.io/github/last-commit/biopsichas/SWATtunR.svg)](https://github.com/biopsichas/SWATtunR/commits/green)
[![](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
[![Project Status: Active - The project has reached a stable, usable
Expand Down
2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/LICENSE-text.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f247ff2

Please sign in to comment.