diff --git a/% b/% new file mode 100644 index 0000000..e69de29 diff --git a/DESCRIPTION b/DESCRIPTION index 995d24e..f1b817d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,7 @@ Suggests: Imports: magrittr, tibble, - vroom, + readr, dplyr, lubridate, nloptr, diff --git a/NAMESPACE b/NAMESPACE index b4b9a5d..18336b5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,10 @@ # Generated by roxygen2: do not edit by hand export("%>%") -export(convert_parameters_to_storage_targets) +export(convert_parameters_to_targets) export(extrapolate_targets_to_GRanD) export(find_closest_dam) +export(fit_release_function) export(fit_targets) export(read_GRanD_HUC8) export(read_reservoir_attributes) @@ -18,6 +19,7 @@ importFrom(dplyr,first) importFrom(dplyr,group_by) importFrom(dplyr,if_else) importFrom(dplyr,last) +importFrom(dplyr,lead) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,pull) @@ -29,7 +31,7 @@ importFrom(lubridate,year) importFrom(magrittr,"%>%") importFrom(nloptr,nloptr) importFrom(purrr,map_dfr) +importFrom(readr,cols) +importFrom(readr,read_csv) importFrom(sp,spDistsN1) importFrom(tibble,tibble) -importFrom(vroom,cols) -importFrom(vroom,vroom) diff --git a/R/constants.R b/R/constants.R index 9814c5f..b5740df 100644 --- a/R/constants.R +++ b/R/constants.R @@ -13,3 +13,12 @@ n_points <- 3 # minimum allowable days of storage data (three years ~ 1095 days) min_allowable_days_of_storage <- 1095 + +# minimum allowable data points to use release and inflow without any back-calculating +min_r_i_datapoints <- 260 # 5 years + + +# unit conversion +m3_to_Mm3 <- 1e-6 +seconds_per_day <- 86400 +weeks_per_year <- 365.25 / 7 diff --git a/R/extrapolate_targets_to_GRanD.R b/R/extrapolate_targets_to_GRanD.R index 2dffe74..35e0e44 100644 --- a/R/extrapolate_targets_to_GRanD.R +++ b/R/extrapolate_targets_to_GRanD.R @@ -4,7 +4,6 @@ #' @param USRDATS_path path to USRDATS data #' @param targets_path path to fitted targets (generated using fit_targets()) #' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join -#' @importFrom vroom vroom cols #' @importFrom purrr map_dfr #' @return tibble dam ids to copy #' @export diff --git a/R/fit_release_function.R b/R/fit_release_function.R new file mode 100644 index 0000000..6da0f5b --- /dev/null +++ b/R/fit_release_function.R @@ -0,0 +1,150 @@ +#' fit_release_function +#' +#' @description fit parameters of weekly-varying release function +#' @param USRDATS_path path to USRDATS data +#' @param dam_id integer id of dam; same as GRanD ID +#' @param targets_path path to fitted targets. If NULL, fit_targets() will be run. +#' @importFrom lubridate year epiweek +#' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join lead +#' @importFrom readr read_csv cols +#' @return tibble of observed dam data (storage, inflow, release) +#' @export +#' +fit_release_function <- function(USRDATS_path, dam_id, targets_path){ + + read_reservoir_attributes(USRDATS_path, dam_id) -> + reservoir_attributes + + info(paste0("Fitting release function for dam ", dam_id, ": ", + reservoir_attributes[["DAM_NAME"]])) + + if(missing(targets_path)){ + + info("targets_path not supplied; fitting storage targets...") + + fit_targets(USRDATS_path, dam_id) -> fitted_targets + + tibble(pf = fitted_targets[["flood target parameters"]], + pm = fitted_targets[["conservation target parameters"]]) -> + storage_target_parameters + + }else{ + # read storage target parameters straight from file if already fitted + read_csv(paste0(targets_path, "/", dam_id, ".csv"), + col_types = cols()) -> + storage_target_parameters + + } + + if(all(is.na(storage_target_parameters))){ + problem("Storage targets unavailable due to lack of data!") + return( + list() + ) + } + + reservoir_attributes[[capacity_variable]] -> + storage_capacity_MCM + + if(fitted_targets$`weekly storage` %>% .[["year"]] %>% last() < cutoff_year){ + cutoff_year <- fitted_targets$`weekly storage` %>% .[["year"]] %>% first() + #problem(paste0("dam ", dam_id, "cutoff year set back to ", first_year_of_data)) + } + + read_reservoir_data(USRDATS_path, dam_id) %>% + mutate(i = i_cumecs * m3_to_Mm3 * seconds_per_day, + r = r_cumecs * m3_to_Mm3 * seconds_per_day) %>% + select(date, s = s_MCM, i, r) %>% + mutate(year = year(date), epiweek = epiweek(date)) %>% + filter(year >= cutoff_year) %>% + aggregate_to_epiweeks() %>% + back_calc_missing_flows() %>% + filter(!is.na(i) & !is.na(r), + i >= 0, r >= 0) -> weekly_ops_NA_removed + + + # RETURN BLANK IF INSUFFICIENT RELEASE/INFOW DATA + if(nrow(weekly_ops_NA_removed) <= min_r_i_datapoints){ + problem("Insufficient data to build release function") + fitted_targets[["mean inflow from GRAND. (MCM / wk)"]] <- reservoir_attributes[["i_MAF_MCM"]] / weeks_per_year + fitted_targets[["mean inflow from obs. (MCM / wk)"]] <- NA_real_ + fitted_targets[["release harmonic parameters"]] <- rep(NA_real_, 4) + fitted_targets[["release residual model coefficients"]] <- rep(NA_real_, 3) + return( + fitted_targets + ) + } + + weekly_ops_NA_removed %>% + left_join(convert_parameters_to_targets(storage_target_parameters[["pf"]], + "upper"), by = "epiweek") %>% + left_join(convert_parameters_to_targets(storage_target_parameters[["pm"]], + "lower"), by = "epiweek") %>% + mutate(avail_pct = 100 * ((s_start) / storage_capacity_MCM)) %>% + mutate(availability_status = (avail_pct - lower) / (upper - lower)) %>% + filter(availability_status <= 1, + availability_status > 0) %>% + mutate( + i_st = (i / mean(i)) - 1, + r_st = (r / mean(i)) - 1 + ) -> + training_data + + + ### harmonic regression (two harmonics) for standardized release + lm( + data = training_data, + r_st ~ 0 + + # first harmonic + sin(2 * pi * epiweek / 52) + + cos(2 * pi * epiweek / 52) + + # second harmonic + sin(4 * pi * epiweek / 52) + + cos(4 * pi * epiweek / 52) + ) %>% .[["coefficients"]] %>% unname() %>% + round(4) -> + st_r_harmonic + + training_data %>% + mutate(st_r_harmonic = + st_r_harmonic[1] * sin(2 * pi * epiweek / 52) + + st_r_harmonic[2] * cos(2 * pi * epiweek / 52) + + st_r_harmonic[3] * sin(4 * pi * epiweek / 52) + + st_r_harmonic[4] * cos(4 * pi * epiweek / 52)) %>% + # ggplot(aes(epiweek, r_st)) + geom_point() + + # geom_point(aes(y = st_r_harmonic), col = "blue") + mutate(r_st_resid = r_st - st_r_harmonic) -> + data_for_linear_model_of_release_residuals + + lm( + data = data_for_linear_model_of_release_residuals, + r_st_resid ~ availability_status + i_st + ) -> st_r_residual_model + + st_r_residual_model[["coefficients"]] %>% unname() %>% + round(3) -> + st_r_residual_model_coef + + if(summary(st_r_residual_model) %>% .[["adj.r.squared"]] < 0.1){ + info("Release residual model has low r-squared; (release will be based harmonic function only)") + st_r_residual_model_coef <- c(0, 0, 0) + } + + # data_for_linear_model_of_release_residuals %>% + # mutate(r_st_predicted = + # st_r_residual_model_coef[1] + + # st_r_residual_model_coef[2] * availability_status + + # st_r_residual_model_coef[3] * i_st, + # r_pred = (1 + (r_st_predicted + st_r_harmonic)) * mean(i)) + # ggplot(aes(r, r_pred)) + geom_point() + geom_abline(slope = 1) + + # scale_x_continuous(trans = "log10") + scale_y_continuous(trans = "log10") %>% + # NULL + + fitted_targets[["mean inflow from GRAND. (MCM / wk)"]] <- reservoir_attributes[["i_MAF_MCM"]] / weeks_per_year + fitted_targets[["mean inflow from obs. (MCM / wk)"]] <- training_data[["i"]] %>% mean() + fitted_targets[["release harmonic parameters"]] <- st_r_harmonic + fitted_targets[["release residual model coefficients"]] <- st_r_residual_model_coef + + return(fitted_targets) + +} diff --git a/R/fit_targets.R b/R/fit_targets.R index 7d7a327..2f14b81 100644 --- a/R/fit_targets.R +++ b/R/fit_targets.R @@ -5,7 +5,6 @@ #' @param dam_id integer id of dam; same as GRanD ID #' @importFrom lubridate year epiweek #' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join -#' @importFrom vroom vroom cols #' @return tibble of observed dam data (storage, inflow, release) #' @export #' @@ -111,10 +110,10 @@ fit_targets <- function(USRDATS_path, dam_id){ p_conservation_harmonic # evaluate targets to remove any superfluous constraints - convert_parameters_to_storage_targets(p_flood_harmonic, - constrain = FALSE) -> targets_flood - convert_parameters_to_storage_targets(p_conservation_harmonic, - constrain = FALSE) -> targets_cons + convert_parameters_to_targets(p_flood_harmonic, + constrain = FALSE) -> targets_flood + convert_parameters_to_targets(p_conservation_harmonic, + constrain = FALSE) -> targets_cons targets_flood[["target"]] %>% max() -> max_flood_target targets_flood[["target"]] %>% min() -> min_flood_target diff --git a/R/functions.R b/R/functions.R index dd05034..92dbfc9 100644 --- a/R/functions.R +++ b/R/functions.R @@ -9,7 +9,7 @@ #' @importFrom dplyr mutate if_else #' @export #' -convert_parameters_to_storage_targets <- function(parameters, target_name, constrain = TRUE){ +convert_parameters_to_targets <- function(parameters, target_name, constrain = TRUE){ parameters[1] -> p1 parameters[2] -> p2 @@ -21,11 +21,11 @@ convert_parameters_to_storage_targets <- function(parameters, target_name, const mutate(target = p1 + p2 * sin(2 * pi * epiweek / 52) + p3 * cos(2 * pi * epiweek / 52)) %>% mutate(target = if_else(target > p4, p4, target)) %>% mutate(target = if_else(target < p5, p5, target)) -> - storage_targets + targets - if(!missing(target_name)) names(storage_targets) <- c("epiweek", target_name) + if(!missing(target_name)) names(targets) <- c("epiweek", target_name) - return(storage_targets) + return(targets) } @@ -62,7 +62,6 @@ fit_constrained_harmonic <- function(data_for_harmonic_fitting){ ) } - # function to evaluate goodness-of-fit of fitted harmonic... # ... (used as objective function in optimization of constrained harmonic) evaluate_harmonic <- function(x){ @@ -141,3 +140,88 @@ find_closest_dam <- function(dam_attr, other_dams){ return(select(best_match, GRAND_ID, matches)) } + + +#' aggregate_to_epiweeks +#' +#' Aggregate data to epiweek and back-calculate release and inflow using mass balance +aggregate_to_epiweeks <- function(x){ + + which(x$epiweek %>% diff() != 0) %>% first() -> start_snip + + if(!(start_snip %in% 1:8)) stop("first water week duration > 8 days!") + + if(start_snip < 7) { + x_snipped <- x[-(1:start_snip), ] + }else{ + x_snipped <- x + } + + x_snipped %>% + mutate(s_end = lead(s, 8)) %>% + group_by(year, epiweek) %>% + summarise(i = sum(i), + r = sum(r), + s_start = first(s), + s_end = first(s_end)) %>% + ungroup() %>% + filter(epiweek != 53, + !is.na(s_end)) +} + +#' back_calc_missing_flows +#' +#' Compute i or r from mass balance (if either is missing) +back_calc_missing_flows <- function(x){ + x %>% + mutate(s_change = s_end - s_start, + r_ = if_else((i - s_change) < 0, 0, i - s_change), + i_ = r + s_change) -> x_ + + x_ %>% + filter(!is.na(r) & !is.na(i)) -> full_data_points + + if(nrow(full_data_points) == 0){ + data_points_on_most_data_scarce_epiweek <- -Inf + }else{ + full_data_points %>% + group_by(epiweek) %>% + count() %>% .[["n"]] %>% min() -> + data_points_on_most_data_scarce_epiweek + } + + + + if(data_points_on_most_data_scarce_epiweek >= min_r_i_datapoints){ + return( + x_ %>% + select(year, epiweek, s_start, i, r) + ) + } + + sum(is.na(x_$i)) -> missing_i + sum(is.na(x_$r)) -> missing_r + + + if(missing_i <= missing_r){ + return( + x_ %>% + mutate(i = if_else(is.na(i) & !is.na(r), i_, i), + r = if_else(is.na(r_), r, r_)) %>% + select(year, epiweek, s_start, i, r) + ) + } + + if(missing_i > missing_r){ + return( + x_ %>% + mutate(r = if_else(is.na(r) & !is.na(i), r_, r), + i = if_else(is.na(i_), i, i_)) %>% + select(year, epiweek, s_start, i, r) + ) + + } + + + +} diff --git a/R/inputs.R b/R/inputs.R index b00abf0..464b669 100644 --- a/R/inputs.R +++ b/R/inputs.R @@ -3,22 +3,22 @@ #' @description reads raw reservoir time series data #' @param USRDATS_path directory containing reservoir input time series #' @param dam_id integer id of dam; same as GRanD ID -#' @importFrom vroom vroom cols +#' @importFrom readr read_csv cols #' @importFrom dplyr select #' @return tibble of observed dam data (storage, inflow, release) #' @export #' read_reservoir_data <- function(USRDATS_path, dam_id){ - vroom(paste0(USRDATS_path, "/TimeSeriesv2/", - dam_id, ".csv"), - col_types = cols(date = "D", - storage = "d", - inflow = "d", - outflow = "d", - elevation = "d", - evaporation = "d"), - progress = FALSE) %>% + read_csv(paste0(USRDATS_path, "/TimeSeriesv2/", + dam_id, ".csv"), + col_types = cols(date = "D", + storage = "d", + inflow = "d", + outflow = "d", + elevation = "d", + evaporation = "d"), + progress = FALSE) %>% # stamp units into column names select(date, s_MCM = storage, i_cumecs = inflow, r_cumecs = outflow) @@ -29,16 +29,16 @@ read_reservoir_data <- function(USRDATS_path, dam_id){ #' @description reads reservoir time series data #' @param USRDATS_path directory containing reservoir input time series #' @param dam_id integer id of dam; same as GRanD ID. If NULL, all attributes are returned. -#' @importFrom vroom vroom cols +#' @importFrom readr read_csv cols #' @importFrom dplyr select #' @return tibble of reservoir attributes for selected dams #' @export #' read_reservoir_attributes <- function(USRDATS_path, dam_id = NULL){ - vroom(paste0(USRDATS_path, "/attributes/", - "Reservoir_Attributes.csv"), - col_types = cols(), progress = FALSE) -> attributes_all + read_csv(paste0(USRDATS_path, "/attributes/", + "Reservoir_Attributes_Publishable.csv"), + col_types = cols(), progress = FALSE) -> attributes_all if(is.null(dam_id)){ return(attributes_all) @@ -58,13 +58,13 @@ read_reservoir_attributes <- function(USRDATS_path, dam_id = NULL){ #' read_GRanD_HUC8 #' #' @description gets HUC8 for all US GRanD IDs -#' @importFrom vroom vroom cols +#' @importFrom readr read_csv cols #' @return tibble of HUC8s #' @export #' read_GRanD_HUC8 <- function(){ - vroom( + read_csv( paste0(system.file("extdata/", package = "rulecurve"), "GRAND_HUC8.csv"), comment = "#", col_types = cols()) diff --git a/man/aggregate_to_epiweeks.Rd b/man/aggregate_to_epiweeks.Rd new file mode 100644 index 0000000..9051a76 --- /dev/null +++ b/man/aggregate_to_epiweeks.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions.R +\name{aggregate_to_epiweeks} +\alias{aggregate_to_epiweeks} +\title{aggregate_to_epiweeks} +\usage{ +aggregate_to_epiweeks(x) +} +\description{ +Aggregate data to epiweek and back-calculate release and inflow using mass balance +} diff --git a/man/back_calc_missing_flows.Rd b/man/back_calc_missing_flows.Rd new file mode 100644 index 0000000..21a24d4 --- /dev/null +++ b/man/back_calc_missing_flows.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions.R +\name{back_calc_missing_flows} +\alias{back_calc_missing_flows} +\title{back_calc_missing_flows} +\usage{ +back_calc_missing_flows(x) +} +\description{ +Compute i or r from mass balance (if either is missing) +} diff --git a/man/convert_parameters_to_storage_targets.Rd b/man/convert_parameters_to_targets.Rd similarity index 76% rename from man/convert_parameters_to_storage_targets.Rd rename to man/convert_parameters_to_targets.Rd index 166672d..ab476ca 100644 --- a/man/convert_parameters_to_storage_targets.Rd +++ b/man/convert_parameters_to_targets.Rd @@ -1,14 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/functions.R -\name{convert_parameters_to_storage_targets} -\alias{convert_parameters_to_storage_targets} +\name{convert_parameters_to_targets} +\alias{convert_parameters_to_targets} \title{convert_parameters_to_storage_targets} \usage{ -convert_parameters_to_storage_targets( - parameters, - target_name, - constrain = TRUE -) +convert_parameters_to_targets(parameters, target_name, constrain = TRUE) } \arguments{ \item{parameters}{vector of length 5 giving, in order, intercept, sine term, cosine term, and upper and lower constraints of the harmonic.} diff --git a/man/fit_release_function.Rd b/man/fit_release_function.Rd new file mode 100644 index 0000000..a2f89a5 --- /dev/null +++ b/man/fit_release_function.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_release_function.R +\name{fit_release_function} +\alias{fit_release_function} +\title{fit_release_function} +\usage{ +fit_release_function(USRDATS_path, dam_id, targets_path) +} +\arguments{ +\item{USRDATS_path}{path to USRDATS data} + +\item{dam_id}{integer id of dam; same as GRanD ID} + +\item{targets_path}{path to fitted targets. If NULL, fit_targets() will be run.} +} +\value{ +tibble of observed dam data (storage, inflow, release) +} +\description{ +fit parameters of weekly-varying release function +}