minor fixes

IMMM-SFA · Aug 27, 2021 · df3a5fa · df3a5fa
1 parent de6e37f
commit df3a5fa
Show file tree

Hide file tree

Showing 14 changed files with 175 additions and 51 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -24,5 +24,6 @@ Imports:
     nloptr,
     purrr,
     sp,
+    sf,
     clisymbols,
     crayon
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export(convert_parameters_to_release_harmonic)
 export(convert_parameters_to_targets)
 export(extrapolate_targets_to_GRanD)
 export(find_closest_dam)
@@ -14,6 +15,9 @@ importFrom(crayon,blue)
 importFrom(crayon,green)
 importFrom(crayon,red)
 importFrom(dplyr,arrange)
+importFrom(dplyr,as_tibble)
+importFrom(dplyr,bind_cols)
+importFrom(dplyr,bind_rows)
 importFrom(dplyr,count)
 importFrom(dplyr,filter)
 importFrom(dplyr,first)
@@ -24,6 +28,7 @@ importFrom(dplyr,lead)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,pull)
+importFrom(dplyr,rename)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
 importFrom(dplyr,ungroup)
@@ -34,5 +39,6 @@ importFrom(nloptr,nloptr)
 importFrom(purrr,map_dfr)
 importFrom(readr,cols)
 importFrom(readr,read_csv)
+importFrom(sf,st_read)
 importFrom(sp,spDistsN1)
 importFrom(tibble,tibble)
diff --git a/R/constants.R b/R/constants.R
@@ -2,7 +2,12 @@
 
 # storage capacity variable used
 # "MAXIMUM_STORAGE_CAPACITY" or "REPRESENTATIVE_STORAGE_CAPACITY"
-capacity_variable <- "MAXIMUM_STORAGE_CAPACITY"
+capacity_variable <- "CAP_MCM"
+
+# Canada dams to be added (for Canadian dams in watersheds flowing into United States)
+Canada_GRanD_IDs <- c(250, 257, 262, 265, 268, 273, 274, 279,
+                      282, 283, 286, 287, 288, 289, 723, 724,
+                      725, 726, 1485, 1518, 1520, 1527, 2078, 6866)
 
 # cutoff_year; defines earliest year of data to use...
 #... in target and release rule inferrence
@@ -11,19 +16,22 @@ cutoff_year <- 1995
 # number of points per week for fitting storage curve harmonic
 n_points <- 3
 
-# minimum allowable days of storage data (three years ~ 1095 days)
-min_allowable_days_of_storage <- 1095
+# minimum allowable days of storage data (ten years ~ 3650 days)
+min_allowable_days_of_storage <- 3650
 
 # minimum allowable data points to use release and inflow without any back-calculating
 min_r_i_datapoints <- 260 # 5 years
 
+# minimum allowable number of days of data to define release max min
+min_r_maxmin_days <- 365
+
 # tolerance for r-squared value of release residual model.
 # Models with lower r-squared value than r_sq_tol are discarded.
 r_sq_tol <- 0.2
 
 # release constraint quantile
-r_st_min_quantile <- 0.005
-r_st_max_quantile <- 0.995
+r_st_min_quantile <- 0.05
+r_st_max_quantile <- 0.95
 
 
 # unit conversion

diff --git a/R/extrapolate_targets_to_GRanD.R b/R/extrapolate_targets_to_GRanD.R
@@ -1,29 +1,40 @@
 #' extrapolate_targets_to_GRanD
 #'
 #' @description use an set of inferred storage targets to extrapolate storage targets to all dams in GRanD
-#' @param USRDATS_path path to USRDATS data
+#' @param GRanD_path path to v1.3 of GRanD database
 #' @param targets_path path to fitted targets (generated using fit_targets())
 #' @param include_all T/F (if T, returns results for dams in the trained set)
 #' @param HUC_04_correction T/F (if T, replaces Great Lakes HUC2 with neighboring HUC2s...(deals with missing data for all of Great Lakes))
-#' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join
+#' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join bind_rows bind_cols rename
 #' @importFrom purrr map_dfr
 #' @return tibble dam ids to copy
 #' @export
 #'
-extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path, include_all = FALSE, HUC_04_correction = TRUE){
+extrapolate_targets_to_GRanD <- function(GRanD_path, targets_path,
+                                         include_all = FALSE, HUC_04_correction = TRUE,
+                                         distance_only = FALSE){
 
   # read GRanD data and join HUCs
-  read_reservoir_attributes(USRDATS_path) %>%
-    select(GRAND_ID, DAM_NAME, STATE, S_RATIO,
-           flood = P_FLC, hydro = P_HYD, supply = P_SUP, irr = P_IRR,
-           lon = LONG, lat = LAT) %>%
-    filter(!is.na(GRAND_ID),
-           !grepl("CAN", STATE),
-           !grepl("MEX", STATE)) %>%
+  read_reservoir_attributes(GRanD_path) %>%
+    select(GRAND_ID, DAM_NAME, STATE = ADMIN_UNIT,
+           flood = USE_FCON, hydro = USE_ELEC,
+           supply = USE_SUPP, irr = USE_IRRI,
+           lon = LONG_DD, lat = LAT_DD) %>%
     left_join(read_GRanD_HUC8(), by = "GRAND_ID") %>%
+    mutate(HUC8 = if_else(STATE == "British Columbia", "1702XXXX", HUC8),
+           HUC8 = if_else(STATE == "Ontario", "09XXXXXX", HUC8),
+           HUC8 = if_else(STATE == "Saskatchewan", "0901XXXX", HUC8),
+           HUC8 = if_else(STATE == "Quebec", "02XXXXXX", HUC8)) %>%
+    filter(!is.na(HUC8)) %>%
     mutate(GRAND_ID = as.character(GRAND_ID),
            HUC8 = as.character(HUC8)) %>%
-    mutate(HUC4 = substr(HUC8, 1, 4)) ->
+    mutate(HUC4 = substr(HUC8, 1, 4)) %>%
+    # ADJUSTMENT FOR HUC 0904 (distal from most HUC09)
+    mutate(HUC4 = if_else(HUC4 == "0904", "1005", HUC4)) %>%
+    mutate(flood = if_else(is.na(flood), FALSE, TRUE),
+           hydro = if_else(is.na(hydro), FALSE, TRUE),
+           supply = if_else(is.na(supply), FALSE, TRUE),
+           irr = if_else(is.na(irr), FALSE, TRUE)) ->
     dam_attributes_and_HUCs
 
   # get list of dams that are already fitted with targets
@@ -73,18 +84,33 @@ extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path, include_all
                GRAND_ID %in% fitted_dams) ->
         dams_same_HUC2
 
-      find_closest_dam(dam_attr, dams_same_HUC4) -> huc4_match
-      find_closest_dam(dam_attr, dams_same_HUC2) -> huc2_match
+      if(distance_only == FALSE){
 
-      if(huc4_match[["matches"]] >= 2) return(tibble(dam, match = huc4_match[["GRAND_ID"]]))
+        find_closest_dam(dam_attr, dams_same_HUC4) -> huc4_match
+        find_closest_dam(dam_attr, dams_same_HUC2) -> huc2_match
 
-      if(huc4_match[["matches"]] < 2 & huc2_match[["matches"]] >= 2) return(tibble(dam, match = huc2_match[["GRAND_ID"]]))
+        if(huc4_match[["matches"]] >= 1) return(tibble(dam, HUC4_match = T) %>% bind_cols(huc4_match) %>% rename(match = GRAND_ID))
 
-      if(huc4_match[["matches"]] > 0) return(tibble(dam, match = huc4_match[["GRAND_ID"]]))
+        if(huc4_match[["matches"]] < 1 & huc2_match[["matches"]] >= 1) return(tibble(dam, HUC4_match = F) %>% bind_cols(huc2_match) %>% rename(match = GRAND_ID))
+
+        if(huc4_match[["matches"]] > 0) return(tibble(dam, HUC4_match = T) %>% bind_cols(huc4_match) %>% rename(match = GRAND_ID))
+
+        if(huc2_match[["matches"]] > 0) return(tibble(dam, HUC4_match = F) %>% bind_cols(huc2_match) %>% rename(match = GRAND_ID))
+
+        return(tibble(dam, HUC4_match = F) %>% bind_cols(huc2_match) %>% rename(match = GRAND_ID))
+
+      }else{
+
+        find_closest_dam(dam_attr,
+                         bind_rows(dams_same_HUC4, dams_same_HUC2),
+                         distance_only = TRUE) -> distance_match
+
+        return(tibble(dam, match = distance_match[["GRAND_ID"]]))
+
+
+      }
 
-      if(huc2_match[["matches"]] > 0) return(tibble(dam, match = huc2_match[["GRAND_ID"]]))
 
-      return(tibble(dam, match = huc2_match[["GRAND_ID"]]))
 
     })
 

diff --git a/R/fit_release_function.R b/R/fit_release_function.R
@@ -2,6 +2,7 @@
 #'
 #' @description fit parameters of weekly-varying release function
 #' @param USRDATS_path path to USRDATS data
+#' @param GRanD_path path to v1.3 of GRanD database
 #' @param dam_id integer id of dam; same as GRanD ID
 #' @param targets_path path to fitted targets. If NULL, fit_targets() will be run.
 #' @importFrom lubridate year epiweek
@@ -10,9 +11,9 @@
 #' @return tibble of observed dam data (storage, inflow, release)
 #' @export
 #'
-fit_release_function <- function(USRDATS_path, dam_id, targets_path){
+fit_release_function <- function(USRDATS_path, GRanD_path, dam_id, targets_path){
 
-  read_reservoir_attributes(USRDATS_path, dam_id) ->
+  read_reservoir_attributes(GRanD_path, dam_id) ->
     reservoir_attributes
 
   info(paste0("Fitting release function for dam ", dam_id, ": ",
@@ -22,7 +23,7 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
 
     #info("targets_path not supplied; fitting storage targets...")
 
-    fit_targets(USRDATS_path, dam_id) -> fitted_targets
+    fit_targets(USRDATS_path, GRanD_path, dam_id, reservoir_attributes) -> fitted_targets
 
     tibble(pf = fitted_targets[["NSR upper bound"]],
            pm = fitted_targets[["NSR lower bound"]]) ->
@@ -110,14 +111,14 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
   daily_ops_non_spill_periods %>% filter(!is.na(r)) %>% .[["r"]] -> r_daily
 
   # use daily release data to define max release (if possible)
-  if(length(r_daily) > min_r_i_datapoints * 7){
+  if(length(r_daily) > min_r_maxmin_days){
     r_st_max <- ((quantile(r_daily, r_st_max_quantile) %>% unname() %>% round(4) * 7) / i_mean) - 1
     r_st_min <- ((quantile(r_daily, r_st_min_quantile) %>% unname() %>% round(4) * 7) / i_mean) - 1
   }else{
     training_data_unfiltered %>%
       filter(s_start + i < storage_capacity_MCM) %>% .[["r_st"]] -> r_st_vector
-    quantile(r_st_vector, r_st_min_quantile) %>% unname() %>% round(4) -> r_st_min
-    quantile(r_st_vector, r_st_max_quantile) %>% unname() %>% round(4) -> r_st_max
+    quantile(r_st_vector, r_st_min_quantile, na.rm = TRUE) %>% unname() %>% round(4) -> r_st_min
+    quantile(r_st_vector, r_st_max_quantile, na.rm = TRUE) %>% unname() %>% round(4) -> r_st_max
   }
 
   # create final training data for normal operating period

diff --git a/R/fit_targets.R b/R/fit_targets.R
@@ -2,16 +2,20 @@
 #'
 #' @description fit parameters of storage targets
 #' @param USRDATS_path path to USRDATS data
+#' @param GRanD_path path to v1.3 of GRanD database
 #' @param dam_id integer id of dam; same as GRanD ID
+#' @param reservoir_attributes tibble of GRanD attributes for selected dam
 #' @importFrom lubridate year epiweek
 #' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join
 #' @return tibble of observed dam data (storage, inflow, release)
 #' @export
 #'
-fit_targets <- function(USRDATS_path, dam_id){
+fit_targets <- function(USRDATS_path, GRanD_path, dam_id, reservoir_attributes){
 
-  read_reservoir_attributes(USRDATS_path, dam_id) ->
-    reservoir_attributes
+  if(missing(reservoir_attributes)){
+    read_reservoir_attributes(GRanD_path, dam_id) ->
+      reservoir_attributes
+  }
 
   info(paste0("Fitting targets for dam ", dam_id, ": ",
                  reservoir_attributes[["DAM_NAME"]]))

diff --git a/R/functions.R b/R/functions.R
@@ -29,6 +29,36 @@ convert_parameters_to_targets <- function(parameters, target_name, constrain = T
 
 }
 
+#' convert_parameters_to_release_harmonic
+#'
+#' @description fit parameters of a constrained harmonic
+#' @param parameters vector of length 4 giving, in order, first sine term, first cosine term, second sine term, second cosine term.
+#' @return a tibble of storage target levels by week
+#' @importFrom tibble tibble
+#' @importFrom dplyr mutate
+#' @export
+#'
+convert_parameters_to_release_harmonic <- function(parameters){
+
+  parameters[1] -> p1
+  parameters[2] -> p2
+  parameters[3] -> p3
+  parameters[4] -> p4
+
+
+  tibble(epiweek = 1:52) %>%
+    mutate(
+      release_harmonic =
+        p1 * sin(2 * pi * epiweek / 52) +
+        p2 * cos(2 * pi * epiweek / 52) +
+        p3 * sin(4 * pi * epiweek / 52) +
+        p4 * cos(4 * pi * epiweek / 52)
+    ) -> release_harmonic
+
+  return(release_harmonic)
+
+}
+
 
 #' fit_constrained_harmonic
 #'
@@ -97,16 +127,34 @@ fit_constrained_harmonic <- function(data_for_harmonic_fitting){
 #' @description finds the dam that is closest in terms of purposes served and Euclidean distance
 #' @param dam_attr attributes of target dam
 #' @param other_dams table of attributes for possible canditate dams to replicate
+#' @param distance_only allows for use of closest distance only (disregarding purpose)
 #' @return GRAND_ID of the target dam
 #' @importFrom tibble tibble
 #' @importFrom dplyr mutate if_else arrange select first
 #' @importFrom sp spDistsN1
 #' @export
 #'
-find_closest_dam <- function(dam_attr, other_dams){
+find_closest_dam <- function(dam_attr, other_dams, distance_only = FALSE){
 
   if(nrow(other_dams) == 0) return(tibble(GRAND_ID = NA_character_, matches = -Inf))
 
+  if(distance_only == TRUE){
+
+    other_dams %>%
+      mutate(
+        euc_dist = spDistsN1(
+          as.matrix(select(other_dams, lon, lat)),
+          as.matrix(select(dam_attr, lon, lat)),
+          longlat = TRUE
+        )
+      ) %>%
+      arrange(euc_dist) %>% .[1,] -> best_match
+
+    return(select(best_match, GRAND_ID) %>%
+             mutate(clear_winner = NA, matches = NA))
+
+  }
+
   dam_attr[["flood"]] -> fl
   dam_attr[["hydro"]] -> hy
   dam_attr[["supply"]] -> su
@@ -128,20 +176,23 @@ find_closest_dam <- function(dam_attr, other_dams){
     filter(matches == first(.[["matches"]])) -> best_matches
 
   # return if there is a clear winning match
-  if(nrow(best_matches) == 1) return(select(best_matches, GRAND_ID, matches))
+  if(nrow(best_matches) == 1) return(select(best_matches, GRAND_ID, matches) %>%
+                                       mutate(clear_winner = TRUE))
 
   # otherwise find closest distance
 
   best_matches %>%
     mutate(
       euc_dist = spDistsN1(
         as.matrix(select(best_matches, lon, lat)),
-        as.matrix(select(dam_attr, lon, lat))
+        as.matrix(select(dam_attr, lon, lat)),
+        longlat = TRUE
         )
       ) %>%
-    arrange(-euc_dist) %>% .[1,] -> best_match
+    arrange(euc_dist) %>% .[1,] -> best_match
 
-  return(select(best_match, GRAND_ID, matches))
+  return(select(best_match, GRAND_ID, matches) %>%
+           mutate(clear_winner = FALSE))
 
 }
 

diff --git a/R/inputs.R b/R/inputs.R
@@ -30,15 +30,16 @@ read_reservoir_data <- function(USRDATS_path, dam_id){
 #' @param USRDATS_path directory containing reservoir input time series
 #' @param dam_id integer id of dam; same as GRanD ID. If NULL, all attributes are returned.
 #' @importFrom readr read_csv cols
-#' @importFrom dplyr select
+#' @importFrom dplyr select as_tibble
+#' @importFrom sf st_read
 #' @return tibble of reservoir attributes for selected dams
 #' @export
 #'
-read_reservoir_attributes <- function(USRDATS_path, dam_id = NULL){
+read_reservoir_attributes <- function(GRanD_path, dam_id = NULL){
 
-  read_csv(paste0(USRDATS_path, "/attributes/",
-                  "Reservoir_Attributes_Publishable.csv"),
-           col_types = cols(), progress = FALSE) -> attributes_all
+  st_read(paste0(GRanD_path, "GRanD_dams_v1_3.shp")) %>%
+    as_tibble() %>%
+    filter(COUNTRY %in% "United States") -> attributes_all
 
   if(is.null(dam_id)){
     return(attributes_all)

diff --git a/man/convert_parameters_to_release_harmonic.Rd b/man/convert_parameters_to_release_harmonic.Rd
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,5 +24,6 @@ Imports: @@
         nloptr,
         purrr,
         sp,
+        sf,
         clisymbols,
         crayon