-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgage_characterization.Rmd
683 lines (528 loc) · 28.5 KB
/
gage_characterization.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
---
title: "Delineating Watersheds, Grabbing Data"
author: "Katie Willi"
date: "2024-12-09"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(tidyverse)
library(sf)
library(terra)
library(elevatr)
library(dataRetrieval)
library(nhdplusTools)
library(StreamCatTools)
library(tmap)
library(climateR)
library(data.table)
library(mapview)
```
## Grab all USGS gages
Using the dataRetrieval package in R, locate all USGS stream gages under 1500 square kilometers that measure discharge.
```{r}
states_oi <- c("Colorado")#, "Wyoming", "Utah", "Kansas")
us_sf_object <- tigris::states() %>%
filter(NAME %in% states_oi)
# Get a list of NWIS sites for all of the states
nwis_sites_by_state <- map(us_sf_object$STUSPS,
~{
discharge_sites <- whatNWISsites(stateCd = .x, parameterCd = "00060") %>%
filter(site_tp_cd == 'ST')
# Only use gages under 1500 square kilometers (as defined by the USGS):
small_enough <- readNWISsite(discharge_sites$site_no) %>%
mutate(drain_area_km = drain_area_va * 2.58999) %>%
filter(drain_area_km <= 1500) %>%
# For future tracking with the NLDI:
mutate(site_pretty=paste0("USGS-",site_no))
return(small_enough)
}
)
nwis_sites <- bind_rows(nwis_sites_by_state) %>%
st_as_sf(coords = c("dec_long_va", "dec_lat_va"), crs = 4269)
mapview(nwis_sites)
```
## Subset to gages with data since 1980 with at least 30 years of data
This is a Katie choice that hasn't been run by MR of SK. But! To reduce our gages to only those with data during the time period we want (... which, I personally don't know what that is - you will need to confirm this with MR and SK) will reduce the number of watersheds you'll have to manually verify substantially. So, here I am filtering our gages to only sites that have at least 30 years worth of daily data starting in 1980. Meaning, if a gage only measured discharge before 1980, we are removing them as candidate gages. We are also removing any gages that don't have at least 30 years of data since 1980.
```{r}
gage_meta <- dataRetrieval::whatNWISdata(siteNumber = nwis_sites$site_no, parameterCd = "00060")
tables <- rvest::read_html('https://help.waterdata.usgs.gov/parameter_cd?group_cd=%') %>%
rvest::html_nodes('table') %>%
rvest::html_table()
pcodes <- tables[[1]] %>%
janitor::clean_names() %>%
dplyr::mutate(parm_cd = stringr::str_pad(as.character(parameter_code), 5, pad = "0"))
inventory <- gage_meta %>%
dplyr::left_join(pcodes,by="parm_cd") %>%
dplyr::select(c(site_name = station_nm,
site_no,
data_type_cd,
site_type_cd = site_tp_cd,
n_obs=count_nu,
begin_date,
end_date,
parameter=parameter_name_description,
code=parm_cd))
site_url <- 'https://maps.waterdata.usgs.gov/mapper/help/sitetype.html'
table <- rvest::read_html(site_url) %>%
rvest::html_nodes('table') %>%
rvest::html_table() #%>%
table <- rbind(table[[1]],table[[2]],table[[3]],table[[4]],table[[5]]) %>%
dplyr::select(site_type_cd = 1,
site_type = 2)
inventory <- left_join(inventory,table,by='site_type_cd') %>%
mutate(data_type=case_when(data_type_cd=="dv" ~ "Daily",
data_type_cd=="uv" ~ "Unit",
data_type_cd=="qw" ~ "Water Quality",
data_type_cd=="gw" ~ "Groundwater Levels",
data_type_cd=="iv" ~ "Unit",
data_type_cd=="sv" ~ "Site Visits",
data_type_cd=="pk" ~ "Peak Measurements",
data_type_cd=="ad" ~ "USGS Annual Water Data Report",
data_type_cd=="aw" ~ "Active Groundwater Level Network",
data_type_cd=="id" ~ "Historic Instantaneous"))
new_data_gages <- inventory %>%
filter(year(end_date) >= "1980",
data_type == "Daily") %>%
# and only sites with at least 30 years worth of data -ish
filter(n_obs >= 365*30)
# Only keep gages with data during or after 1980:
nwis_sites <- nwis_sites %>%
filter(site_no %in% new_data_gages$site_no)
```
Grab CO DWR gages... not currently running this but here's the code for when you want to begin including those gages as well.
```{r, eval = FALSE}
cdwr_sites <- httr::GET(url = "https://dwr.state.co.us/Rest/GET/api/v2/surfacewater/surfacewaterstations/?format=json&fields=stationNum%2Cabbrev%2CusgsSiteId%2CstationName%2CutmX%2CutmY%2Clatitude%2Clongitude%2CstartDate%2CendDate%2CmeasUnit") %>%
httr::content(., as = "text", encoding = "UTF-8") %>%
jsonlite::fromJSON() %>%
.[["ResultList"]] %>%
filter(year(endDate) >= "1980") %>%
filter(is.na(usgsSiteId)) %>%
filter(!is.na(longitude) & !is.na(latitude)) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4269) %>%
# Obnoxiously station type cannot be accessed on API only GUI
filter(abbrev %in% c(read_csv("data/cdwr.csv") %>%.$Abbrev)) %>%
select(site_no = abbrev,
station_nm = stationName)
# Bind CO DWR and USGS sites together
nwis_sites <- nwis_sites %>% bind_rows(cdwr_sites)}
```
## Delineate stream gage watersheds
For this analysis, we are relying on NHDPlus Version 2. We are able to delineate a stream gage's upstream contributing area (i.e., its watershed) by leveraging the NHD's network index that, for every stream feature in the NHD, identifies all other stream features upstream of it. So, our first task is to find out which NHD stream feature each gage is associated with. All stream features are given a unique ID, called a comid. Every stream feature also has an associated "catchment", or direct contributing area, with the same comid. So here, we are identifying the comid for the stream feature each gage is associated with. **DANGER: YOU WILL NEED TO CONFIRM THE APPROPRIATE FLOWLINE IS SELECTED FOR EVERY GAGE. THERE IS NO WAY TO CONFIRM THEY ARE RIGHT WITHOUT EYEBALLING THEM!**
```{r}
nwis_sites$comid <- NA # attempt 'get_nldi_feature()' first
nwis_sites$comid_coords <- NA # if that doesn't work for all gages, do 'discover_nhdplus_id()'
# first try to get comid using nldi ("verified" correct comid - or at least what USGS says it is)
for(i in 1:nrow(nwis_sites)){
try(nwis_sites$comid[i] <- get_nldi_feature(list("featureSource" = "nwissite", featureID = nwis_sites[i,]$site_pretty))$comid, silent = T)
}
# get the comid using the weirdos' coordinates instead of their gage name
for(i in 1:nrow(nwis_sites)){
nwis_sites$comid_coords[i] <- discover_nhdplus_id(nwis_sites[i,])
}
# back it up if you want:
# saveRDS(nwis_sites, 'data/nwis_gages_comid.RDS')
# Ones where the USGS says they fall on a comid they don't technically fall on. For these, it is highly likely that you will need to go
# one-by-one to identify which COMID is actually appropriate to attribute to them:
weirdos <- nwis_sites %>% filter(comid_coords != comid)
mapview(weirdos) + mapview(get_nhdplus(comid = weirdos$comid), color = "blue", layer.name = "By NLDI") + mapview(get_nhdplus(comid = weirdos$comid_coords), color = "red", layer.name = "By coordinates")
nwis_sites <- nwis_sites %>%
# UPDATE THIS SECTION - This is where the exercise above will dictate which of the comids is most appropriate to use for each gage.
# For now, just removing the weirdos.
mutate(comid_new = ifelse(is.na(comid), comid_coords, comid)) %>%
filter(comid_coords == comid) %>%
select(site_no, station_nm, comid = comid_new) %>%
mutate(comid = as.numeric(comid))
```
##### Delineate each gage's watershed
Now that we have a list of our gages and their associated NHDPlus V2 stream features, we can use the NHD indexing to "crawl" upstream of each gage's flowline, then grab each flowline's catchment, and lastly dissolve those catchments into a single polygon the represents the gage's upstream contributing area (i.e., its watershed):
```{r}
# load in the NHD as a table. This table lists all COMIDs in CONUS and allows you to "navigate" the NHD.
nhd <- read_csv("data/nhd_flow_network.csv")
# function to delineate each gage's watershed:
watershed_delineator <- function(site_list){
# filter our master list to just the gage we are iterating over
site <- nwis_sites %>%
filter(site_no == site_list)
# use get_UT to list all comids that are upstream of our gage using the comid the
# gage falls on:
upstream <- nhdplusTools::get_UT(nhd, site$comid)
# grab all the catchments associated with the upstream comids:
nhd_catch <- nhdplusTools::get_nhdplus(comid = upstream,
realization = 'catchment',
t_srs = 4269) %>%
# remove dupes (precautionary step, not likely necessary)
dplyr::distinct(featureid, .keep_all=TRUE) %>%
# "dissolve" all the catchments into a single polygon
dplyr::summarize() %>%
# remove weird hole by-products that exist if the catchment boundaries don't
# line up perfectly:
nngeo::st_remove_holes() %>%
# tack on the site name and comid to the watershed
dplyr::mutate(site_no = site$site_no,
comid = site$comid)
# back it up:
saveRDS(nhd_catch, paste0("data/watersheds/", site$site_no, ".RDS"))
print(paste0(site$station_nm, " delineated!"))
# return the delineated watershed
return(nhd_catch)
}
# Create a vector of nwis sites to iterate over
watersheds <- nwis_sites$site_no %>%
#... then delineate each site's watershed:
map(~watershed_delineator(.)) %>%
bind_rows()
mapview::mapview(watersheds)
```
## Grab explanatory variables
Link up gages with streamcat variables. StreamCat watershed statistics are available for every stream feature in the NHDPlusV2. StreamCat uses the comid as the identifier so we can link up information that way. A complete list of available variables you can pull is found in the vars table below. I am NOT pulling in all the available info that you can because these soooo much!
```{r}
# Grab a list of all available streamcat variables:
download.file("https://java.epa.gov/StreamCAT/metrics/variable_info.csv",
destfile = paste0(getwd(), "/data/StreamCatVars.csv"))
# This table describes all the available variables in streamcat. Look here
# if you want to explore other vars, or the descriptions of the ones I've
# selected here:
vars <- read_csv("data/StreamCatVars.csv")
# This is what's available on StreamCat related to lithology. Likely not identical to what
# was used by Abby but hopefully good swap:
lithology_vars <- c("PctAlkIntruVol", "PctWater",
"PctSilicic", "PctSalLake",
"PctNonCarbResid", "PctHydric",
"PctGlacTilLoam", "PctGlacTilCrs",
"PctGlacTilClay", "PctGlacLakeFine",
"PctGlacLakeCrs", "PctExtruVol",
"PctEolFine", "PctEolCrs",
"PctColluvSed", "PctCoastCrs",
"PctCarbResid", "PctAlluvCoast")
# Urban cover (add all together to get percent of total urban cover):
# Many available years. Which do we want to use? For now using 2011:
urban_cover <- c("PctUrbOp2019", "PctUrbMd2019", "PctUrbLo2019", "PctUrbHi2019")
# PRISM mean precip for 1981-2010 OR 1991-2020
prism_precip <- c("Precip8110", "Precip9120")
# These are all the variables Fred was interested in for describing flow
# in his work. Likely a good starting point for our needs, too.
fred_vars <- c("CanalDens",
# BFI
"BFI",
#NLCD 2019
"PctOw2019", "PctIce2019", "PctUrbOp2019", "PctUrbLo2019", "PctUrbMd2019", "PctUrbHi2019",
"PctBl2019", "PctDecid2019", "PctConif2019", "PctMxFst2019", "PctShrb2019", "PctGrs2019",
"PctHay2019", "PctCrop2019", "PctWdWet2019", "PctHbWet2019",
# Dam Info
"DamDens", "DamNIDStor", "DamNrmStor",
# Elevation
"Elev",
# Impervious Surfaces across a bunch of years:
"PctImp2006", "PctImp2008", "PctImp2011", "PctImp2001",
"PctImp2013", "PctImp2019", "PctImp2016", "PctImp2004",
# PRISM 1991-2020
"Precip9120", "Tmax9120", "Tmean9120", "Tmin9120",
# STATSGO variables:
"Clay", "Sand", "Silt", "WtDep", "Om", "Perm", "RckDep")
streamcat_vars <- StreamCatTools::sc_get_data(metric = paste(c(lithology_vars, urban_cover, prism_precip, fred_vars), collapse = ","),
aoi = 'watershed',
comid = nwis_sites$comid) %>%
# remove variables we don't particularly care about that get returned:
select(-contains("AREASQKM"))
watersheds_streamcat <- watersheds %>%
left_join(., streamcat_vars, by = c("comid" = "COMID")) %>%
mutate(PCTURB2019WS = PCTURBHI2019WS + PCTURBMD2019WS + PCTURBLO2019WS + PCTURBOP2019WS)
```
## Find reference quality gages:
Using the StreamCat variables, we can drop any gages who have characteristics that make them unsuitable as reference gages. For example, we can remove any gages whose watersheds have \>= 10% urban landcover and watersheds that have dam storage densities larger than 100 megaliters/square kilometer:
```{r}
ref_watersheds <- watersheds_streamcat %>%
filter(PCTURB2019WS < 10) %>%
filter(DAMNIDSTORWS < 100000)
# REMOVE THIS STEP ONCE "GOOD" WATERSHEDS HAVE BEEN IDENTIFIED:
# But for now, this is a quick and lazy way of getting rid of watersheds
# we know were delineated incorrectly. They were delineated incorrectly
# because their attributed comids are wrong. (See comment about the weirdos
# object above):
ref_watersheds <- ref_watersheds %>% mutate(area = sf::st_area(ref_watersheds)) %>%
# remove any watersheds larger than 1600 km (for some conservative wiggle
# with projection differences)
filter(as.numeric(area) <= 1.6e+9)
```
And, we can drop any gages whose watersheds contain a transbasin diversion. We identify watersheds that have a transbasin diversion by grabbing NHD HR flowlines features that intersect the watershed. We are using NHD HR instead of NHDPlusV2 because the NHDHR has flowline features that are identified as being canals, ditches, etc. With that info, we identify watersheds where any of those "unnatural" features cross over the watershed boundary. If a canal/ditch crosses over a watershed boundary, that means that water is being moved in or out of the watershed unnaturally.
```{r}
fetch_flowlines <- function(site_list){
site <- watersheds %>%
filter(site_no == site_list)
# open the nhd_hr - which contains a bunch of layers
nhd_hr <- arcgislayers::arc_open("https://hydro.nationalmap.gov/arcgis/rest/services/NHDPlus_HR/MapServer")
# arcgislayers::list_items(nhd_hr)
nhd_hr_flowlines <- arcgislayers::get_layer(nhd_hr, 3)
# use bbox to return associated flowlines
geospatial_aoi <- site %>%
# add a buffer around the watershed for visualization later on
st_buffer(1000) %>%
# Convert sf object to sfc object (required for downloading from the map server)
st_as_sfc(.)
nhd_flowlines <- vector("list", length = length(geospatial_aoi))
tryCatch({
nhd_flowlines <- arcgislayers::arc_select(nhd_hr_flowlines,
# where = query,
filter_geom = geospatial_aoi,
crs = st_crs(geospatial_aoi)) %>%
st_make_valid()},
error = function(e){
cat("Index ", i, " from input data failed.")
})
nhd_flowlines <- nhd_flowlines %>%
keep(~!is.null(.))
try(nhd_flowlines <- nhd_flowlines %>%
dplyr::bind_rows() %>%
dplyr::distinct() %>%
mutate(#natural = ifelse(ftype == 460, T, F),
flowline_type = case_when(ftype == 460 ~ "natural",
ftype == 558 ~ "artificial path",
ftype == 468 ~ "drainageway",
ftype == 336 ~ "canal ditch",
ftype == 566 ~ "coastline",
ftype == 334 ~ "connector",
ftype == 428 ~ "pipeline",
ftype == 420 ~ "underground conduit",
.default = "unnatural")),
silent = TRUE)
saveRDS(nhd_flowlines, paste0("data/flowlines/", site_list, ".RDS"))
print(paste0(site_list, " done!"))
return(nhd_flowlines)
}
all_flowlines <- ref_watersheds$site_no %>%
map(~fetch_flowlines(.)) %>%
bind_rows() %>%
distinct()
# in fetchNHD_flowlines, we have categorized each flowline as being natural or unnatural.
# So, we can subset the flowlines to just the unnatural ones.
all_flowlines_unnatural <- all_flowlines %>%
filter(flowline_type != "natural")
transbasin_finder <- function(site_list){
# filter our master list to just the gage's watershed we are iterating over
site <- ref_watersheds %>%
filter(site_no == site_list)
flowlines <- readRDS(paste0("data/flowlines/", site$site_no, ".RDS"))
flowlines_unnatural <- flowlines %>%
filter(flowline_type != "natural")
# For linestring transformation step to work, need the watershed to be a polygon object:
if (st_geometry_type(site) != "POLYGON") {
# If not, cast to a Polygon... which will "explode" it into multiple.
# This is a rare thing... I think...
site <- st_cast(site, "POLYGON")
}
polyline <- site %>% st_cast("LINESTRING")
crossovers <- flowlines_unnatural %>%
.[polyline,] %>%
nrow()
# Some watersheds are multipolygons and therefore need to be put back together here:
site <- site %>% group_by(site_no, comid) %>% summarize() %>%
mutate(transbasin = ifelse(crossovers > 0, "TRANSBASIN DIVERSION", "NATURAL"))
# Extract the bounding box of the site_data
bbox_site <- st_bbox(site)
# Create the ggplot map, zoomed to the bounding box of site_data
gg_map <- ggplot() +
# Plot the site data
geom_sf(data = site, color = "black", fill = "white", size = 1) +
# Plot the site data
geom_sf(data = filter(nwis_sites, site_no == site$site_no), color = "lightblue", size = 5.5) +
# Plot the natural flowlines in blue
geom_sf(data = flowlines, color = "blue", size = 0.5) +
# Plot the unnatural flowlines in red
geom_sf(data = flowlines_unnatural, color = "red", size = 2) +
# Set the xlim and ylim based on the bounding box of site_data
xlim(bbox_site["xmin"], bbox_site["xmax"]) +
ylim(bbox_site["ymin"], bbox_site["ymax"]) +
coord_sf() +
theme_void() +
labs(title = paste0(site$site_no, " ", site$transbasin)) +
theme(
plot.title = element_text(size = 14, hjust = 0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
# Save the map as an image
ggsave(gg_map, filename = paste0("data/transbasin_confirm/", site$transbasin, "_", site$site_no, ".png"))
print(site$site_no)
return(site)
}
watersheds_div <- ref_watersheds$site_no %>%
map(~transbasin_finder(.)) %>%
bind_rows() %>%
st_make_valid()
```
Reduce our reference gages to only gages without a transbasin diversion:
```{r}
ref_watersheds <- watersheds_div %>%
filter(transbasin == "NATURAL")
```
### Grab other variables not found in stream cat:
#### Dominant watershed aspect
Dominant aspect requires a bit of a wonky workflow - I'm not quite sure if there's an easier approach than what I'm presenting here. I grab raw elevation DEMs, get the aspect (in degrees) for each grid cell, then convert those raw aspects (in degrees) into categorical N, E, S, W cardinal directions as displayed in this image below:
![](data/compassrose.jpg)
```{r}
aspect_finder <- function(site_list){
# create numerical representation for each cardinal aspect:
aspect_lookup <- tibble(val = c(1, 2, 3, 4),
aspect = c("North", "East", "South", "West"))
# filter our master list to just the gage's watershed we are iterating over
site <- ref_watersheds %>%
filter(site_no == site_list)
# grab elevation data
elev <- elevatr::get_elev_raster(summarize(site), z = 12, clip = "locations") %>% # zoom of 12 is close-ish(?) to 30 meters... JD should confirm!
terra::rast() %>%
# clip to extent of the watershed
terra::mask(., site, touches = FALSE)
# calculate aspect from the masked elevation
aspect <- terra::terrain(elev, v = 'aspect')
# convert aspect values to cardinal directions
convert_to_direction <- function(aspect) {
direction <- rep(NA, length(aspect))
direction[aspect >= 0 & aspect <= 45 | aspect > 315 & aspect <= 360] <- 1 # North
direction[aspect > 45 & aspect <= 135] <- 2 # East
direction[aspect > 135 & aspect <= 225] <- 3 # South
direction[aspect > 225 & aspect <= 315] <- 4 # West
return(direction)
}
# apply the conversion directly to the raster values
aspect_cardinal_raster <- terra::app(aspect, fun = convert_to_direction)
#Map showing what this aspect layer looks like geospatially:
plot(aspect_cardinal_raster)
# Calculate the mode (dom aspect) in each watershed
dominant_aspect <- as.data.table(aspect_cardinal_raster) %>%
rename(val = lyr.1) %>%
group_by(val) %>%
summarize(count = n()) %>%
filter(count == max(count)) %>%
left_join(aspect_lookup, by = "val") %>%
mutate(site_no = site$site_no)
}
watershed_aspects <- ref_watersheds$site_no %>%
map(~aspect_finder(.)) %>%
bind_rows()
```
#### GridMET climate data
gridMET is DAILY gridded climate data. I am pulling in daily data for all grid cells that overlap each watershed for days in 2001-2020. Namely, I'm downloading max temperature, min temperature, PET, and precipitation. Then, I'm averaging that data across the watershed to get a single, average value for the watershed. See the function `get_climate_historic()` if you want to see how the raw gridMET data is pulled in.
```{r}
get_climate_historic <- function(sf,
col_name,
start = "1979-01-01",
end = "2023-12-31",
vars = c("tmmx", "tmmn", "pr", "pet")) {
sf <- sf %>%
dplyr::rename("join_index" = {{col_name}})
all_climate_data <- vector("list", length = nrow(sf))
if(any(unique(sf::st_geometry_type(sf)) %in% c("POLYGON", "MULTIPOLYGON"))){
for (i in 1:nrow(sf)) {
aoi <- sf[i,]
print(paste0('Downloading GridMET for ', aoi$join_index, "."))
clim <- climateR::getGridMET(AOI = aoi,
varname = vars,
startDate = start,
endDate = end)
if(inherits(clim[[1]], "SpatRaster")){
clim_crs <- crs(clim[[1]])
if(st_crs(clim[[1]]) != st_crs(sf)){
clim <- clim %>%
purrr::map(
# getGridMET defaults AOI to bbox - so crop / mask results to sf extent
~terra::crop(., st_transform(aoi, crs = clim_crs), mask = TRUE),
crs = clim_crs)
} else {
clim <- clim %>%
purrr::map(
# getGridMET defaults AOI to bbox - so crop / mask results to sf extent
~terra::crop(., aoi, mask = TRUE),
crs = clim_crs)
}
all_climate_data[[i]] <- clim %>%
purrr::map_dfr(~ as.data.frame(., xy = TRUE)) %>%
data.table() %>%
pivot_longer(-(x:y),
names_to = "var_temp",
values_to = "val") %>%
separate_wider_delim(var_temp, "_", names = c("var", "date")) %>%
drop_na(val) %>%
group_by(x, y, date) %>%
pivot_wider(names_from = "var", values_from = "val") %>%
dplyr::mutate(date = as.Date(date),
pet_mm = pet,
ppt_mm = pr,
tmax_C = tmmx - 273.15,
tmin_C = tmmn - 273.15,
tmean_C = (tmax_C + tmin_C)/2,
join_index = aoi$join_index) %>%
dplyr::select(-c("tmmx", "tmmn", "pr", "pet"))
saveRDS(all_climate_data[[i]], paste0("data/climate/", aoi$join_index, ".RDS"))
} else {
all_climate_data[[i]] <- clim %>%
data.table() %>%
# names of columns include va_mode_rcp so must rename
rename_with(~ str_split(.x, "_", n = 2) %>% map_chr(1)) %>%
# since polygon grabbed a single grid, gridMET does not provide the coordinates
# of the gridMET cell, so we fill in x and y with the coordinates
# of the sf object:
dplyr::mutate(x = sf::st_coordinates(aoi)[[1]],
y = sf::st_coordinates(aoi)[[2]]) %>%
# Then do all other cleaning steps done for polygon sf objects:
dplyr::mutate(date = as.Date(date),
pet_mm = pet,
ppt_mm = pr,
tmax_C = tmmx - 273.15,
tmin_C = tmmn - 273.15,
tmean_C = (tmax_C + tmin_C)/2,
join_index = aoi$join_index) %>%
dplyr::select(-c("tmmx", "tmmn", "pr", "pet"))
saveRDS(all_climate_data[[i]], paste0("data/climate/", aoi$join_index, ".RDS"))
}
}
all_climate_data <- all_climate_data %>%
bind_rows()
# Rename the join_index column
colnames(all_climate_data)[colnames(all_climate_data) == "join_index"] <- {{col_name}}
return(all_climate_data)
} else {stop("Your sf feature is neither a polygon nor point feature, or it needs to be made valid.")}
}
watershed_climate <- get_climate_historic(sf = ref_watersheds,
col_name = "site_no",
# Snow persistence start
start = "2001-01-01",
# Snow persistence end
end = "2020-12-31",
vars = c("pet", "pr", "tmmn", "tmmx")) %>%
group_by(site_no) %>%
summarize(pet_mm_2001_2020 = mean(pet_mm, na.rm = TRUE),
ppt_mm_2001_2020 = mean(ppt_mm, na.rm = TRUE),
tmax_C_2001_2020 = mean(tmax_C, na.rm = TRUE),
tmin_C_2001_2020 = mean(tmin_C, na.rm = TRUE))
```
#### Snow persistence
Here I am loading in all the snow persistence data I downloaded from: <https://www.sciencebase.gov/catalog/item/5f63790982ce38aaa23a3930>. There is annual snow persistence data from 2001-2020. Using the {terra} package, I get the area-weighted annual average snow persistence value for each watershed, then average each year's data together to get a single time- and area-weighted average for each watershed.
```{r}
# sp_preview <- rast("data/snow_persistence_hammond/MOD10A2_SCI_2020.tif")
#
# tm_shape(sp_preview) +
# tm_raster(palette = "viridis", title = "Snow Persistence") +
# tm_layout(frame = FALSE)
# load all the .tif files into a list
tif_files <- list.files("data/snow_persistence_hammond/", pattern = "\\.tif$", full.names = TRUE)
# stack the snow persistence .tif files into a single raster stack
raster_stack <- rast(tif_files)
# convert the shapefile to a 'terra' object (if necessary)
polygon <- st_transform(ref_watersheds, crs(raster_stack)) # Align CRS
# convert the polygons to 'terra' vector format
polygon_terra <- vect(polygon)
# mask the raster stack to the watershed polygons
masked_stack <- mask(raster_stack, polygon_terra)
# extract mean SP across each watershed. weights = TRUE means get the area-weighted average
mean_sp <- extract(masked_stack, polygon_terra, fun = mean, weights = TRUE)
# convert the results to a data frame listing each gage's SP
watershed_sp <- as_tibble(mean_sp) %>%
bind_cols(st_drop_geometry(ref_watersheds)) %>%
select(-ID) %>%
pivot_longer(cols = c(contains("MOD"))) %>%
group_by(site_no, comid, transbasin) %>%
summarize(mean_sp_2001_2020 = mean(value))
```