Skip to content

Commit

Permalink
Merge pull request #161 from CIROH-UA/name_change
Browse files Browse the repository at this point in the history
change name to datastreamcli
  • Loading branch information
JordanLaserGit authored Feb 18, 2025
2 parents 08de524 + 9074fb3 commit 8d5569d
Show file tree
Hide file tree
Showing 28 changed files with 84 additions and 49 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_test_docker_x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ jobs:
- name: Test docker containers
run : |
curl -L -O https://ngen-datastream.s3.us-east-2.amazonaws.com/palisade.gpkg
./scripts/ngen-datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
2 changes: 1 addition & 1 deletion .github/workflows/build_test_push_docker_x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
- name: Test docker containers
run : |
curl -L -O https://ngen-datastream.s3.us-east-2.amazonaws.com/palisade.gpkg
./scripts/ngen-datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -n 4
./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -n 4
- name: Login to Docker Hub
uses: docker/login-action@v3
Expand Down
30 changes: 15 additions & 15 deletions .github/workflows/test_datastream_options.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
- name: Base test and NWM_RETRO_V3
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
- name: Cache resource directory
run: |
Expand All @@ -66,93 +66,93 @@ jobs:
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -g $(pwd)/palisade.gpkg -R $(pwd)/data/cache/datastream-resources/config/realization_sloth_nom_cfe_pet.json -F $(pwd)/data/cache/datastream-resources/ngen-forcings/1_forcings.nc -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test
./scripts/datastream -g $(pwd)/palisade.gpkg -R $(pwd)/data/cache/datastream-resources/config/realization_sloth_nom_cfe_pet.json -F $(pwd)/data/cache/datastream-resources/ngen-forcings/1_forcings.nc -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test
- name: Resource directory test missing all
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
sudo rm -rf ./data/cache/datastream-resources-missing/ngen-forcings
sudo rm -rf ./data/cache/datastream-resources-missing/config/*
./scripts/ngen-datastream -r ./data/cache/datastream-resources-missing -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg
./scripts/datastream -r ./data/cache/datastream-resources-missing -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg
- name: Resource directory test
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -r ./data/cache/datastream-resources -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test
./scripts/datastream -r ./data/cache/datastream-resources -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test
- name: Forcings sources option test NWM_RETRO_V2
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -r ./data/cache/datastream-resources-no-forcings -s 201906200100 -e 201906200200 -C NWM_RETRO_V2 -d $(pwd)/data/datastream_test
./scripts/datastream -r ./data/cache/datastream-resources-no-forcings -s 201906200100 -e 201906200200 -C NWM_RETRO_V2 -d $(pwd)/data/datastream_test
- name: Forcings sources option test NWM_RETRO_V3
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -r ./data/cache/datastream-resources-no-forcings -s 201906200100 -e 201906200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test
./scripts/datastream -r ./data/cache/datastream-resources-no-forcings -s 201906200100 -e 201906200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test
- name: Forcings sources option test NWM_V3
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
TODAY=$(env TZ=US/Eastern date +'%Y%m%d')
./scripts/ngen-datastream -r ./data/cache/datastream-resources-no-forcings -s $TODAY"0100" -e $TODAY"0200" -C NWM_V3 -d $(pwd)/data/datastream_test
./scripts/datastream -r ./data/cache/datastream-resources-no-forcings -s $TODAY"0100" -e $TODAY"0200" -C NWM_V3 -d $(pwd)/data/datastream_test
- name: Forcings sources option test NOMADS
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
TODAY=$(env TZ=US/Eastern date +'%Y%m%d')
./scripts/ngen-datastream -r ./data/cache/datastream-resources-no-forcings -s $TODAY"0100" -e $TODAY"0200" -C NOMADS -d $(pwd)/data/datastream_test
./scripts/datastream -r ./data/cache/datastream-resources-no-forcings -s $TODAY"0100" -e $TODAY"0200" -C NOMADS -d $(pwd)/data/datastream_test
- name: Test hfsubset options
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -I "Gages-09106150" -i hl -v 2.1.1 -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -I "Gages-09106150" -i hl -v 2.1.1 -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
- name: S3 write out test
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -S ciroh-community-ngen-datastream -o git_actions_test
./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -S ciroh-community-ngen-datastream -o git_actions_test
aws s3api wait object-exists --bucket ciroh-community-ngen-datastream --key git_actions_test/ngen-run.tar.gz
aws s3 rm s3://ciroh-community-ngen-datastream/git_actions_test --recursive
- name: DAILY today test
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -s DAILY -C NWM_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
./scripts/datastream -s DAILY -C NWM_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
- name: DAILY pick day test
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -s DAILY -C NWM_V3 -e $(date -d '-15 day' '+%Y%m%d0000') -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
./scripts/datastream -s DAILY -C NWM_V3 -e $(date -d '-15 day' '+%Y%m%d0000') -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
- name: DAILY short_range today test
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -s DAILY -C NWM_SHORT_RANGE -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
./scripts/datastream -s DAILY -C NWM_SHORT_RANGE -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
- name: DAILY medium_range today test
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -s DAILY -C NWM_MEDIUM_RANGE -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
./scripts/datastream -s DAILY -C NWM_MEDIUM_RANGE -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
- name: DAILY analysis assim extend today test
if: always()
run: |
sudo rm -rf $(pwd)/data/datastream_test
./scripts/ngen-datastream -s DAILY -C NWM_ANALYSIS_ASSIM_EXTEND -e $(date -d '-2 day' '+%Y%m%d0000') -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
./scripts/datastream -s DAILY -C NWM_ANALYSIS_ASSIM_EXTEND -e $(date -d '-2 day' '+%Y%m%d0000') -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
- name: TEEHR integration test
if: always()
Expand Down
32 changes: 21 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,30 @@
# NextGen Water Modeling Framework Datastream
`ngen-datastream` automates the process of collecting and formatting input data for NextGen, orchestrating the NextGen run through NextGen In a Box (NGIAB), and handling outputs. This software allows users to run NextGen in an efficient, _relatively_ painless, and reproducible fashion.
# Research DataStream
The Research DataStream is an array of daily [NextGen](https://github.com/NOAA-OWP/ngen)-based hydrolgic simulations in the AWS cloud. An exciting aspect of the Research DataStream is the NextGen configuration is [open-sourced](https://github.com/CIROH-UA/ngen-datastream/tree/main/research_datastream/configuration) and [community editable](https://github.com/CIROH-UA/ngen-datastream/blob/main/research_datastream/CONTRIBUTE.md), which allows any member of the hydrologic community to contribute to improving streamflow predictions. By making the NextGen forcings, outputs, and configuration publicly available, it is now possible to leverage regional expertise and incrementally improve streamflow predictions configured with the NextGen Framework.
See the Research DataStream related documentation:
* **Find daily output data at:** https://datastream.ciroh.org/index.html
* **Make improvements to NextGen configuration:**
Find out how you can contribute [here](https://github.com/CIROH-UA/ngen-datastream/blob/main/research_datastream/CONTRIBUTE.md)!
* **Current status and configuration:** Read [here](https://github.com/CIROH-UA/ngen-datastream/blob/main/research_datastream/STATUS_AND_METADATA.md)!
* **Infrastructure as Code:** See the template AWS architecture [here](https://github.com/CIROH-UA/ngen-datastream/blob/main/research_datastream/terraform/ARCHITECTURE.md), which users can deploy within their own AWS account to issue and manage AWS server-based jobs.
* The actual research datastream deployment, which builds upon the template AWS infra, exists [here](https://github.com/CIROH-UA/ngen-datastream/tree/main/research_datastream/terraform_community) and is available for reference only.

![ngen-datastream](docs/images/ngen-datastream.jpg)
# DataStreamCLI
The software backend of the Research DataStream is DataStreamCLI, which is a stand alone tool that automates the process of collecting and formatting input data for NextGen, orchestrating the NextGen run through NextGen In a Box (NGIAB), and handling outputs. This software allows users to run NextGen in an efficient, _relatively_ painless, and reproducible fashion while providing flexibility and integrations like hfsubset, NextGen In A Box, and TEEHR.

![datastream](docs/images/datastreamcli.jpg)

## Getting Started
* **Installation:** Follow the [Installation Guide](https://github.com/CIROH-UA/ngen-datastream/blob/main/INSTALL.md) to set up `ngen-datastream` on your system.
* **Guide:** Start by running the [ngen-datastream guide](https://github.com/CIROH-UA/ngen-datastream/blob/main/scripts/datastream_guide)! It is an interactive script that will provide a tour of the repo as well as help you form a command with `ngen-datastream`.
* **Installation:** Follow the [Installation Guide](https://github.com/CIROH-UA/ngen-datastream/blob/main/INSTALL.md) to prepare your environment for `DataStreamCLI`.
* **Guide:** Start by running the [DataStreamCLI guide](https://github.com/CIROH-UA/ngen-datastream/blob/main/scripts/datastream_guide)! It is an interactive script that will provide a tour of the repo as well as help you form a command with `DataStreamCLI`.
* **Docs**: Make sure to review the [documentation](https://github.com/CIROH-UA/ngen-datastream/blob/main/docs/) for
* Available [NextGen models](https://github.com/CIROH-UA/ngen-datastream/blob/main/docs/NGEN_MODELS.md) and automated BMI configuration generation
* [Datastream options](https://github.com/CIROH-UA/ngen-datastream/blob/main/docs/DATASTREAM_OPTIONS.md)
* Input and output [directory structure](https://github.com/CIROH-UA/ngen-datastream/blob/main/docs/STANDARD_DIRECTORIES.md)
* A [usage guide](https://github.com/CIROH-UA/ngen-datastream/blob/main/docs/USAGE.md) for executing `ngen-datastream` effectively
* A step-by-step [breakdown](https://github.com/CIROH-UA/ngen-datastream/blob/main/docs/BREAKDOWN.md) of `ngen-datastream`'s internal workflow
* A [usage guide](https://github.com/CIROH-UA/ngen-datastream/blob/main/docs/USAGE.md) for executing `DataStreamCLI` effectively
* A step-by-step [breakdown](https://github.com/CIROH-UA/ngen-datastream/blob/main/docs/BREAKDOWN.md) of `DataStreamCLI`'s internal workflow
* An explanation of the [Research DataStream](https://github.com/CIROH-UA/ngen-datastream/blob/main/research_datastream/README.md)

## Run it
## Run DataStreamCLI
This example will execute a 24 hour NextGen simulation over the Palisade, Colorado watershed with CFE, SLOTH, PET, NOM, and t-route configuration distributed over 4 processes. The forcings used are the National Water Model v3 Retrospective.

First, obtain a hydrofabric file for the gage you wish to model. Check out [hfsubset](https://github.com/lynker-spatial/hfsubsetCLI) for a handy cli tool for generating geopackages. For Palisade, Colorado:
Expand All @@ -27,9 +37,9 @@ hfsubset -w medium_range \
-t hl "Gages-09106150"
```

Then feed the hydrofabric file to ngen-datastream along with a few cli args to define the time domain and NextGen configuration
Then feed the hydrofabric file to DataStreamCLI along with a few cli args to define the time domain and NextGen configuration
```
./scripts/ngen-datastream -s 202006200100 \
./scripts/datastream -s 202006200100 \
-e 202006210000 \
-C NWM_RETRO_V3 \
-d $(pwd)/data/datastream_test \
Expand All @@ -41,4 +51,4 @@ Then feed the hydrofabric file to ngen-datastream along with a few cli args to d
And that's it! Outputs will exist at `$(pwd)/data/datastream_test/ngen-run/outputs`

## License
`ngen-datastream` is distributed under [GNU General Public License v3.0 or later](LICENSE.md)
The entirety of `ngen-datastream` is distributed under [GNU General Public License v3.0 or later](LICENSE.md)
4 changes: 2 additions & 2 deletions docs/DATASTREAM_OPTIONS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Below is the output of `./scripts/ngen-datastream -h`. A more in depth of each option is given in the table below.
Below is the output of `./scripts/datastream -h`. A more in depth of each option is given in the table below.

```
Usage: ./scripts/ngen-datastream [options]
Usage: ./scripts/datastream [options]
Either provide a datastream configuration file
-c, --CONF_FILE <Path to datastream configuration file>
or run with cli args
Expand Down
1 change: 1 addition & 0 deletions docs/NGEN_MODELS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ NextGen is a framework in which physical models can be coupled in numerical simu
* [SoilFreezeThaw](https://github.com/NOAA-OWP/SoilFreezeThaw)
* [SoilMoistureProfiles](https://github.com/NOAA-OWP/SoilMoistureProfiles)
* [TOPMODEL](https://github.com/NOAA-OWP/topmodel)
* [LSTM](https://github.com/NOAA-OWP/lstm)

## Coming Soon to NGIAB and `ngen-datastream`
* [Sac-SMA](https://github.com/NOAA-OWP/sac-sma)
Expand Down
7 changes: 5 additions & 2 deletions docs/STANDARD_DIRECTORIES.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ datastream-metadata/
|
├── filenamelist.txt
|
├── realization.json
├── realization_datastream.json
|
├── realization_user.json
```
| File Type | Path in Resource Directory | Description | Naming |
|-------------|--------|----------|-----|
Expand All @@ -40,7 +42,8 @@ datastream-metadata/
| PROFILE | datastream-metadata/profile_fp.txt | Datetime print statements that allow for profiling each step in forcingprocessor| profile_fp.txt |
| PROFILE | datastream-metadata/profile.txt | Datetime print statements that allow for profiling each step in datastream | profile.txt |
| FILENAME LIST | datastream-metadata/filenamelist.txt | Local file paths or URLs to NWM forcings. Generated by [nwmurl](https://github.com/CIROH-UA/nwmurl). | filenamelist.txt |
| REALIZATION | datastream-metadata/realization.json | NextGen configuration file. See [here](https://github.com/CIROH-UA/ngen-datastream/blob/main/configs/ngen/realization_cfe_sloth_pet_nom.json) | realization.json |
| REALIZATION | datastream-metadata/realization_datastream.json | NextGen configuration file. See [here](https://github.com/CIROH-UA/ngen-datastream/blob/main/configs/ngen/realization_cfe_sloth_pet_nom.json). DataStreamCLI can edit the user supplied realization. This is the realization file that is used for the NextGen execution. | realization_datastream.json |
| REALIZATION | datastream-metadata/realization.json | NextGen configuration file. See [here](https://github.com/CIROH-UA/ngen-datastream/blob/main/configs/ngen/realization_cfe_sloth_pet_nom.json). DataStreamCLI can edit the user supplied realization. This is the exact copy of the user supplied realization file and may differ from the file used for the NextGen execution. | realization_user.json |

### `RESOURCE_DIR` (`datastream-resources/`)
`datastream-resources/` holds all the input data files required to perform the various computations `ngen-datastream` performs. This folder is not required as input, but will be a faster method for running ngen-datastream repeatedly over a given spatial or time domain.
Expand Down
Binary file modified docs/images/AWS_diagram.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/datastreamcli.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed docs/images/ngen-datastream.jpg
Binary file not shown.
Loading

0 comments on commit 8d5569d

Please sign in to comment.