Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalise Spinup-NEMO to work with DINO data #30

Merged
merged 25 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
cf16563
Added requirements.txt dependencies
surbhigoel77 Aug 22, 2024
a61300d
Add additional packages to requirements.txt
ma595 Sep 11, 2024
4bf3eb4
Notebook to resample data from monthly to yearly
isaacaka Nov 6, 2024
797bc9a
Altered code to accept a tuple of (term_name, filenames_identifier)
isaacaka Nov 6, 2024
31826ae
Changed start, end, steps and replaced hardcoded step values
isaacaka Nov 7, 2024
014ad06
Notebook unchanged
isaacaka Nov 7, 2024
bd7f8bb
Changed file path names and updated restart.py to extract a variable …
isaacaka Nov 19, 2024
4908c97
Removed superfluous setting of path
isaacaka Jan 15, 2025
386db49
Added steps for running the spin-up nemo tool
isaacaka Jan 15, 2025
98d2944
Update README.md
Etienne-Meunier Jan 16, 2025
2f0dd74
Updated instructions for running NEMO/DINO with the Spin-UP tool
isaacaka Jan 29, 2025
3114cb6
Add more content on restart toolbox to README.md
ma595 Jan 29, 2025
21da2b5
Minor changes to README.md
ma595 Jan 29, 2025
0edc050
Formatting fixes
ma595 Feb 5, 2025
deb1768
initial attempt at restart toolbox in bash
ma595 Feb 5, 2025
204dc92
Amend main_forecast.py to enable it to run for DINO data
ma595 Feb 12, 2025
1876459
Update restart_toolbox.sh - remove hardcoded paths
ma595 Feb 14, 2025
3bd6fb1
Update main_forecast.py - remove hardcoded paths
ma595 Feb 14, 2025
26a1815
Fix random seed in Gaussian Process
ma595 Feb 14, 2025
462375b
Added additional comments
isaacaka Feb 14, 2025
bcdcde5
Added missing parameter to jump function call
isaacaka Feb 17, 2025
31f90ca
Added upto date Jumper notebook
isaacaka Feb 17, 2025
682a90e
pre-commit formatting
isaacaka Feb 18, 2025
32738f3
Apply ruff reformatting
isaacaka Feb 18, 2025
44e631a
Update restart path in restart_toolbox.sh
ma595 Feb 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 116 additions & 143 deletions Notebooks/Jumper.ipynb

Large diffs are not rendered by default.

118 changes: 118 additions & 0 deletions Notebooks/Resample_ssh.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 66,
"id": "37c00295-e62d-446f-aaa8-6bb4d88fd617",
"metadata": {},
"outputs": [],
"source": [
"import xarray as xr\n",
"import numpy as np\n",
"import cftime\n",
"\n",
"# Load the dataset with time decoding\n",
"ds_monthly = xr.open_dataset(\"DINO_1m_grid_T.nc\", decode_times=True, use_cftime=True)\n",
"\n",
"\n",
"# Resample the dataset to yearly frequency and compute the mean\n",
"ds_yearly = ds_monthly.resample(time_counter=\"YE\").mean()\n",
"\n",
"# Save the resampled dataset to a new NetCDF file\n",
"ds_yearly.to_netcdf(\"DINO_1m_To_1y_grid_T.nc\")"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "0a3d4608-5a7f-428f-b433-2b33da3b9ecc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<xarray.Dataset> Size: 20MB\n",
"Dimensions: (time_counter: 50, y: 199, x: 62)\n",
"Coordinates:\n",
" nav_lat (y, x) float32 49kB ...\n",
" nav_lon (y, x) float32 49kB ...\n",
" * time_counter (time_counter) int64 400B 0 360 720 1080 ... 16920 17280 17640\n",
"Dimensions without coordinates: y, x\n",
"Data variables:\n",
" ssh (time_counter, y, x) float32 2MB ...\n",
" sst (time_counter, y, x) float32 2MB ...\n",
" sss (time_counter, y, x) float32 2MB ...\n",
" saltflx (time_counter, y, x) float32 2MB ...\n",
" qns (time_counter, y, x) float32 2MB ...\n",
" qsr (time_counter, y, x) float32 2MB ...\n",
" mldr10_1 (time_counter, y, x) float32 2MB ...\n",
" mldr10_1max (time_counter, y, x) float32 2MB ...\n",
"Attributes:\n",
" name: DINO_1m_grid_T\n",
" description: ocean T grid variables monthly\n",
" title: ocean T grid variables monthly\n",
" Conventions: CF-1.6\n",
" timeStamp: 2024-Nov-02 12:56:48 GMT\n",
" uuid: 4580aece-1ad3-4429-9fa2-2417c992e848\n",
"\n",
"---------------\n",
"\n",
"<xarray.DataArray 'time_counter' (time_counter: 50)> Size: 400B\n",
"array([ 0, 360, 720, 1080, 1440, 1800, 2160, 2520, 2880, 3240,\n",
" 3600, 3960, 4320, 4680, 5040, 5400, 5760, 6120, 6480, 6840,\n",
" 7200, 7560, 7920, 8280, 8640, 9000, 9360, 9720, 10080, 10440,\n",
" 10800, 11160, 11520, 11880, 12240, 12600, 12960, 13320, 13680, 14040,\n",
" 14400, 14760, 15120, 15480, 15840, 16200, 16560, 16920, 17280, 17640])\n",
"Coordinates:\n",
" * time_counter (time_counter) int64 400B 0 360 720 1080 ... 16920 17280 17640\n",
"Attributes:\n",
" axis: T\n",
" standard_name: time\n",
" long_name: Time axis\n",
" time_origin: 1900-01-01 00:00:00\n",
" bounds: time_counter_bounds\n",
" units: days since 0001-12-30 00:00:00.000000\n",
" calendar: 360_day\n"
]
}
],
"source": [
"ds_yearly_from_file = xr.open_dataset(\"DINO_1m_To_1y_grid_T.nc\", decode_times=False)\n",
"\n",
"print(ds_yearly_from_file)\n",
"print(\"\\n---------------\\n\")\n",
"print(ds_yearly_from_file[\"time_counter\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4df47882-f2c2-47aa-b165-da157f498a85",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:netcdf]",
"language": "python",
"name": "conda-env-netcdf-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
58 changes: 58 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,61 @@ NB : En amont code de Guillaume pour obtenir des moyennes annuelles
- comp : Nombre/ratio de composantes à accelerer
- steps : taille du saut (en année si ye = True sinon en mois)
- path : adresse du fichier de simulations





## Steps for running Spin-Up NEMO

1. Run DINO for 50-100 years. Slurm script has been provided in NEMO [notes](https://github.com/m2lines/Spinup-NEMO-notes/blob/main/nemo/buildandrun_NEMODINO.md). If we need to train on more data we then need to concatentate simulation outputs `*grid_T.nc` using `ncrcat`.

2. Create a virtual environment, for example, with conda. Then install the requirements within it using `pip install -r requirements.txt`

3. Run the resampling **notebook** on `DINO_1m_grid_T.nc` (See [run_with_DINO_data](https://github.com/m2lines/Spinup-NEMO/tree/run_with_DINO_data) branch). This is the [Notebook](https://github.com/m2lines/Spinup-NEMO/blob/resample_dino_data/Notebooks/Resample_ssh.ipynb)
This notebook converts DINO 2d monthly SSH output `DINO_1m_grid_T.nc` to annual `DINO_1m_To_1y_grid_T.nc`. Temperature and salinity (3D) are sampled annually already and are in `DINO_1y_grid_T.nc`. We can then read these files in the updated notebook for DINO.
4. Run the updated `Jumper.ipynb` **notebook** and to create the projected state.
1. In the Jumper Notebook set the `path` to the directory of the NEMO/DINO (Grid) data:
![image](https://hackmd.io/_uploads/HkODLLHPyl.png)


5. Prepare restart file:

Combine `mesh_mask_[0000].nc` files and `DINO_[<time>]_restart_[<process>].nc` (last files) using **[REBUILD_NEMO](https://forge.nemo-ocean.eu/nemo/nemo/-/tree/4.2.0/tools/REBUILD_NEMO)** tools.
The command looks something like:
```
./rebuild_nemo -n ./nam_rebuild /path/to/DINO/restart/file/DINO_00576000_restart 36
./rebuild_nemo -n ./nam_rebuild /path/to/DINO/mesh_mask 36
```
Where 36 in the above corresponds to the number of MPI processes.

6. In the directory which holds your NEMO data, create a new restart file by running `main_restart.py`:
```bash
python main_restart.py
--restart_path /path/to/EXP00/
--radical DINO_[<time>]_restart
--mask_file /full/path/to/EXP00/mesh_mask.nc
--prediction_path /full/path/to/directory/simus_predicted/
```
where:
```
--radical : refers to the prefix of the restart file.
<time> : corresponds to the most recent restart file time.
```


The `main_restart.py` script has been modified to work on DINO data. This is in the [run_with_DINO_data](https://github.com/m2lines/Spinup-NEMO/tree/run_with_DINO_data) branch. It creates an updated restart file with the same names as the original but with 'NEW' prepended to the front.


7. Within the NEMO repository make a copy of DINO experiment directory. Delete old NEMO output files from the original experiment directory (mesh mask, restart files, grid files etc). The copy serves as a backup as data is overwritten on each run.
8. Copy `mesh_mask_<proc_id>.nc` and `DINO_[<time>]_restart_<proc_id>.nc` to the original experiment directory.
9. Then modify the namelist_cfg file. Open namelist_cfg and amend the following under namrun:

- `nn_it000` (the first timestep) (The last timestep +1)
- `nn_itend` (the final timestep)
- `cn_ocerst_in` (the restart name syntax - to coincide with the latest restart file)
- `ln_rstart` (start from rest(F) or from a restart file (T) - therefore `.true.`

![image](https://hackmd.io/_uploads/HJtsvsCPJe.png)

10. Restart DINO with updated restart file.
21 changes: 12 additions & 9 deletions lib/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def load_ts(file, var):
dico = np.load(file + f"/{var}.npz", allow_pickle=True)
dico = {key: dico[key] for key in dico.keys()}
df = pd.DataFrame(
dico["ts"], columns=[f"{var}-{i+1}" for i in range(np.shape(dico["ts"])[1])]
dico["ts"], columns=[f"{var}-{i + 1}" for i in range(np.shape(dico["ts"])[1])]
)
with open(file + f"/pca_{var}", "rb") as file:
dico["pca"] = pickle.load(file)
Expand Down Expand Up @@ -92,7 +92,7 @@ def __init__(
term (str) : The term for the simulation.
start (int, optional) : The start index for data slicing. Defaults to 0.
end (int, optional) : The end index for data slicing. Defaults to None.
comp (float, optional) : The comp value for the simulation. Defaults to 0.9.
comp (float, optional) : The comp value for the simulation. Defaults to 0.9. Percentage of explained variance.
ye (bool, optional) : Flag indicating whether to use ye or not. Defaults to True.
ssca (bool, optional) : Flag indicating whether ssca is used. Defaults to False. #Not used in this class
"""
Expand Down Expand Up @@ -126,7 +126,7 @@ def getData(path, term):
"""
grid = []
for file in sorted(os.listdir(path)):
if term + "." in file: # add param!=""
if term[1] in file: # add param!=""
grid.append(path + "/" + file)
return grid

Expand All @@ -137,13 +137,15 @@ def getAttributes(self):
array = xr.open_dataset(
self.files[-1], decode_times=False, chunks={"time": 200, "x": 120}
)
self.time_dim = list(array.dims)[0]
self.time_dim = list(array.dims)[
0
] # Seems that index at 0 is 'nav_lat' TODO: Investigate this
self.y_size = array.sizes["y"]
self.x_size = array.sizes["x"]
if "deptht" in array[self.term].dims:
if "deptht" in array[self.term[0]].dims:
self.z_size = array.sizes["deptht"]
self.shape = (self.z_size, self.y_size, self.x_size)
elif "olevel" in array[self.term].dims:
elif "olevel" in array[self.term[0]].dims:
self.z_size = array.sizes["olevel"]
self.shape = (self.z_size, self.y_size, self.x_size)
else:
Expand Down Expand Up @@ -183,7 +185,7 @@ def loadFile(self, file):
array = xr.open_dataset(
file, decode_times=False, chunks={"time": 200, "x": 120}
)
array = array[self.term]
array = array[self.term[0]]
self.len = self.len + array.sizes[self.time_dim]
# print(self.len)
# if self.ye:
Expand Down Expand Up @@ -232,7 +234,7 @@ def getSSCA(self, array):
Parameters:
array (xarray.Dataset): The last dataset containing simulation data in the simulation file.
"""
array = array[self.term].values
array = array[self.term[0]].values
n = np.shape(array)[0] // 12 * 12
array = array[-n:]
ssca = np.array(array).reshape(
Expand Down Expand Up @@ -296,6 +298,7 @@ def getPC(self, n):
Returns:
(numpy.ndarray): The principal component map.
"""
# map_ = np.zeros((np.product(self.shape)), dtype=float)
map_ = np.zeros((np.prod(self.shape)), dtype=float)
map_[~self.bool_mask] = np.nan
map_[self.bool_mask] = self.pca.components_[n]
Expand Down Expand Up @@ -464,7 +467,7 @@ def defineGP():
) # 0.1**2*RBF(length_scale=0.01) + 2*WhiteKernel(noise_level=1)
kernel = irregularities_kernel + noise_kernel + long_term_trend_kernel
return GaussianProcessRegressor(
kernel=kernel, normalize_y=False, n_restarts_optimizer=8
kernel=kernel, normalize_y=True, n_restarts_optimizer=8, random_state=42
)

def Forecast(self, train_len, steps, jobs=1):
Expand Down
14 changes: 9 additions & 5 deletions lib/restart.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def getRestartFiles(path, radical, puzzled=False):
return glob.glob(path + radical + "_*.nc").sorted()
else:
try:
print("Path: ", path)
print(path + radical + ".nc")
return glob.glob(path + radical + ".nc")[0]
except IndexError:
print(
Expand All @@ -47,7 +49,8 @@ def getMaskFile(maskpath, restart):
"""
mask = xr.open_dataset(maskpath, decode_times=False)
# Harmonizing the structure of mask with that of restart
mask = mask.swap_dims(dims_dict={"z": "nav_lev", "t": "time_counter"})
# Isaac, default configuration for DINO mask is the same as the restart file
# mask = mask.swap_dims(dims_dict={"z": "nav_lev","t":"time_counter"})
mask["time_counter"] = restart["time_counter"]
return mask

Expand Down Expand Up @@ -132,7 +135,7 @@ def load_predictions(restart, dirpath="/data/mtissot/simus_predicted"):
## Loading new SSH in directly affected variables
## (loading zos.npy, selecting last snapshot, then converting to fitting xarray.DataArray, and cleaning the nans)
try:
zos = np.load(dirpath + "/zos.npy")[-1:]
zos = np.load(dirpath + "/pred_zos.npy")[-1:]
restart["sshn"] = xr.DataArray(
zos, dims=("time_counter", "y", "x"), name="sshn"
).fillna(0)
Expand All @@ -144,7 +147,7 @@ def load_predictions(restart, dirpath="/data/mtissot/simus_predicted"):
## Loading new SO in directly affected variables
## (loading so.npy, selecting last snapshot, then converting to fitting xarray.DataArray, and cleaning the nans)
try:
so = np.load(dirpath + "/so.npy")[-1:]
so = np.load(dirpath + "/pred_so.npy")[-1:]
restart["sn"] = xr.DataArray(
so, dims=("time_counter", "nav_lev", "y", "x"), name="sn"
).fillna(0)
Expand All @@ -156,7 +159,7 @@ def load_predictions(restart, dirpath="/data/mtissot/simus_predicted"):
## Loading new THETAO in directly affected variables
## (loading thetao.npy, selecting last snapshot, then converting to fitting xarray.DataArray, and cleaning the nans)
try:
thetao = np.load(dirpath + "/thetao.npy")[-1:]
thetao = np.load(dirpath + "/pred_thetao.npy")[-1:]
restart["tn"] = xr.DataArray(
thetao, dims=("time_counter", "nav_lev", "y", "x"), name="tn"
).fillna(0)
Expand Down Expand Up @@ -265,7 +268,8 @@ def update_e3t(restart, mask):
Returns:
e3t (numpy.ndarray) : Updated array of z-axis cell thicknesses.
"""
e3t_ini = restart.e3t_ini # initial z axis cell's thickness on grid T - (t,z,y,x)
# e3t_ini = restart.e3t_ini #Changed
e3t_ini = mask.e3t_0 # initial z axis cell's thickness on grid T - (t,z,y,x)
ssmask = mask.tmask.max(
dim="nav_lev"
) # continent mask - (t,y,x)
Expand Down
Loading