From 71e8c3762286c5a2e1a391d6bb923928533aae11 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 29 Nov 2024 09:47:13 +0100 Subject: [PATCH 1/5] add bofek module --- nlmod/read/__init__.py | 1 + nlmod/read/ahn.py | 6 ++-- nlmod/read/bofek.py | 57 +++++++++++++++++++++++++++++++++ nlmod/util.py | 2 ++ pyproject.toml | 10 +++++- tests/test_005_external_data.py | 11 +++++++ 6 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 nlmod/read/bofek.py diff --git a/nlmod/read/__init__.py b/nlmod/read/__init__.py index 5e605993..4f273018 100644 --- a/nlmod/read/__init__.py +++ b/nlmod/read/__init__.py @@ -3,6 +3,7 @@ administrative, ahn, bgt, + bofek, bro, brp, geotop, diff --git a/nlmod/read/ahn.py b/nlmod/read/ahn.py index 3c43459d..8922bf50 100644 --- a/nlmod/read/ahn.py +++ b/nlmod/read/ahn.py @@ -32,8 +32,8 @@ def get_ahn(ds=None, identifier="AHN4_5M_M", method="average", extent=None, **kw Parameters ---------- - ds : xr.Dataset - dataset with the model information. + ds : xr.Dataset, optional + dataset with the model information. If None the extent is used. identifier : str, optional Possible values for the different AHN-versions are (casing is important): AHN1: 'AHN1_5M' @@ -49,7 +49,7 @@ def get_ahn(ds=None, identifier="AHN4_5M_M", method="average", extent=None, **kw nlmod.resample.structured_da_to_ds for possible values. The default is 'average'. extent : list, tuple or np.array, optional - extent. The default is None. + extent xmin, xmax, ymin, ymax. Only used if ds is None. The default is None. Returns ------- diff --git a/nlmod/read/bofek.py b/nlmod/read/bofek.py new file mode 100644 index 00000000..76273bb4 --- /dev/null +++ b/nlmod/read/bofek.py @@ -0,0 +1,57 @@ +import requests, zipfile, io +import geopandas as gpd +from pathlib import Path +from nlmod import NLMOD_DATADIR, cache, dims, util + + +@cache.cache_pickle +def get_gdf_bofek(ds=None, extent=None): + """get geodataframe of bofek 2020 wihtin the extent of the model. It does so by + downloading a zip file (> 100 MB) and extracting the relevant geodatabase. Therefore + the function can be slow, ~35 seconds depending on your internet connection. + + Parameters + ---------- + ds : xr.DataSet, optional + dataset containing relevant model information. The default is None. + extent : list, tuple or np.array, optional + extent xmin, xmax, ymin, ymax. Only used if ds is None. The default is None. + + Returns + ------- + gdf_bofek : GeoDataframe + Bofek2020 geodataframe with a column 'BOFEK2020' containing the bofek cluster + codes + """ + + import py7zr + + if extent is None and ds is not None: + extent = dims.get_extent(ds) + + # set paths + tmpdir = Path(NLMOD_DATADIR) + fname_7z = tmpdir / 'BOFEK2020_GIS.7z' + fname_bofek = tmpdir / 'GIS' / 'BOFEK2020_bestanden' / 'BOFEK2020.gdb' + bofek_zip_url = 'https://www.wur.nl/nl/show/bofek-2020-gis-1.htm' + + if not fname_bofek.exists(): + # download zip + r = requests.get(bofek_zip_url) + z = zipfile.ZipFile(io.BytesIO(r.content)) + + # extract 7z + z.extractall(tmpdir) + with py7zr.SevenZipFile(fname_7z, mode='r') as z: + z.extract(targets=['GIS/BOFEK2020_bestanden/BOFEK2020.gdb'], path=tmpdir, recursive=True) + + # clean up + Path(fname_7z).unlink() + + # read geodatabase + gdf_bofek = gpd.read_file(fname_bofek) + + # slice to extent + gdf_bofek = util.gdf_within_extent(gdf_bofek, extent) + + return gdf_bofek \ No newline at end of file diff --git a/nlmod/util.py b/nlmod/util.py index a50bf021..bbc3113b 100644 --- a/nlmod/util.py +++ b/nlmod/util.py @@ -719,6 +719,8 @@ def gdf_within_extent(gdf, extent): raise TypeError(f"Only accepts single geometry type not {geom_types}") elif geom_types[0] == "Polygon": gdf = gpd.overlay(gdf, gdf_extent) + elif geom_types[0] == "MultiPolygon": + gdf = gpd.overlay(gdf, gdf_extent) elif geom_types[0] == "LineString": gdf = gpd.sjoin(gdf, gdf_extent) elif geom_types[0] == "Point": diff --git a/pyproject.toml b/pyproject.toml index 8e46547b..6c9cc08b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,15 @@ repository = "https://github.com/gwmod/nlmod" documentation = "https://nlmod.readthedocs.io/en/latest/" [project.optional-dependencies] -full = ["nlmod[knmi]", "gdown", "geocube", "rasterstats", "contextily", "scikit-image"] +full = [ + "nlmod[knmi]", + "gdown", + "geocube", + "rasterstats", + "contextily", + "scikit-image", + "py7zr" +] knmi = ["h5netcdf", "nlmod[grib]"] grib = ["cfgrib", "ecmwflibs"] test = ["pytest>=7", "pytest-cov", "pytest-dependency"] diff --git a/tests/test_005_external_data.py b/tests/test_005_external_data.py index f185c237..5bf2f1e9 100644 --- a/tests/test_005_external_data.py +++ b/tests/test_005_external_data.py @@ -131,3 +131,14 @@ def test_get_surface_water_ghb(): def test_get_brp(): extent = [116500, 120000, 439000, 442000] nlmod.read.brp.get_percelen(extent) + + +# disable because slow (~35 seconds depending on internet connection) +# def test_get_bofek(): +# # model with sea +# ds = test_001_model.get_ds_from_cache("basic_sea_model") + +# # add knmi recharge to the model dataset +# gdf_bofek = nlmod.read.bofek.get_gdf_bofek(ds) + +# assert not gdf_bofek.empty, "Bofek geodataframe is empty" From de35527f435c96de54e2166a6227eec33c6b9753 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 29 Nov 2024 16:24:50 +0100 Subject: [PATCH 2/5] add bofek to gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 52f5aa9e..3d4dd62d 100644 --- a/.gitignore +++ b/.gitignore @@ -145,7 +145,7 @@ cython_debug/ nlmod/bin/* !nlmod/bin/ !nlmod/bin/mp7_2_002_provisional -flowchartnlmod.pptx +nlmod/data/GIS tests/data/* !tests/data/**/ From 4d75a492589d2faff9e5e560bf2e58f0c7fcee48 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 16 Dec 2024 10:54:52 +0100 Subject: [PATCH 3/5] Download bofek2020 --- nlmod/data/bofek/.gitkeep | 0 nlmod/gwf/recharge.py | 7 +++++-- nlmod/read/bofek.py | 25 ++++++++++++++++++++++--- 3 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 nlmod/data/bofek/.gitkeep diff --git a/nlmod/data/bofek/.gitkeep b/nlmod/data/bofek/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/nlmod/gwf/recharge.py b/nlmod/gwf/recharge.py index 7f54708b..ba85167a 100644 --- a/nlmod/gwf/recharge.py +++ b/nlmod/gwf/recharge.py @@ -1,4 +1,5 @@ import logging +import numbers import flopy import numpy as np @@ -370,7 +371,7 @@ def ds_to_uzf( # generate packagedata surfdep = _get_value_from_ds_datavar(ds, "surfdep", surfdep, return_da=True) - vks = _get_value_from_ds_datavar(ds, "vk", vks, return_da=True) + vks = _get_value_from_ds_datavar(ds, "vks", vks, return_da=True) thtr = _get_value_from_ds_datavar(ds, "thtr", thtr, return_da=True) thts = _get_value_from_ds_datavar(ds, "thts", thts, return_da=True) thti = _get_value_from_ds_datavar(ds, "thti", thti, return_da=True) @@ -496,7 +497,9 @@ def ds_to_uzf( # account for surfdep, as this decreases the height of the top of the upper cell # otherwise modflow may return an error thickness = calculate_thickness(ds) - thickness = [thickness[x] - landflag[x] * surfdep / 2 for x in cellids_obs] + if isinstance(surfdep, numbers.Number): + surfdep = xr.ones_like(thickness) * surfdep + thickness = [thickness[x] - landflag[x] * surfdep[x] / 2 for x in cellids_obs] # create observations list obsdepths = [] diff --git a/nlmod/read/bofek.py b/nlmod/read/bofek.py index 76273bb4..95d59541 100644 --- a/nlmod/read/bofek.py +++ b/nlmod/read/bofek.py @@ -2,6 +2,10 @@ import geopandas as gpd from pathlib import Path from nlmod import NLMOD_DATADIR, cache, dims, util +import shutil +import logging + +logger = logging.getLogger(__name__) @cache.cache_pickle @@ -33,23 +37,38 @@ def get_gdf_bofek(ds=None, extent=None): tmpdir = Path(NLMOD_DATADIR) fname_7z = tmpdir / 'BOFEK2020_GIS.7z' fname_bofek = tmpdir / 'GIS' / 'BOFEK2020_bestanden' / 'BOFEK2020.gdb' + fname_bofek_geojson = tmpdir / 'bofek' / 'BOFEK2020.geojson' bofek_zip_url = 'https://www.wur.nl/nl/show/bofek-2020-gis-1.htm' - if not fname_bofek.exists(): + if not fname_bofek_geojson.exists(): # download zip + logger.info('Downloading BOFEK2020 GIS data (~35 seconds)') r = requests.get(bofek_zip_url) z = zipfile.ZipFile(io.BytesIO(r.content)) # extract 7z + logger.debug('Extracting zipped BOFEK2020 GIS data') z.extractall(tmpdir) with py7zr.SevenZipFile(fname_7z, mode='r') as z: z.extract(targets=['GIS/BOFEK2020_bestanden/BOFEK2020.gdb'], path=tmpdir, recursive=True) # clean up + logger.debug('Remove zip files') Path(fname_7z).unlink() - # read geodatabase - gdf_bofek = gpd.read_file(fname_bofek) + # read geodatabase + logger.debug('convert geodatabase to geojson') + gdf_bofek = gpd.read_file(fname_bofek) + + # save to geojson + gdf_bofek.to_file(fname_bofek_geojson, driver='GeoJSON') + + # remove geodatabase + shutil.rmtree(fname_bofek) + + # read geojson + logger.debug(f'read bofek2020 geojson from {fname_bofek_geojson}') + gdf_bofek = gpd.read_file(fname_bofek_geojson) # slice to extent gdf_bofek = util.gdf_within_extent(gdf_bofek, extent) From e3164a2ffe110a9fd922762418830684a0856c9d Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 16 Dec 2024 11:00:18 +0100 Subject: [PATCH 4/5] add downloaded bofek data to gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3d4dd62d..5edf9e93 100644 --- a/.gitignore +++ b/.gitignore @@ -145,7 +145,7 @@ cython_debug/ nlmod/bin/* !nlmod/bin/ !nlmod/bin/mp7_2_002_provisional -nlmod/data/GIS +nlmod/data/bofek/* tests/data/* !tests/data/**/ From 36d0418fc936b1a4ef2e5f21e58625288a8c5c5a Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 16 Dec 2024 11:22:57 +0100 Subject: [PATCH 5/5] codacy --- nlmod/read/bofek.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/nlmod/read/bofek.py b/nlmod/read/bofek.py index 95d59541..65b06eb1 100644 --- a/nlmod/read/bofek.py +++ b/nlmod/read/bofek.py @@ -1,18 +1,24 @@ -import requests, zipfile, io -import geopandas as gpd +import io +import logging +import shutil +import zipfile from pathlib import Path + +import geopandas as gpd +import requests + from nlmod import NLMOD_DATADIR, cache, dims, util -import shutil -import logging logger = logging.getLogger(__name__) @cache.cache_pickle def get_gdf_bofek(ds=None, extent=None): - """get geodataframe of bofek 2020 wihtin the extent of the model. It does so by - downloading a zip file (> 100 MB) and extracting the relevant geodatabase. Therefore - the function can be slow, ~35 seconds depending on your internet connection. + """Get geodataframe of bofek 2020 wihtin the extent of the model. + + It does so by downloading a zip file (> 100 MB) and extracting the relevant + geodatabase. Therefore the function can be slow, ~35 seconds depending on your + internet connection. Parameters ---------- @@ -27,7 +33,6 @@ def get_gdf_bofek(ds=None, extent=None): Bofek2020 geodataframe with a column 'BOFEK2020' containing the bofek cluster codes """ - import py7zr if extent is None and ds is not None: @@ -43,14 +48,16 @@ def get_gdf_bofek(ds=None, extent=None): if not fname_bofek_geojson.exists(): # download zip logger.info('Downloading BOFEK2020 GIS data (~35 seconds)') - r = requests.get(bofek_zip_url) + r = requests.get(bofek_zip_url, timeout=3600) z = zipfile.ZipFile(io.BytesIO(r.content)) # extract 7z logger.debug('Extracting zipped BOFEK2020 GIS data') z.extractall(tmpdir) with py7zr.SevenZipFile(fname_7z, mode='r') as z: - z.extract(targets=['GIS/BOFEK2020_bestanden/BOFEK2020.gdb'], path=tmpdir, recursive=True) + z.extract(targets=['GIS/BOFEK2020_bestanden/BOFEK2020.gdb'], + path=tmpdir, + recursive=True) # clean up logger.debug('Remove zip files')