Skip to content

Commit

Permalink
CI: GDAL performance tuning by tweaking rasterio's env
Browse files Browse the repository at this point in the history
  • Loading branch information
remi-braun committed Dec 17, 2024
1 parent 5b6f3c4 commit 9cdc2b8
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- FIX: Fix the computation of parametric spectral indices [#193](https://github.com/sertit/eoreader/issues/193)
- OPTIM: Cache the access to any archived file list, as this operation is expensive when done with large archives stored on the cloud (and thus better done only once).
- CI: Remove useless verbosity in CI
- CI: GDAL performance tuning by tweaking `rasterio`'s env
- INTERNAL: Switch from `setup.py` to `pyproject.toml` [#109](https://github.com/sertit/eoreader/issues/109)
- INTERNAL: Use `ruff` instead of `black` + `flake8` + `isort`
- DOC: Update `conf.py` (remove useless hunks and set Sphinx 7 as base)
Expand Down
37 changes: 35 additions & 2 deletions ci/scripts_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,12 @@ def set_env():
os.environ["CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD"] = "1"

LOGGER.info("Using Dask and creating Dask client.")
with dask.get_or_create_dask_client(processes=False) as client:
with (
tempenv.TemporaryEnvironment(
{"CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD": "1"}
),
dask.get_or_create_dask_client(processes=False) as client,
):
# TODO: test with process=true also
client.run(set_env)
function(*_args, **_kwargs)
Expand Down Expand Up @@ -148,7 +153,35 @@ def broken_s2_path():


def s3_env(*args, **kwargs):
return unistra.s3_env(*args, use_s3_env_var=CI_EOREADER_S3, **kwargs)
# See https://developmentseed.org/titiler/advanced/performance_tuning/#recommended-configuration-for-dynamic-tiling
# And https://gdalcubes.github.io/source/concepts/config.html#recommended-settings-for-cloud-access

def ko_to_bytes(value):
return int(value * 1e3)

def mo_to_bytes(value):
return int(value * 1e6)

import psutil
import rasterio

ram_info = psutil.virtual_memory()
gdal_cachemax_30 = int(0.3 * ram_info.available / 1024 / 1024)
LOGGER.debug(f"gdal_cachemax_30={gdal_cachemax_30}")

with rasterio.Env(
GDAL_DISABLE_READDIR_ON_OPEN=True,
GDAL_CACHEMAX=gdal_cachemax_30,
CPL_VSIL_CURL_CACHE_SIZE=mo_to_bytes(10),
VSI_CACHE=True,
VSI_CACHE_SIZE=mo_to_bytes(5),
GDAL_HTTP_MULTIPLEX=True,
GDAL_INGESTED_BYTES_AT_OPEN=ko_to_bytes(32),
GDAL_HTTP_VERSION=2,
GDAL_HTTP_MERGE_CONSECUTIVE_RANGES="YES",
GDAL_NUM_THREADS="ALL_CPUS",
):
return unistra.s3_env(*args, use_s3_env_var=CI_EOREADER_S3, **kwargs)


def compare(to_be_checked, ref, topic):
Expand Down

0 comments on commit 9cdc2b8

Please sign in to comment.