diff --git a/creodias_finder/creodias_storage.py b/creodias_finder/creodias_storage.py index c5ba6d4..d1a696a 100644 --- a/creodias_finder/creodias_storage.py +++ b/creodias_finder/creodias_storage.py @@ -4,6 +4,7 @@ import errno import pathlib from pathlib import Path +import re from typing import Any, Dict, List, Union import boto3.session @@ -63,6 +64,7 @@ def download_product( bucket: str, product_key: Union[pathlib.Path, str], destination: Union[pathlib.Path, str], + file_filter: str, ) -> None: """Download all files beginning with product_key into directory 'destination'. Downloaded files will have their prefixes stripped. @@ -72,6 +74,7 @@ def download_product( bucket (str): bucket product_key (Union[pathlib.Path, str]): product key - path destination (Union[pathlib.Path, str]): directory to put the product + file_filter (str): regex expression to filter which product files to download """ if isinstance(destination, Path): dest = Path(destination) @@ -92,13 +95,11 @@ def download_product( files: List[Path] = [] for s3_key in objects_list: file_path = s3_key["Key"].replace(product_key, "", 1) - if file_path: - if file_path.endswith("/"): - dest.joinpath(Path(file_path.lstrip("/"))).mkdir( - parents=True, exist_ok=True - ) - else: - files.append(Path(file_path.lstrip("/"))) + if file_path and not file_path.endswith("/"): + if file_filter: + if not re.search(file_filter, file_path): + continue + files.append(Path(file_path.lstrip("/"))) for item in files: dest.joinpath(item).parent.mkdir(parents=True, exist_ok=True) diff --git a/creodias_finder/download.py b/creodias_finder/download.py index 54af6bf..9832719 100644 --- a/creodias_finder/download.py +++ b/creodias_finder/download.py @@ -43,7 +43,7 @@ def download(uid, username, password, outfile, show_progress=True, token=None): _download_raw_data(url, outfile, show_progress) -def download_from_s3(source_path, outdir, s3_client=None): +def download_from_s3(source_path, outdir, s3_client=None, file_filter=""): """Download a file from CreoDIAS S3 storage to the given location (Works only when used from a CreoDIAS vm) @@ -53,6 +53,10 @@ def download_from_s3(source_path, outdir, s3_client=None): CreoDIAS path to S3 object target_path: Path to write the product folder + s3_client: + S3 client, if None the default one is used + file_filter: + Regex expression to filter which product files to download """ import boto3 from botocore.client import Config @@ -77,7 +81,7 @@ def download_from_s3(source_path, outdir, s3_client=None): source_path = source_path.removeprefix("/eodata/") product_folder = source_path.split("/")[-1] storage_client.download_product( - "DIAS", source_path, os.path.join(outdir, product_folder) + "DIAS", source_path, os.path.join(outdir, product_folder), file_filter )