Skip to content

Commit

Permalink
Allow download of individual files when using S3
Browse files Browse the repository at this point in the history
  • Loading branch information
radosuav committed Jan 15, 2025
1 parent bd84e21 commit 942a7f2
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
15 changes: 8 additions & 7 deletions creodias_finder/creodias_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import errno
import pathlib
from pathlib import Path
import re
from typing import Any, Dict, List, Union

import boto3.session
Expand Down Expand Up @@ -63,6 +64,7 @@ def download_product(
bucket: str,
product_key: Union[pathlib.Path, str],
destination: Union[pathlib.Path, str],
file_filter: str,
) -> None:
"""Download all files beginning with product_key into directory 'destination'.
Downloaded files will have their prefixes stripped.
Expand All @@ -72,6 +74,7 @@ def download_product(
bucket (str): bucket
product_key (Union[pathlib.Path, str]): product key - path
destination (Union[pathlib.Path, str]): directory to put the product
file_filter (str): regex expression to filter which product files to download
"""
if isinstance(destination, Path):
dest = Path(destination)
Expand All @@ -92,13 +95,11 @@ def download_product(
files: List[Path] = []
for s3_key in objects_list:
file_path = s3_key["Key"].replace(product_key, "", 1)
if file_path:
if file_path.endswith("/"):
dest.joinpath(Path(file_path.lstrip("/"))).mkdir(
parents=True, exist_ok=True
)
else:
files.append(Path(file_path.lstrip("/")))
if file_path and not file_path.endswith("/"):
if file_filter:
if not re.search(file_filter, file_path):
continue
files.append(Path(file_path.lstrip("/")))

for item in files:
dest.joinpath(item).parent.mkdir(parents=True, exist_ok=True)
Expand Down
8 changes: 6 additions & 2 deletions creodias_finder/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def download(uid, username, password, outfile, show_progress=True, token=None):
_download_raw_data(url, outfile, show_progress)


def download_from_s3(source_path, outdir, s3_client=None):
def download_from_s3(source_path, outdir, s3_client=None, file_filter=""):
"""Download a file from CreoDIAS S3 storage to the given location
(Works only when used from a CreoDIAS vm)
Expand All @@ -53,6 +53,10 @@ def download_from_s3(source_path, outdir, s3_client=None):
CreoDIAS path to S3 object
target_path:
Path to write the product folder
s3_client:
S3 client, if None the default one is used
file_filter:
Regex expression to filter which product files to download
"""
import boto3
from botocore.client import Config
Expand All @@ -77,7 +81,7 @@ def download_from_s3(source_path, outdir, s3_client=None):
source_path = source_path.removeprefix("/eodata/")
product_folder = source_path.split("/")[-1]
storage_client.download_product(
"DIAS", source_path, os.path.join(outdir, product_folder)
"DIAS", source_path, os.path.join(outdir, product_folder), file_filter
)


Expand Down

0 comments on commit 942a7f2

Please sign in to comment.