diff --git a/CI/SCRIPTS/test_archives.py b/CI/SCRIPTS/test_archives.py index 3415618..ddb5d0f 100644 --- a/CI/SCRIPTS/test_archives.py +++ b/CI/SCRIPTS/test_archives.py @@ -8,6 +8,7 @@ from sertit import archives, ci, files, path, s3, vectors +@s3_env def test_archive(tmp_path): """Test extracting functions""" # Archives @@ -31,6 +32,11 @@ def test_archive(tmp_path): # Extract extracted_dirs = archives.extract_files(arch, tmp_path, overwrite=True) + + # Test + for ex_dir in extracted_dirs: + ci.assert_dir_equal(core_dir, ex_dir) + archives.extract_files([zip2_file], tmp_path, overwrite=False) # Already existing # Test @@ -54,7 +60,7 @@ def test_archive(tmp_path): # Add to zip zip_out = zip2_file if path.is_cloud_path(zip2_file) else archive_base + ".zip" core_copy = files.copy(core_dir, os.path.join(tmp_path, "core2")) - zip_out = archives.add_to_zip(zip_out, core_copy) + zip_out = archives.add_to_zip(s3.download(zip_out, tmp_path), core_copy) # Extract unzip_out = os.path.join(tmp_path, "out") diff --git a/sertit/archives.py b/sertit/archives.py index 2115071..b739e5f 100644 --- a/sertit/archives.py +++ b/sertit/archives.py @@ -285,10 +285,14 @@ def archive( archive_path = AnyPath(archive_path) folder_path = AnyPath(folder_path) + # with zipfile.ZipFile(archive_path, mode='w', compression=zipfile.ZIP_DEFLATED) as zipf: + # for f in folder_path.glob("**"): + # zipf.write(f, f.relative_to(folder_path.name)) + tmp_dir = None if path.is_cloud_path(folder_path): tmp_dir = tempfile.TemporaryDirectory() - folder_path = folder_path.download_to(tmp_dir.name) + folder_path = s3.download(folder_path, tmp_dir.name) # Shutil make_archive needs a path without extension archive_base = os.path.splitext(archive_path)[0] @@ -304,7 +308,12 @@ def archive( if tmp_dir is not None: tmp_dir.cleanup() - return AnyPath(archive_fn) + try: + arch = AnyPath(archive_fn, folder_path.storage_options) + except Exception: + arch = AnyPath(archive_fn) + + return arch def add_to_zip( @@ -329,55 +338,54 @@ def add_to_zip( """ zip_path = AnyPath(zip_path) - # If the zip is on the cloud, cache it (zipfile doesn't like cloud paths) - if path.is_cloud_path(zip_path): - zip_path = AnyPath(zip_path.fspath) - - # Check if existing zipfile - if not zip_path.is_file(): - raise FileNotFoundError(f"Non existing {zip_path}") - - # Convert to list if needed - if not isinstance(dirs_to_add, list): - dirs_to_add = [dirs_to_add] - - # Add all folders to the existing zip - # Forced to use ZipFile because make_archive only works with one folder and not existing zipfile - with open_zipfile(zip_path, "a") as zip_file: - progress_bar = tqdm(dirs_to_add) - for dir_to_add_path in progress_bar: - # Just to be sure, use str instead of Paths - if isinstance(dir_to_add_path, Path): - dir_to_add = str(dir_to_add_path) - elif path.is_cloud_path(dir_to_add_path): - dir_to_add = dir_to_add_path.fspath - else: - dir_to_add = dir_to_add_path - - progress_bar.set_description( - f"Adding {os.path.basename(dir_to_add)} to {os.path.basename(zip_path)}" + with tempfile.TemporaryDirectory() as tmp_dir: + # If the zip is on the cloud, cache it (zipfile doesn't like cloud paths) + if path.is_cloud_path(zip_path): + raise NotImplementedError( + "Impossible (for now) to update a zip stored in the cloud!" ) - tmp = tempfile.TemporaryDirectory() - if os.path.isfile(dir_to_add): - dir_to_add = extract_file(dir_to_add, tmp.name) - - for root, _, files in os.walk(dir_to_add): - base_path = os.path.join(dir_to_add, "..") - - # Write dir (in namelist at least) - zip_file.write(root, os.path.relpath(root, base_path)) - - # Write files - for file in files: - zip_file.write( - os.path.join(root, file), - os.path.relpath( - os.path.join(root, file), os.path.join(dir_to_add, "..") - ), - ) - - # Clean tmp - tmp.cleanup() + + # Check if existing zipfile + if not zip_path.is_file(): + raise FileNotFoundError(f"Non existing {zip_path}") + + # Convert to list if needed + if not isinstance(dirs_to_add, list): + dirs_to_add = [dirs_to_add] + + # Add all folders to the existing zip + # Forced to use ZipFile because make_archive only works with one folder and not existing zipfile + with open_zipfile(zip_path, "a") as zip_file: + progress_bar = tqdm(dirs_to_add) + for dir_to_add_path in progress_bar: + # Just to be sure, use str instead of Paths + if isinstance(dir_to_add_path, Path): + dir_to_add = str(dir_to_add_path) + elif path.is_cloud_path(dir_to_add_path): + dir_to_add = dir_to_add_path.fspath + else: + dir_to_add = dir_to_add_path + + progress_bar.set_description( + f"Adding {os.path.basename(dir_to_add)} to {os.path.basename(zip_path)}" + ) + if os.path.isfile(dir_to_add): + dir_to_add = extract_file(dir_to_add, tmp_dir) + + for root, _, files in os.walk(dir_to_add): + base_path = os.path.join(dir_to_add, "..") + + # Write dir (in namelist at least) + zip_file.write(root, os.path.relpath(root, base_path)) + + # Write files + for file in files: + zip_file.write( + os.path.join(root, file), + os.path.relpath( + os.path.join(root, file), os.path.join(dir_to_add, "..") + ), + ) return zip_path diff --git a/sertit/ci.py b/sertit/ci.py index 8b000ba..a0280e4 100644 --- a/sertit/ci.py +++ b/sertit/ci.py @@ -20,6 +20,7 @@ import filecmp import logging import pprint +import tempfile from doctest import Example from typing import Any, Union @@ -30,7 +31,7 @@ from shapely import force_2d, normalize from shapely.testing import assert_geometries_equal -from sertit import AnyPath, files, s3, unistra +from sertit import AnyPath, files, path, s3, unistra from sertit.logs import SU_NAME, deprecation_warning from sertit.types import AnyPathStrType, AnyXrDataStructure @@ -381,27 +382,33 @@ def assert_dir_equal(path_1: AnyPathStrType, path_2: AnyPathStrType) -> None: assert path_1.is_dir(), f"{path_1} is not a directory!" assert path_2.is_dir(), f"{path_2} is not a directory!" - dcmp = filecmp.dircmp(path_1, path_2) - try: - assert ( - dcmp.left_only == [] - ), f"More files in {path_1}!\n{pprint.pformat(list(dcmp.left_only))}" - assert ( - dcmp.right_only == [] - ), f"More files in {path_2}!\n{pprint.pformat(list(dcmp.right_only))}" - except FileNotFoundError: - files_1 = [AnyPath(p).name for p in AnyPath(path_1).iterdir()] - files_2 = [AnyPath(p).name for p in AnyPath(path_2).iterdir()] - - for f1 in files_1: - assert ( - f1 in files_2 - ), f"File missing!\n{f1} not in {pprint.pformat(files_2)}" + with tempfile.TemporaryDirectory() as tmpdir: + if path.is_cloud_path(path_1): + path_1 = s3.download(path_1, tmpdir) + if path.is_cloud_path(path_2): + path_2 = s3.download(path_2, tmpdir) - for f2 in files_2: + dcmp = filecmp.dircmp(path_1, path_2) + try: + assert ( + dcmp.left_only == [] + ), f"More files in {path_1}!\n{pprint.pformat(list(dcmp.left_only))}" assert ( - f2 in files_1 - ), f"File missing!\n{f2} not in {pprint.pformat(files_1)}" + dcmp.right_only == [] + ), f"More files in {path_2}!\n{pprint.pformat(list(dcmp.right_only))}" + except FileNotFoundError: + files_1 = [p.name for p in path_1.iterdir()] + files_2 = [p.name for p in path_2.iterdir()] + + for f1 in files_1: + assert ( + f1 in files_2 + ), f"File missing!\n{f1} not in {pprint.pformat(files_2)}" + + for f2 in files_2: + assert ( + f2 in files_1 + ), f"File missing!\n{f2} not in {pprint.pformat(files_1)}" def assert_geom_equal( diff --git a/sertit/s3.py b/sertit/s3.py index aba8660..55cae8f 100644 --- a/sertit/s3.py +++ b/sertit/s3.py @@ -288,10 +288,21 @@ def download(src, dst): import shutil dst = AnyPath(dst) - downloaded_path = dst / src.name if dst.is_dir() else dst + if dst.is_dir() and src.name != dst.name: + downloaded_path = dst / src.name + else: + downloaded_path = dst - with src.open("rb") as f0, downloaded_path.open("wb") as f1: - shutil.copyfileobj(f0, f1) + if src.is_file(): + with src.open("rb") as f0, downloaded_path.open("wb") as f1: + shutil.copyfileobj(f0, f1) + else: + for f in src.glob("**"): + dst_file = downloaded_path / f.name + if f.is_file(): + dst_file.parent.mkdir(parents=True, exist_ok=True) + with f.open("rb") as f0, dst_file.open("wb") as f1: + shutil.copyfileobj(f0, f1) # cloudpathlib elif isinstance(src, CloudPath):