Skip to content

Commit

Permalink
Make UPath work with all archive functions and dcmp
Browse files Browse the repository at this point in the history
  • Loading branch information
remi-braun committed Dec 13, 2024
1 parent 3637145 commit 8b95aac
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 74 deletions.
8 changes: 7 additions & 1 deletion CI/SCRIPTS/test_archives.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sertit import archives, ci, files, path, s3, vectors


@s3_env
def test_archive(tmp_path):
"""Test extracting functions"""
# Archives
Expand All @@ -31,6 +32,11 @@ def test_archive(tmp_path):

# Extract
extracted_dirs = archives.extract_files(arch, tmp_path, overwrite=True)

# Test
for ex_dir in extracted_dirs:
ci.assert_dir_equal(core_dir, ex_dir)

archives.extract_files([zip2_file], tmp_path, overwrite=False) # Already existing

# Test
Expand All @@ -54,7 +60,7 @@ def test_archive(tmp_path):
# Add to zip
zip_out = zip2_file if path.is_cloud_path(zip2_file) else archive_base + ".zip"
core_copy = files.copy(core_dir, os.path.join(tmp_path, "core2"))
zip_out = archives.add_to_zip(zip_out, core_copy)
zip_out = archives.add_to_zip(s3.download(zip_out, tmp_path), core_copy)

# Extract
unzip_out = os.path.join(tmp_path, "out")
Expand Down
108 changes: 58 additions & 50 deletions sertit/archives.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,10 +285,14 @@ def archive(
archive_path = AnyPath(archive_path)
folder_path = AnyPath(folder_path)

# with zipfile.ZipFile(archive_path, mode='w', compression=zipfile.ZIP_DEFLATED) as zipf:
# for f in folder_path.glob("**"):
# zipf.write(f, f.relative_to(folder_path.name))

tmp_dir = None
if path.is_cloud_path(folder_path):
tmp_dir = tempfile.TemporaryDirectory()
folder_path = folder_path.download_to(tmp_dir.name)
folder_path = s3.download(folder_path, tmp_dir.name)

# Shutil make_archive needs a path without extension
archive_base = os.path.splitext(archive_path)[0]
Expand All @@ -304,7 +308,12 @@ def archive(
if tmp_dir is not None:
tmp_dir.cleanup()

return AnyPath(archive_fn)
try:
arch = AnyPath(archive_fn, folder_path.storage_options)
except Exception:
arch = AnyPath(archive_fn)

return arch


def add_to_zip(
Expand All @@ -329,55 +338,54 @@ def add_to_zip(
"""
zip_path = AnyPath(zip_path)

# If the zip is on the cloud, cache it (zipfile doesn't like cloud paths)
if path.is_cloud_path(zip_path):
zip_path = AnyPath(zip_path.fspath)

# Check if existing zipfile
if not zip_path.is_file():
raise FileNotFoundError(f"Non existing {zip_path}")

# Convert to list if needed
if not isinstance(dirs_to_add, list):
dirs_to_add = [dirs_to_add]

# Add all folders to the existing zip
# Forced to use ZipFile because make_archive only works with one folder and not existing zipfile
with open_zipfile(zip_path, "a") as zip_file:
progress_bar = tqdm(dirs_to_add)
for dir_to_add_path in progress_bar:
# Just to be sure, use str instead of Paths
if isinstance(dir_to_add_path, Path):
dir_to_add = str(dir_to_add_path)
elif path.is_cloud_path(dir_to_add_path):
dir_to_add = dir_to_add_path.fspath
else:
dir_to_add = dir_to_add_path

progress_bar.set_description(
f"Adding {os.path.basename(dir_to_add)} to {os.path.basename(zip_path)}"
with tempfile.TemporaryDirectory() as tmp_dir:
# If the zip is on the cloud, cache it (zipfile doesn't like cloud paths)
if path.is_cloud_path(zip_path):
raise NotImplementedError(
"Impossible (for now) to update a zip stored in the cloud!"
)
tmp = tempfile.TemporaryDirectory()
if os.path.isfile(dir_to_add):
dir_to_add = extract_file(dir_to_add, tmp.name)

for root, _, files in os.walk(dir_to_add):
base_path = os.path.join(dir_to_add, "..")

# Write dir (in namelist at least)
zip_file.write(root, os.path.relpath(root, base_path))

# Write files
for file in files:
zip_file.write(
os.path.join(root, file),
os.path.relpath(
os.path.join(root, file), os.path.join(dir_to_add, "..")
),
)

# Clean tmp
tmp.cleanup()

# Check if existing zipfile
if not zip_path.is_file():
raise FileNotFoundError(f"Non existing {zip_path}")

# Convert to list if needed
if not isinstance(dirs_to_add, list):
dirs_to_add = [dirs_to_add]

# Add all folders to the existing zip
# Forced to use ZipFile because make_archive only works with one folder and not existing zipfile
with open_zipfile(zip_path, "a") as zip_file:
progress_bar = tqdm(dirs_to_add)
for dir_to_add_path in progress_bar:
# Just to be sure, use str instead of Paths
if isinstance(dir_to_add_path, Path):
dir_to_add = str(dir_to_add_path)
elif path.is_cloud_path(dir_to_add_path):
dir_to_add = dir_to_add_path.fspath
else:
dir_to_add = dir_to_add_path

progress_bar.set_description(
f"Adding {os.path.basename(dir_to_add)} to {os.path.basename(zip_path)}"
)
if os.path.isfile(dir_to_add):
dir_to_add = extract_file(dir_to_add, tmp_dir)

for root, _, files in os.walk(dir_to_add):
base_path = os.path.join(dir_to_add, "..")

# Write dir (in namelist at least)
zip_file.write(root, os.path.relpath(root, base_path))

# Write files
for file in files:
zip_file.write(
os.path.join(root, file),
os.path.relpath(
os.path.join(root, file), os.path.join(dir_to_add, "..")
),
)

return zip_path

Expand Down
47 changes: 27 additions & 20 deletions sertit/ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import filecmp
import logging
import pprint
import tempfile
from doctest import Example
from typing import Any, Union

Expand All @@ -30,7 +31,7 @@
from shapely import force_2d, normalize
from shapely.testing import assert_geometries_equal

from sertit import AnyPath, files, s3, unistra
from sertit import AnyPath, files, path, s3, unistra
from sertit.logs import SU_NAME, deprecation_warning
from sertit.types import AnyPathStrType, AnyXrDataStructure

Expand Down Expand Up @@ -381,27 +382,33 @@ def assert_dir_equal(path_1: AnyPathStrType, path_2: AnyPathStrType) -> None:
assert path_1.is_dir(), f"{path_1} is not a directory!"
assert path_2.is_dir(), f"{path_2} is not a directory!"

dcmp = filecmp.dircmp(path_1, path_2)
try:
assert (
dcmp.left_only == []
), f"More files in {path_1}!\n{pprint.pformat(list(dcmp.left_only))}"
assert (
dcmp.right_only == []
), f"More files in {path_2}!\n{pprint.pformat(list(dcmp.right_only))}"
except FileNotFoundError:
files_1 = [AnyPath(p).name for p in AnyPath(path_1).iterdir()]
files_2 = [AnyPath(p).name for p in AnyPath(path_2).iterdir()]

for f1 in files_1:
assert (
f1 in files_2
), f"File missing!\n{f1} not in {pprint.pformat(files_2)}"
with tempfile.TemporaryDirectory() as tmpdir:
if path.is_cloud_path(path_1):
path_1 = s3.download(path_1, tmpdir)
if path.is_cloud_path(path_2):
path_2 = s3.download(path_2, tmpdir)

for f2 in files_2:
dcmp = filecmp.dircmp(path_1, path_2)
try:
assert (
dcmp.left_only == []
), f"More files in {path_1}!\n{pprint.pformat(list(dcmp.left_only))}"
assert (
f2 in files_1
), f"File missing!\n{f2} not in {pprint.pformat(files_1)}"
dcmp.right_only == []
), f"More files in {path_2}!\n{pprint.pformat(list(dcmp.right_only))}"
except FileNotFoundError:
files_1 = [p.name for p in path_1.iterdir()]
files_2 = [p.name for p in path_2.iterdir()]

for f1 in files_1:
assert (
f1 in files_2
), f"File missing!\n{f1} not in {pprint.pformat(files_2)}"

for f2 in files_2:
assert (
f2 in files_1
), f"File missing!\n{f2} not in {pprint.pformat(files_1)}"


def assert_geom_equal(
Expand Down
17 changes: 14 additions & 3 deletions sertit/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,21 @@ def download(src, dst):
import shutil

dst = AnyPath(dst)
downloaded_path = dst / src.name if dst.is_dir() else dst
if dst.is_dir() and src.name != dst.name:
downloaded_path = dst / src.name
else:
downloaded_path = dst

with src.open("rb") as f0, downloaded_path.open("wb") as f1:
shutil.copyfileobj(f0, f1)
if src.is_file():
with src.open("rb") as f0, downloaded_path.open("wb") as f1:
shutil.copyfileobj(f0, f1)
else:
for f in src.glob("**"):
dst_file = downloaded_path / f.name
if f.is_file():
dst_file.parent.mkdir(parents=True, exist_ok=True)
with f.open("rb") as f0, dst_file.open("wb") as f1:
shutil.copyfileobj(f0, f1)

# cloudpathlib
elif isinstance(src, CloudPath):
Expand Down

0 comments on commit 8b95aac

Please sign in to comment.