Skip to content

Commit

Permalink
✨ dynamic-sidecar logs changes to input ports (#5999)
Browse files Browse the repository at this point in the history
Co-authored-by: Andrei Neagu <neagu@itis.swiss>
  • Loading branch information
GitHK and Andrei Neagu authored Jun 27, 2024
1 parent b5d82e0 commit 87d3354
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 12 deletions.
55 changes: 53 additions & 2 deletions packages/service-library/src/servicelib/file_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import asyncio
import hashlib
import shutil
from contextlib import contextmanager
from logging import Logger
from pathlib import Path
from typing import Final, Protocol
from typing import Final, Iterator, Protocol

# https://docs.python.org/3/library/shutil.html#shutil.rmtree
# https://docs.python.org/3/library/os.html#os.remove
Expand Down Expand Up @@ -60,10 +62,59 @@ async def create_sha256_checksum(

async def _eval_hash_async(
async_stream: AsyncStream,
hasher: "hashlib._Hash", # noqa: SLF001
hasher: "hashlib._Hash",
chunk_size: ByteSize,
) -> str:
while chunk := await async_stream.read(chunk_size):
hasher.update(chunk)
digest = hasher.hexdigest()
return f"{digest}"


def _get_file_properties(path: Path) -> tuple[float, int]:
stats = path.stat()
return stats.st_mtime, stats.st_size


def _get_directory_snapshot(path: Path) -> dict[str, tuple[float, int]]:
return {
f"{p.relative_to(path)}": _get_file_properties(p)
for p in path.rglob("*")
if p.is_file()
}


@contextmanager
def log_directory_changes(path: Path, logger: Logger, log_level: int) -> Iterator[None]:
before: dict[str, tuple[float, int]] = _get_directory_snapshot(path)
yield
after: dict[str, tuple[float, int]] = _get_directory_snapshot(path)

after_keys: set[str] = set(after.keys())
before_keys: set[str] = set(before.keys())
common_keys = before_keys & after_keys

added_elements = after_keys - before_keys
removed_elements = before_keys - after_keys
content_changed_elements = {x for x in common_keys if before[x] != after[x]}

if added_elements or removed_elements or content_changed_elements:
logger.log(log_level, "File changes in path: '%s'", f"{path}")
if added_elements:
logger.log(
log_level,
"Files added:\n%s",
"\n".join([f"+ {x}" for x in sorted(added_elements)]),
)
if removed_elements:
logger.log(
log_level,
"Files removed:\n%s",
"\n".join([f"- {x}" for x in sorted(removed_elements)]),
)
if content_changed_elements:
logger.log(
log_level,
"File content changed:\n%s",
"\n".join([f"* {x}" for x in sorted(content_changed_elements)]),
)
62 changes: 61 additions & 1 deletion packages/service-library/tests/test_file_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# pylint: disable=redefined-outer-name
# pylint: disable=unused-argument

import logging
from pathlib import Path

import pytest
from faker import Faker
from servicelib.file_utils import remove_directory
from servicelib.file_utils import log_directory_changes, remove_directory

_logger = logging.getLogger(__name__)


@pytest.fixture
Expand Down Expand Up @@ -80,3 +83,60 @@ async def test_remove_not_existing_directory_rasing_error(
await remove_directory(
path=missing_path, only_children=only_children, ignore_errors=False
)


async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: Path):
# directory cretion triggers no changes
caplog.clear()
with log_directory_changes(some_dir, _logger, logging.ERROR):
(some_dir / "a-dir").mkdir(parents=True, exist_ok=True)
assert "File changes in path" not in caplog.text
assert "Files added:" not in caplog.text
assert "Files removed:" not in caplog.text
assert "File content changed" not in caplog.text

# files were added
caplog.clear()
with log_directory_changes(some_dir, _logger, logging.ERROR):
(some_dir / "hoho").touch()
assert "File changes in path" in caplog.text
assert "Files added:" in caplog.text
assert "Files removed:" not in caplog.text
assert "File content changed" not in caplog.text

# files were removed
caplog.clear()
with log_directory_changes(some_dir, _logger, logging.ERROR):
await remove_directory(path=some_dir)
assert "File changes in path" in caplog.text
assert "Files removed:" in caplog.text
assert "Files added:" not in caplog.text
assert "File content changed" not in caplog.text

# nothing changed
caplog.clear()
with log_directory_changes(some_dir, _logger, logging.ERROR):
pass
assert caplog.text == ""

# files added and removed
caplog.clear()
some_dir.mkdir(parents=True, exist_ok=True)
(some_dir / "som_other_file").touch()
with log_directory_changes(some_dir, _logger, logging.ERROR):
(some_dir / "som_other_file").unlink()
(some_dir / "som_other_file_2").touch()
assert "File changes in path" in caplog.text
assert "Files added:" in caplog.text
assert "Files removed:" in caplog.text
assert "File content changed" not in caplog.text

# file content changed
caplog.clear()
(some_dir / "file_to_change").touch()
with log_directory_changes(some_dir, _logger, logging.ERROR):
(some_dir / "file_to_change").write_text("ab")
assert "File changes in path" in caplog.text
assert "Files added:" not in caplog.text
assert "Files removed:" not in caplog.text
assert "File content changed" in caplog.text
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from models_library.rabbitmq_messages import ProgressType, SimcorePlatformStatus
from pydantic import PositiveInt
from servicelib.fastapi.long_running_tasks.server import TaskProgress
from servicelib.file_utils import log_directory_changes
from servicelib.logging_utils import log_context
from servicelib.progress_bar import ProgressBarData
from servicelib.utils import logged_gather
Expand Down Expand Up @@ -476,15 +477,18 @@ async def task_ports_inputs_pull(
),
description="pulling inputs",
) as root_progress:
transferred_bytes = await nodeports.download_target_ports(
nodeports.PortTypeName.INPUTS,
mounted_volumes.disk_inputs_path,
port_keys=port_keys,
io_log_redirect_cb=functools.partial(
post_sidecar_log_message, app, log_level=logging.INFO
),
progress_bar=root_progress,
)
with log_directory_changes(
mounted_volumes.disk_inputs_path, _logger, logging.INFO
):
transferred_bytes = await nodeports.download_target_ports(
nodeports.PortTypeName.INPUTS,
mounted_volumes.disk_inputs_path,
port_keys=port_keys,
io_log_redirect_cb=functools.partial(
post_sidecar_log_message, app, log_level=logging.INFO
),
progress_bar=root_progress,
)
await post_sidecar_log_message(
app, "Finished pulling inputs", log_level=logging.INFO
)
Expand Down

0 comments on commit 87d3354

Please sign in to comment.