diff --git a/.flake8 b/.flake8 index fe13e52..619d0e3 100644 --- a/.flake8 +++ b/.flake8 @@ -6,6 +6,3 @@ extend-ignore = W503 disable-noqa = True application-import-names = nbmetaclean, tests import-order-style = google -per-file-ignores = - # imported but unused - __init__.py: F401 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..477534a --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,17 @@ +name: Lint +on: + push: + branches: + - dev + - main +jobs: + tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@main + - uses: actions/setup-python@main + with: + python-version: "3.11" + architecture: x64 + - run: pip install ruff + - run: ruff check . diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..e4220a4 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,35 @@ +name: Tests +on: + push: + branches: + - dev + - main +jobs: + tests: + runs-on: ubuntu-latest + strategy: + matrix: + python: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - name: Checkout + uses: actions/checkout@main + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@main + with: + python-version: ${{ matrix.python }} + architecture: x64 + + - name: Install + run: | + pip install uv + uv pip install --system .[test] "coverage[toml]" + + - name: Tests + run: pytest --cov + + - name: CodeCov + if: ${{ matrix.python == '3.11' }} + uses: codecov/codecov-action@main + with: + token: ${{ secrets.CODECOV_TOKEN }} + slug: ayasyrev/nbmetaclean diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8fdcef6..7455314 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,10 +1,13 @@ repos: -- repo: https://github.com/ayasyrev/nbmetaclean - rev: 0.0.8 +- repo: local + hooks: - - id: nbclean - name: nbclean + # local version for testing + - id: nbclean + name: nbclean local entry: nbclean + language: system + files: \.ipynb - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 @@ -34,7 +37,8 @@ repos: - id: requirements-txt-fixer - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.1 + rev: v0.5.7 + hooks: # Run the linter. - id: ruff @@ -47,5 +51,4 @@ repos: - id: python-check-mock-methods - id: python-use-type-annotations - id: python-check-blanket-noqa - - id: python-use-type-annotations - id: text-unicode-replacement-char diff --git a/noxfile.py b/noxfile.py index 65c30b5..5819a1e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,9 +1,8 @@ import nox -@nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12"]) +@nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12"], venv_backend="uv") def tests(session: nox.Session) -> None: args = session.posargs or ["--cov"] - session.install("uv") - session.run("uv", "pip", "install", ".[test]") + session.install("-e .[test]") session.run("pytest", *args) diff --git a/noxfile_conda.py b/noxfile_conda.py index 92b7e4e..305308c 100644 --- a/noxfile_conda.py +++ b/noxfile_conda.py @@ -5,5 +5,5 @@ def conda_tests(session: nox.Session) -> None: args = session.posargs or ["--cov"] session.conda_install("uv") - session.run("uv", "pip", "install", ".[test]") + session.install("uv", "pip", "install", "-e", ".[test]") session.run("pytest", *args) diff --git a/noxfile_conda_lint.py b/noxfile_conda_lint.py index b811702..88ab393 100644 --- a/noxfile_conda_lint.py +++ b/noxfile_conda_lint.py @@ -1,6 +1,6 @@ import nox -locations = "src/nbmetaclean", "tests", "noxfile.py" +locations = "." @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12"], venv_backend="mamba") diff --git a/noxfile_lint.py b/noxfile_lint.py index 2176455..d2429a7 100644 --- a/noxfile_lint.py +++ b/noxfile_lint.py @@ -3,7 +3,7 @@ locations = "." -@nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12"]) +@nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12"], venv_backend="uv") def lint(session: nox.Session) -> None: args = session.posargs or locations session.install("ruff") diff --git a/pyproject.toml b/pyproject.toml index a40cc44..b4abb8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.coverage.paths] -source = ["src", "*/site-packages"] +source = ["src"] [tool.coverage.run] branch = true @@ -7,3 +7,16 @@ source = ["nbmetaclean"] [tool.coverage.report] show_missing = true + +[tool.ruff] +extend-include = ["*.ipynb"] +indent-width = 4 + +[tool.ruff.lint] +explicit-preview-rules = true + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" diff --git a/requirements_dev.txt b/requirements_dev.txt index 8778d84..d400fe5 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,7 +1,9 @@ black black[jupyter] +coverage[toml] flake8 isort mypy +nox pre-commit ruff diff --git a/requirements_test_extra.txt b/requirements_test_extra.txt deleted file mode 100644 index 16ac163..0000000 --- a/requirements_test_extra.txt +++ /dev/null @@ -1,2 +0,0 @@ -coverage[toml] -nox diff --git a/setup.py b/setup.py index f7abcb7..8c11aa0 100644 --- a/setup.py +++ b/setup.py @@ -1,24 +1,30 @@ from setuptools import setup + REQUIREMENTS_FILENAME = "requirements.txt" REQUIREMENTS_TEST_FILENAME = "requirements_test.txt" +REQUIREMENTS_DEV_FILENAME = "requirements_dev.txt" + + +def load_requirements(filename: str) -> list[str]: + """Load requirements from file""" + try: + with open(filename, encoding="utf-8") as fh: + return fh.read().splitlines() + except FileNotFoundError: + return [] -# Requirements -try: - with open(REQUIREMENTS_FILENAME, encoding="utf-8") as fh: - REQUIRED = fh.read().split("\n") -except FileNotFoundError: - REQUIRED = [] +REQUIRED = load_requirements(REQUIREMENTS_FILENAME) +TEST_REQUIRED = load_requirements(REQUIREMENTS_TEST_FILENAME) +DEV_REQUIRED = load_requirements(REQUIREMENTS_DEV_FILENAME) -try: - with open(REQUIREMENTS_TEST_FILENAME, encoding="utf-8") as fh: - TEST_REQUIRED = fh.read().split("\n") -except FileNotFoundError: - TEST_REQUIRED = [] # What packages are optional? -EXTRAS = {"test": TEST_REQUIRED} +EXTRAS = { + "test": TEST_REQUIRED, + "dev": DEV_REQUIRED + TEST_REQUIRED, +} setup( diff --git a/src/nbmetaclean/app.py b/src/nbmetaclean/app.py index 68e969e..fba439d 100644 --- a/src/nbmetaclean/app.py +++ b/src/nbmetaclean/app.py @@ -21,6 +21,11 @@ action="store_true", help="Silent mode.", ) +parser.add_argument( + "--not_ec", + action="store_false", + help="Do not clear execution_count.", +) parser.add_argument( "--not-pt", action="store_true", @@ -61,6 +66,18 @@ action="store_true", help="Clean hidden notebooks.", ) +parser.add_argument( + "-D", + "--dry_run", + action="store_true", + help="perform a trial run, don't write results", +) +parser.add_argument( + "-V", + "--verbose", + action="store_true", + help="Verbose mode. Print extra information.", +) def process_mask(mask: Union[list[str], None]) -> Union[tuple[TupleStr, ...], None]: @@ -69,41 +86,63 @@ def process_mask(mask: Union[list[str], None]) -> Union[tuple[TupleStr, ...], No return tuple(tuple(item.split(".")) for item in mask) +def print_result( + cleaned: list[Path], + errors: list[tuple[Path, Exception]], + clean_config: CleanConfig, + path: list[Path], + num_nbs: int, +) -> None: + if clean_config.verbose: + print( + f"Path: {', '.join(path)}, preserve timestamp: {clean_config.preserve_timestamp}" + ) + print(f"checked: {num_nbs} notebooks") + if cleaned: + if len(cleaned) == 1: + print(f"cleaned: {cleaned[0]}") + else: + print(f"cleaned: {len(cleaned)} notebooks") + for nb in cleaned: + print("- ", nb) + if errors: + print(f"with errors: {len(errors)}") + for nb, exc in errors: + print(f"{nb}: {exc}") + + def app() -> None: """Clean metadata and execution_count from Jupyter notebook.""" cfg = parser.parse_args() - path_list = cfg.path if isinstance(cfg.path, list) else [cfg.path] - nb_files: list[Path] = [] - if not cfg.silent: - print(f"Path: {', '.join(cfg.path)}, preserve timestamp: {not cfg.not_pt}") - for path in path_list: - try: - nb_files.extend(get_nb_names(path, hidden=cfg.clean_hidden_nbs)) - except FileNotFoundError: - print(f"{path} not exists!") - if not cfg.silent: - print(f"notebooks to check: {len(nb_files)} ") clean_config = CleanConfig( clear_nb_metadata=not cfg.dont_clear_nb_metadata, clear_cell_metadata=cfg.clear_cell_metadata, - clear_execution_count=True, + clear_execution_count=cfg.not_ec, clear_outputs=cfg.clear_outputs, preserve_timestamp=not cfg.not_pt, silent=cfg.silent, nb_metadata_preserve_mask=process_mask(cfg.nb_metadata_preserve_mask), cell_metadata_preserve_mask=process_mask(cfg.cell_metadata_preserve_mask), mask_merge=not cfg.dont_merge_masks, + dry_run=cfg.dry_run, + verbose=cfg.verbose if not cfg.silent else False, ) + path_list = cfg.path if isinstance(cfg.path, list) else [cfg.path] + nb_files: list[Path] = [] + for path in path_list: + path = Path(path) + if path.exists(): + nb_files.extend(get_nb_names(path, hidden=cfg.clean_hidden_nbs)) + else: + print(f"{path} not exists!") + cleaned, errors = clean_nb_file( nb_files, clean_config, ) + if not cfg.silent: - print(f"cleaned nbs: {len(cleaned)}") - if errors: - print(f"with errors: {len(errors)}") - for nb, exc in errors: - print(f"{nb}: {exc}") + print_result(cleaned, errors, clean_config, path_list, len(nb_files)) if __name__ == "__main__": diff --git a/src/nbmetaclean/clean.py b/src/nbmetaclean/clean.py index cf69e55..f4efa26 100644 --- a/src/nbmetaclean/clean.py +++ b/src/nbmetaclean/clean.py @@ -1,14 +1,13 @@ from __future__ import annotations import copy -import os from dataclasses import dataclass from pathlib import Path from typing import Optional, Tuple, Union from nbmetaclean.helpers import read_nb, write_nb -from .typing import Cell, CodeCell, Metadata, Nb, Output +from .types import Cell, CodeCell, Metadata, Nb, Output TupleStr = Tuple[str, ...] @@ -33,8 +32,10 @@ class CleanConfig: Preserve mask for notebook metadata. Defaults to None. cell_metadata_preserve_mask (Optional[tuple[str, ...]], optional): Preserve mask for cell metadata. Defaults to None. - mask_merge (bool, optional): Merge masks. Add new mask to default. + mask_merge (bool): Merge masks. Add new mask to default. If False - use new mask. Defaults to True. + dry_run (bool): perform a trial run, don't write results. Defaults to False. + verbose (bool): Verbose mode. Print extra information. Defaults to False. """ clear_nb_metadata: bool = True @@ -46,6 +47,8 @@ class CleanConfig: nb_metadata_preserve_mask: Optional[tuple[TupleStr, ...]] = None cell_metadata_preserve_mask: Optional[tuple[TupleStr, ...]] = None mask_merge: bool = True + dry_run: bool = False + verbose: bool = False def filter_meta_mask( @@ -175,24 +178,17 @@ def clean_nb_file( Args: path (Union[str, PosixPath]): Notebook filename or list of names. - clear_nb_metadata (bool): Clear notebook metadata. Defaults to True. - clear_cell_metadata (bool): Clear cell metadata. Defaults to False. - clear_outputs (bool): Clear outputs. Defaults to False. - preserve_timestamp (bool): Preserve timestamp. Defaults to True. - clear_execution_count (bool, optional): Clean execution count. Defaults to True. - silent (bool): Silent mode. Defaults to False. + cfg (CleanConfig, optional): Config for job, if None, used default settings. Default is None. Returns: tuple[List[Path], List[TuplePath]]: List of cleaned notebooks, list of notebooks with errors. """ - if cfg is None: - cfg = CleanConfig() + cfg = cfg or CleanConfig() if not isinstance(path, list): path = [path] cleaned: list[Path] = [] errors: list[tuple[Path, Exception]] = [] - to_clean = len(path) - for num, filename in enumerate(path): + for filename in path: try: nb = read_nb(filename) except Exception as ex: @@ -204,11 +200,13 @@ def clean_nb_file( ) if result: cleaned.append(filename) + if cfg.dry_run: + continue if cfg.preserve_timestamp: stat = filename.stat() - write_nb(nb, filename) - if cfg.preserve_timestamp: - os.utime(filename, (stat.st_atime, stat.st_mtime)) - if not cfg.silent: - print(f"done {num + 1} of {to_clean}: {filename}") + timestamp = (stat.st_atime, stat.st_mtime) + else: + timestamp = None + write_nb(nb, filename, timestamp) + return cleaned, errors diff --git a/src/nbmetaclean/helpers.py b/src/nbmetaclean/helpers.py index b3f1c3a..ff0d181 100644 --- a/src/nbmetaclean/helpers.py +++ b/src/nbmetaclean/helpers.py @@ -1,10 +1,11 @@ from __future__ import annotations import json +import os from pathlib import Path from typing import Optional -from .typing import Nb, PathOrStr +from .types import Nb, PathOrStr def read_nb(path: PathOrStr) -> Nb: @@ -22,12 +23,14 @@ def read_nb(path: PathOrStr) -> Nb: def write_nb( nb: Nb, path: PathOrStr, + timestamp: Optional[tuple[float, float]] = None, ) -> Path: - """Write notebook to file + """Write notebook to file, optionally set timestamp. Args: nb (Notebook): Notebook to write path (Union[str, PosixPath]): filename to write + timestamp (Optional[tuple[float, float]]): timestamp to set, (st_atime, st_mtime) defaults to None Returns: Path: Filename of written notebook. """ @@ -45,6 +48,8 @@ def write_nb( ) + "\n", ) + if timestamp is not None: + os.utime(filename, timestamp) return filename diff --git a/src/nbmetaclean/typing.py b/src/nbmetaclean/types.py similarity index 100% rename from src/nbmetaclean/typing.py rename to src/nbmetaclean/types.py diff --git a/src/nbmetaclean/version.py b/src/nbmetaclean/version.py index 0e12f90..00ec2dc 100644 --- a/src/nbmetaclean/version.py +++ b/src/nbmetaclean/version.py @@ -1 +1 @@ -__version__ = "0.0.8" # pragma: no cover +__version__ = "0.0.9" diff --git a/tests/test_clean.py b/tests/test_clean.py index 813f3e1..465df66 100644 --- a/tests/test_clean.py +++ b/tests/test_clean.py @@ -255,7 +255,24 @@ def test_clean_nb_file(tmp_path: Path, capsys: CaptureFixture[str]): nb_clean = read_nb(path / "test_nb_2_clean.ipynb") # prepare temp test notebook - test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name) + nb_source = read_nb(path / nb_name) + test_nb_path = write_nb(nb_source, tmp_path / nb_name) + + # clean meta, leave execution_count + # first lets dry run + cleaned, errors = clean_nb_file( + test_nb_path, + cfg=CleanConfig( + clear_execution_count=False, + dry_run=True, + ), + ) + assert len(cleaned) == 1 + assert len(errors) == 0 + nb = read_nb(cleaned[0]) + assert nb["metadata"] == nb_source["metadata"] + assert nb["cells"][1]["execution_count"] == 1 + assert nb["cells"][1]["outputs"][0]["execution_count"] == 1 # clean meta, leave execution_count cleaned, errors = clean_nb_file( @@ -272,10 +289,6 @@ def test_clean_nb_file(tmp_path: Path, capsys: CaptureFixture[str]): # clean meta, execution_count # path as list cleaned, errors = clean_nb_file([test_nb_path], CleanConfig()) - captured = capsys.readouterr() - out = captured.out - assert out.startswith("done") - assert "test_clean_nb_file0/.test_nb_2_meta.ipynb" in out assert len(cleaned) == 1 nb = read_nb(cleaned[0]) assert nb == nb_clean @@ -283,17 +296,7 @@ def test_clean_nb_file(tmp_path: Path, capsys: CaptureFixture[str]): # try clean cleaned cleaned, errors = clean_nb_file(test_nb_path, CleanConfig()) assert len(cleaned) == 0 - captured = capsys.readouterr() - out = captured.out - assert not out.strip() - - # silent - test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name) - cleaned, errors = clean_nb_file(test_nb_path, CleanConfig(silent=True)) - assert len(cleaned) == 1 assert len(errors) == 0 - captured = capsys.readouterr() - assert not captured.out.strip() def test_clean_nb_file_errors(capsys: CaptureFixture[str], tmp_path: Path): diff --git a/tests/test_read_write.py b/tests/test_read_write.py index 0a7e301..b2b94b5 100644 --- a/tests/test_read_write.py +++ b/tests/test_read_write.py @@ -41,3 +41,10 @@ def test_write_nb(tmp_path: Path): # write with name w/o suffix result = write_nb(nb, tmp_path / "test_nb_1") assert result == tmp_path / "test_nb_1.ipynb" + + # write with stat + stat = file.stat() + timestamp = (stat.st_atime, stat.st_mtime) + result = write_nb(nb, tmp_path / "test_nb_1", timestamp=timestamp) + res_stat = result.stat() + assert timestamp == (res_stat.st_atime, res_stat.st_mtime)