Skip to content

Commit

Permalink
Merge pull request #5 from ayasyrev:dev
Browse files Browse the repository at this point in the history
0.0.4
  • Loading branch information
ayasyrev authored Oct 25, 2023
2 parents 6bfe0e8 + 49f8288 commit fb9869d
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 24 deletions.
24 changes: 17 additions & 7 deletions src/nbmetaclean/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,23 @@
import nbformat
from argparsecfg.app import App
from argparsecfg.core import field_argument
from rich import print as rprint

from .core import get_nb_names
from .clean import clean_nb_file
from .core import get_nb_names


@dataclass
class AppCfg:
path: str = field_argument("path", default=".", nargs="*")
as_version: int = field_argument(
default=nbformat.NO_CONVERT,
help="Save as version, default - no convert"
default=nbformat.NO_CONVERT, help="Save as version, default - no convert"
)
silent: bool = field_argument("-s", default=False, action="store_true")
not_pt: bool = field_argument(
default=False,
help="Do not preserve timestamp, default - preserve timestamp",
action="store_true")


app = App(
Expand All @@ -32,14 +36,20 @@ def clean(
"""Clean metadata and execution_count from Jupyter notebook."""
path_list = cfg.path if isinstance(cfg.path, list) else [cfg.path]
nb_files: list[Path] = []
print(cfg)
for path in path_list:
try:
nb_files.extend(get_nb_names(path))
except FileNotFoundError:
print(f"{path} not exists!")
print(f"find notebooks: {len(nb_files)} ")
cleaned = clean_nb_file(nb_files, as_version=cfg.as_version, silent=cfg.silent)
print(f"cleaned nbs: {len(cleaned)}")
rprint(f"{path} not exists!")
rprint(f"find notebooks: {len(nb_files)} ")
cleaned = clean_nb_file(
nb_files,
as_version=cfg.as_version,
silent=cfg.silent,
preserve_timestamp=not cfg.not_pt,
)
rprint(f"cleaned nbs: {len(cleaned)}")


if __name__ == "__main__":
Expand Down
17 changes: 14 additions & 3 deletions src/nbmetaclean/clean.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations
import copy
import os

from pathlib import Path
from typing import Optional, Union
Expand All @@ -8,6 +9,7 @@

from nbformat.notebooknode import NotebookNode
from rich.progress import track
from rich import print as rprint

from .core import read_nb, write_nb, PathOrStr

Expand Down Expand Up @@ -131,23 +133,28 @@ def clean_nb_file(
clear_cell_metadata: bool = True,
clear_execution_count: bool = True,
clear_outputs: bool = False,
preserve_timestamp: bool = True,
as_version: nbformat.Sentinel = nbformat.NO_CONVERT,
silent: bool = False,
) -> list[Path]:
"""Clean metadata and execution count from notebook.
Args:
path (Union[str, PosixPath]): Notebook filename or list of names.
clear_nb_metadata (bool): Clear notebook metadata. Defaults to True.
clear_cell_metadata (bool): Clear cell metadata. Defaults to True.
clear_outputs (bool): Clear outputs. Defaults to False.
preserve_timestamp (bool): Preserve timestamp. Defaults to True.
as_version (int, optional): Nbformat version. Defaults to 4.
clear_execution_count (bool, optional): Clean execution count. Defaults to True.
silent (bool, optional): Silent mode. Defaults to False.
silent (bool): Silent mode. Defaults to False.
Returns:
List[Path]: List of cleaned notebooks
"""
if not isinstance(path, list):
path = [path]
cleaned: list[PathOrStr] = []
cleaned: list[Path] = []
for filename in track(path, transient=True):
nb = read_nb(filename)
nb, result = clean_nb(
Expand All @@ -159,7 +166,11 @@ def clean_nb_file(
)
if result:
cleaned.append(filename)
if preserve_timestamp:
stat = filename.stat()
write_nb(nb, filename, as_version)
if preserve_timestamp:
os.utime(filename, (stat.st_atime, stat.st_mtime))
if not silent:
print(f"done: {filename}")
rprint(f"done: {filename}")
return cleaned
6 changes: 3 additions & 3 deletions src/nbmetaclean/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def write_nb(


def get_nb_names(
path: Optional[PathOrStr] = None,
recursive: bool = True,
filter_hidden: bool = True,
path: Optional[PathOrStr] = None,
recursive: bool = True,
filter_hidden: bool = True,
) -> list[Path]:
"""Return list of notebooks from `path`. If no `path` return notebooks from current folder.
Expand Down
2 changes: 1 addition & 1 deletion src/nbmetaclean/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.1" # pragma: no cover
__version__ = "0.0.4" # pragma: no cover
50 changes: 40 additions & 10 deletions tests/test_clean.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import os
from pathlib import Path
from nbformat.notebooknode import NotebookNode

Expand Down Expand Up @@ -164,34 +165,63 @@ def test_clean_nb_file(tmp_path: Path, capsys: CaptureFixture[str]):
test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name)

# clean meta, leave execution_count
result = clean_nb_file(test_nb_path, clear_execution_count=False)
assert len(result) == 1
nb = read_nb(result[0])
cleaned = clean_nb_file(test_nb_path, clear_execution_count=False)
assert len(cleaned) == 1
nb = read_nb(cleaned[0])
assert nb.metadata == nb_clean.metadata
assert nb.cells[1].execution_count == 1
assert nb.cells[1].outputs[0].execution_count == 1

# clean meta, execution_count
# path as list
result = clean_nb_file([test_nb_path])
cleaned = clean_nb_file([test_nb_path])
captured = capsys.readouterr()
out = captured.out
assert out.startswith("done:")
assert "test_clean_nb_file0/test_nb_2.ipynb" in out
assert len(result) == 1
nb = read_nb(result[0])
assert len(cleaned) == 1
nb = read_nb(cleaned[0])
assert nb == nb_clean

# try clean cleaned
result = clean_nb_file(test_nb_path)
assert len(result) == 0
cleaned = clean_nb_file(test_nb_path)
assert len(cleaned) == 0
captured = capsys.readouterr()
out = captured.out
assert not out.strip()

# silent
test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name)
result = clean_nb_file(test_nb_path, silent=True)
# assert len(result) == 1
cleaned = clean_nb_file(test_nb_path, silent=True)
assert len(cleaned) == 1
captured = capsys.readouterr()
assert not captured.out.strip()


def test_clean_nb_file_timestamp(tmp_path: Path, capsys: CaptureFixture[str]):
"""test clean_nb_file, timestamp"""
path = Path("tests/test_nbs")
nb_name = "test_nb_2.ipynb"
nb_stat = (path / nb_name).stat()

# prepare temp test notebook, set timestamp
test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name)
os.utime(test_nb_path, (nb_stat.st_atime, nb_stat.st_mtime))
test_nb_stat = test_nb_path.stat()
assert test_nb_stat.st_atime == nb_stat.st_atime
assert test_nb_stat.st_mtime == nb_stat.st_mtime

cleaned = clean_nb_file(test_nb_path)
assert len(cleaned) == 1
cleaned_stat = cleaned[0].stat()
assert True
assert cleaned_stat.st_mtime == test_nb_stat.st_mtime

# dont preserve timestamp
test_nb_path = write_nb(read_nb(path / nb_name), tmp_path / nb_name)
os.utime(test_nb_path, (nb_stat.st_atime, nb_stat.st_mtime))
cleaned = clean_nb_file(test_nb_path, preserve_timestamp=False)
assert len(cleaned) == 1
cleaned_stat = cleaned[0].stat()
assert True
assert cleaned_stat.st_mtime != nb_stat.st_mtime
50 changes: 50 additions & 0 deletions tests/test_read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,53 @@ def test_get_nb_names():
except FileNotFoundError as ex:
assert True
assert str(ex) == "wrong_name not exists!"


def test_get_nb_names_recursive_hidden(tmp_path: Path):
"""test get_nb_names recursive hidden"""
suffix = ".ipynb"
# add one nb
with open((tmp_path / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
pass
files = get_nb_names(tmp_path)
assert len(files) == 1

# add hidden nb
with open((tmp_path / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
pass
files = get_nb_names(tmp_path)
assert len(files) == 1
files = get_nb_names(tmp_path, filter_hidden=False)
assert len(files) == 2
# add simple file
with open((tmp_path / "simple"), "w", encoding="utf-8") as _:
pass
files = get_nb_names(tmp_path)
assert len(files) == 1

# add dir with one nb, hidden nb
new_dir = tmp_path / "new_dir"
new_dir.mkdir()
with open((new_dir / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
pass
with open((new_dir / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
pass
files = get_nb_names(tmp_path)
assert len(files) == 2
files = get_nb_names(tmp_path, filter_hidden=False)
assert len(files) == 4

files = get_nb_names(tmp_path, recursive=False)
assert len(files) == 1

# add hidden dir
hid_dir = tmp_path / ".hid_dir"
hid_dir.mkdir()
with open((hid_dir / "tst").with_suffix(suffix), "w", encoding="utf-8") as _:
pass
with open((hid_dir / ".tst").with_suffix(suffix), "w", encoding="utf-8") as _:
pass
files = get_nb_names(tmp_path, filter_hidden=False)
assert len(files) == 6
files = get_nb_names(tmp_path)
assert len(files) == 2

0 comments on commit fb9869d

Please sign in to comment.