diff --git a/.github/workflows/run_notebooks.yml b/.github/workflows/run_notebooks.yml index 7f19f070..995b8dae 100644 --- a/.github/workflows/run_notebooks.yml +++ b/.github/workflows/run_notebooks.yml @@ -13,7 +13,7 @@ jobs: "docs/tutorials/notebooks/ehrapy_introduction.ipynb", "docs/tutorials/notebooks/mimic_2_introduction.ipynb", "docs/tutorials/notebooks/mimic_2_survival_analysis.ipynb", - "docs/tutorials/notebooks/mimic_2_fate.ipynb", + # "docs/tutorials/notebooks/mimic_2_fate.ipynb", # https://github.com/theislab/cellrank/issues/1235 "docs/tutorials/notebooks/mimic_2_causal_inference.ipynb", # "docs/tutorials/notebooks/mimic_3_demo.ipynb", # "docs/tutorials/notebooks/medcat.ipynb", @@ -34,5 +34,8 @@ jobs: - name: Install ehrapy and additional dependencies run: uv pip install --system . cellrank nbconvert ipykernel + - name: Install scvelo from Github + run: uv pip install --system git+https://github.com/theislab/scvelo.git + - name: Run ${{ matrix.notebook }} Notebook run: jupyter nbconvert --to notebook --execute ${{ matrix.notebook }} diff --git a/.readthedocs.yml b/.readthedocs.yml index 5135ef47..bac3abbe 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -3,16 +3,13 @@ build: os: ubuntu-22.04 tools: python: "3.11" - jobs: - pre_build: - - python -c "import ehrapy" - - pip freeze - post_create_environment: - - pip install uv - post_install: - # VIRTUAL_ENV needs to be set manually for now. - # See https://github.com/readthedocs/readthedocs.org/pull/11152/ - - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH pip install .[docs] + commands: + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + - uv venv + - uv pip install .[docs] + - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs $READTHEDOCS_OUTPUT/html sphinx: configuration: docs/conf.py fail_on_warning: false diff --git a/docs/_ext/edit_on_github.py b/docs/_ext/edit_on_github.py index 85ed75de..746d2c41 100644 --- a/docs/_ext/edit_on_github.py +++ b/docs/_ext/edit_on_github.py @@ -20,7 +20,7 @@ def get_github_repo(app: Sphinx, path: str) -> str: def _html_page_context( - app: Sphinx, _pagename: str, templatename: str, context: dict[str, Any], doctree: Optional[Any] + app: Sphinx, _pagename: str, templatename: str, context: dict[str, Any], doctree: Any | None ) -> None: # doctree is None - otherwise viewcode fails if templatename != "page.html" or doctree is None: diff --git a/ehrapy/_utils_doc.py b/ehrapy/_utils_doc.py index b0cf9587..3d6b07f7 100644 --- a/ehrapy/_utils_doc.py +++ b/ehrapy/_utils_doc.py @@ -1,9 +1,9 @@ import inspect +from collections.abc import Callable from textwrap import dedent -from typing import Callable, Optional, Union -def getdoc(c_or_f: Union[Callable, type]) -> Optional[str]: # pragma: no cover +def getdoc(c_or_f: Callable | type) -> str | None: # pragma: no cover if getattr(c_or_f, "__doc__", None) is None: return None doc = inspect.getdoc(c_or_f) diff --git a/ehrapy/anndata/anndata_ext.py b/ehrapy/anndata/anndata_ext.py index a82721d3..fb420202 100644 --- a/ehrapy/anndata/anndata_ext.py +++ b/ehrapy/anndata/anndata_ext.py @@ -404,7 +404,7 @@ def _detect_binary_columns(df: pd.DataFrame, numerical_columns: list[str]) -> li for column in numerical_columns: # checking for float and int as well as NaNs (this is safe since checked columns are numericals only) # only columns that contain at least one 0 and one 1 are counted as binary (or 0.0/1.0) - if df[column].isin([0.0, 1.0, np.NaN, 0, 1]).all() and df[column].nunique() == 2: + if df[column].isin([0.0, 1.0, np.nan, 0, 1]).all() and df[column].nunique() == 2: binary_columns.append(column) return binary_columns @@ -423,7 +423,7 @@ def _cast_obs_columns(obs: pd.DataFrame) -> pd.DataFrame: # type cast each non-numerical column to either bool (if possible) or category else obs[object_columns] = obs[object_columns].apply( lambda obs_name: obs_name.astype("category") - if not set(pd.unique(obs_name)).issubset({False, True, np.NaN}) + if not set(pd.unique(obs_name)).issubset({False, True, np.nan}) else obs_name.astype("bool"), axis=0, ) diff --git a/ehrapy/data/_datasets.py b/ehrapy/data/_datasets.py index 5719373a..7957d17c 100644 --- a/ehrapy/data/_datasets.py +++ b/ehrapy/data/_datasets.py @@ -743,7 +743,7 @@ def synthea_1k_sample( df = anndata_to_df(adata) df.drop( - columns=[col for col in df.columns if any(isinstance(x, (list, dict)) for x in df[col].dropna())], inplace=True + columns=[col for col in df.columns if any(isinstance(x, list | dict) for x in df[col].dropna())], inplace=True ) df.drop(columns=df.columns[df.isna().all()], inplace=True) adata = df_to_anndata(df, index_column="id") diff --git a/ehrapy/plot/_scanpy_pl_api.py b/ehrapy/plot/_scanpy_pl_api.py index a1d032a2..4cf278d6 100644 --- a/ehrapy/plot/_scanpy_pl_api.py +++ b/ehrapy/plot/_scanpy_pl_api.py @@ -1,10 +1,10 @@ from __future__ import annotations -from collections.abc import Collection, Iterable, Mapping, Sequence +from collections.abc import Callable, Collection, Iterable, Mapping, Sequence from enum import Enum from functools import partial from types import MappingProxyType -from typing import TYPE_CHECKING, Any, Callable, Literal, Union +from typing import TYPE_CHECKING, Any, Literal import scanpy as sc from scanpy.plotting import DotPlot, MatrixPlot, StackedViolin @@ -36,12 +36,12 @@ from scanpy.plotting._utils import _AxesSubplot _Basis = Literal["pca", "tsne", "umap", "diffmap", "draw_graph_fr"] -_VarNames = Union[str, Sequence[str]] -ColorLike = Union[str, tuple[float, ...]] +_VarNames = str | Sequence[str] +ColorLike = str | tuple[float, ...] _IGraphLayout = Literal["fa", "fr", "rt", "rt_circular", "drl", "eq_tree", ...] # type: ignore _FontWeight = Literal["light", "normal", "medium", "semibold", "bold", "heavy", "black"] _FontSize = Literal["xx-small", "x-small", "small", "medium", "large", "x-large", "xx-large"] -VBound = Union[str, float, Callable[[Sequence[float]], float]] +VBound = str | float | Callable[[Sequence[float]], float] @_doc_params(scatter_temp=doc_scatter_basic, show_save_ax=doc_show_save_ax) diff --git a/ehrapy/preprocessing/_encoding.py b/ehrapy/preprocessing/_encoding.py index 1f761e71..3c3426aa 100644 --- a/ehrapy/preprocessing/_encoding.py +++ b/ehrapy/preprocessing/_encoding.py @@ -73,7 +73,7 @@ def encode( if isinstance(encodings, str) and not autodetect: raise ValueError("Passing a string for parameter encodings is only possible when using autodetect=True!") - elif autodetect and not isinstance(encodings, (str, type(None))): + elif autodetect and not isinstance(encodings, str | type(None)): raise ValueError( f"Setting encode mode with autodetect=True only works by passing a string (encode mode name) or None not {type(encodings)}!" ) @@ -630,7 +630,7 @@ def _update_obs(adata: AnnData, categorical_names: list[str]) -> pd.DataFrame: updated_obs[var_name] = adata.X[::, idx : idx + 1].flatten() # note: this will count binary columns (0 and 1 only) as well # needed for writing to .h5ad files - if set(pd.unique(updated_obs[var_name])).issubset({False, True, np.NaN}): + if set(pd.unique(updated_obs[var_name])).issubset({False, True, np.nan}): updated_obs[var_name] = updated_obs[var_name].astype("bool") # get all non bool object columns and cast them to category dtype object_columns = list(updated_obs.select_dtypes(include="object").columns) diff --git a/ehrapy/preprocessing/_imputation.py b/ehrapy/preprocessing/_imputation.py index 60facfeb..03796c2b 100644 --- a/ehrapy/preprocessing/_imputation.py +++ b/ehrapy/preprocessing/_imputation.py @@ -63,7 +63,7 @@ def explicit_impute( _warn_imputation_threshold(adata, var_names=replacement.keys(), threshold=warning_threshold) # type: ignore # 1: Replace all missing values with the specified value - if isinstance(replacement, (int, str)): + if isinstance(replacement, int | str): _replace_explicit(adata.X, replacement, impute_empty_strings) # 2: Replace all missing values in a subset of columns with a specified value per column or a default value, when the column is not explicitly named diff --git a/ehrapy/preprocessing/_normalization.py b/ehrapy/preprocessing/_normalization.py index 4541cef3..de6cf646 100644 --- a/ehrapy/preprocessing/_normalization.py +++ b/ehrapy/preprocessing/_normalization.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING import numpy as np import sklearn.preprocessing as sklearn_pp @@ -20,7 +20,7 @@ ) if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Callable, Sequence import pandas as pd from anndata import AnnData diff --git a/ehrapy/preprocessing/_scanpy_pp_api.py b/ehrapy/preprocessing/_scanpy_pp_api.py index 5317530e..e0e50221 100644 --- a/ehrapy/preprocessing/_scanpy_pp_api.py +++ b/ehrapy/preprocessing/_scanpy_pp_api.py @@ -1,7 +1,8 @@ from __future__ import annotations +from collections.abc import Callable from types import MappingProxyType -from typing import TYPE_CHECKING, Any, Callable, Literal, Union +from typing import TYPE_CHECKING, Any, Literal, TypeAlias import numpy as np import scanpy as sc @@ -15,7 +16,7 @@ from ehrapy.preprocessing._types import KnownTransformer -AnyRandom = Union[int, np.random.RandomState, None] +AnyRandom: TypeAlias = int | np.random.RandomState | None def pca( @@ -193,7 +194,7 @@ def combat( "sqeuclidean", "yule", ] -_Metric = Union[_MetricSparseCapable, _MetricScipySpatial] +_Metric = _MetricSparseCapable | _MetricScipySpatial def neighbors( diff --git a/ehrapy/tools/_method_options.py b/ehrapy/tools/_method_options.py index 315f47f7..5bcad67c 100644 --- a/ehrapy/tools/_method_options.py +++ b/ehrapy/tools/_method_options.py @@ -1,11 +1,11 @@ -from typing import Literal, Optional +from typing import Literal _InitPos = Literal["paga", "spectral", "random"] _LAYOUTS = ("fr", "drl", "kk", "grid_fr", "lgl", "rt", "rt_circular", "fa") _Layout = Literal[_LAYOUTS] # type: ignore -_rank_features_groups_method = Optional[Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"]] +_rank_features_groups_method = Literal["logreg", "t-test", "wilcoxon", "t-test_overestim_var"] | None _correction_method = Literal["benjamini-hochberg", "bonferroni"] _rank_features_groups_cat_method = Literal[ "chi-square", "g-test", "freeman-tukey", "mod-log-likelihood", "neyman", "cressie-read" diff --git a/ehrapy/tools/_scanpy_tl_api.py b/ehrapy/tools/_scanpy_tl_api.py index 4e1af6b0..6b82448e 100644 --- a/ehrapy/tools/_scanpy_tl_api.py +++ b/ehrapy/tools/_scanpy_tl_api.py @@ -1,29 +1,34 @@ -from collections.abc import Iterable, Sequence -from typing import Any, Literal, Optional, Union +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal, TypeAlias import numpy as np import scanpy as sc -from anndata import AnnData -from leidenalg.VertexPartition import MutableVertexPartition -from scipy.sparse import spmatrix -from ehrapy.tools import _method_options +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + + from anndata import AnnData + from leidenalg.VertexPartition import MutableVertexPartition + from scipy.sparse import spmatrix + + from ehrapy.tools import _method_options -AnyRandom = Union[int, np.random.RandomState, None] +AnyRandom: TypeAlias = int | np.random.RandomState | None def tsne( adata: AnnData, - n_pcs: Optional[int] = None, - use_rep: Optional[str] = None, - perplexity: Union[float, int] = 30, - early_exaggeration: Union[float, int] = 12, - learning_rate: Union[float, int] = 1000, + n_pcs: int | None = None, + use_rep: str | None = None, + perplexity: float | int = 30, + early_exaggeration: float | int = 12, + learning_rate: float | int = 1000, random_state: AnyRandom = 0, - n_jobs: Optional[int] = None, + n_jobs: int | None = None, copy: bool = False, metric: str = "euclidean", -) -> Optional[AnnData]: # pragma: no cover +) -> AnnData | None: # pragma: no cover """Calculates t-SNE [Maaten08]_ [Amir13]_ [Pedregosa11]_. t-distributed stochastic neighborhood embedding (tSNE) [Maaten08]_ has been @@ -83,18 +88,18 @@ def umap( min_dist: float = 0.5, spread: float = 1.0, n_components: int = 2, - maxiter: Optional[int] = None, + maxiter: int | None = None, alpha: float = 1.0, gamma: float = 1.0, negative_sample_rate: int = 5, - init_pos: Union[_method_options._InitPos, np.ndarray, None] = "spectral", + init_pos: _method_options._InitPos | np.ndarray | None = "spectral", random_state: AnyRandom = 0, - a: Optional[float] = None, - b: Optional[float] = None, + a: float | None = None, + b: float | None = None, copy: bool = False, method: Literal["umap", "rapids"] = "umap", - neighbors_key: Optional[str] = None, -) -> Optional[AnnData]: # pragma: no cover + neighbors_key: str | None = None, +) -> AnnData | None: # pragma: no cover """Embed the neighborhood graph using UMAP [McInnes18]_. UMAP (Uniform Manifold Approximation and Projection) is a manifold learning @@ -186,17 +191,17 @@ def umap( def draw_graph( adata: AnnData, layout: _method_options._Layout = "fa", - init_pos: Union[str, bool, None] = None, - root: Optional[int] = None, + init_pos: str | bool | None = None, + root: int | None = None, random_state: AnyRandom = 0, - n_jobs: Optional[int] = None, - adjacency: Optional[spmatrix] = None, - key_added_ext: Optional[str] = None, - neighbors_key: Optional[str] = None, - obsp: Optional[str] = None, + n_jobs: int | None = None, + adjacency: spmatrix | None = None, + key_added_ext: str | None = None, + neighbors_key: str | None = None, + obsp: str | None = None, copy: bool = False, **kwds, -) -> Optional[AnnData]: # pragma: no cover +) -> AnnData | None: # pragma: no cover """Force-directed graph drawing [Islam11]_ [Jacomy14]_ [Chippada18]_. .. _fa2: https://github.com/bhargavchippada/forceatlas2 @@ -264,10 +269,10 @@ def draw_graph( def diffmap( adata: AnnData, n_comps: int = 15, - neighbors_key: Optional[str] = None, + neighbors_key: str | None = None, random_state: AnyRandom = 0, copy: bool = False, -) -> Optional[AnnData]: # pragma: no cover +) -> AnnData | None: # pragma: no cover """Diffusion Maps [Coifman05]_ [Haghverdi15]_ [Wolf18]_. Diffusion maps [Coifman05]_ has been proposed for visualizing single-cell @@ -309,9 +314,9 @@ def diffmap( def embedding_density( adata: AnnData, basis: str = "umap", # was positional before 1.4.5 - groupby: Optional[str] = None, - key_added: Optional[str] = None, - components: Union[str, Sequence[str]] = None, + groupby: str | None = None, + key_added: str | None = None, + components: str | Sequence[str] = None, ) -> None: # pragma: no cover """Calculate the density of observation in an embedding (per condition). @@ -353,19 +358,19 @@ def embedding_density( def leiden( adata: AnnData, resolution: float = 1, - restrict_to: Optional[tuple[str, Sequence[str]]] = None, + restrict_to: tuple[str, Sequence[str]] | None = None, random_state: AnyRandom = 0, key_added: str = "leiden", - adjacency: Optional[spmatrix] = None, + adjacency: spmatrix | None = None, directed: bool = True, use_weights: bool = True, n_iterations: int = -1, - partition_type: Optional[type[MutableVertexPartition]] = None, - neighbors_key: Optional[str] = None, - obsp: Optional[str] = None, + partition_type: type[MutableVertexPartition] | None = None, + neighbors_key: str | None = None, + obsp: str | None = None, copy: bool = False, **partition_kwargs, -) -> Optional[AnnData]: # pragma: no cover +) -> AnnData | None: # pragma: no cover """Cluster observations into subgroups [Traag18]_. Cluster observations using the Leiden algorithm [Traag18]_, @@ -429,15 +434,15 @@ def leiden( def dendrogram( adata: AnnData, groupby: str, - n_pcs: Optional[int] = None, - use_rep: Optional[str] = None, - var_names: Optional[Sequence[str]] = None, + n_pcs: int | None = None, + use_rep: str | None = None, + var_names: Sequence[str] | None = None, cor_method: str = "pearson", linkage_method: str = "complete", optimal_ordering: bool = False, - key_added: Optional[str] = None, + key_added: str | None = None, inplace: bool = True, -) -> Optional[dict[str, Any]]: # pragma: no cover +) -> dict[str, Any] | None: # pragma: no cover """Computes a hierarchical clustering for the given `groupby` categories. By default, the PCA representation is used unless `.X` has less than 50 variables. @@ -505,9 +510,9 @@ def dpt( n_branchings: int = 0, min_group_size: float = 0.01, allow_kendall_tau_shift: bool = True, - neighbors_key: Optional[str] = None, + neighbors_key: str | None = None, copy: bool = False, -) -> Optional[AnnData]: # pragma: no cover +) -> AnnData | None: # pragma: no cover """Infer progression of observations through geodesic distance along the graph [Haghverdi16]_ [Wolf19]_. Reconstruct the progression of a biological process from snapshot @@ -562,12 +567,12 @@ def dpt( def paga( adata: AnnData, - groups: Optional[str] = None, + groups: str | None = None, use_rna_velocity: bool = False, model: Literal["v1.2", "v1.0"] = "v1.2", - neighbors_key: Optional[str] = None, + neighbors_key: str | None = None, copy: bool = False, -) -> Optional[AnnData]: # pragma: no cover +) -> AnnData | None: # pragma: no cover """Mapping out the coarse-grained connectivity structures of complex manifolds [Wolf19]_. By quantifying the connectivity of partitions (groups, clusters), @@ -626,13 +631,13 @@ def paga( def ingest( adata: AnnData, adata_ref: AnnData, - obs: Optional[Union[str, Iterable[str]]] = None, - embedding_method: Union[str, Iterable[str]] = ("umap", "pca"), + obs: str | Iterable[str] | None = None, + embedding_method: str | Iterable[str] = ("umap", "pca"), labeling_method: str = "knn", - neighbors_key: Optional[str] = None, + neighbors_key: str | None = None, inplace: bool = True, **kwargs, -) -> Optional[AnnData]: # pragma: no cover +) -> AnnData | None: # pragma: no cover """Map labels and embeddings from reference data to new data. Integrates embeddings and annotations of an `adata` with a reference dataset diff --git a/ehrapy/tools/causal/_dowhy.py b/ehrapy/tools/causal/_dowhy.py index 55916179..f972804b 100644 --- a/ehrapy/tools/causal/_dowhy.py +++ b/ehrapy/tools/causal/_dowhy.py @@ -244,7 +244,7 @@ def causal_inference( pval = "Not applicable" # Format effect, can be list when refuter is "add_unobserved_common_cause" - if isinstance(refute.new_effect, (list, tuple)): + if isinstance(refute.new_effect, list | tuple): new_effect = ", ".join([str(np.round(x, 2)) for x in refute.new_effect]) else: new_effect = f"{refute.new_effect:.3f}" diff --git a/ehrapy/tools/cohort_tracking/_cohort_tracker.py b/ehrapy/tools/cohort_tracking/_cohort_tracker.py index 4d92fb84..fc331550 100644 --- a/ehrapy/tools/cohort_tracking/_cohort_tracker.py +++ b/ehrapy/tools/cohort_tracking/_cohort_tracker.py @@ -390,7 +390,7 @@ def create_legend_with_subtitles(patches_list, subtitles_list, tot_legend_kwargs # there can be empty lists which distort the logic of matching patches to subtitles patches_list = [patch for patch in patches_list if patch] - for patches, subtitle in zip(patches_list, subtitles_list): + for patches, subtitle in zip(patches_list, subtitles_list, strict=False): handles.append(Line2D([], [], linestyle="none", marker="", alpha=0)) # Placeholder for title labels.append(subtitle) @@ -494,7 +494,7 @@ def plot_flowchart( tot_bbox_kwargs = {"boxstyle": "round,pad=0.3", "fc": "lightblue", "alpha": 0.5} if bbox_kwargs is not None: tot_bbox_kwargs.update(bbox_kwargs) - for _, (y, label) in enumerate(zip(y_positions, node_labels)): + for _, (y, label) in enumerate(zip(y_positions, node_labels, strict=False)): axes.annotate( label, xy=(0, y), diff --git a/ehrapy/tools/feature_ranking/_rank_features_groups.py b/ehrapy/tools/feature_ranking/_rank_features_groups.py index d78bfd1c..6fb3932c 100644 --- a/ehrapy/tools/feature_ranking/_rank_features_groups.py +++ b/ehrapy/tools/feature_ranking/_rank_features_groups.py @@ -107,7 +107,7 @@ def _save_rank_features_result( fields = (names, scores, pvals, pvals_adj, logfoldchanges, pts) field_names = ("names", "scores", "pvals", "pvals_adj", "logfoldchanges", "pts") - for values, key in zip(fields, field_names): + for values, key in zip(fields, field_names, strict=False): if values is None or not len(values): continue @@ -139,7 +139,7 @@ def _get_groups_order(groups_subset, group_names, reference): """ if groups_subset == "all": groups_order = group_names - elif isinstance(groups_subset, (str, int)): + elif isinstance(groups_subset, str | int): raise ValueError("Specify a sequence of groups") else: groups_order = list(groups_subset) diff --git a/pyproject.toml b/pyproject.toml index 517a940a..7d910479 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "ehrapy" version = "0.9.0" description = "Electronic Health Record Analysis with Python." readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10,<3.13" license = {file = "LICENSE"} authors = [ {name = "Lukas Heumos"}, @@ -38,7 +38,6 @@ classifiers = [ "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -66,7 +65,6 @@ dependencies = [ "fknni", "python-dateutil", "filelock", - "numpy<2.0.0" # for compatiblity with lifelines ] [project.optional-dependencies] @@ -101,8 +99,7 @@ docs = [ "nbsphinx-link", "ipykernel", "ipython", - "medcat", - "ehrapy[dask]", + "ehrapy[dask,medcat]", ] test = [ "ehrapy[dask]", @@ -135,7 +132,10 @@ markers = [ filterwarnings = [ "ignore::DeprecationWarning", "ignore::anndata.OldFormatWarning:", - "ignore:X converted to numpy array with dtype object:UserWarning" + "ignore:X converted to numpy array with dtype object:UserWarning", + "ignore:`flavor='seurat_v3'` expects raw count data, but non-integers were found:UserWarning", + "ignore:All-NaN slice encountered:RuntimeWarning", + "ignore:Observation names are not unique. To make them unique, call `.obs_names_make_unique`.:UserWarning" ] minversion = 6.0 norecursedirs = [ '.*', 'build', 'dist', '*.egg', 'data', '__pycache__']