From 7f687fbdb6f25f3a9e3a890a8d925864d8fc55d5 Mon Sep 17 00:00:00 2001 From: Taleb Zeghmi <4167032+talebzeghmi@users.noreply.github.com> Date: Mon, 26 Jun 2023 09:28:29 -0700 Subject: [PATCH] Rename import to zdatasets avoid HuggingFace conflict (#37) --- .github/workflows/test.yml | 6 +++--- .gitignore | 10 ++++----- .pre-commit-config.yaml | 2 +- CONTRIBUTING.md | 4 ++-- MANIFEST.in | 2 +- README.md | 10 ++++----- datasets/__init__.py | 18 ---------------- datasets/plugins/__init__.py | 10 --------- docs/conf.py | 12 +++++------ docs/index.rst | 8 +++---- pyproject.toml | 20 +++++++++--------- setup.cfg | 6 +++--- zdatasets/__init__.py | 18 ++++++++++++++++ {datasets => zdatasets}/_typing.py | 0 {datasets => zdatasets}/context.py | 0 {datasets => zdatasets}/dataset_plugin.py | 6 +++--- {datasets => zdatasets}/datasets_decorator.py | 8 +++---- {datasets => zdatasets}/exceptions.py | 0 {datasets => zdatasets}/metaflow.py | 12 +++++------ {datasets => zdatasets}/mode.py | 0 zdatasets/plugins/__init__.py | 10 +++++++++ .../plugins/batch/__init__.py | 0 .../plugins/batch/batch_base_plugin.py | 10 ++++----- .../plugins/batch/batch_dataset.py | 12 +++++------ .../plugins/batch/flow_dataset.py | 10 ++++----- .../plugins/batch/hive_dataset.py | 14 ++++++------ .../plugins/executors/__init__.py | 0 .../plugins/executors/metaflow_executor.py | 4 ++-- .../plugins/register_plugins.py | 8 +++---- {datasets => zdatasets}/program_executor.py | 4 ++-- {datasets => zdatasets}/tests/__init__.py | 0 {datasets => zdatasets}/tests/conftest.py | 6 +++--- .../date=2020-07-23/region=king/data.parquet | Bin .../date=2020-07-23/region=la/data.parquet | Bin .../tests/test_batch_dataset.py | 14 ++++++------ .../tests/test_dataset_plugin.py | 16 +++++++------- .../tests/test_datasets_decorator.py | 4 ++-- .../tests/test_flow_dataset.py | 4 ++-- .../tests/test_hive_dataset.py | 10 ++++----- .../tests/test_metaflow.py | 20 +++++++++--------- .../tests/test_tutorials.py | 2 +- .../tests/utils/__init__.py | 0 .../tests/utils/test_case_utils.py | 2 +- .../tests/utils/test_partitions.py | 4 ++-- .../tests/utils/test_secret_fetcher.py | 10 ++++----- .../tutorials/0_hello_dataset_flow.py | 8 +++---- .../tutorials/1_input_output_flow.py | 4 ++-- .../tutorials/2_spark_dask_flow.py | 4 ++-- .../tutorials/3_foreach_dataset_flow.py | 4 ++-- .../tutorials/4_hello_plugin_flow.py | 10 ++++----- .../tutorials/5_consistent_flow.py | 12 +++++------ .../tutorials/6_hive_dataset_flow.py | 8 +++---- .../tutorials/README.ipynb | 0 .../tutorials/online_plugin.py | 4 ++-- {datasets => zdatasets}/utils/__init__.py | 0 {datasets => zdatasets}/utils/aws.py | 0 {datasets => zdatasets}/utils/case_utils.py | 0 {datasets => zdatasets}/utils/partitions.py | 2 +- .../utils/secret_fetcher.py | 0 59 files changed, 186 insertions(+), 186 deletions(-) delete mode 100644 datasets/__init__.py delete mode 100644 datasets/plugins/__init__.py create mode 100644 zdatasets/__init__.py rename {datasets => zdatasets}/_typing.py (100%) rename {datasets => zdatasets}/context.py (100%) rename {datasets => zdatasets}/dataset_plugin.py (98%) rename {datasets => zdatasets}/datasets_decorator.py (88%) rename {datasets => zdatasets}/exceptions.py (100%) rename {datasets => zdatasets}/metaflow.py (93%) rename {datasets => zdatasets}/mode.py (100%) create mode 100644 zdatasets/plugins/__init__.py rename {datasets => zdatasets}/plugins/batch/__init__.py (100%) rename {datasets => zdatasets}/plugins/batch/batch_base_plugin.py (95%) rename {datasets => zdatasets}/plugins/batch/batch_dataset.py (97%) rename {datasets => zdatasets}/plugins/batch/flow_dataset.py (89%) rename {datasets => zdatasets}/plugins/batch/hive_dataset.py (96%) rename {datasets => zdatasets}/plugins/executors/__init__.py (100%) rename {datasets => zdatasets}/plugins/executors/metaflow_executor.py (92%) rename {datasets => zdatasets}/plugins/register_plugins.py (71%) rename {datasets => zdatasets}/program_executor.py (93%) rename {datasets => zdatasets}/tests/__init__.py (100%) rename {datasets => zdatasets}/tests/conftest.py (92%) rename {datasets => zdatasets}/tests/data/train/date=2020-07-23/region=king/data.parquet (100%) rename {datasets => zdatasets}/tests/data/train/date=2020-07-23/region=la/data.parquet (100%) rename {datasets => zdatasets}/tests/test_batch_dataset.py (95%) rename {datasets => zdatasets}/tests/test_dataset_plugin.py (96%) rename {datasets => zdatasets}/tests/test_datasets_decorator.py (95%) rename {datasets => zdatasets}/tests/test_flow_dataset.py (86%) rename {datasets => zdatasets}/tests/test_hive_dataset.py (97%) rename {datasets => zdatasets}/tests/test_metaflow.py (89%) rename {datasets => zdatasets}/tests/test_tutorials.py (96%) rename {datasets => zdatasets}/tests/utils/__init__.py (100%) rename {datasets => zdatasets}/tests/utils/test_case_utils.py (96%) rename {datasets => zdatasets}/tests/utils/test_partitions.py (94%) rename {datasets => zdatasets}/tests/utils/test_secret_fetcher.py (95%) rename {datasets => zdatasets}/tutorials/0_hello_dataset_flow.py (86%) rename {datasets => zdatasets}/tutorials/1_input_output_flow.py (91%) rename {datasets => zdatasets}/tutorials/2_spark_dask_flow.py (91%) rename {datasets => zdatasets}/tutorials/3_foreach_dataset_flow.py (93%) rename {datasets => zdatasets}/tutorials/4_hello_plugin_flow.py (73%) rename {datasets => zdatasets}/tutorials/5_consistent_flow.py (79%) rename {datasets => zdatasets}/tutorials/6_hive_dataset_flow.py (85%) rename {datasets => zdatasets}/tutorials/README.ipynb (100%) rename {datasets => zdatasets}/tutorials/online_plugin.py (94%) rename {datasets => zdatasets}/utils/__init__.py (100%) rename {datasets => zdatasets}/utils/aws.py (100%) rename {datasets => zdatasets}/utils/case_utils.py (100%) rename {datasets => zdatasets}/utils/partitions.py (98%) rename {datasets => zdatasets}/utils/secret_fetcher.py (100%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c14fa27..f2a4b2e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -55,12 +55,12 @@ jobs: - name: Code Quality Check run: | - poetry run black datasets --check - poetry run flake8 datasets + poetry run black zdatasets --check + poetry run flake8 zdatasets - name: Execute Python tests run: | - poetry run pytest datasets + poetry run pytest zdatasets pip install coverage==6.1.2 coveragepy-lcov diff --git a/.gitignore b/.gitignore index 8457ed3..358ec26 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ eggs/ parts/ /share/ /etc/ -datasets.egg-info/ +zdatasets.egg-info/ .extends-cache GENERATED_VERSION versioner-* @@ -91,7 +91,7 @@ man pip-selfcheck.json metastore_db/* -datasets/tutorials/data/* -datasets/tests/data/datastore/* -datasets/tests/data/ds1/* -datasets/tests/data/pandas.csv +zdatasets/tutorials/data/* +zdatasets/tests/data/datastore/* +zdatasets/tests/data/ds1/* +zdatasets/tests/data/pandas.csv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b4a292c..1a7993b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,4 +25,4 @@ repos: entry: pytest pass_filenames: false stages: [commit] - args: [datasets] + args: [zdatasets] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 27b67a6..62711a4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,11 +4,11 @@ ## Running Tests - poetry run pytest datasets + poetry run pytest zdatasets ### Run Single Test - poetry run pytest datasets -k search_item (e.g., name of test) + poetry run pytest zdatasets -k search_item (e.g., name of test) ### Pre-commit diff --git a/MANIFEST.in b/MANIFEST.in index 43a8203..f9a78f7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ # TODO: https://python-poetry.org/docs/pyproject#include-and-exclude include VERSION include README.md -recursive-include datasets/docs/* +recursive-include zdatasets/docs/* diff --git a/README.md b/README.md index 3473da9..4860639 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/zillow/datasets/main?urlpath=lab/tree/datasets/tutorials) -Welcome to @datasets +# Welcome to zdatasets ================================================== TODO @@ -12,13 +12,13 @@ TODO import pandas as pd from metaflow import FlowSpec, step -from datasets import Dataset, Mode -from datasets.metaflow import DatasetParameter -from datasets.plugins import BatchOptions +from zdatasets import Dataset, Mode +from zdatasets.metaflow import DatasetParameter +from zdatasets.plugins import BatchOptions # Can also invoke from CLI: -# > python datasets/tutorials/0_hello_dataset_flow.py run \ +# > python zdatasets/tutorials/0_hello_dataset_flow.py run \ # --hello_dataset '{"name": "HelloDataset", "mode": "READ_WRITE", \ # "options": {"type": "BatchOptions", "partition_by": "region"}}' class HelloDatasetFlow(FlowSpec): diff --git a/datasets/__init__.py b/datasets/__init__.py deleted file mode 100644 index 9807898..0000000 --- a/datasets/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# isort: skip_file -# flake8: noqa: F401 -from datasets.mode import Mode -from datasets.dataset_plugin import DatasetPlugin - - -from datasets.context import Context -from datasets.datasets_decorator import dataset - - -from datasets import plugins -from datasets.plugins.batch.hive_dataset import HiveDataset - -from datasets._typing import ColumnNames, DataFrameType - -from datasets.utils import SecretFetcher - -Dataset = DatasetPlugin.factory diff --git a/datasets/plugins/__init__.py b/datasets/plugins/__init__.py deleted file mode 100644 index ddc597c..0000000 --- a/datasets/plugins/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# isort: skip_file -# flake8: noqa: F401 -from datasets.plugins.executors.metaflow_executor import MetaflowExecutor -from datasets.plugins.batch.batch_dataset import BatchDataset, BatchOptions -from datasets.plugins.batch.flow_dataset import FlowDataset, FlowOptions -from datasets.plugins.batch.hive_dataset import HiveDataset, HiveOptions -from datasets.plugins.register_plugins import register - - -register() diff --git a/docs/conf.py b/docs/conf.py index dd08570..be4ad78 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -197,7 +197,7 @@ # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = "datasets-daldoc" +htmlhelp_basename = "zdatasets-daldoc" # -- Options for LaTeX output --------------------------------------------- @@ -215,7 +215,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). -latex_documents = [("index", "datasets.tex", "datasets Documentation", "aiplat@zillow.com", "manual")] +latex_documents = [("index", "zdatasets.tex", "zdatasets Documentation", "aiplat@zillow.com", "manual")] # The name of an image file (relative to this directory) to place at the top of # the title page. @@ -242,7 +242,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [("index", "datasets", "datasets Documentation", ["aiplat@zillow.com"], 1)] +man_pages = [("index", "zdatasets", "zdatasets Documentation", ["aiplat@zillow.com"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -256,10 +256,10 @@ texinfo_documents = [ ( "index", - "datasets", - "datasets Documentation", + "zdatasets", + "zdatasets Documentation", "", - "datasets", + "zdatasets", "One line description of project.", "Miscellaneous", ) diff --git a/docs/index.rst b/docs/index.rst index 7317aca..e7e48b3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,8 +1,8 @@ -.. datasets documentation master file +.. zdatasets documentation master file You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to datasets +Welcome to zdatasets ================================================== TODO @@ -11,8 +11,8 @@ TODO import pandas as pd from metaflow import FlowSpec, step - from datasets.datasets_decorator import datasets - from datasets.mode import Mode + from zdatasets.datasets_decorator import datasets + from zdatasets.mode import Mode class HelloDatasetFlow(FlowSpec): diff --git a/pyproject.toml b/pyproject.toml index 8c067a3..627507e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zdatasets" -version = "0.2.5" +version = "1.2.5" description = "Dataset SDK for consistent read/write [batch, online, streaming] data." classifiers = [ "Development Status :: 2 - Pre-Alpha", @@ -13,17 +13,17 @@ authors = ["Taleb Zeghmi"] readme = "README.md" [[tool.poetry.packages]] -include = "datasets" +include = "zdatasets" # https://packaging.python.org/guides/creating-and-discovering-plugins/ [tool.poetry.plugins] -[tool.poetry.plugins."datasets.plugins"] -batch_dataset = "datasets.plugins:BatchDataset" -flow_dataset = "datasets.plugins:FlowDataset" -hive_dataset = "datasets.plugins:HiveDataset" +[tool.poetry.plugins."zdatasets.plugins"] +batch_dataset = "zdatasets.plugins:BatchDataset" +flow_dataset = "zdatasets.plugins:FlowDataset" +hive_dataset = "zdatasets.plugins:HiveDataset" -[tool.poetry.plugins."datasets.executors"] -metaflow_executor = "datasets.plugins:MetaflowExecutor" +[tool.poetry.plugins."zdatasets.executors"] +metaflow_executor = "zdatasets.plugins:MetaflowExecutor" [tool.poetry.dependencies] python = ">=3.8.0,<4" @@ -57,8 +57,8 @@ spark = ["pyspark"] kubernetes = ["kubernetes"] [tool.isort] -known_first_party = 'datasets' -known_third_party = ["datasets", "numpy", "orbital_core", "pandas"] +known_first_party = 'zdatasets' +known_third_party = ["zdatasets", "numpy", "orbital_core", "pandas"] multi_line_output = 3 lines_after_imports = 2 force_grid_wrap = 0 diff --git a/setup.cfg b/setup.cfg index 26244ca..f81fafc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,7 @@ # coverage.py [coverage:run] branch = true -source = datasets +source = zdatasets omit = **/tests/* [coverage:report] @@ -27,8 +27,8 @@ exclude = .git,__pycache__,docs/*,doc/*,build,dist [tool:pytest] log_cli=true ;addopts = -vv -m "not spark" --cov-report=term -rP -;addopts = --cov=datasets --cov-report=term --cov-report=html --cov-report xml -addopts = --cov=datasets --cov-config=setup.cfg --cov-report=term --cov-report=html --cov-report xml +;addopts = --cov=zdatasets --cov-report=term --cov-report=html --cov-report xml +addopts = --cov=zdatasets --cov-config=setup.cfg --cov-report=term --cov-report=html --cov-report xml markers = spark: marks tests as slow (deselect with '-m "not slow"') \ No newline at end of file diff --git a/zdatasets/__init__.py b/zdatasets/__init__.py new file mode 100644 index 0000000..e35896d --- /dev/null +++ b/zdatasets/__init__.py @@ -0,0 +1,18 @@ +# isort: skip_file +# flake8: noqa: F401 +from zdatasets.mode import Mode +from zdatasets.dataset_plugin import DatasetPlugin + + +from zdatasets.context import Context +from zdatasets.datasets_decorator import dataset + + +from zdatasets import plugins +from zdatasets.plugins.batch.hive_dataset import HiveDataset + +from zdatasets._typing import ColumnNames, DataFrameType + +from zdatasets.utils import SecretFetcher + +Dataset = DatasetPlugin.factory diff --git a/datasets/_typing.py b/zdatasets/_typing.py similarity index 100% rename from datasets/_typing.py rename to zdatasets/_typing.py diff --git a/datasets/context.py b/zdatasets/context.py similarity index 100% rename from datasets/context.py rename to zdatasets/context.py diff --git a/datasets/dataset_plugin.py b/zdatasets/dataset_plugin.py similarity index 98% rename from datasets/dataset_plugin.py rename to zdatasets/dataset_plugin.py index b1eda9f..c7a486a 100644 --- a/datasets/dataset_plugin.py +++ b/zdatasets/dataset_plugin.py @@ -6,9 +6,9 @@ from dataclasses import dataclass from typing import Callable, Dict, Iterable, Optional, Tuple, Type, Union -from datasets._typing import ColumnNames, DataFrameType -from datasets.context import Context -from datasets.utils.case_utils import is_upper_pascal_case +from zdatasets._typing import ColumnNames, DataFrameType +from zdatasets.context import Context +from zdatasets.utils.case_utils import is_upper_pascal_case from .mode import Mode from .program_executor import ProgramExecutor diff --git a/datasets/datasets_decorator.py b/zdatasets/datasets_decorator.py similarity index 88% rename from datasets/datasets_decorator.py rename to zdatasets/datasets_decorator.py index ae62c70..e3ce441 100755 --- a/datasets/datasets_decorator.py +++ b/zdatasets/datasets_decorator.py @@ -2,10 +2,10 @@ import keyword from typing import Callable, Dict, Optional, Union -from datasets._typing import ColumnNames -from datasets.dataset_plugin import Context, DatasetPlugin, StorageOptions -from datasets.mode import Mode -from datasets.utils.case_utils import pascal_to_snake_case +from zdatasets._typing import ColumnNames +from zdatasets.dataset_plugin import Context, DatasetPlugin, StorageOptions +from zdatasets.mode import Mode +from zdatasets.utils.case_utils import pascal_to_snake_case def dataset( diff --git a/datasets/exceptions.py b/zdatasets/exceptions.py similarity index 100% rename from datasets/exceptions.py rename to zdatasets/exceptions.py diff --git a/datasets/metaflow.py b/zdatasets/metaflow.py similarity index 93% rename from datasets/metaflow.py rename to zdatasets/metaflow.py index 98f9eb0..60ec9e3 100644 --- a/datasets/metaflow.py +++ b/zdatasets/metaflow.py @@ -6,12 +6,12 @@ from metaflow._vendor.click import ParamType from metaflow.parameters import Parameter -from datasets import DataFrameType -from datasets._typing import ColumnNames -from datasets.context import Context -from datasets.dataset_plugin import DatasetPlugin, StorageOptions -from datasets.mode import Mode -from datasets.utils.secret_fetcher import SecretFetcher +from zdatasets import DataFrameType +from zdatasets._typing import ColumnNames +from zdatasets.context import Context +from zdatasets.dataset_plugin import DatasetPlugin, StorageOptions +from zdatasets.mode import Mode +from zdatasets.utils.secret_fetcher import SecretFetcher class _DatasetTypeClass(ParamType): diff --git a/datasets/mode.py b/zdatasets/mode.py similarity index 100% rename from datasets/mode.py rename to zdatasets/mode.py diff --git a/zdatasets/plugins/__init__.py b/zdatasets/plugins/__init__.py new file mode 100644 index 0000000..8560c48 --- /dev/null +++ b/zdatasets/plugins/__init__.py @@ -0,0 +1,10 @@ +# isort: skip_file +# flake8: noqa: F401 +from zdatasets.plugins.executors.metaflow_executor import MetaflowExecutor +from zdatasets.plugins.batch.batch_dataset import BatchDataset, BatchOptions +from zdatasets.plugins.batch.flow_dataset import FlowDataset, FlowOptions +from zdatasets.plugins.batch.hive_dataset import HiveDataset, HiveOptions +from zdatasets.plugins.register_plugins import register + + +register() diff --git a/datasets/plugins/batch/__init__.py b/zdatasets/plugins/batch/__init__.py similarity index 100% rename from datasets/plugins/batch/__init__.py rename to zdatasets/plugins/batch/__init__.py diff --git a/datasets/plugins/batch/batch_base_plugin.py b/zdatasets/plugins/batch/batch_base_plugin.py similarity index 95% rename from datasets/plugins/batch/batch_base_plugin.py rename to zdatasets/plugins/batch/batch_base_plugin.py index d5dceea..f9aa26a 100644 --- a/datasets/plugins/batch/batch_base_plugin.py +++ b/zdatasets/plugins/batch/batch_base_plugin.py @@ -12,11 +12,11 @@ Union, ) -from datasets._typing import ColumnNames, DataFrameType -from datasets.dataset_plugin import DatasetPlugin, StorageOptions -from datasets.exceptions import InvalidOperationException -from datasets.mode import Mode -from datasets.utils.case_utils import pascal_to_snake_case +from zdatasets._typing import ColumnNames, DataFrameType +from zdatasets.dataset_plugin import DatasetPlugin, StorageOptions +from zdatasets.exceptions import InvalidOperationException +from zdatasets.mode import Mode +from zdatasets.utils.case_utils import pascal_to_snake_case _logger = logging.getLogger(__name__) diff --git a/datasets/plugins/batch/batch_dataset.py b/zdatasets/plugins/batch/batch_dataset.py similarity index 97% rename from datasets/plugins/batch/batch_dataset.py rename to zdatasets/plugins/batch/batch_dataset.py index 32b563b..e1e8019 100644 --- a/datasets/plugins/batch/batch_dataset.py +++ b/zdatasets/plugins/batch/batch_dataset.py @@ -3,12 +3,12 @@ import pandas as pd -from datasets import Mode -from datasets._typing import ColumnNames, DataFrameType -from datasets.context import Context -from datasets.dataset_plugin import DatasetPlugin -from datasets.exceptions import InvalidOperationException -from datasets.plugins.batch.batch_base_plugin import ( +from zdatasets import Mode +from zdatasets._typing import ColumnNames, DataFrameType +from zdatasets.context import Context +from zdatasets.dataset_plugin import DatasetPlugin +from zdatasets.exceptions import InvalidOperationException +from zdatasets.plugins.batch.batch_base_plugin import ( BatchBasePlugin, BatchOptions, ) diff --git a/datasets/plugins/batch/flow_dataset.py b/zdatasets/plugins/batch/flow_dataset.py similarity index 89% rename from datasets/plugins/batch/flow_dataset.py rename to zdatasets/plugins/batch/flow_dataset.py index cddd4e8..b67110b 100644 --- a/datasets/plugins/batch/flow_dataset.py +++ b/zdatasets/plugins/batch/flow_dataset.py @@ -1,11 +1,11 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Optional, Tuple, Union -from datasets._typing import ColumnNames -from datasets.context import Context -from datasets.dataset_plugin import DatasetPlugin, StorageOptions -from datasets.mode import Mode -from datasets.plugins import BatchDataset +from zdatasets._typing import ColumnNames +from zdatasets.context import Context +from zdatasets.dataset_plugin import DatasetPlugin, StorageOptions +from zdatasets.mode import Mode +from zdatasets.plugins import BatchDataset if TYPE_CHECKING: diff --git a/datasets/plugins/batch/hive_dataset.py b/zdatasets/plugins/batch/hive_dataset.py similarity index 96% rename from datasets/plugins/batch/hive_dataset.py rename to zdatasets/plugins/batch/hive_dataset.py index 06cfc57..c7241ac 100644 --- a/datasets/plugins/batch/hive_dataset.py +++ b/zdatasets/plugins/batch/hive_dataset.py @@ -7,16 +7,16 @@ import pandas as pd -from datasets._typing import ColumnNames -from datasets.context import Context -from datasets.dataset_plugin import DatasetPlugin -from datasets.exceptions import InvalidOperationException -from datasets.mode import Mode -from datasets.plugins.batch.batch_base_plugin import ( +from zdatasets._typing import ColumnNames +from zdatasets.context import Context +from zdatasets.dataset_plugin import DatasetPlugin +from zdatasets.exceptions import InvalidOperationException +from zdatasets.mode import Mode +from zdatasets.plugins.batch.batch_base_plugin import ( BatchBasePlugin, BatchOptions, ) -from datasets.utils.case_utils import ( +from zdatasets.utils.case_utils import ( is_upper_pascal_case, snake_case_to_pascal, ) diff --git a/datasets/plugins/executors/__init__.py b/zdatasets/plugins/executors/__init__.py similarity index 100% rename from datasets/plugins/executors/__init__.py rename to zdatasets/plugins/executors/__init__.py diff --git a/datasets/plugins/executors/metaflow_executor.py b/zdatasets/plugins/executors/metaflow_executor.py similarity index 92% rename from datasets/plugins/executors/metaflow_executor.py rename to zdatasets/plugins/executors/metaflow_executor.py index ee48703..5e7072b 100644 --- a/datasets/plugins/executors/metaflow_executor.py +++ b/zdatasets/plugins/executors/metaflow_executor.py @@ -2,8 +2,8 @@ from dateutil import parser -from datasets.context import Context -from datasets.program_executor import ProgramExecutor +from zdatasets.context import Context +from zdatasets.program_executor import ProgramExecutor class MetaflowExecutor(ProgramExecutor): diff --git a/datasets/plugins/register_plugins.py b/zdatasets/plugins/register_plugins.py similarity index 71% rename from datasets/plugins/register_plugins.py rename to zdatasets/plugins/register_plugins.py index a4571d4..3c24792 100644 --- a/datasets/plugins/register_plugins.py +++ b/zdatasets/plugins/register_plugins.py @@ -1,18 +1,18 @@ -from datasets.dataset_plugin import DatasetPlugin -from datasets.plugins import MetaflowExecutor +from zdatasets.dataset_plugin import DatasetPlugin +from zdatasets.plugins import MetaflowExecutor def register(): from importlib_metadata import entry_points # Register plugins - for entry in entry_points(group="datasets.plugins"): + for entry in entry_points(group="zdatasets.plugins"): entry.load() # Register default executor first DatasetPlugin.register_executor(executor=MetaflowExecutor()) - for entry in entry_points(group="datasets.executors"): + for entry in entry_points(group="zdatasets.executors"): executor = entry.load() if not isinstance(executor, type(MetaflowExecutor)): DatasetPlugin.register_executor(executor=executor) diff --git a/datasets/program_executor.py b/zdatasets/program_executor.py similarity index 93% rename from datasets/program_executor.py rename to zdatasets/program_executor.py index 7c00833..40898db 100644 --- a/datasets/program_executor.py +++ b/zdatasets/program_executor.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -from datasets.context import Context +from zdatasets.context import Context class ProgramExecutor(ABC): @@ -36,6 +36,6 @@ def context(self) -> Context: def run_time(self) -> int: """ UTC Epoch time when the program started, used as the run_time column in Batch & Hive - datasets. + zdatasets. """ pass diff --git a/datasets/tests/__init__.py b/zdatasets/tests/__init__.py similarity index 100% rename from datasets/tests/__init__.py rename to zdatasets/tests/__init__.py diff --git a/datasets/tests/conftest.py b/zdatasets/tests/conftest.py similarity index 92% rename from datasets/tests/conftest.py rename to zdatasets/tests/conftest.py index 3a4de0c..16dde96 100644 --- a/datasets/tests/conftest.py +++ b/zdatasets/tests/conftest.py @@ -7,9 +7,9 @@ import pytest from pyspark.sql import SparkSession -from datasets import Context -from datasets.dataset_plugin import DatasetPlugin -from datasets.program_executor import ProgramExecutor +from zdatasets import Context +from zdatasets.dataset_plugin import DatasetPlugin +from zdatasets.program_executor import ProgramExecutor test_dir = Path(os.path.realpath(__file__)).parent diff --git a/datasets/tests/data/train/date=2020-07-23/region=king/data.parquet b/zdatasets/tests/data/train/date=2020-07-23/region=king/data.parquet similarity index 100% rename from datasets/tests/data/train/date=2020-07-23/region=king/data.parquet rename to zdatasets/tests/data/train/date=2020-07-23/region=king/data.parquet diff --git a/datasets/tests/data/train/date=2020-07-23/region=la/data.parquet b/zdatasets/tests/data/train/date=2020-07-23/region=la/data.parquet similarity index 100% rename from datasets/tests/data/train/date=2020-07-23/region=la/data.parquet rename to zdatasets/tests/data/train/date=2020-07-23/region=la/data.parquet diff --git a/datasets/tests/test_batch_dataset.py b/zdatasets/tests/test_batch_dataset.py similarity index 95% rename from datasets/tests/test_batch_dataset.py rename to zdatasets/tests/test_batch_dataset.py index ced3402..67d1d28 100644 --- a/datasets/tests/test_batch_dataset.py +++ b/zdatasets/tests/test_batch_dataset.py @@ -8,11 +8,11 @@ from pyspark import pandas as ps from pyspark.sql import DataFrame as SparkDataFrame -from datasets import Dataset, Mode -from datasets.exceptions import InvalidOperationException -from datasets.plugins.batch.batch_base_plugin import BatchOptions -from datasets.plugins.batch.batch_dataset import BatchDataset -from datasets.tests.conftest import TestExecutor +from zdatasets import Dataset, Mode +from zdatasets.exceptions import InvalidOperationException +from zdatasets.plugins.batch.batch_base_plugin import BatchOptions +from zdatasets.plugins.batch.batch_dataset import BatchDataset +from zdatasets.tests.conftest import TestExecutor csv_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data/pandas.csv") @@ -160,7 +160,7 @@ def test_read(): def test_get_dataset_path(dataset: BatchDataset, df: pd.DataFrame): dataset.write(df.copy()) path = dataset._get_dataset_path() - assert path.endswith("datasets/tests/data/datastore/my_program/ds_1") + assert path.endswith("zdatasets/tests/data/datastore/my_program/ds_1") os.path.exists(f"{path}/col1=A/col3=A1") shutil.rmtree(path) @@ -265,7 +265,7 @@ def test_register_dataset_path_plugin(dataset: BatchDataset, df: pd.DataFrame): assert dataset.name == "MyTable" path = dataset._get_dataset_path() - assert path.endswith("datasets/tests/data/datastore/my_program/my_table") + assert path.endswith("zdatasets/tests/data/datastore/my_program/my_table") def test_dataset_path_func(passed_dataset: BatchDataset) -> str: return "fee_foo" diff --git a/datasets/tests/test_dataset_plugin.py b/zdatasets/tests/test_dataset_plugin.py similarity index 96% rename from datasets/tests/test_dataset_plugin.py rename to zdatasets/tests/test_dataset_plugin.py index 8da7722..e48254d 100644 --- a/datasets/tests/test_dataset_plugin.py +++ b/zdatasets/tests/test_dataset_plugin.py @@ -4,14 +4,14 @@ import pandas as pd import pytest -from datasets import Context, DataFrameType, Dataset, Mode -from datasets._typing import ColumnNames -from datasets.dataset_plugin import DatasetPlugin, StorageOptions -from datasets.metaflow import _DatasetTypeClass -from datasets.plugins import HiveDataset -from datasets.plugins.batch.hive_dataset import HiveOptions -from datasets.tests.conftest import TestExecutor -from datasets.utils.secret_fetcher import SecretFetcher +from zdatasets import Context, DataFrameType, Dataset, Mode +from zdatasets._typing import ColumnNames +from zdatasets.dataset_plugin import DatasetPlugin, StorageOptions +from zdatasets.metaflow import _DatasetTypeClass +from zdatasets.plugins import HiveDataset +from zdatasets.plugins.batch.hive_dataset import HiveOptions +from zdatasets.tests.conftest import TestExecutor +from zdatasets.utils.secret_fetcher import SecretFetcher # We'll need to inherit dict too to make this class json serializable diff --git a/datasets/tests/test_datasets_decorator.py b/zdatasets/tests/test_datasets_decorator.py similarity index 95% rename from datasets/tests/test_datasets_decorator.py rename to zdatasets/tests/test_datasets_decorator.py index 85ff9d5..49e651a 100644 --- a/datasets/tests/test_datasets_decorator.py +++ b/zdatasets/tests/test_datasets_decorator.py @@ -1,7 +1,7 @@ import pytest -from datasets import dataset -from datasets.plugins import HiveDataset +from zdatasets import dataset +from zdatasets.plugins import HiveDataset def test_step_decorator(): diff --git a/datasets/tests/test_flow_dataset.py b/zdatasets/tests/test_flow_dataset.py similarity index 86% rename from datasets/tests/test_flow_dataset.py rename to zdatasets/tests/test_flow_dataset.py index 171af97..2a0237e 100644 --- a/datasets/tests/test_flow_dataset.py +++ b/zdatasets/tests/test_flow_dataset.py @@ -1,7 +1,7 @@ from metaflow import Flow, namespace -from datasets.plugins.batch.flow_dataset import _get_run_id -from datasets.tests.test_tutorials import run_flow +from zdatasets.plugins.batch.flow_dataset import _get_run_id +from zdatasets.tests.test_tutorials import run_flow def test_get_run_id(): diff --git a/datasets/tests/test_hive_dataset.py b/zdatasets/tests/test_hive_dataset.py similarity index 97% rename from datasets/tests/test_hive_dataset.py rename to zdatasets/tests/test_hive_dataset.py index f8b0751..10d0fe4 100644 --- a/datasets/tests/test_hive_dataset.py +++ b/zdatasets/tests/test_hive_dataset.py @@ -7,14 +7,14 @@ from pyspark import pandas as ps from pyspark.sql import DataFrame as SparkDataFrame, SparkSession -from datasets import Dataset, Mode -from datasets.exceptions import InvalidOperationException -from datasets.plugins import HiveDataset -from datasets.plugins.batch.hive_dataset import ( +from zdatasets import Dataset, Mode +from zdatasets.exceptions import InvalidOperationException +from zdatasets.plugins import HiveDataset +from zdatasets.plugins.batch.hive_dataset import ( HiveOptions, _retry_with_backoff, ) -from datasets.tests.conftest import TestExecutor +from zdatasets.tests.conftest import TestExecutor @pytest.fixture diff --git a/datasets/tests/test_metaflow.py b/zdatasets/tests/test_metaflow.py similarity index 89% rename from datasets/tests/test_metaflow.py rename to zdatasets/tests/test_metaflow.py index 8070f3a..138e9d4 100644 --- a/datasets/tests/test_metaflow.py +++ b/zdatasets/tests/test_metaflow.py @@ -1,22 +1,22 @@ import json -from datasets import Dataset -from datasets.context import Context -from datasets.dataset_plugin import StorageOptions -from datasets.metaflow import ( +from zdatasets import Dataset +from zdatasets.context import Context +from zdatasets.dataset_plugin import StorageOptions +from zdatasets.metaflow import ( _DatasetParams, _DatasetParamsDecoder, _DatasetTypeClass, ) -from datasets.mode import Mode -from datasets.plugins.batch.batch_base_plugin import BatchOptions -from datasets.plugins.batch.batch_dataset import BatchDataset -from datasets.plugins.batch.hive_dataset import HiveOptions -from datasets.tests.test_dataset_plugin import ( +from zdatasets.mode import Mode +from zdatasets.plugins.batch.batch_base_plugin import BatchOptions +from zdatasets.plugins.batch.batch_dataset import BatchDataset +from zdatasets.plugins.batch.hive_dataset import HiveOptions +from zdatasets.tests.test_dataset_plugin import ( SecretDatasetPluginTest, SecretDatasetTestOptions, ) -from datasets.utils.secret_fetcher import SecretFetcher +from zdatasets.utils.secret_fetcher import SecretFetcher def test_dataset_dumps_load(): diff --git a/datasets/tests/test_tutorials.py b/zdatasets/tests/test_tutorials.py similarity index 96% rename from datasets/tests/test_tutorials.py rename to zdatasets/tests/test_tutorials.py index 3c3002d..8eeb9e4 100644 --- a/datasets/tests/test_tutorials.py +++ b/zdatasets/tests/test_tutorials.py @@ -53,7 +53,7 @@ def test_hive_flow(): def run_flow(flow_py, args: Optional[list] = None, context: Optional[str] = None) -> str: - os.environ["METAFLOW_COVERAGE_SOURCE"] = "tutorial,datasets" + os.environ["METAFLOW_COVERAGE_SOURCE"] = "tutorial,zdatasets" os.environ["METAFLOW_COVERAGE_OMIT"] = "metaflow" os.environ["METAFLOW_USER"] = "compile_only_user" if context: diff --git a/datasets/tests/utils/__init__.py b/zdatasets/tests/utils/__init__.py similarity index 100% rename from datasets/tests/utils/__init__.py rename to zdatasets/tests/utils/__init__.py diff --git a/datasets/tests/utils/test_case_utils.py b/zdatasets/tests/utils/test_case_utils.py similarity index 96% rename from datasets/tests/utils/test_case_utils.py rename to zdatasets/tests/utils/test_case_utils.py index 9e7afe4..9060734 100644 --- a/datasets/tests/utils/test_case_utils.py +++ b/zdatasets/tests/utils/test_case_utils.py @@ -1,4 +1,4 @@ -from datasets.utils.case_utils import ( +from zdatasets.utils.case_utils import ( is_snake_case, is_upper_pascal_case, pascal_to_snake_case, diff --git a/datasets/tests/utils/test_partitions.py b/zdatasets/tests/utils/test_partitions.py similarity index 94% rename from datasets/tests/utils/test_partitions.py rename to zdatasets/tests/utils/test_partitions.py index 9079af7..7c89574 100644 --- a/datasets/tests/utils/test_partitions.py +++ b/zdatasets/tests/utils/test_partitions.py @@ -4,8 +4,8 @@ import pytest from moto import mock_s3, mock_sts -from datasets.utils.aws import get_aws_client -from datasets.utils.partitions import Partition, get_path_partitions +from zdatasets.utils.aws import get_aws_client +from zdatasets.utils.partitions import Partition, get_path_partitions @pytest.fixture diff --git a/datasets/tests/utils/test_secret_fetcher.py b/zdatasets/tests/utils/test_secret_fetcher.py similarity index 95% rename from datasets/tests/utils/test_secret_fetcher.py rename to zdatasets/tests/utils/test_secret_fetcher.py index 7e0afd9..9a912e3 100644 --- a/datasets/tests/utils/test_secret_fetcher.py +++ b/zdatasets/tests/utils/test_secret_fetcher.py @@ -7,7 +7,7 @@ import pytest from moto import mock_secretsmanager -from datasets.utils.secret_fetcher import ( +from zdatasets.utils.secret_fetcher import ( SecretFetcher, get_current_namespace, try_import_kubernetes, @@ -71,7 +71,7 @@ def test_fetch_env_secret_not_json_decodable(): @mock_secretsmanager def test_fetch_aws_secret(): - from datasets.utils.secret_fetcher import logger, secret_cache + from zdatasets.utils.secret_fetcher import logger, secret_cache conn = boto3.client("secretsmanager", region_name="us-west-2") conn.create_secret(Name="json-decodable-dict", SecretString='{"key": "value"}') @@ -113,10 +113,10 @@ def test_fetch_aws_secret(): SecretFetcher(aws_secret_arn="empty").value -@mock.patch("datasets.utils.secret_fetcher.get_current_namespace") -@mock.patch("datasets.utils.secret_fetcher.try_import_kubernetes") +@mock.patch("zdatasets.utils.secret_fetcher.get_current_namespace") +@mock.patch("zdatasets.utils.secret_fetcher.try_import_kubernetes") def test_fetch_kubernetes_secret(kubernetes, namespace): - from datasets.utils.secret_fetcher import logger, secret_cache + from zdatasets.utils.secret_fetcher import logger, secret_cache example_kubernetes_secret = { "key": base64.b64encode(b"value"), diff --git a/datasets/tutorials/0_hello_dataset_flow.py b/zdatasets/tutorials/0_hello_dataset_flow.py similarity index 86% rename from datasets/tutorials/0_hello_dataset_flow.py rename to zdatasets/tutorials/0_hello_dataset_flow.py index e99cbc2..56566e5 100755 --- a/datasets/tutorials/0_hello_dataset_flow.py +++ b/zdatasets/tutorials/0_hello_dataset_flow.py @@ -1,13 +1,13 @@ import pandas as pd from metaflow import FlowSpec, step -from datasets import Dataset, Mode -from datasets.metaflow import DatasetParameter -from datasets.plugins import BatchOptions +from zdatasets import Dataset, Mode +from zdatasets.metaflow import DatasetParameter +from zdatasets.plugins import BatchOptions # Can also invoke from CLI: -# > python datasets/tutorials/0_hello_dataset_flow.py run \ +# > python zdatasets/tutorials/0_hello_dataset_flow.py run \ # --hello_dataset '{"name": "HelloDataset", "mode": "READ_WRITE", \ # "options": {"type": "BatchOptions", "partition_by": "region"}}' class HelloDatasetFlow(FlowSpec): diff --git a/datasets/tutorials/1_input_output_flow.py b/zdatasets/tutorials/1_input_output_flow.py similarity index 91% rename from datasets/tutorials/1_input_output_flow.py rename to zdatasets/tutorials/1_input_output_flow.py index c2d4623..66030b9 100755 --- a/datasets/tutorials/1_input_output_flow.py +++ b/zdatasets/tutorials/1_input_output_flow.py @@ -1,8 +1,8 @@ import pandas as pd from metaflow import Flow, FlowSpec, step -from datasets import Mode, dataset -from datasets.plugins import BatchOptions, FlowOptions +from zdatasets import Mode, dataset +from zdatasets.plugins import BatchOptions, FlowOptions class InputOutputDatasetFlow(FlowSpec): diff --git a/datasets/tutorials/2_spark_dask_flow.py b/zdatasets/tutorials/2_spark_dask_flow.py similarity index 91% rename from datasets/tutorials/2_spark_dask_flow.py rename to zdatasets/tutorials/2_spark_dask_flow.py index 6f024fb..2857952 100755 --- a/datasets/tutorials/2_spark_dask_flow.py +++ b/zdatasets/tutorials/2_spark_dask_flow.py @@ -1,8 +1,8 @@ from dask.dataframe import DataFrame from metaflow import FlowSpec, step -from datasets import dataset -from datasets.plugins import FlowOptions +from zdatasets import dataset +from zdatasets.plugins import FlowOptions class SparkDaskFlow(FlowSpec): diff --git a/datasets/tutorials/3_foreach_dataset_flow.py b/zdatasets/tutorials/3_foreach_dataset_flow.py similarity index 93% rename from datasets/tutorials/3_foreach_dataset_flow.py rename to zdatasets/tutorials/3_foreach_dataset_flow.py index f7ba1be..f494e6e 100755 --- a/datasets/tutorials/3_foreach_dataset_flow.py +++ b/zdatasets/tutorials/3_foreach_dataset_flow.py @@ -3,8 +3,8 @@ import pandas as pd # type: ignore from metaflow import FlowSpec, step -from datasets import Mode, dataset -from datasets.plugins import BatchDataset, BatchOptions +from zdatasets import Mode, dataset +from zdatasets.plugins import BatchDataset, BatchOptions flow_dir = os.path.dirname(os.path.realpath(__file__)) diff --git a/datasets/tutorials/4_hello_plugin_flow.py b/zdatasets/tutorials/4_hello_plugin_flow.py similarity index 73% rename from datasets/tutorials/4_hello_plugin_flow.py rename to zdatasets/tutorials/4_hello_plugin_flow.py index 9f6fc4f..77e8133 100755 --- a/datasets/tutorials/4_hello_plugin_flow.py +++ b/zdatasets/tutorials/4_hello_plugin_flow.py @@ -1,11 +1,11 @@ import pandas as pd from metaflow import FlowSpec, step -from datasets import dataset -from datasets.context import Context -from datasets.dataset_plugin import DatasetPlugin -from datasets.plugins import MetaflowExecutor -from datasets.tutorials.online_plugin import DefaultOnlineDatasetPlugin +from zdatasets import dataset +from zdatasets.context import Context +from zdatasets.dataset_plugin import DatasetPlugin +from zdatasets.plugins import MetaflowExecutor +from zdatasets.tutorials.online_plugin import DefaultOnlineDatasetPlugin # An online executor context! diff --git a/datasets/tutorials/5_consistent_flow.py b/zdatasets/tutorials/5_consistent_flow.py similarity index 79% rename from datasets/tutorials/5_consistent_flow.py rename to zdatasets/tutorials/5_consistent_flow.py index fdbb053..05643b3 100755 --- a/datasets/tutorials/5_consistent_flow.py +++ b/zdatasets/tutorials/5_consistent_flow.py @@ -3,12 +3,12 @@ import pandas as pd from metaflow import FlowSpec, step -from datasets import Dataset, Mode -from datasets.context import Context -from datasets.dataset_plugin import DatasetPlugin -from datasets.metaflow import DatasetParameter -from datasets.plugins import BatchOptions, MetaflowExecutor -from datasets.tutorials.online_plugin import OnlineOptions +from zdatasets import Dataset, Mode +from zdatasets.context import Context +from zdatasets.dataset_plugin import DatasetPlugin +from zdatasets.metaflow import DatasetParameter +from zdatasets.plugins import BatchOptions, MetaflowExecutor +from zdatasets.tutorials.online_plugin import OnlineOptions class PortableExecutor(MetaflowExecutor): diff --git a/datasets/tutorials/6_hive_dataset_flow.py b/zdatasets/tutorials/6_hive_dataset_flow.py similarity index 85% rename from datasets/tutorials/6_hive_dataset_flow.py rename to zdatasets/tutorials/6_hive_dataset_flow.py index bcb64c7..8438ac5 100755 --- a/datasets/tutorials/6_hive_dataset_flow.py +++ b/zdatasets/tutorials/6_hive_dataset_flow.py @@ -1,13 +1,13 @@ import pandas as pd from metaflow import FlowSpec, step -from datasets import Dataset, Mode -from datasets.metaflow import DatasetParameter -from datasets.plugins import HiveDataset, HiveOptions +from zdatasets import Dataset, Mode +from zdatasets.metaflow import DatasetParameter +from zdatasets.plugins import HiveDataset, HiveOptions # Can also invoke from CLI: -# > python datasets/tutorials/6_hive_dataset_flow.py.py run \ +# > python zdatasets/tutorials/6_hive_dataset_flow.py.py run \ # --zpids_dataset '{"name": "ZpidsDataset", hive_table="zpids_dataset", \ # "partition_by": "region", "mode": "READ_WRITE"}' diff --git a/datasets/tutorials/README.ipynb b/zdatasets/tutorials/README.ipynb similarity index 100% rename from datasets/tutorials/README.ipynb rename to zdatasets/tutorials/README.ipynb diff --git a/datasets/tutorials/online_plugin.py b/zdatasets/tutorials/online_plugin.py similarity index 94% rename from datasets/tutorials/online_plugin.py rename to zdatasets/tutorials/online_plugin.py index a7bae31..b3e0648 100644 --- a/datasets/tutorials/online_plugin.py +++ b/zdatasets/tutorials/online_plugin.py @@ -4,8 +4,8 @@ import pandas as pd -from datasets.context import Context -from datasets.dataset_plugin import DatasetPlugin, StorageOptions +from zdatasets.context import Context +from zdatasets.dataset_plugin import DatasetPlugin, StorageOptions @dataclass diff --git a/datasets/utils/__init__.py b/zdatasets/utils/__init__.py similarity index 100% rename from datasets/utils/__init__.py rename to zdatasets/utils/__init__.py diff --git a/datasets/utils/aws.py b/zdatasets/utils/aws.py similarity index 100% rename from datasets/utils/aws.py rename to zdatasets/utils/aws.py diff --git a/datasets/utils/case_utils.py b/zdatasets/utils/case_utils.py similarity index 100% rename from datasets/utils/case_utils.py rename to zdatasets/utils/case_utils.py diff --git a/datasets/utils/partitions.py b/zdatasets/utils/partitions.py similarity index 98% rename from datasets/utils/partitions.py rename to zdatasets/utils/partitions.py index fabc22f..08c6d3b 100644 --- a/datasets/utils/partitions.py +++ b/zdatasets/utils/partitions.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import List, NamedTuple, Union -from datasets.utils.aws import ( +from zdatasets.utils.aws import ( get_aws_client, get_paginated_list_objects_iterator, get_s3_bucket_key, diff --git a/datasets/utils/secret_fetcher.py b/zdatasets/utils/secret_fetcher.py similarity index 100% rename from datasets/utils/secret_fetcher.py rename to zdatasets/utils/secret_fetcher.py