Skip to content

Commit

Permalink
Rename import to zdatasets avoid HuggingFace conflict (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
talebzeghmi authored Jun 26, 2023
1 parent cf23227 commit 7f687fb
Show file tree
Hide file tree
Showing 59 changed files with 186 additions and 186 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ jobs:
- name: Code Quality Check
run: |
poetry run black datasets --check
poetry run flake8 datasets
poetry run black zdatasets --check
poetry run flake8 zdatasets
- name: Execute Python tests
run: |
poetry run pytest datasets
poetry run pytest zdatasets
pip install coverage==6.1.2
coveragepy-lcov
Expand Down
10 changes: 5 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ eggs/
parts/
/share/
/etc/
datasets.egg-info/
zdatasets.egg-info/
.extends-cache
GENERATED_VERSION
versioner-*
Expand Down Expand Up @@ -91,7 +91,7 @@ man

pip-selfcheck.json
metastore_db/*
datasets/tutorials/data/*
datasets/tests/data/datastore/*
datasets/tests/data/ds1/*
datasets/tests/data/pandas.csv
zdatasets/tutorials/data/*
zdatasets/tests/data/datastore/*
zdatasets/tests/data/ds1/*
zdatasets/tests/data/pandas.csv
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ repos:
entry: pytest
pass_filenames: false
stages: [commit]
args: [datasets]
args: [zdatasets]
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

## Running Tests

poetry run pytest datasets
poetry run pytest zdatasets

### Run Single Test

poetry run pytest datasets -k search_item (e.g., name of test)
poetry run pytest zdatasets -k search_item (e.g., name of test)

### Pre-commit
Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# TODO: https://python-poetry.org/docs/pyproject#include-and-exclude
include VERSION
include README.md
recursive-include datasets/docs/*
recursive-include zdatasets/docs/*
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/zillow/datasets/main?urlpath=lab/tree/datasets/tutorials)


Welcome to @datasets
# Welcome to zdatasets
==================================================

TODO
Expand All @@ -12,13 +12,13 @@ TODO
import pandas as pd
from metaflow import FlowSpec, step

from datasets import Dataset, Mode
from datasets.metaflow import DatasetParameter
from datasets.plugins import BatchOptions
from zdatasets import Dataset, Mode
from zdatasets.metaflow import DatasetParameter
from zdatasets.plugins import BatchOptions


# Can also invoke from CLI:
# > python datasets/tutorials/0_hello_dataset_flow.py run \
# > python zdatasets/tutorials/0_hello_dataset_flow.py run \
# --hello_dataset '{"name": "HelloDataset", "mode": "READ_WRITE", \
# "options": {"type": "BatchOptions", "partition_by": "region"}}'
class HelloDatasetFlow(FlowSpec):
Expand Down
18 changes: 0 additions & 18 deletions datasets/__init__.py

This file was deleted.

10 changes: 0 additions & 10 deletions datasets/plugins/__init__.py

This file was deleted.

12 changes: 6 additions & 6 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@
# html_search_scorer = 'scorer.js'

# Output file base name for HTML help builder.
htmlhelp_basename = "datasets-daldoc"
htmlhelp_basename = "zdatasets-daldoc"

# -- Options for LaTeX output ---------------------------------------------

Expand All @@ -215,7 +215,7 @@
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [("index", "datasets.tex", "datasets Documentation", "aiplat@zillow.com", "manual")]
latex_documents = [("index", "zdatasets.tex", "zdatasets Documentation", "aiplat@zillow.com", "manual")]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
Expand All @@ -242,7 +242,7 @@

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [("index", "datasets", "datasets Documentation", ["aiplat@zillow.com"], 1)]
man_pages = [("index", "zdatasets", "zdatasets Documentation", ["aiplat@zillow.com"], 1)]

# If true, show URL addresses after external links.
# man_show_urls = False
Expand All @@ -256,10 +256,10 @@
texinfo_documents = [
(
"index",
"datasets",
"datasets Documentation",
"zdatasets",
"zdatasets Documentation",
"",
"datasets",
"zdatasets",
"One line description of project.",
"Miscellaneous",
)
Expand Down
8 changes: 4 additions & 4 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
.. datasets documentation master file
.. zdatasets documentation master file
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to datasets
Welcome to zdatasets
==================================================

TODO
Expand All @@ -11,8 +11,8 @@ TODO
import pandas as pd
from metaflow import FlowSpec, step
from datasets.datasets_decorator import datasets
from datasets.mode import Mode
from zdatasets.datasets_decorator import datasets
from zdatasets.mode import Mode
class HelloDatasetFlow(FlowSpec):
Expand Down
20 changes: 10 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "zdatasets"
version = "0.2.5"
version = "1.2.5"
description = "Dataset SDK for consistent read/write [batch, online, streaming] data."
classifiers = [
"Development Status :: 2 - Pre-Alpha",
Expand All @@ -13,17 +13,17 @@ authors = ["Taleb Zeghmi"]
readme = "README.md"

[[tool.poetry.packages]]
include = "datasets"
include = "zdatasets"

# https://packaging.python.org/guides/creating-and-discovering-plugins/
[tool.poetry.plugins]
[tool.poetry.plugins."datasets.plugins"]
batch_dataset = "datasets.plugins:BatchDataset"
flow_dataset = "datasets.plugins:FlowDataset"
hive_dataset = "datasets.plugins:HiveDataset"
[tool.poetry.plugins."zdatasets.plugins"]
batch_dataset = "zdatasets.plugins:BatchDataset"
flow_dataset = "zdatasets.plugins:FlowDataset"
hive_dataset = "zdatasets.plugins:HiveDataset"

[tool.poetry.plugins."datasets.executors"]
metaflow_executor = "datasets.plugins:MetaflowExecutor"
[tool.poetry.plugins."zdatasets.executors"]
metaflow_executor = "zdatasets.plugins:MetaflowExecutor"

[tool.poetry.dependencies]
python = ">=3.8.0,<4"
Expand Down Expand Up @@ -57,8 +57,8 @@ spark = ["pyspark"]
kubernetes = ["kubernetes"]

[tool.isort]
known_first_party = 'datasets'
known_third_party = ["datasets", "numpy", "orbital_core", "pandas"]
known_first_party = 'zdatasets'
known_third_party = ["zdatasets", "numpy", "orbital_core", "pandas"]
multi_line_output = 3
lines_after_imports = 2
force_grid_wrap = 0
Expand Down
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# coverage.py
[coverage:run]
branch = true
source = datasets
source = zdatasets
omit = **/tests/*

[coverage:report]
Expand All @@ -27,8 +27,8 @@ exclude = .git,__pycache__,docs/*,doc/*,build,dist
[tool:pytest]
log_cli=true
;addopts = -vv -m "not spark" --cov-report=term -rP
;addopts = --cov=datasets --cov-report=term --cov-report=html --cov-report xml
addopts = --cov=datasets --cov-config=setup.cfg --cov-report=term --cov-report=html --cov-report xml
;addopts = --cov=zdatasets --cov-report=term --cov-report=html --cov-report xml
addopts = --cov=zdatasets --cov-config=setup.cfg --cov-report=term --cov-report=html --cov-report xml

markers =
spark: marks tests as slow (deselect with '-m "not slow"')
18 changes: 18 additions & 0 deletions zdatasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# isort: skip_file
# flake8: noqa: F401
from zdatasets.mode import Mode
from zdatasets.dataset_plugin import DatasetPlugin


from zdatasets.context import Context
from zdatasets.datasets_decorator import dataset


from zdatasets import plugins
from zdatasets.plugins.batch.hive_dataset import HiveDataset

from zdatasets._typing import ColumnNames, DataFrameType

from zdatasets.utils import SecretFetcher

Dataset = DatasetPlugin.factory
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions datasets/dataset_plugin.py → zdatasets/dataset_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from dataclasses import dataclass
from typing import Callable, Dict, Iterable, Optional, Tuple, Type, Union

from datasets._typing import ColumnNames, DataFrameType
from datasets.context import Context
from datasets.utils.case_utils import is_upper_pascal_case
from zdatasets._typing import ColumnNames, DataFrameType
from zdatasets.context import Context
from zdatasets.utils.case_utils import is_upper_pascal_case

from .mode import Mode
from .program_executor import ProgramExecutor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import keyword
from typing import Callable, Dict, Optional, Union

from datasets._typing import ColumnNames
from datasets.dataset_plugin import Context, DatasetPlugin, StorageOptions
from datasets.mode import Mode
from datasets.utils.case_utils import pascal_to_snake_case
from zdatasets._typing import ColumnNames
from zdatasets.dataset_plugin import Context, DatasetPlugin, StorageOptions
from zdatasets.mode import Mode
from zdatasets.utils.case_utils import pascal_to_snake_case


def dataset(
Expand Down
File renamed without changes.
12 changes: 6 additions & 6 deletions datasets/metaflow.py → zdatasets/metaflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
from metaflow._vendor.click import ParamType
from metaflow.parameters import Parameter

from datasets import DataFrameType
from datasets._typing import ColumnNames
from datasets.context import Context
from datasets.dataset_plugin import DatasetPlugin, StorageOptions
from datasets.mode import Mode
from datasets.utils.secret_fetcher import SecretFetcher
from zdatasets import DataFrameType
from zdatasets._typing import ColumnNames
from zdatasets.context import Context
from zdatasets.dataset_plugin import DatasetPlugin, StorageOptions
from zdatasets.mode import Mode
from zdatasets.utils.secret_fetcher import SecretFetcher


class _DatasetTypeClass(ParamType):
Expand Down
File renamed without changes.
10 changes: 10 additions & 0 deletions zdatasets/plugins/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# isort: skip_file
# flake8: noqa: F401
from zdatasets.plugins.executors.metaflow_executor import MetaflowExecutor
from zdatasets.plugins.batch.batch_dataset import BatchDataset, BatchOptions
from zdatasets.plugins.batch.flow_dataset import FlowDataset, FlowOptions
from zdatasets.plugins.batch.hive_dataset import HiveDataset, HiveOptions
from zdatasets.plugins.register_plugins import register


register()
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
Union,
)

from datasets._typing import ColumnNames, DataFrameType
from datasets.dataset_plugin import DatasetPlugin, StorageOptions
from datasets.exceptions import InvalidOperationException
from datasets.mode import Mode
from datasets.utils.case_utils import pascal_to_snake_case
from zdatasets._typing import ColumnNames, DataFrameType
from zdatasets.dataset_plugin import DatasetPlugin, StorageOptions
from zdatasets.exceptions import InvalidOperationException
from zdatasets.mode import Mode
from zdatasets.utils.case_utils import pascal_to_snake_case


_logger = logging.getLogger(__name__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

import pandas as pd

from datasets import Mode
from datasets._typing import ColumnNames, DataFrameType
from datasets.context import Context
from datasets.dataset_plugin import DatasetPlugin
from datasets.exceptions import InvalidOperationException
from datasets.plugins.batch.batch_base_plugin import (
from zdatasets import Mode
from zdatasets._typing import ColumnNames, DataFrameType
from zdatasets.context import Context
from zdatasets.dataset_plugin import DatasetPlugin
from zdatasets.exceptions import InvalidOperationException
from zdatasets.plugins.batch.batch_base_plugin import (
BatchBasePlugin,
BatchOptions,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Optional, Tuple, Union

from datasets._typing import ColumnNames
from datasets.context import Context
from datasets.dataset_plugin import DatasetPlugin, StorageOptions
from datasets.mode import Mode
from datasets.plugins import BatchDataset
from zdatasets._typing import ColumnNames
from zdatasets.context import Context
from zdatasets.dataset_plugin import DatasetPlugin, StorageOptions
from zdatasets.mode import Mode
from zdatasets.plugins import BatchDataset


if TYPE_CHECKING:
Expand Down
Loading

0 comments on commit 7f687fb

Please sign in to comment.