Skip to content

Commit

Permalink
Update the library for Data Package (v2) (#13)
Browse files Browse the repository at this point in the history
* Added sources.version

* Added sources to DCAT mapper

* Added `contributor.given/familyName`

* Replace `contributor.role` by `contributor.roles`

* Added contributor.role mapper

* Support resource.url (v0)

* Support field.format (v0)

* Renamed `IDict` to `IData`

* Rebased on mode_validator for compat

* Updated Table Dialect

* Added `schema.fieldsMatch`

* Moved `field` model to its own folder

* Simplified foreignKey

* Better separate models/types

* Added `schema.uniqueKeys`

* Added `list` field type

* Added `constraints.jsonSchema`

* Support `groupChart` for integers

* Added exclusive constraints

* Support `primaryKey` from v1

* Improved compat code

* Support `foreignKeys` from v1

* Removed profile rules

* Removed metadata profile

* Added changelog

* Simplified dialect defaults

* Updated profiles

* Removed version from compat

* Rebased on `$schema` property

* Removed profile model

* Fixed plugin tests

* Updated actions

* Fixed tests

* Fixed typo

* Updated changelog

* Fixed CI tests

* Fixed CI tests
  • Loading branch information
roll authored Apr 12, 2024
1 parent 8f1a9e9 commit 858a83e
Show file tree
Hide file tree
Showing 59 changed files with 8,028 additions and 670 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/general.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- name: Prepare variables
run: cp .env.example .env
- name: Test software
run: hatch run ci:test +py=${{ matrix.py || matrix.python-version }}
run: hatch run +py=${{ matrix.py || matrix.python-version }} ci:test
- name: Report coverage
uses: codecov/codecov-action@v2

Expand All @@ -55,7 +55,7 @@ jobs:
run: cp .env.example .env
- name: Test software
# https://stackoverflow.com/questions/9678408/cant-install-psycopg2-with-pip-in-virtualenv-on-mac-os-x-10-7
run: LDFLAGS=`echo $(pg_config --ldflags)` hatch run ci:test
run: LDFLAGS=`echo $(pg_config --ldflags)` hatch run +py=3.10 ci:test

# Test (Windows)

Expand All @@ -74,7 +74,7 @@ jobs:
- name: Prepare variables
run: cp .env.example .env
- name: Test software
run: hatch run ci:test
run: hatch run +py=3.10 ci:test

# Deploy

Expand Down
2 changes: 1 addition & 1 deletion data/package-custom-profile.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"profile": "https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/profiles/required.json",
"$schema": "https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/profiles/required.json",
"name": "name",
"resources": [
{
Expand Down
11 changes: 11 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Changelog

This document covers main `dplib-py` releases:

## v0.7

- Updated to Data Package (v2)

## v0.6

- Initial public release
2 changes: 1 addition & 1 deletion dplib/actions/dialect/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ..metadata.check import check_metadata


def check_dialect(dialect: Union[str, types.IDict, Dialect]) -> List[MetadataError]:
def check_dialect(dialect: Union[str, types.IData, Dialect]) -> List[MetadataError]:
"""Check the validity of a Table Dialect descriptor
This validates the descriptor against the JSON Schema profiles to ensure
Expand Down
30 changes: 17 additions & 13 deletions dplib/actions/metadata/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,32 @@

from typing import List, Union

from ... import types
from ... import settings, types
from ...errors.metadata import MetadataError
from ...helpers.data import read_data
from ...helpers.path import is_remote_path
from ...helpers.profile import check_metadata_against_jsonschema, read_profile
from ...models import Profile
from ...helpers.profile import check_profile


def check_metadata(
metadata: Union[str, types.IDict], *, type: str
metadata: Union[str, types.IData], *, type: types.IMetadataType
) -> List[MetadataError]:
if isinstance(metadata, str):
metadata = read_data(metadata)

# Base profile
profile = Profile.from_dict(read_profile(metadata_type=type))
errors = check_metadata_against_jsonschema(metadata, profile.jsonSchema)
# Get default profile
if type == "dialect":
default_profile = settings.PROFILE_DEFAULT_DIALECT
elif type == "package":
default_profile = settings.PROFILE_DEFAULT_PACKAGE
elif type == "resource":
default_profile = settings.PROFILE_DEFAULT_RESOURCE
elif type == "schema":
default_profile = settings.PROFILE_DEFAULT_SCHEMA
else:
raise ValueError(f"Invalid metadata type: {type}")

# Custom profile
custom_profile = metadata.get("profile")
if custom_profile and is_remote_path(custom_profile):
custom_profile = Profile.from_path(custom_profile)
errors += check_metadata_against_jsonschema(metadata, custom_profile.jsonSchema)
# Validate metadata
profile = metadata.get("$schema", default_profile)
errors = check_profile(metadata=metadata, profile=profile)

return errors
5 changes: 2 additions & 3 deletions dplib/actions/package/__spec__/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ def test_check_package_invalid_dereferencing():
errors = check_package("data/package-invalid-dereferencing.json")
assert len(errors) == 1
error = errors[0]
assert (
error.full_message == "[/resources/0/dialect/delimiter] 1 is not of type 'string'"
)
# TODO: extend error path so it shows the full path from the package root
assert error.full_message == "[/delimiter] 1 is not of type 'string'"


@pytest.mark.vcr
Expand Down
16 changes: 9 additions & 7 deletions dplib/actions/package/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ..metadata.check import check_metadata


def check_package(package: Union[str, types.IDict, Package]) -> List[MetadataError]:
def check_package(package: Union[str, types.IData, Package]) -> List[MetadataError]:
"""Check the validity of a Data Package descriptor
This validates the descriptor against the JSON Schema profiles to ensure
Expand All @@ -30,13 +30,15 @@ def check_package(package: Union[str, types.IDict, Package]) -> List[MetadataErr
basepath = package.basepath
package = package.to_dict()

# Dereference resources[].dialect/schema
# Validate (including nested descriptors)
errors = check_metadata(package, type="package")
resources = package.get("resources", [])
if isinstance(resources, list):
for resource in resources: # type: ignore
for name in ["dialect", "schema"]:
value = resource.get(name) # type: ignore
if value and isinstance(value, str):
resource[name] = read_data(value, basepath=basepath)
for type in ["dialect", "schema"]:
value = resource.get(type) # type: ignore
if isinstance(value, str):
metadata = read_data(value, basepath=basepath)
errors.extend(check_metadata(metadata, type=type)) # type: ignore

return check_metadata(package, type="package")
return errors
3 changes: 2 additions & 1 deletion dplib/actions/resource/__spec__/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def test_check_resource_invalid_dereferencing():
errors = check_resource("data/resource-invalid-dereferencing.json")
assert len(errors) == 1
error = errors[0]
assert error.full_message == "[/dialect/delimiter] 1 is not of type 'string'"
# TODO: extend error path so it shows the full path from the resource root
assert error.full_message == "[/delimiter] 1 is not of type 'string'"


def test_check_resource_from_model():
Expand Down
16 changes: 9 additions & 7 deletions dplib/actions/resource/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ..metadata.check import check_metadata


def check_resource(resource: Union[str, types.IDict, Resource]) -> List[MetadataError]:
def check_resource(resource: Union[str, types.IData, Resource]) -> List[MetadataError]:
"""Check the validity of a Data Resource descriptor
This validates the descriptor against the JSON Schema profiles to ensure
Expand All @@ -30,10 +30,12 @@ def check_resource(resource: Union[str, types.IDict, Resource]) -> List[Metadata
basepath = resource.basepath
resource = resource.to_dict()

# Dereference dialect/schema
for name in ["dialect", "schema"]:
value = resource.get(name)
if value and isinstance(value, str):
resource[name] = read_data(value, basepath=basepath)
# Validate (including nested descriptors)
errors = check_metadata(resource, type="resource")
for type in ["dialect", "schema"]:
value = resource.get(type)
if isinstance(value, str):
metadata = read_data(value, basepath=basepath)
errors.extend(check_metadata(metadata, type=type)) # type: ignore

return check_metadata(resource, type="resource")
return errors
2 changes: 1 addition & 1 deletion dplib/actions/schema/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ..metadata.check import check_metadata


def check_schema(schema: Union[str, types.IDict, Schema]) -> List[MetadataError]:
def check_schema(schema: Union[str, types.IData, Schema]) -> List[MetadataError]:
"""Check the validity of a Table Schema descriptor
This validates the descriptor against the JSON Schema profiles to ensure
Expand Down
10 changes: 5 additions & 5 deletions dplib/helpers/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@

def read_data(
path: str, *, format: Optional[str] = None, basepath: Optional[str] = None
) -> types.IDict:
) -> types.IData:
if not format:
format = infer_format(path, raise_missing=True)
text = read_file(path, basepath=basepath)
data = load_data(text, format=format)
return data


def write_data(path: str, data: types.IDict, *, format: Optional[str] = None):
def write_data(path: str, data: types.IData, *, format: Optional[str] = None):
if not format:
format = infer_format(path, raise_missing=True)
text = dump_data(data, format=format)
write_file(path, text)


def load_data(text: str, *, format: str) -> types.IDict:
def load_data(text: str, *, format: str) -> types.IData:
try:
if format == "json":
return json.loads(text)
Expand All @@ -39,7 +39,7 @@ def load_data(text: str, *, format: str) -> types.IDict:
raise Error(f"Cannot load data from text with format: {format}")


def dump_data(data: types.IDict, *, format: str) -> str:
def dump_data(data: types.IData, *, format: str) -> str:
try:
if format == "json":
return json.dumps(data, indent=2)
Expand All @@ -51,7 +51,7 @@ def dump_data(data: types.IDict, *, format: str) -> str:
raise Error(f"Cannot dump data to text with format: {format}")


def clean_data(data: types.IDict):
def clean_data(data: types.IData):
for key, value in list(data.items()):
if isinstance(value, dict):
clean_data(value) # type: ignore
Expand Down
69 changes: 42 additions & 27 deletions dplib/helpers/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,44 +6,59 @@

from jsonschema.validators import validator_for # type: ignore

from .. import types
from .. import settings, types
from ..error import Error
from ..errors.metadata import MetadataError
from .data import load_data
from .file import read_file

# TODO: implement additional user-side profile caching

def select_profile(*, metadata_type: types.IMetadataType) -> str:
if metadata_type == "package":
return "data-package"
elif metadata_type == "resource":
return "data-resource"
elif metadata_type == "dialect":
return "table-dialect"
elif metadata_type == "schema":
return "table-schema"
raise Error(f'Invalid metadata type "{metadata_type}"')

def check_profile(*, metadata: types.IData, profile: str) -> List[MetadataError]:
# Prepare validator
jsonSchema = read_profile(profile=profile)
Validator = validator_for(jsonSchema) # type: ignore
validator = Validator(jsonSchema) # type: ignore

# Validate metadata
errors: List[MetadataError] = []
for validation_error in validator.iter_errors(metadata): # type: ignore
errors.append(MetadataError(validation_error)) # type: ignore

return errors


@lru_cache
def read_profile(*, metadata_type: types.IMetadataType) -> types.IDict:
format = "json"
name = select_profile(metadata_type=metadata_type)
path = os.path.join(os.path.dirname(__file__), "..", "profiles", f"{name}.{format}")
def read_profile(*, profile: str) -> types.IData:
parts = parse_profile(profile)

# Replace with builtin copy
if parts:
version, filename = parts
profile = os.path.join(settings.PROFILE_BASEDIR, version, filename)

# Read jsonSchema
try:
text = read_file(path)
data = load_data(text, format=format)
text = read_file(profile)
data = load_data(text, format="json")
except Exception:
raise Error(f'Cannot read profile "{name}" at "{path}"')
raise Error(f'Cannot read profile: "{profile}"')

return data


def check_metadata_against_jsonschema(
metadata: types.IDict, jsonSchema: types.IDict
) -> List[MetadataError]:
Validator = validator_for(jsonSchema) # type: ignore
validator = Validator(jsonSchema) # type: ignore
errors: List[MetadataError] = []
for validation_error in validator.iter_errors(metadata): # type: ignore
errors.append(MetadataError(validation_error)) # type: ignore
return errors
def parse_profile(profile: str):
parts = profile.rsplit("/", 3)

# Ensure builtin copy exists
if len(parts) != 3:
return None
if parts[0] != settings.PROFILE_BASEURL:
return None
if parts[1] not in os.listdir(settings.PROFILE_BASEDIR):
return None
if parts[2] not in os.listdir(os.path.join(settings.PROFILE_BASEDIR, parts[1])):
return None

return parts[1], parts[2]
7 changes: 3 additions & 4 deletions dplib/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import pprint
import warnings
from functools import cached_property
from typing import Optional

from pydantic import BaseModel
Expand All @@ -22,8 +21,8 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return pprint.pformat(self.to_dict(), sort_dicts=False)

@cached_property
def custom(self) -> types.IDict:
@property
def custom(self) -> types.IData:
assert self.model_extra is not None
return self.model_extra

Expand Down Expand Up @@ -63,7 +62,7 @@ def to_dict(self):
return data

@classmethod
def from_dict(cls, data: types.IDict, *, basepath: Optional[str] = None) -> Self:
def from_dict(cls, data: types.IData, *, basepath: Optional[str] = None) -> Self:
if basepath and cls.model_fields.get("basepath"):
data["basepath"] = basepath
return cls(**data)
Expand Down
4 changes: 2 additions & 2 deletions dplib/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from .contributor import Contributor
from .dialect import Dialect
from .field import Constraints, Field
from .license import License
from .package import Package
from .profile import Profile
from .resource import Resource
from .schema import Constraints, Field, ForeignKey, ForeignKeyReference, Schema
from .schema import ForeignKey, ForeignKeyReference, Schema
from .source import Source
25 changes: 23 additions & 2 deletions dplib/models/contributor.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,32 @@
from typing import Optional
from typing import List, Optional

import pydantic

from .. import types
from ..model import Model


class Contributor(Model):
title: Optional[str] = None
givenName: Optional[str] = None
familyName: Optional[str] = None
path: Optional[str] = None
email: Optional[str] = None
role: Optional[str] = None
roles: List[str] = []
organization: Optional[str] = None

# Compat

@pydantic.model_validator(mode="before")
@classmethod
def compat(cls, data: types.IData):
if not isinstance(data, dict): # type: ignore
return data

# contributor.role
if not data.get("roles"):
role = data.pop("role", None)
if role:
data["roles"] = [role]

return data
Loading

0 comments on commit 858a83e

Please sign in to comment.