Skip to content

Commit

Permalink
upath: implement poc for flavour base vendoring (#200)
Browse files Browse the repository at this point in the history
* upath: implement poc for flavour base vendoring
* update sources generator
* upath: update flavour implementation
* upath.implementations: adjust for codechanges
* upath: fix resolving issue
* upath: provide default flavour
* upath.implementations: cleanup
* upath.core: fix prefix issue with glob on windows
* upath._flavour: for file/local get drive on windows
* upath._flavour: move _deprecated to upath._compat
* upath._flavour: use local_file attribute in splitdrive
* upath._flavour: use os.path for local_file in isabs
* readme: fix toml entrypoint spelling
* upath: fallback classmethod for UPath._parse_path and UPath._format_parsed_parts
* flavours: fix reproducibility in flavour generate script
* upath._flavour: refactor flavour settings
* tests: test stat
* upath: move flavour specializations to subclasses
* tests: adjust resolve test for http paths
* upath: ensure support for __fspath__ args
* upath.implementations.local: suppress warning
* test: adjust fspath test for windows
* upath.local: WindowsUPath.path should return the posix version
* upath.local: fix WindowsUPath.path
  • Loading branch information
ap-- authored Mar 3, 2024
1 parent 032f437 commit 3646cd2
Show file tree
Hide file tree
Showing 15 changed files with 1,856 additions and 318 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
default_language_version:
python: python3
exclude: ^upath/tests/pathlib/test_pathlib.*\.py|^upath/tests/pathlib/_test_support\.py
exclude: ^upath/tests/pathlib/test_pathlib.*\.py|^upath/tests/pathlib/_test_support\.py|^upath/_flavour_sources\.py
repos:
- repo: https://github.com/psf/black
rev: 24.1.1
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ If you distribute your implementation in your own Python package, you can inform

```
# pyproject.toml
[project.entry-points."unversal_pathlib.implementations"]
[project.entry-points."universal_pathlib.implementations"]
myproto = "my_module.submodule:MyPath"
```

Expand Down
350 changes: 350 additions & 0 deletions dev/generate_flavours.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,350 @@
"""Generates the _flavour_sources.py file"""

from __future__ import annotations

import inspect
import re
import sys
import warnings
from io import StringIO
from typing import Any
from unittest.mock import Mock

from fsspec.registry import available_protocols
from fsspec.registry import get_filesystem_class
from fsspec.spec import AbstractFileSystem
from fsspec.utils import get_package_version_without_import

HEADER = '''\
""" upath._flavour_sources
<experimental!>
Warning
-------
Do not modify this file manually!
It is generated by `dev/generate_flavours.py`
To be able to parse the different filesystem uri schemes, we need
the string parsing functionality each of the filesystem implementations.
In an attempt to support parsing uris without having to import the
specific filesystems, we extract the necessary subset of the
AbstractFileSystem classes and generate a new "flavour" class for
each of the known filesystems. This will allow us to provide a
`PurePath` equivalent `PureUPath` for each protocol in the future
without a direct dependency on the underlying filesystem package.
"""
'''

IMPORTS = """\
from __future__ import annotations
import logging
import re
from typing import Any
from typing import cast
from urllib.parse import parse_qs
from urllib.parse import urlsplit
from fsspec.implementations.local import make_path_posix
from fsspec.utils import infer_storage_options
from fsspec.utils import stringify_path
"""

INIT_CODE = '''\
__all__ = [
"AbstractFileSystemFlavour",
"FileSystemFlavourBase",
"flavour_registry",
]
logger = logging.getLogger(__name__)
flavour_registry: dict[str, type[FileSystemFlavourBase]] = {}
class FileSystemFlavourBase:
"""base class for the fsspec flavours"""
def __init_subclass__(cls: Any, **kwargs):
if isinstance(cls.protocol, str):
protocols = (cls.protocol,)
else:
protocols = tuple(cls.protocol)
for protocol in protocols:
if protocol in flavour_registry:
raise ValueError(f"protocol {protocol!r} already registered")
flavour_registry[protocol] = cls
'''

BASE_CLASS_NAME_SUFFIX = "Flavour"
BASE_CLASS_NAME = f"{AbstractFileSystem.__name__}{BASE_CLASS_NAME_SUFFIX}"

SKIP_PROTOCOLS = [
"dir",
"blockcache",
"cached",
"simplecache",
"filecache",
]

FIX_PROTOCOLS = {
"MemFS": ("memfs",),
"AsyncLocalFileSystem": (),
}

FIX_METHODS = {
"GCSFileSystem": ["_strip_protocol", "_get_kwargs_from_urls", "_split_path"],
}


def _fix_azure_blob_file_system(x: str) -> str:
return re.sub(
r"host = ops.get\(\"host\", None\)",
'host: str | None = ops.get("host", None)',
x,
)


def _fix_memfs_file_system(x: str) -> str:
return re.sub(
"_MemFS",
"MemoryFileSystemFlavour",
x,
)


def _fix_xrootd_file_system(x: str) -> str:
x = re.sub(
r"client.URL",
"urlsplit",
x,
)
return re.sub(
"url.hostid",
"url.netloc",
x,
)


FIX_SOURCE = {
"AzureBlobFileSystem": _fix_azure_blob_file_system,
"MemFS": _fix_memfs_file_system,
"XRootDFileSystem": _fix_xrootd_file_system,
}


def before_imports() -> None:
"""allow to patch the generated state before importing anything"""
# patch libarchive
sys.modules["libarchive"] = Mock()
sys.modules["libarchive.ffi"] = Mock()
# patch xrootd
sys.modules["XRootD"] = Mock()
sys.modules["XRootD.client"] = Mock()
sys.modules["XRootD.client.flags"] = Mock()
sys.modules["XRootD.client.responses"] = Mock()


def get_protos(cls: type, remove: str, add: str) -> tuple[str, ...]:
try:
return FIX_PROTOCOLS[cls.__name__]
except KeyError:
pass
if isinstance(cls.protocol, str):
p = [cls.protocol, add]
else:
p = [*cls.protocol, add]
return tuple([x for x in p if x != remove])


def get_fsspec_filesystems_and_protocol_errors() -> (
tuple[dict[type[AbstractFileSystem], tuple[str, ...]], dict[str, str]]
):
before_imports()

classes: dict[type[AbstractFileSystem], tuple[str]] = {}
errors: dict[str, str] = {}

for protocol in available_protocols():
if protocol in SKIP_PROTOCOLS:
continue
try:
cls = get_filesystem_class(protocol)
except ImportError as err:
errors[protocol] = str(err)
else:
protos = get_protos(cls, remove="abstract", add=protocol)
cprotos = classes.get(cls, [])
classes[cls] = tuple(dict.fromkeys([*cprotos, *protos]))
return classes, errors


def _get_plain_method(cls, name):
for c in cls.__mro__:
try:
return c.__dict__[name]
except KeyError:
pass
else:
raise AttributeError(f"{cls.__name__}.{name} not found")


def get_subclass_methods(cls: type) -> list[str]: # noqa: C901
try:
return FIX_METHODS[cls.__name__]
except KeyError:
pass
errors = []

# storage options
so = None
base_get_kwargs_from_urls = _get_plain_method(
AbstractFileSystem, "_get_kwargs_from_urls"
)
try:
cls_get_kwargs_from_urls = _get_plain_method(cls, "_get_kwargs_from_urls")
except AttributeError:
errors.append("missing `_get_kwargs_from_urls()`")
else:
so = cls_get_kwargs_from_urls is base_get_kwargs_from_urls
if not isinstance(cls_get_kwargs_from_urls, staticmethod):
warnings.warn(
f"{cls.__name__}: {cls_get_kwargs_from_urls!r} not a staticmethod",
RuntimeWarning,
stacklevel=2,
)

# strip protocol
sp = None
base_strip_protocol = _get_plain_method(AbstractFileSystem, "_strip_protocol")
try:
cls_strip_protocol = _get_plain_method(cls, "_strip_protocol")
except AttributeError:
errors.append("missing `_strip_protocol()`")
else:
if isinstance(cls_strip_protocol, staticmethod):
warnings.warn(
f"{cls.__name__}: {cls_strip_protocol.__name__!r} is not a classmethod",
UserWarning,
stacklevel=2,
)
sp = False
elif isinstance(cls_strip_protocol, classmethod):
sp = cls_strip_protocol.__func__ is base_strip_protocol.__func__
else:
errors.append(
f"{cls.__name__}: {cls_strip_protocol.__name__!r} not a classmethod"
)

# _parent
pt = None
base_parent = _get_plain_method(AbstractFileSystem, "_parent")
try:
cls_parent = _get_plain_method(cls, "_parent")
except AttributeError:
errors.append("missing `_parent()`")
else:
pt = cls_parent is base_parent

if errors or sp is None or so is None:
raise AttributeError(" AND ".join(errors))

methods = []
if not sp:
methods.append("_strip_protocol")
if not so:
methods.append("_get_kwargs_from_urls")
if not pt:
methods.append("_parent")
return methods


def generate_class_source_code(
cls: type,
methods: list[str],
overrides: dict[str, Any],
attributes: list[str],
cls_suffix: str,
base_cls: str | None,
) -> str:
s = ["\n"]
if base_cls:
s += [f"class {cls.__name__}{cls_suffix}({base_cls}):"]
else:
s += [f"class {cls.__name__}{cls_suffix}:"]
mod_ver = get_package_version_without_import(cls.__module__.partition(".")[0])
s.append(f" __orig_class__ = '{cls.__module__}.{cls.__name__}'")
s.append(f" __orig_version__ = {mod_ver!r}")
for attr, value in overrides.items():
s.append(f" {attr} = {value!r}")
for attr in attributes:
s.append(f" {attr} = {getattr(cls, attr)!r}")
s.append("")
for method in methods:
s.append(inspect.getsource(getattr(cls, method)))
try:
fix_func = FIX_SOURCE[cls.__name__]
except KeyError:
return "\n".join(s)
else:
return "\n".join(fix_func(line) for line in s)


def create_source() -> str:
buf = StringIO()
buf.write(HEADER)

classes, errors = get_fsspec_filesystems_and_protocol_errors()

srcs = [
generate_class_source_code(
AbstractFileSystem,
["_strip_protocol", "_get_kwargs_from_urls", "_parent"],
{},
["protocol", "root_marker"],
cls_suffix=BASE_CLASS_NAME_SUFFIX,
base_cls="FileSystemFlavourBase",
)
]

for cls in sorted(classes, key=lambda cls: cls.__name__):
try:
sub_cls_methods = get_subclass_methods(cls)
except AttributeError as err:
protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
for proto in protos:
errors[proto] = str(err)
continue
sub_cls = generate_class_source_code(
cls,
sub_cls_methods,
{"protocol": classes[cls]},
["root_marker", "sep"],
cls_suffix=BASE_CLASS_NAME_SUFFIX,
base_cls=BASE_CLASS_NAME,
)
srcs.append(sub_cls)

if SKIP_PROTOCOLS:
buf.write("#\n# skipping protocols:\n")
for protocol in sorted(SKIP_PROTOCOLS):
buf.write(f"# - {protocol}\n")

if errors:
buf.write("# protocol import errors:\n")
for protocol, error_msg in sorted(errors.items()):
buf.write(f"# - {protocol} ({error_msg})\n")
buf.write("#\n")

buf.write(IMPORTS)
buf.write(INIT_CODE)
for cls_src in srcs:
buf.write(cls_src)

return buf.getvalue().removesuffix("\n")


if __name__ == "__main__":
print(create_source())
Loading

0 comments on commit 3646cd2

Please sign in to comment.