Skip to content

Commit

Permalink
Backport py312 memory-filesystem and empty authority handling (#162)
Browse files Browse the repository at this point in the history
* upath: always add netloc to ensure fsspec compatibility

* upath.implementations.memory: ignore authority parts in memory URIs

* tests: stricter normalization tests and 2-slash memory uri normalization

* upath.core: improve __eq__ check

* upath.implementations.memory: implement memory uri normalization

* tests: mark netloc only memory normalization tests as currently broken
  • Loading branch information
ap-- authored Jan 26, 2024
1 parent 9f8cc86 commit a7cc6e6
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 8 deletions.
18 changes: 17 additions & 1 deletion upath/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def _format_parsed_parts(
else:
scheme, netloc = url.scheme, url.netloc
scheme = (scheme + ":") if scheme else ""
netloc = "//" + netloc if netloc else ""
netloc = "//" + netloc # always add netloc
formatted = scheme + netloc + path
return formatted

Expand Down Expand Up @@ -685,6 +685,22 @@ def _from_parsed_parts(
obj._url = url
return obj

def __eq__(self, other):
if not isinstance(other, self.__class__):
return NotImplemented
p0, p1 = self.parts, other.parts
if len(p0) > len(p1):
if p0 and p0[-1] == "":
p0 = p0[:-1]
elif len(p1) > len(p0):
if p1 and p1[-1] == "":
p1 = p1[:-1]
return (
p0 == p1
and self.protocol == other.protocol
and self.storage_options == other.storage_options
)

def __str__(self) -> str:
"""Return the string representation of the path, suitable for
passing to system calls."""
Expand Down
32 changes: 32 additions & 0 deletions upath/implementations/memory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from __future__ import annotations

from typing import Any
from urllib.parse import SplitResult

import upath.core
from upath.core import PT


class _MemoryAccessor(upath.core._FSSpecAccessor):
Expand All @@ -27,3 +31,31 @@ def iterdir(self):
name = name.rstrip("/")
name = self._sub_path(name)
yield self._make_child_relpath(name)

@classmethod
def _from_parts(cls, args, url=None, **kwargs):
print("A", args, url)
if url and url.netloc:
if args:
if args[0].startswith("/"):
args[0] = args[0][1:]
args[0:1] = [f"/{url.netloc}/{args[0]}"]
else:
args[:] = f"/{url.netloc}"
url = url._replace(netloc="")
print("B", args, url)
return super()._from_parts(args, url=url, **kwargs)

@classmethod
def _format_parsed_parts(
cls: type[PT],
drv: str,
root: str,
parts: list[str],
url: SplitResult | None = None,
**kwargs: Any,
) -> str:
s = super()._format_parsed_parts(drv, root, parts, url=url, **kwargs)
if s.startswith("memory:///"):
s = s.replace("memory:///", "memory://", 1)
return s
23 changes: 23 additions & 0 deletions upath/tests/implementations/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,26 @@ def path(self, local_testdir):

def test_is_MemoryPath(self):
assert isinstance(self.path, MemoryPath)


@pytest.mark.parametrize(
"path, expected",
[
("memory:/", "memory://"),
("memory:/a", "memory://a"),
("memory:/a/b", "memory://a/b"),
("memory://", "memory://"),
pytest.param(
"memory://a",
"memory://a",
marks=pytest.mark.xfail(reason="currently broken due to urllib parsing"),
),
("memory://a/b", "memory://a/b"),
("memory:///", "memory://"),
("memory:///a", "memory://a"),
("memory:///a/b", "memory://a/b"),
],
)
def test_string_representation(path, expected):
path = UPath(path)
assert str(path) == expected
23 changes: 16 additions & 7 deletions upath/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,19 +339,28 @@ def test_uri_parsing():
("http://example.com/a//..//.", "http://example.com/a//"),
("http://example.com/a//..//b", "http://example.com/a//b"),
# Normalization with and without an authority component
("memory:/a/b/..", "memory:/a/"),
("memory:/a/b/../..", "memory:/"),
("memory:/a/b/../../..", "memory:/"),
("memory:/a/b/..", "memory://a/"),
("memory:/a/b/.", "memory://a/b/"),
("memory:/a/b/../..", "memory://"),
("memory:/a/b/../../..", "memory://"),
("memory://a/b/.", "memory://a/b/"),
("memory://a/b/..", "memory://a/"),
("memory://a/b/../..", "memory://a/"),
("memory://a/b/../../..", "memory://a/"),
("memory://a/b/../..", "memory://"),
("memory://a/b/../../..", "memory://"),
("memory:///a/b/.", "memory://a/b/"),
("memory:///a/b/..", "memory://a/"),
("memory:///a/b/../..", "memory://"),
("memory:///a/b/../../..", "memory://"),
),
)


@pytest.mark.parametrize(*NORMALIZATIONS)
def test_normalize(unnormalized, normalized):
expected = str(UPath(normalized))
expected = UPath(normalized)
# Normalise only, do not attempt to follow redirects for http:// paths here
result = str(UPath.resolve(UPath(unnormalized)))
result = UPath.resolve(UPath(unnormalized))
if expected.protocol == "memory":
pass
assert expected == result
assert str(expected) == str(result)

0 comments on commit a7cc6e6

Please sign in to comment.