From 041aca1d3c11b14daeb8c6c26bfc9b870e71151f Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 3 Mar 2024 23:34:44 +0100 Subject: [PATCH] Fix open kwarg passthrough (#204) * upath.core: handle kwargs in UPath.open * upath: tests for open * upath.local: support fsspec options in open --- .flake8 | 2 + upath/core.py | 69 ++++++++++++++++++++++-- upath/implementations/local.py | 43 +++++++++++++++ upath/tests/cases.py | 20 ++++++- upath/tests/implementations/test_data.py | 20 +++++++ 5 files changed, 149 insertions(+), 5 deletions(-) diff --git a/.flake8 b/.flake8 index 4347457b..b4f70a67 100644 --- a/.flake8 +++ b/.flake8 @@ -9,6 +9,8 @@ ignore= # unindexed parameters in the str.format, see: # https://pypi.org/project/flake8-string-format/ P1 + # def statements on the same line with overload + E704 max_line_length = 88 max-complexity = 15 select = B,C,E,F,W,T4,B902,T,P diff --git a/upath/core.py b/upath/core.py index 6be5343e..a9058cc0 100644 --- a/upath/core.py +++ b/upath/core.py @@ -6,14 +6,19 @@ from copy import copy from pathlib import Path from types import MappingProxyType +from typing import IO from typing import TYPE_CHECKING from typing import Any +from typing import BinaryIO +from typing import Literal from typing import Mapping +from typing import TextIO from typing import TypeVar +from typing import overload from urllib.parse import urlsplit -from fsspec import AbstractFileSystem -from fsspec import get_filesystem_class +from fsspec.registry import get_filesystem_class +from fsspec.spec import AbstractFileSystem from upath._compat import FSSpecAccessorShim from upath._compat import PathlibPathShim @@ -741,8 +746,64 @@ def is_socket(self): def samefile(self, other_path): raise NotImplementedError - def open(self, mode="r", buffering=-1, encoding=None, errors=None, newline=None): - return self.fs.open(self.path, mode) # fixme + @overload + def open( + self, + mode: Literal["r", "w", "a"] = ..., + buffering: int = ..., + encoding: str = ..., + errors: str = ..., + newline: str = ..., + **fsspec_kwargs: Any, + ) -> TextIO: ... + + @overload + def open( + self, + mode: Literal["rb", "wb", "ab"] = ..., + buffering: int = ..., + encoding: str = ..., + errors: str = ..., + newline: str = ..., + **fsspec_kwargs: Any, + ) -> BinaryIO: ... + + def open( + self, + mode: str = "r", + *args: Any, + **fsspec_kwargs: Any, + ) -> IO[Any]: + """ + Open the file pointed by this path and return a file object, as + the built-in open() function does. + + Parameters + ---------- + mode: + Opening mode. Default is 'r'. + buffering: + Default is the block size of the underlying fsspec filesystem. + encoding: + Encoding is only used in text mode. Default is None. + errors: + Error handling for encoding. Only used in text mode. Default is None. + newline: + Newline handling. Only used in text mode. Default is None. + **fsspec_kwargs: + Additional options for the fsspec filesystem. + """ + # match the signature of pathlib.Path.open() + for key, value in zip(["buffering", "encoding", "errors", "newline"], args): + if key in fsspec_kwargs: + raise TypeError( + f"{type(self).__name__}.open() got multiple values for '{key}'" + ) + fsspec_kwargs[key] = value + # translate pathlib buffering to fs block_size + if "buffering" in fsspec_kwargs: + fsspec_kwargs.setdefault("block_size", fsspec_kwargs.pop("buffering")) + return self.fs.open(self.path, mode=mode, **fsspec_kwargs) def iterdir(self): for name in self.fs.listdir(self.path): diff --git a/upath/implementations/local.py b/upath/implementations/local.py index e0fba453..b2ee1e54 100644 --- a/upath/implementations/local.py +++ b/upath/implementations/local.py @@ -6,6 +6,7 @@ from pathlib import Path from pathlib import PosixPath from pathlib import WindowsPath +from typing import IO from typing import Any from typing import Collection from typing import MutableMapping @@ -110,6 +111,27 @@ class PosixUPath(PosixPath, LocalPath): # assign all PosixPath methods/attrs to prevent multi inheritance issues _set_class_attributes(locals(), src=PosixPath) + def open( + self, + mode="r", + buffering=-1, + encoding=None, + errors=None, + newline=None, + **fsspec_kwargs, + ) -> IO[Any]: + if fsspec_kwargs: + return super(LocalPath, self).open( + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + **fsspec_kwargs, + ) + else: + return PosixPath.open(self, mode, buffering, encoding, errors, newline) + if sys.version_info < (3, 12): def __new__( @@ -153,6 +175,27 @@ class WindowsUPath(WindowsPath, LocalPath): # assign all WindowsPath methods/attrs to prevent multi inheritance issues _set_class_attributes(locals(), src=WindowsPath) + def open( + self, + mode="r", + buffering=-1, + encoding=None, + errors=None, + newline=None, + **fsspec_kwargs, + ) -> IO[Any]: + if fsspec_kwargs: + return super(LocalPath, self).open( + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + **fsspec_kwargs, + ) + else: + return WindowsPath.open(self, mode, buffering, encoding, errors, newline) + if sys.version_info < (3, 12): def __new__( diff --git a/upath/tests/cases.py b/upath/tests/cases.py index 5037cce0..ae04ee6b 100644 --- a/upath/tests/cases.py +++ b/upath/tests/cases.py @@ -236,7 +236,25 @@ def test_makedirs_exist_ok_false(self): new_dir._accessor.makedirs(new_dir, exist_ok=False) def test_open(self): - pass + p = self.path.joinpath("file1.txt") + with p.open(mode="r") as f: + assert f.read() == "hello world" + with p.open(mode="rb") as f: + assert f.read() == b"hello world" + + def test_open_buffering(self): + p = self.path.joinpath("file1.txt") + p.open(buffering=-1) + + def test_open_block_size(self): + p = self.path.joinpath("file1.txt") + with p.open(mode="r", block_size=8192) as f: + assert f.read() == "hello world" + + def test_open_errors(self): + p = self.path.joinpath("file1.txt") + with p.open(mode="r", encoding="ascii", errors="strict") as f: + assert f.read() == "hello world" def test_owner(self): with pytest.raises(NotImplementedError): diff --git a/upath/tests/implementations/test_data.py b/upath/tests/implementations/test_data.py index 6342cc46..90e1f8a6 100644 --- a/upath/tests/implementations/test_data.py +++ b/upath/tests/implementations/test_data.py @@ -92,6 +92,26 @@ def test_mkdir_parents_true_exists_ok_true(self): def test_mkdir_parents_true_exists_ok_false(self): pass + def test_open(self): + p = UPath("data:text/plain;base64,aGVsbG8gd29ybGQ=") + with p.open(mode="r") as f: + assert f.read() == "hello world" + with p.open(mode="rb") as f: + assert f.read() == b"hello world" + + def test_open_buffering(self): + self.path.open(buffering=-1) + + def test_open_block_size(self): + p = UPath("data:text/plain;base64,aGVsbG8gd29ybGQ=") + with p.open(mode="r", block_size=8192) as f: + assert f.read() == "hello world" + + def test_open_errors(self): + p = UPath("data:text/plain;base64,aGVsbG8gd29ybGQ=") + with p.open(mode="r", encoding="ascii", errors="strict") as f: + assert f.read() == "hello world" + def test_read_bytes(self, pathlib_base): assert len(self.path.read_bytes()) == 69