From 012475b480dd516bbd49818a85fb95c413a1f18d Mon Sep 17 00:00:00 2001 From: Cyril Matthey-Doret Date: Fri, 20 Oct 2023 10:59:02 +0000 Subject: [PATCH] refactor(io): simplify resource interface (#93) * refactor(io): do not enforce buffering * doc(io): better docstrings * refactor(io): Resource.open always returns io.RawIOBase * test(io): rm unneeded import, type checks * test(io): rm useless check * refactor(io): rm mode from LocalResource.open args * fix(io): skip empty elements in IterStream * refactor(io): simplify readinto logic --- gimie/io.py | 98 ++++++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 47 deletions(-) diff --git a/gimie/io.py b/gimie/io.py index d41c732f..6f852300 100644 --- a/gimie/io.py +++ b/gimie/io.py @@ -4,39 +4,38 @@ import os from pathlib import Path import requests -from typing import Optional, Union +from typing import Iterator, Optional, Union class Resource: - """Abstract class for buffered read-only access to local or remote resources via - a file-like interface.""" + """Abstract class for read-only access to local or remote resources via + a file-like interface. + + Parameters + ---------- + name: + The name of the resource, typically the filename. + """ name: str - def open(self) -> io.BufferedReader: + def open(self) -> io.RawIOBase: raise NotImplementedError class LocalResource(Resource): - """Providing buffered read-only access to local data. - - Parameters - ---------- - name: the name of the resource, typically the filename. - url: the URL where the resource. can be downladed from. - headers: optional headers to pass to the request. + """Providing read-only access to local data via a file-like interface. Examples -------- - >>> from gimie.io import LocalResource >>> resource = LocalResource("README.md") """ def __init__(self, path: Union[str, os.PathLike]): self.path: Path = Path(path) - def open(self, mode="r") -> io.BufferedReader: - return io.BufferedReader(io.FileIO(self.path, mode)) + def open(self) -> io.RawIOBase: + return io.FileIO(self.path, mode="r") @property def name(self) -> str: @@ -44,19 +43,20 @@ def name(self) -> str: class RemoteResource(Resource): - """Provides buffered read-only access to remote data. + """Provides read-only access to remote data via a file-like interface. Parameters ---------- - name: the name of the resource, typically the filename. - url: the URL where the resource. can be downladed from. - headers: optional headers to pass to the request. + url: + The URL where the resource. can be downladed from. + headers: + Optional headers to pass to the request. Examples -------- - >>> from gimie.io import RemoteResource >>> url = "https://raw.githubusercontent.com/SDSC-ORD/gimie/main/README.md" - >>> resource = RemoteResource("README.md", url) + >>> content = RemoteResource("README.md", url).open().read() + >>> assert isinstance(content, bytes) """ def __init__(self, name: str, url: str, headers: Optional[dict] = None): @@ -64,42 +64,46 @@ def __init__(self, name: str, url: str, headers: Optional[dict] = None): self.url = url self.headers = headers or {} - def open(self) -> io.BufferedReader: + def open(self) -> io.RawIOBase: resp = requests.get( self.url, headers=self.headers, stream=True ).iter_content(chunk_size=128) - return iterable_to_stream(resp) + return IterStream(resp) -def iterable_to_stream( - iterable, buffer_size=io.DEFAULT_BUFFER_SIZE -) -> io.BufferedReader: - """ - Converts an iterable yielding bytestrings to a read-only input stream. - Lets you use an iterable (e.g. a generator) that yields bytestrings as a read-only - input stream. +class IterStream(io.RawIOBase): + """Wraps an iterator under a like a file-like interface. + Empty elements in the iterator are ignored. - The stream implements Python 3's newer I/O API (available in Python 2's io module). - For efficiency, the stream is buffered. + Parameters + ---------- + iterator: + An iterator yielding bytes. - credits: https://stackoverflow.com/a/20260030/8440675 + Examples + -------- + >>> stream = IterStream(iter([b"Hello ", b"", b"World"])) + >>> stream.read() + b'Hello World' """ - class IterStream(io.RawIOBase): - def __init__(self): - self.leftover = "" - - def readable(self): - return True - - def readinto(self, b): - try: - l = len(b) # We're supposed to return at most this much - chunk = self.leftover or next(iterable) + def __init__(self, iterator: Iterator[bytes]): + self.leftover = b"" + self.iterator = iterator + + def readable(self): + return True + + def readinto(self, b): + try: + l = len(b) # We're supposed to return at most this much + while True: + chunk = self.leftover or next(self.iterator) + # skip empty elements + if not chunk: + continue output, self.leftover = chunk[:l], chunk[l:] b[: len(output)] = output return len(output) - except StopIteration: - return 0 # indicate EOF - - return io.BufferedReader(IterStream(), buffer_size=buffer_size) + except StopIteration: + return 0 # indicate EOF