Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(io): simplify resource interface #93

Merged
merged 8 commits into from
Oct 20, 2023
99 changes: 49 additions & 50 deletions gimie/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,102 +4,101 @@
import os
from pathlib import Path
import requests
from typing import Optional, Union
from typing import Iterator, Optional, Union


class Resource:
"""Abstract class for buffered read-only access to local or remote resources via
a file-like interface."""
"""Abstract class for read-only access to local or remote resources via
a file-like interface.

Parameters
----------
name:
The name of the resource, typically the filename.
"""

name: str

def open(self) -> io.BufferedReader:
def open(self) -> io.RawIOBase:
raise NotImplementedError


class LocalResource(Resource):
"""Providing buffered read-only access to local data.

Parameters
----------
name: the name of the resource, typically the filename.
url: the URL where the resource. can be downladed from.
headers: optional headers to pass to the request.
"""Providing read-only access to local data via a file-like interface.

Examples
--------
>>> from gimie.io import LocalResource
>>> resource = LocalResource("README.md")
"""

def __init__(self, path: Union[str, os.PathLike]):
self.path: Path = Path(path)

def open(self, mode="r") -> io.BufferedReader:
return io.BufferedReader(io.FileIO(self.path, mode))
def open(self, mode="r") -> io.RawIOBase:
cmdoret marked this conversation as resolved.
Show resolved Hide resolved
return io.FileIO(self.path, mode)

@property
def name(self) -> str:
return self.path.name


class RemoteResource(Resource):
"""Provides buffered read-only access to remote data.
"""Provides read-only access to remote data via a file-like interface.

Parameters
----------
name: the name of the resource, typically the filename.
url: the URL where the resource. can be downladed from.
headers: optional headers to pass to the request.
url:
The URL where the resource. can be downladed from.
headers:
Optional headers to pass to the request.

Examples
--------
>>> from gimie.io import RemoteResource
>>> url = "https://raw.githubusercontent.com/SDSC-ORD/gimie/main/README.md"
>>> resource = RemoteResource("README.md", url)
>>> content = RemoteResource("README.md", url).open().read()
>>> assert isinstance(content, bytes)
"""

def __init__(self, name: str, url: str, headers: Optional[dict] = None):
self.name = name
self.url = url
self.headers = headers or {}

def open(self) -> io.BufferedReader:
def open(self) -> io.RawIOBase:
resp = requests.get(
self.url, headers=self.headers, stream=True
).iter_content(chunk_size=128)
return iterable_to_stream(resp)
return IterStream(resp)


def iterable_to_stream(
iterable, buffer_size=io.DEFAULT_BUFFER_SIZE
) -> io.BufferedReader:
"""
Converts an iterable yielding bytestrings to a read-only input stream.
Lets you use an iterable (e.g. a generator) that yields bytestrings as a read-only
input stream.
class IterStream(io.RawIOBase):
"""Wraps an iterator under a like a file-like interface.

The stream implements Python 3's newer I/O API (available in Python 2's io module).
For efficiency, the stream is buffered.
Parameters
----------
iterator:
An iterator yielding bytes.

credits: https://stackoverflow.com/a/20260030/8440675
Examples
--------
>>> stream = IterStream(iter([b"Hello ", b"World"]))
>>> stream.read()
b'Hello World'
"""

class IterStream(io.RawIOBase):
def __init__(self):
self.leftover = ""

def readable(self):
return True

def readinto(self, b):
try:
l = len(b) # We're supposed to return at most this much
chunk = self.leftover or next(iterable)
output, self.leftover = chunk[:l], chunk[l:]
b[: len(output)] = output
return len(output)
except StopIteration:
return 0 # indicate EOF

return io.BufferedReader(IterStream(), buffer_size=buffer_size)
def __init__(self, iterator: Iterator[bytes]):
self.leftover = b""
self.iterator = iterator

def readable(self):
return True

def readinto(self, b):
try:
l = len(b) # We're supposed to return at most this much
chunk = self.leftover or next(self.iterator)
cmdoret marked this conversation as resolved.
Show resolved Hide resolved
output, self.leftover = chunk[:l], chunk[l:]
b[: len(output)] = output
return len(output)
except StopIteration:
return 0 # indicate EOF