Skip to content

Commit

Permalink
fix 816 (#817)
Browse files Browse the repository at this point in the history
  • Loading branch information
thejcannon authored Mar 27, 2024
1 parent 58bb1bc commit b3d8621
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 3 deletions.
3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,8 @@ GCS Advanced Usage

Additional keyword arguments can be propagated to the GCS open method (`docs <https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#google_cloud_storage_blob_Blob_open>`__), which is used by ``smart_open`` under the hood, using the ``blob_open_kwargs`` transport parameter.

Additionally keyword arguments can be propagated to the GCS ``get_blob`` method (`docs <https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.bucket.Bucket#google_cloud_storage_bucket_Bucket_get_blob>`__) when in a read-mode, using the ``get_blob_kwargs`` transport parameter.

Additional blob properties (`docs <https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#properties>`__) can be set before an upload, as long as they are not read-only, using the ``blob_properties`` transport parameter.

.. code-block:: python
Expand Down Expand Up @@ -507,4 +509,3 @@ issues or pull requests there. Suggestions, pull requests and improvements welco

``smart_open`` is open source software released under the `MIT license <https://github.com/piskvorky/smart_open/blob/master/LICENSE>`_.
Copyright (c) 2015-now `Radim Řehůřek <https://radimrehurek.com>`_.

10 changes: 9 additions & 1 deletion smart_open/gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def open(
buffer_size=None,
min_part_size=_DEFAULT_MIN_PART_SIZE,
client=None, # type: google.cloud.storage.Client
get_blob_kwargs=None,
blob_properties=None,
blob_open_kwargs=None,
):
Expand All @@ -78,6 +79,9 @@ def open(
The minimum part size for multipart uploads. For writing only.
client: google.cloud.storage.Client, optional
The GCS client to use when working with google-cloud-storage.
get_blob_kwargs: dict, optional
Additional keyword arguments to propagate to the bucket.get_blob
method of the google-cloud-storage library. For reading only.
blob_properties: dict, optional
Set properties on blob before writing. For writing only.
blob_open_kwargs: dict, optional
Expand All @@ -95,6 +99,7 @@ def open(
_blob = Reader(bucket=bucket_id,
key=blob_id,
client=client,
get_blob_kwargs=get_blob_kwargs,
blob_open_kwargs=blob_open_kwargs)

elif mode in (constants.WRITE_BINARY, 'w', 'wt'):
Expand All @@ -116,8 +121,11 @@ def Reader(bucket,
buffer_size=None,
line_terminator=None,
client=None,
get_blob_kwargs=None,
blob_open_kwargs=None):

if get_blob_kwargs is None:
get_blob_kwargs = {}
if blob_open_kwargs is None:
blob_open_kwargs = {}
if client is None:
Expand All @@ -128,7 +136,7 @@ def Reader(bucket,
warn_deprecated('line_terminator')

bkt = client.bucket(bucket)
blob = bkt.get_blob(key)
blob = bkt.get_blob(key, **get_blob_kwargs)

if blob is None:
raise google.cloud.exceptions.NotFound(f'blob {key} not found in {bucket}')
Expand Down
13 changes: 12 additions & 1 deletion smart_open/tests/test_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def __init__(self, client, name=None):
#
self.client.register_bucket(self)

self.get_blob = mock.Mock(side_effect=self._get_blob)

def blob(self, blob_id, **kwargs):
return self.blobs.get(blob_id, FakeBlob(blob_id, self, **kwargs))

Expand All @@ -57,7 +59,7 @@ def delete(self):
def exists(self):
return self._exists

def get_blob(self, blob_id):
def _get_blob(self, blob_id, **kwargs):
try:
return self.blobs[blob_id]
except KeyError as e:
Expand Down Expand Up @@ -300,6 +302,15 @@ def test_property_passthrough(self):
for k, v in blob_properties.items():
self.assertEqual(getattr(b, k), v)

def test_get_blob_kwargs_passthrough(self):
get_blob_kwargs = {'generation': '1111111111111111'}

with self.assertRaises(google.cloud.exceptions.NotFound):
smart_open.gcs.Reader(BUCKET_NAME, BLOB_NAME, get_blob_kwargs=get_blob_kwargs)

self.client.bucket(BUCKET_NAME) \
.get_blob.assert_called_once_with(BLOB_NAME, **get_blob_kwargs)

def test_default_open_kwargs(self):
smart_open.gcs.Writer(BUCKET_NAME, BLOB_NAME)

Expand Down

0 comments on commit b3d8621

Please sign in to comment.