diff --git a/CHANGELOG.md b/CHANGELOG.md index 673cb836..9f38d44f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Unreleased - Refactor s3 submodule to minimize resource usage (PR [#569](https://github.com/RaRe-Technologies/smart_open/pull/569), [@mpenkov](https://github.com/mpenkov)) +- Change download_as_bytes to download_as_string in gcs submodule (PR [#571](https://github.com/RaRe-Technologies/smart_open/pull/571), [@alexandreyc](https://github.com/alexandreyc)) # 4.0.1, 27 Nov 2020 diff --git a/README.rst b/README.rst index e1960ac6..ecbbeadb 100644 --- a/README.rst +++ b/README.rst @@ -329,12 +329,12 @@ GCS Credentials --------------- ``smart_open`` uses the ``google-cloud-storage`` library to talk to GCS. ``google-cloud-storage`` uses the ``google-cloud`` package under the hood to handle authentication. -There are several `options `__ to provide +There are several `options `__ to provide credentials. By default, ``smart_open`` will defer to ``google-cloud-storage`` and let it take care of the credentials. To override this behavior, pass a ``google.cloud.storage.Client`` object as a transport parameter to the ``open`` function. -You can `customize the credentials `__ +You can `customize the credentials `__ when constructing the client. ``smart_open`` will then use the client when talking to GCS. To follow allow with the example below, `refer to Google's guide `__ to setting up GCS authentication with a service account. diff --git a/smart_open/gcs.py b/smart_open/gcs.py index bf027482..8cf2edde 100644 --- a/smart_open/gcs.py +++ b/smart_open/gcs.py @@ -188,10 +188,10 @@ def _download_blob_chunk(self, size): # binary = b'' elif size == -1: - binary = self._blob.download_as_string(start=start) + binary = self._blob.download_as_bytes(start=start) else: end = position + size - binary = self._blob.download_as_string(start=start, end=end) + binary = self._blob.download_as_bytes(start=start, end=end) return binary diff --git a/smart_open/tests/test_gcs.py b/smart_open/tests/test_gcs.py index 18665483..71ad16a2 100644 --- a/smart_open/tests/test_gcs.py +++ b/smart_open/tests/test_gcs.py @@ -157,9 +157,9 @@ def delete(self): self._bucket.delete_blob(self) self._exists = False - def download_as_string(self, start=0, end=None): - # mimics Google's API by returning bytes, despite the method name - # https://google-cloud-python.readthedocs.io/en/0.32.0/storage/blobs.html#google.cloud.storage.blob.Blob.download_as_string + def download_as_bytes(self, start=0, end=None): + # mimics Google's API by returning bytes + # https://googleapis.dev/python/storage/latest/blobs.html#google.cloud.storage.blob.Blob.download_as_bytes if end is None: end = self.__contents.tell() self.__contents.seek(start) @@ -170,7 +170,7 @@ def exists(self, client=None): def upload_from_string(self, data): # mimics Google's API by accepting bytes or str, despite the method name - # https://google-cloud-python.readthedocs.io/en/0.32.0/storage/blobs.html#google.cloud.storage.blob.Blob.upload_from_string + # https://googleapis.dev/python/storage/latest/blobs.html#google.cloud.storage.blob.Blob.upload_from_string if isinstance(data, str): data = bytes(data, 'utf8') self.__contents = io.BytesIO(data) @@ -214,10 +214,10 @@ def test_upload_download(self): blob = FakeBlob('fake-blob', self.bucket) contents = b'test' blob.upload_from_string(contents) - self.assertEqual(blob.download_as_string(), b'test') - self.assertEqual(blob.download_as_string(start=2), b'st') - self.assertEqual(blob.download_as_string(end=2), b'te') - self.assertEqual(blob.download_as_string(start=2, end=3), b's') + self.assertEqual(blob.download_as_bytes(), b'test') + self.assertEqual(blob.download_as_bytes(start=2), b'st') + self.assertEqual(blob.download_as_bytes(end=2), b'te') + self.assertEqual(blob.download_as_bytes(start=2, end=3), b's') def test_size(self): blob = FakeBlob('fake-blob', self.bucket) @@ -372,7 +372,7 @@ def test_unfinished_put_does_not_write_to_blob(self): response = self.session.put(self.upload_url, data, headers=headers) self.assertIn(response.status_code, smart_open.gcs._UPLOAD_INCOMPLETE_STATUS_CODES) self.session._blob_with_url(self.upload_url, self.client) - blob_contents = self.blob.download_as_string() + blob_contents = self.blob.download_as_bytes() self.assertEqual(blob_contents, b'') def test_finished_put_writes_to_blob(self): @@ -384,7 +384,7 @@ def test_finished_put_writes_to_blob(self): response = self.session.put(self.upload_url, data, headers=headers) self.assertEqual(response.status_code, 200) self.session._blob_with_url(self.upload_url, self.client) - blob_contents = self.blob.download_as_string() + blob_contents = self.blob.download_as_bytes() data.seek(0) self.assertEqual(blob_contents, data.read())