Skip to content

Commit ddce7e5

Browse files
feat: send entire object checksum in the final api call of resumable upload (#1654)
feat: send entire object checksum in the final api call of resumable upload fixes b/461994245 --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 4e91c54 commit ddce7e5

File tree

3 files changed

+41
-27
lines changed

3 files changed

+41
-27
lines changed

google/cloud/storage/_media/_upload.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,13 @@ def _prepare_request(self):
688688
_CONTENT_TYPE_HEADER: self._content_type,
689689
_helpers.CONTENT_RANGE_HEADER: content_range,
690690
}
691+
if (start_byte + len(payload) == self._total_bytes) and (
692+
self._checksum_object is not None
693+
):
694+
local_checksum = _helpers.prepare_checksum_digest(
695+
self._checksum_object.digest()
696+
)
697+
headers["x-goog-hash"] = f"{self._checksum_type}={local_checksum}"
691698
return _PUT, self.resumable_url, payload, headers
692699

693700
def _update_checksum(self, start_byte, payload):

tests/resumable_media/system/requests/test_upload.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import google.cloud.storage._media.requests as resumable_requests
2828
from google.cloud.storage._media import _helpers
2929
from .. import utils
30-
from google.cloud.storage._media import _upload
3130
from google.cloud.storage.exceptions import InvalidResponse
3231
from google.cloud.storage.exceptions import DataCorruption
3332

@@ -372,29 +371,6 @@ def test_resumable_upload_with_headers(
372371
_resumable_upload_helper(authorized_transport, img_stream, cleanup, headers=headers)
373372

374373

375-
@pytest.mark.parametrize("checksum", ["md5", "crc32c"])
376-
def test_resumable_upload_with_bad_checksum(
377-
authorized_transport, img_stream, bucket, cleanup, checksum
378-
):
379-
fake_checksum_object = _helpers._get_checksum_object(checksum)
380-
fake_checksum_object.update(b"bad data")
381-
fake_prepared_checksum_digest = _helpers.prepare_checksum_digest(
382-
fake_checksum_object.digest()
383-
)
384-
with mock.patch.object(
385-
_helpers, "prepare_checksum_digest", return_value=fake_prepared_checksum_digest
386-
):
387-
with pytest.raises(DataCorruption) as exc_info:
388-
_resumable_upload_helper(
389-
authorized_transport, img_stream, cleanup, checksum=checksum
390-
)
391-
expected_checksums = {"md5": "1bsd83IYNug8hd+V1ING3Q==", "crc32c": "YQGPxA=="}
392-
expected_message = _upload._UPLOAD_CHECKSUM_MISMATCH_MESSAGE.format(
393-
checksum.upper(), fake_prepared_checksum_digest, expected_checksums[checksum]
394-
)
395-
assert exc_info.value.args[0] == expected_message
396-
397-
398374
def test_resumable_upload_bad_chunk_size(authorized_transport, img_stream):
399375
blob_name = os.path.basename(img_stream.name)
400376
# Create the actual upload object.

tests/unit/test_blob.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3049,15 +3049,22 @@ def test__initiate_resumable_upload_with_client_custom_headers(self):
30493049
self._initiate_resumable_helper(client=client)
30503050

30513051
def _make_resumable_transport(
3052-
self, headers1, headers2, headers3, total_bytes, data_corruption=False
3052+
self,
3053+
headers1,
3054+
headers2,
3055+
headers3,
3056+
total_bytes,
3057+
data_corruption=False,
3058+
md5_checksum_value=None,
3059+
crc32c_checksum_value=None,
30533060
):
30543061
fake_transport = mock.Mock(spec=["request"])
30553062

30563063
fake_response1 = self._mock_requests_response(http.client.OK, headers1)
30573064
fake_response2 = self._mock_requests_response(
30583065
http.client.PERMANENT_REDIRECT, headers2
30593066
)
3060-
json_body = f'{{"size": "{total_bytes:d}"}}'
3067+
json_body = json.dumps({"size": str(total_bytes), "md5Hash": md5_checksum_value, "crc32c": crc32c_checksum_value})
30613068
if data_corruption:
30623069
fake_response3 = DataCorruption(None)
30633070
else:
@@ -3151,6 +3158,9 @@ def _do_resumable_upload_call2(
31513158
if_metageneration_match=None,
31523159
if_metageneration_not_match=None,
31533160
timeout=None,
3161+
checksum=None,
3162+
crc32c_checksum_value=None,
3163+
md5_checksum_value=None,
31543164
):
31553165
# Third mock transport.request() does sends last chunk.
31563166
content_range = f"bytes {blob.chunk_size:d}-{total_bytes - 1:d}/{total_bytes:d}"
@@ -3161,6 +3171,11 @@ def _do_resumable_upload_call2(
31613171
"content-type": content_type,
31623172
"content-range": content_range,
31633173
}
3174+
if checksum == "crc32c":
3175+
expected_headers["x-goog-hash"] = f"crc32c={crc32c_checksum_value}"
3176+
elif checksum == "md5":
3177+
expected_headers["x-goog-hash"] = f"md5={md5_checksum_value}"
3178+
31643179
payload = data[blob.chunk_size :]
31653180
return mock.call(
31663181
"PUT",
@@ -3181,12 +3196,17 @@ def _do_resumable_helper(
31813196
timeout=None,
31823197
data_corruption=False,
31833198
retry=None,
3199+
checksum=None, # None is also a valid value, when user decides to disable checksum validation.
31843200
):
31853201
CHUNK_SIZE = 256 * 1024
31863202
USER_AGENT = "testing 1.2.3"
31873203
content_type = "text/html"
31883204
# Data to be uploaded.
31893205
data = b"<html>" + (b"A" * CHUNK_SIZE) + b"</html>"
3206+
3207+
# Data calcuated offline and entered here. (Unit test best practice).
3208+
crc32c_checksum_value = "mQ30hg=="
3209+
md5_checksum_value = "wajHeg1f2Q2u9afI6fjPOw=="
31903210
total_bytes = len(data)
31913211
if use_size:
31923212
size = total_bytes
@@ -3213,6 +3233,8 @@ def _do_resumable_helper(
32133233
headers3,
32143234
total_bytes,
32153235
data_corruption=data_corruption,
3236+
md5_checksum_value=md5_checksum_value,
3237+
crc32c_checksum_value=crc32c_checksum_value,
32163238
)
32173239

32183240
# Create some mock arguments and call the method under test.
@@ -3247,7 +3269,7 @@ def _do_resumable_helper(
32473269
if_generation_not_match,
32483270
if_metageneration_match,
32493271
if_metageneration_not_match,
3250-
checksum=None,
3272+
checksum=checksum,
32513273
retry=retry,
32523274
**timeout_kwarg,
32533275
)
@@ -3296,6 +3318,9 @@ def _do_resumable_helper(
32963318
if_metageneration_match=if_metageneration_match,
32973319
if_metageneration_not_match=if_metageneration_not_match,
32983320
timeout=expected_timeout,
3321+
checksum=checksum,
3322+
crc32c_checksum_value=crc32c_checksum_value,
3323+
md5_checksum_value=md5_checksum_value,
32993324
)
33003325
self.assertEqual(transport.request.mock_calls, [call0, call1, call2])
33013326

@@ -3308,6 +3333,12 @@ def test__do_resumable_upload_no_size(self):
33083333
def test__do_resumable_upload_with_size(self):
33093334
self._do_resumable_helper(use_size=True)
33103335

3336+
def test__do_resumable_upload_with_size_with_crc32c_checksum(self):
3337+
self._do_resumable_helper(use_size=True, checksum="crc32c")
3338+
3339+
def test__do_resumable_upload_with_size_with_md5_checksum(self):
3340+
self._do_resumable_helper(use_size=True, checksum="md5")
3341+
33113342
def test__do_resumable_upload_with_retry(self):
33123343
self._do_resumable_helper(retry=DEFAULT_RETRY)
33133344

0 commit comments

Comments
 (0)