diff --git a/google/cloud/storage/_media/_upload.py b/google/cloud/storage/_media/_upload.py index e656d3e5c..765716882 100644 --- a/google/cloud/storage/_media/_upload.py +++ b/google/cloud/storage/_media/_upload.py @@ -1387,6 +1387,29 @@ def _process_upload_response(self, response): .. _sans-I/O: https://sans-io.readthedocs.io/ """ + # Data corruption errors shouldn't be considered as invalid responses, + # So we handle them earlier than call to `_helpers.require_status_code`. + # If the response is 400, we check for data corruption errors. + if response.status_code == 400: + root = ElementTree.fromstring(response.text) + error_code = root.find("Code").text + error_message = root.find("Message").text + error_details = root.find("Details").text + if error_code in ["InvalidDigest", "BadDigest", "CrcMismatch"]: + raise DataCorruption( + response, + ( + "Checksum mismatch: checksum calculated by client and" + " server did not match. Error code: {error_code}," + " Error message: {error_message}," + " Error details: {error_details}" + ).format( + error_code=error_code, + error_message=error_message, + error_details=error_details, + ), + ) + _helpers.require_status_code( response, (http.client.OK,), diff --git a/google/cloud/storage/_media/requests/upload.py b/google/cloud/storage/_media/requests/upload.py index 75d4c53da..cb9653fb3 100644 --- a/google/cloud/storage/_media/requests/upload.py +++ b/google/cloud/storage/_media/requests/upload.py @@ -21,6 +21,7 @@ from google.cloud.storage._media import _upload from google.cloud.storage._media.requests import _request_helpers +from google.cloud.storage._media import _helpers class SimpleUpload(_request_helpers.RequestsMixin, _upload.SimpleUpload): @@ -757,6 +758,14 @@ def upload( ~requests.Response: The HTTP response returned by ``transport``. """ method, url, payload, headers = self._prepare_upload_request() + if self._checksum_object is not None: + checksum_digest_in_base64 = _helpers.prepare_checksum_digest( + self._checksum_object.digest() + ) + if self._checksum_type == "crc32c": + headers["X-Goog-Hash"] = f"crc32c={checksum_digest_in_base64}" + elif self._checksum_type == "md5": + headers["X-Goog-Hash"] = f"md5={checksum_digest_in_base64}" # Wrap the request business logic in a function to be retried. def retriable_request(): diff --git a/tests/resumable_media/unit/requests/test_upload.py b/tests/resumable_media/unit/requests/test_upload.py index 6868cc7b8..07b4cebd8 100644 --- a/tests/resumable_media/unit/requests/test_upload.py +++ b/tests/resumable_media/unit/requests/test_upload.py @@ -42,6 +42,11 @@ UPLOAD_ID = "VXBsb2FkIElEIGZvciBlbHZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA" PARTS = {1: "39a59594290b0f9a30662a56d695b71d", 2: "00000000290b0f9a30662a56d695b71d"} FILE_DATA = b"testdata" * 128 +_HASH_HEADER = "x-goog-hash" +CRC32C_HASH_OF_FIRST_PART = "8hVqVQ==" +MD5_HASH_OF_FIRST_PART = "gfVZ4+0LdooJwGAkxLrCcg==" +DEFAULT_CONNECT_TIMEOUT = 61 +DEFAULT_READ_TIMEOUT = 60 @pytest.fixture(scope="session") @@ -402,6 +407,66 @@ def test_mpu_part(filename): assert part.etag == PARTS[1] +def test_mpu_part_with_md5_enabled(filename): + part = upload_mod.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + start=0, + end=128, + part_number=1, + checksum="md5", + ) + + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response( + headers={"etag": PARTS[1], _HASH_HEADER: f"md5={MD5_HASH_OF_FIRST_PART}"} + ) + + part.upload(transport) + + transport.request.assert_called_once_with( + "PUT", + f"{part.upload_url}?partNumber={part.part_number}&uploadId={UPLOAD_ID}", + data=FILE_DATA[part.start : part.end], + headers={"X-Goog-Hash": f"md5={MD5_HASH_OF_FIRST_PART}"}, + timeout=(DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT), + ) + + assert part.finished + assert part.etag == PARTS[1] + + +def test_mpu_part_with_crc32c_enabled(filename): + part = upload_mod.XMLMPUPart( + EXAMPLE_XML_UPLOAD_URL, + UPLOAD_ID, + filename, + start=0, + end=128, + part_number=1, + checksum="crc32c", + ) + + transport = mock.Mock(spec=["request"]) + transport.request.return_value = _make_response( + headers={"etag": PARTS[1], _HASH_HEADER: f"crc32c={CRC32C_HASH_OF_FIRST_PART}"} + ) + + part.upload(transport) + + transport.request.assert_called_once_with( + "PUT", + f"{part.upload_url}?partNumber={part.part_number}&uploadId={UPLOAD_ID}", + data=FILE_DATA[part.start : part.end], + headers={"X-Goog-Hash": f"crc32c={CRC32C_HASH_OF_FIRST_PART}"}, + timeout=(DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT), + ) + + assert part.finished + assert part.etag == PARTS[1] + + def _make_response(status_code=http.client.OK, headers=None, text=None): headers = headers or {} return mock.Mock( diff --git a/tests/resumable_media/unit/test__upload.py b/tests/resumable_media/unit/test__upload.py index faabc0f56..15bac1618 100644 --- a/tests/resumable_media/unit/test__upload.py +++ b/tests/resumable_media/unit/test__upload.py @@ -47,6 +47,21 @@ UPLOAD_ID = "VXBsb2FkIElEIGZvciBlbHZpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA" PARTS = {1: "39a59594290b0f9a30662a56d695b71d", 2: "00000000290b0f9a30662a56d695b71d"} FILE_DATA = b"testdata" * 128 +CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE = """ + + {ERROR_CODE} + The MD5 you specified in Content-MD5 or x-goog-hash was invalid. +
Invalid MD5 value: dfdfdfd==
+
""" +INVALID_MD5_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format( + ERROR_CODE="InvalidDigest" +) +INVALID_CRC32C_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format( + ERROR_CODE="BadDigest" +) +INCORRECT_LENGTH_CRC32C_XML_RESPONSE = CHECKSUM_MISMATCH_ERROR_MSG_XML_TEMPLATE.format( + ERROR_CODE="CrcMismatch" +) @pytest.fixture(scope="session") @@ -1471,7 +1486,15 @@ def test_xml_mpu_part_invalid_response(filename): part._process_upload_response(response) -def test_xml_mpu_part_checksum_failure(filename): +@pytest.mark.parametrize( + "error_scenarios", + [ + INVALID_MD5_XML_RESPONSE, + INVALID_CRC32C_XML_RESPONSE, + INCORRECT_LENGTH_CRC32C_XML_RESPONSE, + ], +) +def test_xml_mpu_part_checksum_failure(filename, error_scenarios): PART_NUMBER = 1 START = 0 END = 256 @@ -1490,7 +1513,9 @@ def test_xml_mpu_part_checksum_failure(filename): _fix_up_virtual(part) part._prepare_upload_request() response = _make_xml_response( - headers={"etag": ETAG, "x-goog-hash": "md5=Ojk9c3dhfxgoKVVHYwFbHQ=="} + status_code=http.client.BAD_REQUEST, + headers={"etag": ETAG, "x-goog-hash": "md5=Ojk9c3dhfxgoKVVHYwFbHQ=="}, + text=error_scenarios, ) # Example md5 checksum but not the correct one with pytest.raises(DataCorruption): part._process_upload_response(response) @@ -1555,7 +1580,7 @@ def _make_xml_response(status_code=http.client.OK, headers=None, text=None): headers=headers, status_code=status_code, text=text, - spec=["headers", "status_code"], + spec=["headers", "status_code", "text"], )