diff --git a/storage/google/cloud/storage/blob.py b/storage/google/cloud/storage/blob.py index f2e8c96edb37..be964b179d8e 100644 --- a/storage/google/cloud/storage/blob.py +++ b/storage/google/cloud/storage/blob.py @@ -655,6 +655,52 @@ def compose(self, sources, client=None): _target_object=self) self._set_properties(api_response) + def rewrite(self, source, token=None, client=None): + """Rewrite source blob into this one. + + :type source: :class:`Blob` + :param source: blob whose contents will be rewritten into this blob. + + :type token: str + :param token: Optional. Token returned from an earlier, not-completed + call to rewrite the same source blob. If passed, + result will include updated status, total bytes written. + + :type client: :class:`~google.cloud.storage.client.Client` or + ``NoneType`` + :param client: Optional. The client to use. If not passed, falls back + to the ``client`` stored on the blob's bucket. + + :rtype: tuple + :returns: ``(token, bytes_rewritten, total_bytes)``, where ``token`` + is a rewrite token (``None`` if the rewrite is complete), + ``bytes_rewritten`` is the number of bytes rewritten so far, + and ``total_bytes`` is the total number of bytes to be + rewritten. + """ + client = self._require_client(client) + headers = _get_encryption_headers(self._encryption_key) + headers.update(_get_encryption_headers( + source._encryption_key, source=True)) + + if token: + query_params = {'rewriteToken': token} + else: + query_params = {} + + api_response = client.connection.api_request( + method='POST', path=source.path + '/rewriteTo' + self.path, + query_params=query_params, data=self._properties, headers=headers, + _target_object=self) + self._set_properties(api_response['resource']) + rewritten = int(api_response['totalBytesRewritten']) + size = int(api_response['objectSize']) + + if api_response['done']: + return None, rewritten, size + + return api_response['rewriteToken'], rewritten, size + cache_control = _scalar_property('cacheControl') """HTTP 'Cache-Control' header for this object. @@ -938,12 +984,16 @@ def __init__(self, bucket_name, object_name): self._relative_path = '' -def _get_encryption_headers(key): +def _get_encryption_headers(key, source=False): """Builds customer encryption key headers :type key: bytes :param key: 32 byte key to build request key and hash. + :type source: bool + :param source: If true, return headers for the "source" blob; otherwise, + return headers for the "destination" blob. + :rtype: dict :returns: dict of HTTP headers being sent in request. """ @@ -955,8 +1005,13 @@ def _get_encryption_headers(key): key_hash = base64.b64encode(key_hash).rstrip() key = base64.b64encode(key).rstrip() + if source: + prefix = 'X-Goog-Copy-Source-Encryption-' + else: + prefix = 'X-Goog-Encryption-' + return { - 'X-Goog-Encryption-Algorithm': 'AES256', - 'X-Goog-Encryption-Key': _bytes_to_unicode(key), - 'X-Goog-Encryption-Key-Sha256': _bytes_to_unicode(key_hash), + prefix + 'Algorithm': 'AES256', + prefix + 'Key': _bytes_to_unicode(key), + prefix + 'Key-Sha256': _bytes_to_unicode(key_hash), } diff --git a/storage/unit_tests/test_blob.py b/storage/unit_tests/test_blob.py index 9c5d3527dd45..0658e8677c37 100644 --- a/storage/unit_tests/test_blob.py +++ b/storage/unit_tests/test_blob.py @@ -1202,6 +1202,159 @@ def test_compose_w_additional_property_changes(self): self.assertEqual(kw[0]['path'], '/b/name/o/%s/compose' % DESTINATION) self.assertEqual(kw[0]['data'], SENT) + def test_rewrite_other_bucket_other_name_no_encryption_partial(self): + from six.moves.http_client import OK + SOURCE_BLOB = 'source' + DEST_BLOB = 'dest' + DEST_BUCKET = 'other-bucket' + TOKEN = 'TOKEN' + RESPONSE = { + 'totalBytesRewritten': 33, + 'objectSize': 42, + 'done': False, + 'rewriteToken': TOKEN, + 'resource': {'etag': 'DEADBEEF'}, + } + response = ({'status': OK}, RESPONSE) + connection = _Connection(response) + client = _Client(connection) + source_bucket = _Bucket(client=client) + source_blob = self._makeOne(SOURCE_BLOB, bucket=source_bucket) + dest_bucket = _Bucket(client=client, name=DEST_BUCKET) + dest_blob = self._makeOne(DEST_BLOB, bucket=dest_bucket) + + token, rewritten, size = dest_blob.rewrite(source_blob) + + self.assertEqual(token, TOKEN) + self.assertEqual(rewritten, 33) + self.assertEqual(size, 42) + + kw = connection._requested + self.assertEqual(len(kw), 1) + self.assertEqual(kw[0]['method'], 'POST') + PATH = '/b/name/o/%s/rewriteTo/b/%s/o/%s' % ( + SOURCE_BLOB, DEST_BUCKET, DEST_BLOB) + self.assertEqual(kw[0]['path'], PATH) + self.assertEqual(kw[0]['query_params'], {}) + SENT = {} + self.assertEqual(kw[0]['data'], SENT) + + headers = { + key.title(): str(value) for key, value in kw[0]['headers'].items()} + self.assertNotIn('X-Goog-Copy-Source-Encryption-Algorithm', headers) + self.assertNotIn('X-Goog-Copy-Source-Encryption-Key', headers) + self.assertNotIn('X-Goog-Copy-Source-Encryption-Key-Sha256', headers) + self.assertNotIn('X-Goog-Encryption-Algorithm', headers) + self.assertNotIn('X-Goog-Encryption-Key', headers) + self.assertNotIn('X-Goog-Encryption-Key-Sha256', headers) + + def test_rewrite_same_name_no_old_key_new_key_done(self): + import base64 + import hashlib + from six.moves.http_client import OK + KEY = b'01234567890123456789012345678901' # 32 bytes + KEY_B64 = base64.b64encode(KEY).rstrip().decode('ascii') + KEY_HASH = hashlib.sha256(KEY).digest() + KEY_HASH_B64 = base64.b64encode(KEY_HASH).rstrip().decode('ascii') + BLOB_NAME = 'blob' + RESPONSE = { + 'totalBytesRewritten': 42, + 'objectSize': 42, + 'done': True, + 'resource': {'etag': 'DEADBEEF'}, + } + response = ({'status': OK}, RESPONSE) + connection = _Connection(response) + client = _Client(connection) + bucket = _Bucket(client=client) + plain = self._makeOne(BLOB_NAME, bucket=bucket) + encrypted = self._makeOne(BLOB_NAME, bucket=bucket, encryption_key=KEY) + + token, rewritten, size = encrypted.rewrite(plain) + + self.assertIsNone(token) + self.assertEqual(rewritten, 42) + self.assertEqual(size, 42) + + kw = connection._requested + self.assertEqual(len(kw), 1) + self.assertEqual(kw[0]['method'], 'POST') + PATH = '/b/name/o/%s/rewriteTo/b/name/o/%s' % (BLOB_NAME, BLOB_NAME) + self.assertEqual(kw[0]['path'], PATH) + self.assertEqual(kw[0]['query_params'], {}) + SENT = {} + self.assertEqual(kw[0]['data'], SENT) + + headers = { + key.title(): str(value) for key, value in kw[0]['headers'].items()} + self.assertNotIn('X-Goog-Copy-Source-Encryption-Algorithm', headers) + self.assertNotIn('X-Goog-Copy-Source-Encryption-Key', headers) + self.assertNotIn('X-Goog-Copy-Source-Encryption-Key-Sha256', headers) + self.assertEqual(headers['X-Goog-Encryption-Algorithm'], 'AES256') + self.assertEqual(headers['X-Goog-Encryption-Key'], KEY_B64) + self.assertEqual(headers['X-Goog-Encryption-Key-Sha256'], KEY_HASH_B64) + + def test_rewrite_same_name_no_key_new_key_w_token(self): + import base64 + import hashlib + from six.moves.http_client import OK + SOURCE_KEY = b'01234567890123456789012345678901' # 32 bytes + SOURCE_KEY_B64 = base64.b64encode(SOURCE_KEY).rstrip().decode('ascii') + SOURCE_KEY_HASH = hashlib.sha256(SOURCE_KEY).digest() + SOURCE_KEY_HASH_B64 = base64.b64encode( + SOURCE_KEY_HASH).rstrip().decode('ascii') + DEST_KEY = b'90123456789012345678901234567890' # 32 bytes + DEST_KEY_B64 = base64.b64encode(DEST_KEY).rstrip().decode('ascii') + DEST_KEY_HASH = hashlib.sha256(DEST_KEY).digest() + DEST_KEY_HASH_B64 = base64.b64encode( + DEST_KEY_HASH).rstrip().decode('ascii') + BLOB_NAME = 'blob' + TOKEN = 'TOKEN' + RESPONSE = { + 'totalBytesRewritten': 42, + 'objectSize': 42, + 'done': True, + 'resource': {'etag': 'DEADBEEF'}, + } + response = ({'status': OK}, RESPONSE) + connection = _Connection(response) + client = _Client(connection) + bucket = _Bucket(client=client) + source = self._makeOne( + BLOB_NAME, bucket=bucket, encryption_key=SOURCE_KEY) + dest = self._makeOne(BLOB_NAME, bucket=bucket, encryption_key=DEST_KEY) + + token, rewritten, size = dest.rewrite(source, token=TOKEN) + + self.assertIsNone(token) + self.assertEqual(rewritten, 42) + self.assertEqual(size, 42) + + kw = connection._requested + self.assertEqual(len(kw), 1) + self.assertEqual(kw[0]['method'], 'POST') + PATH = '/b/name/o/%s/rewriteTo/b/name/o/%s' % (BLOB_NAME, BLOB_NAME) + self.assertEqual(kw[0]['path'], PATH) + self.assertEqual(kw[0]['query_params'], {'rewriteToken': TOKEN}) + SENT = {} + self.assertEqual(kw[0]['data'], SENT) + + headers = { + key.title(): str(value) for key, value in kw[0]['headers'].items()} + self.assertEqual( + headers['X-Goog-Copy-Source-Encryption-Algorithm'], 'AES256') + self.assertEqual( + headers['X-Goog-Copy-Source-Encryption-Key'], SOURCE_KEY_B64) + self.assertEqual( + headers['X-Goog-Copy-Source-Encryption-Key-Sha256'], + SOURCE_KEY_HASH_B64) + self.assertEqual( + headers['X-Goog-Encryption-Algorithm'], 'AES256') + self.assertEqual( + headers['X-Goog-Encryption-Key'], DEST_KEY_B64) + self.assertEqual( + headers['X-Goog-Encryption-Key-Sha256'], DEST_KEY_HASH_B64) + def test_cache_control_getter(self): BLOB_NAME = 'blob-name' bucket = _Bucket() @@ -1555,10 +1708,8 @@ def request(self, uri, method, headers, body, **kw): class _Bucket(object): - path = '/b/name' - name = 'name' - def __init__(self, client=None): + def __init__(self, client=None, name='name'): if client is None: connection = _Connection() client = _Client(connection) @@ -1566,6 +1717,8 @@ def __init__(self, client=None): self._blobs = {} self._copied = [] self._deleted = [] + self.name = name + self.path = '/b/' + name def delete_blob(self, blob_name, client=None): del self._blobs[blob_name] diff --git a/system_tests/storage.py b/system_tests/storage.py index 4e7460d32442..664cab102d0b 100644 --- a/system_tests/storage.py +++ b/system_tests/storage.py @@ -436,3 +436,52 @@ def test_compose_replace_existing_blob(self): composed = original.download_as_string() self.assertEqual(composed, BEFORE + TO_APPEND) + + +class TestStorageRewrite(TestStorageFiles): + + FILENAMES = ( + 'file01.txt', + ) + + def test_rewrite_create_new_blob_add_encryption_key(self): + file_data = self.FILES['simple'] + + source = self.bucket.blob('source') + source.upload_from_filename(file_data['path']) + self.case_blobs_to_delete.append(source) + source_data = source.download_as_string() + + KEY = os.urandom(32) + dest = self.bucket.blob('dest', encryption_key=KEY) + token, rewritten, total = dest.rewrite(source) + self.case_blobs_to_delete.append(dest) + + self.assertEqual(token, None) + self.assertEqual(rewritten, len(source_data)) + self.assertEqual(total, len(source_data)) + + self.assertEqual(source.download_as_string(), + dest.download_as_string()) + + def test_rewrite_rotate_encryption_key(self): + BLOB_NAME = 'rotating-keys' + file_data = self.FILES['simple'] + + SOURCE_KEY = os.urandom(32) + source = self.bucket.blob(BLOB_NAME, encryption_key=SOURCE_KEY) + source.upload_from_filename(file_data['path']) + self.case_blobs_to_delete.append(source) + source_data = source.download_as_string() + + DEST_KEY = os.urandom(32) + dest = self.bucket.blob(BLOB_NAME, encryption_key=DEST_KEY) + token, rewritten, total = dest.rewrite(source) + # Not adding 'dest' to 'self.case_blobs_to_delete': it is the + # same object as 'source'. + + self.assertEqual(token, None) + self.assertEqual(rewritten, len(source_data)) + self.assertEqual(total, len(source_data)) + + self.assertEqual(dest.download_as_string(), source_data)