Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'Blob.rewrite' API wrapper method. #2510

Merged
merged 1 commit into from
Oct 11, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 59 additions & 4 deletions storage/google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,52 @@ def compose(self, sources, client=None):
_target_object=self)
self._set_properties(api_response)

def rewrite(self, source, token=None, client=None):
"""Rewrite source blob into this one.

:type source: :class:`Blob`
:param source: blob whose contents will be rewritten into this blob.

:type token: str
:param token: Optional. Token returned from an earlier, not-completed
call to rewrite the same source blob. If passed,
result will include updated status, total bytes written.

:type client: :class:`~google.cloud.storage.client.Client` or
``NoneType``
:param client: Optional. The client to use. If not passed, falls back
to the ``client`` stored on the blob's bucket.

:rtype: tuple
:returns: ``(token, bytes_rewritten, total_bytes)``, where ``token``
is a rewrite token (``None`` if the rewrite is complete),
``bytes_rewritten`` is the number of bytes rewritten so far,
and ``total_bytes`` is the total number of bytes to be
rewritten.
"""
client = self._require_client(client)
headers = _get_encryption_headers(self._encryption_key)
headers.update(_get_encryption_headers(
source._encryption_key, source=True))

if token:
query_params = {'rewriteToken': token}
else:
query_params = {}

api_response = client.connection.api_request(
method='POST', path=source.path + '/rewriteTo' + self.path,
query_params=query_params, data=self._properties, headers=headers,
_target_object=self)
self._set_properties(api_response['resource'])
rewritten = int(api_response['totalBytesRewritten'])
size = int(api_response['objectSize'])

if api_response['done']:
return None, rewritten, size

return api_response['rewriteToken'], rewritten, size

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.


cache_control = _scalar_property('cacheControl')
"""HTTP 'Cache-Control' header for this object.

Expand Down Expand Up @@ -938,12 +984,16 @@ def __init__(self, bucket_name, object_name):
self._relative_path = ''


def _get_encryption_headers(key):
def _get_encryption_headers(key, source=False):
"""Builds customer encryption key headers

:type key: bytes
:param key: 32 byte key to build request key and hash.

:type source: bool
:param source: If true, return headers for the "source" blob; otherwise,
return headers for the "destination" blob.

:rtype: dict
:returns: dict of HTTP headers being sent in request.
"""
Expand All @@ -955,8 +1005,13 @@ def _get_encryption_headers(key):
key_hash = base64.b64encode(key_hash).rstrip()
key = base64.b64encode(key).rstrip()

if source:
prefix = 'X-Goog-Copy-Source-Encryption-'
else:
prefix = 'X-Goog-Encryption-'

return {
'X-Goog-Encryption-Algorithm': 'AES256',
'X-Goog-Encryption-Key': _bytes_to_unicode(key),
'X-Goog-Encryption-Key-Sha256': _bytes_to_unicode(key_hash),
prefix + 'Algorithm': 'AES256',
prefix + 'Key': _bytes_to_unicode(key),
prefix + 'Key-Sha256': _bytes_to_unicode(key_hash),
}
159 changes: 156 additions & 3 deletions storage/unit_tests/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,6 +1202,159 @@ def test_compose_w_additional_property_changes(self):
self.assertEqual(kw[0]['path'], '/b/name/o/%s/compose' % DESTINATION)
self.assertEqual(kw[0]['data'], SENT)

def test_rewrite_other_bucket_other_name_no_encryption_partial(self):
from six.moves.http_client import OK
SOURCE_BLOB = 'source'
DEST_BLOB = 'dest'
DEST_BUCKET = 'other-bucket'
TOKEN = 'TOKEN'
RESPONSE = {
'totalBytesRewritten': 33,
'objectSize': 42,
'done': False,
'rewriteToken': TOKEN,
'resource': {'etag': 'DEADBEEF'},
}
response = ({'status': OK}, RESPONSE)
connection = _Connection(response)
client = _Client(connection)
source_bucket = _Bucket(client=client)
source_blob = self._makeOne(SOURCE_BLOB, bucket=source_bucket)
dest_bucket = _Bucket(client=client, name=DEST_BUCKET)
dest_blob = self._makeOne(DEST_BLOB, bucket=dest_bucket)

token, rewritten, size = dest_blob.rewrite(source_blob)

self.assertEqual(token, TOKEN)
self.assertEqual(rewritten, 33)
self.assertEqual(size, 42)

kw = connection._requested
self.assertEqual(len(kw), 1)
self.assertEqual(kw[0]['method'], 'POST')
PATH = '/b/name/o/%s/rewriteTo/b/%s/o/%s' % (
SOURCE_BLOB, DEST_BUCKET, DEST_BLOB)
self.assertEqual(kw[0]['path'], PATH)
self.assertEqual(kw[0]['query_params'], {})
SENT = {}
self.assertEqual(kw[0]['data'], SENT)

headers = {
key.title(): str(value) for key, value in kw[0]['headers'].items()}
self.assertNotIn('X-Goog-Copy-Source-Encryption-Algorithm', headers)
self.assertNotIn('X-Goog-Copy-Source-Encryption-Key', headers)
self.assertNotIn('X-Goog-Copy-Source-Encryption-Key-Sha256', headers)
self.assertNotIn('X-Goog-Encryption-Algorithm', headers)
self.assertNotIn('X-Goog-Encryption-Key', headers)
self.assertNotIn('X-Goog-Encryption-Key-Sha256', headers)

def test_rewrite_same_name_no_old_key_new_key_done(self):
import base64
import hashlib
from six.moves.http_client import OK
KEY = b'01234567890123456789012345678901' # 32 bytes
KEY_B64 = base64.b64encode(KEY).rstrip().decode('ascii')
KEY_HASH = hashlib.sha256(KEY).digest()
KEY_HASH_B64 = base64.b64encode(KEY_HASH).rstrip().decode('ascii')
BLOB_NAME = 'blob'
RESPONSE = {
'totalBytesRewritten': 42,
'objectSize': 42,
'done': True,
'resource': {'etag': 'DEADBEEF'},
}
response = ({'status': OK}, RESPONSE)
connection = _Connection(response)
client = _Client(connection)
bucket = _Bucket(client=client)
plain = self._makeOne(BLOB_NAME, bucket=bucket)
encrypted = self._makeOne(BLOB_NAME, bucket=bucket, encryption_key=KEY)

token, rewritten, size = encrypted.rewrite(plain)

self.assertIsNone(token)
self.assertEqual(rewritten, 42)
self.assertEqual(size, 42)

kw = connection._requested
self.assertEqual(len(kw), 1)
self.assertEqual(kw[0]['method'], 'POST')
PATH = '/b/name/o/%s/rewriteTo/b/name/o/%s' % (BLOB_NAME, BLOB_NAME)
self.assertEqual(kw[0]['path'], PATH)
self.assertEqual(kw[0]['query_params'], {})
SENT = {}
self.assertEqual(kw[0]['data'], SENT)

headers = {
key.title(): str(value) for key, value in kw[0]['headers'].items()}
self.assertNotIn('X-Goog-Copy-Source-Encryption-Algorithm', headers)
self.assertNotIn('X-Goog-Copy-Source-Encryption-Key', headers)
self.assertNotIn('X-Goog-Copy-Source-Encryption-Key-Sha256', headers)
self.assertEqual(headers['X-Goog-Encryption-Algorithm'], 'AES256')
self.assertEqual(headers['X-Goog-Encryption-Key'], KEY_B64)
self.assertEqual(headers['X-Goog-Encryption-Key-Sha256'], KEY_HASH_B64)

def test_rewrite_same_name_no_key_new_key_w_token(self):
import base64
import hashlib
from six.moves.http_client import OK
SOURCE_KEY = b'01234567890123456789012345678901' # 32 bytes
SOURCE_KEY_B64 = base64.b64encode(SOURCE_KEY).rstrip().decode('ascii')
SOURCE_KEY_HASH = hashlib.sha256(SOURCE_KEY).digest()
SOURCE_KEY_HASH_B64 = base64.b64encode(
SOURCE_KEY_HASH).rstrip().decode('ascii')
DEST_KEY = b'90123456789012345678901234567890' # 32 bytes
DEST_KEY_B64 = base64.b64encode(DEST_KEY).rstrip().decode('ascii')
DEST_KEY_HASH = hashlib.sha256(DEST_KEY).digest()
DEST_KEY_HASH_B64 = base64.b64encode(
DEST_KEY_HASH).rstrip().decode('ascii')
BLOB_NAME = 'blob'
TOKEN = 'TOKEN'
RESPONSE = {
'totalBytesRewritten': 42,
'objectSize': 42,
'done': True,
'resource': {'etag': 'DEADBEEF'},
}
response = ({'status': OK}, RESPONSE)
connection = _Connection(response)
client = _Client(connection)
bucket = _Bucket(client=client)
source = self._makeOne(
BLOB_NAME, bucket=bucket, encryption_key=SOURCE_KEY)
dest = self._makeOne(BLOB_NAME, bucket=bucket, encryption_key=DEST_KEY)

token, rewritten, size = dest.rewrite(source, token=TOKEN)

self.assertIsNone(token)
self.assertEqual(rewritten, 42)
self.assertEqual(size, 42)

kw = connection._requested
self.assertEqual(len(kw), 1)
self.assertEqual(kw[0]['method'], 'POST')
PATH = '/b/name/o/%s/rewriteTo/b/name/o/%s' % (BLOB_NAME, BLOB_NAME)
self.assertEqual(kw[0]['path'], PATH)
self.assertEqual(kw[0]['query_params'], {'rewriteToken': TOKEN})
SENT = {}
self.assertEqual(kw[0]['data'], SENT)

headers = {
key.title(): str(value) for key, value in kw[0]['headers'].items()}
self.assertEqual(
headers['X-Goog-Copy-Source-Encryption-Algorithm'], 'AES256')
self.assertEqual(
headers['X-Goog-Copy-Source-Encryption-Key'], SOURCE_KEY_B64)
self.assertEqual(
headers['X-Goog-Copy-Source-Encryption-Key-Sha256'],
SOURCE_KEY_HASH_B64)
self.assertEqual(
headers['X-Goog-Encryption-Algorithm'], 'AES256')
self.assertEqual(
headers['X-Goog-Encryption-Key'], DEST_KEY_B64)
self.assertEqual(
headers['X-Goog-Encryption-Key-Sha256'], DEST_KEY_HASH_B64)

def test_cache_control_getter(self):
BLOB_NAME = 'blob-name'
bucket = _Bucket()
Expand Down Expand Up @@ -1555,17 +1708,17 @@ def request(self, uri, method, headers, body, **kw):


class _Bucket(object):
path = '/b/name'
name = 'name'

def __init__(self, client=None):
def __init__(self, client=None, name='name'):
if client is None:
connection = _Connection()
client = _Client(connection)
self.client = client
self._blobs = {}
self._copied = []
self._deleted = []
self.name = name
self.path = '/b/' + name

def delete_blob(self, blob_name, client=None):
del self._blobs[blob_name]
Expand Down
49 changes: 49 additions & 0 deletions system_tests/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,3 +436,52 @@ def test_compose_replace_existing_blob(self):

composed = original.download_as_string()
self.assertEqual(composed, BEFORE + TO_APPEND)


class TestStorageRewrite(TestStorageFiles):

FILENAMES = (
'file01.txt',
)

def test_rewrite_create_new_blob_add_encryption_key(self):
file_data = self.FILES['simple']

source = self.bucket.blob('source')
source.upload_from_filename(file_data['path'])
self.case_blobs_to_delete.append(source)
source_data = source.download_as_string()

KEY = os.urandom(32)
dest = self.bucket.blob('dest', encryption_key=KEY)
token, rewritten, total = dest.rewrite(source)
self.case_blobs_to_delete.append(dest)

self.assertEqual(token, None)
self.assertEqual(rewritten, len(source_data))
self.assertEqual(total, len(source_data))

self.assertEqual(source.download_as_string(),
dest.download_as_string())

def test_rewrite_rotate_encryption_key(self):
BLOB_NAME = 'rotating-keys'
file_data = self.FILES['simple']

SOURCE_KEY = os.urandom(32)
source = self.bucket.blob(BLOB_NAME, encryption_key=SOURCE_KEY)
source.upload_from_filename(file_data['path'])
self.case_blobs_to_delete.append(source)
source_data = source.download_as_string()

DEST_KEY = os.urandom(32)
dest = self.bucket.blob(BLOB_NAME, encryption_key=DEST_KEY)
token, rewritten, total = dest.rewrite(source)
# Not adding 'dest' to 'self.case_blobs_to_delete': it is the
# same object as 'source'.

self.assertEqual(token, None)
self.assertEqual(rewritten, len(source_data))
self.assertEqual(total, len(source_data))

self.assertEqual(dest.download_as_string(), source_data)