Skip to content

Commit

Permalink
Merge pull request googleapis#2510 from tseaver/1960-storage-blob_rew…
Browse files Browse the repository at this point in the history
…rite

Add 'Blob.rewrite' API wrapper method.
  • Loading branch information
tseaver authored Oct 11, 2016
2 parents 2b09d6f + 5e28600 commit b797f7c
Show file tree
Hide file tree
Showing 3 changed files with 264 additions and 7 deletions.
63 changes: 59 additions & 4 deletions storage/google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,52 @@ def compose(self, sources, client=None):
_target_object=self)
self._set_properties(api_response)

def rewrite(self, source, token=None, client=None):
"""Rewrite source blob into this one.
:type source: :class:`Blob`
:param source: blob whose contents will be rewritten into this blob.
:type token: str
:param token: Optional. Token returned from an earlier, not-completed
call to rewrite the same source blob. If passed,
result will include updated status, total bytes written.
:type client: :class:`~google.cloud.storage.client.Client` or
``NoneType``
:param client: Optional. The client to use. If not passed, falls back
to the ``client`` stored on the blob's bucket.
:rtype: tuple
:returns: ``(token, bytes_rewritten, total_bytes)``, where ``token``
is a rewrite token (``None`` if the rewrite is complete),
``bytes_rewritten`` is the number of bytes rewritten so far,
and ``total_bytes`` is the total number of bytes to be
rewritten.
"""
client = self._require_client(client)
headers = _get_encryption_headers(self._encryption_key)
headers.update(_get_encryption_headers(
source._encryption_key, source=True))

if token:
query_params = {'rewriteToken': token}
else:
query_params = {}

api_response = client.connection.api_request(
method='POST', path=source.path + '/rewriteTo' + self.path,
query_params=query_params, data=self._properties, headers=headers,
_target_object=self)
self._set_properties(api_response['resource'])
rewritten = int(api_response['totalBytesRewritten'])
size = int(api_response['objectSize'])

if api_response['done']:
return None, rewritten, size

return api_response['rewriteToken'], rewritten, size

cache_control = _scalar_property('cacheControl')
"""HTTP 'Cache-Control' header for this object.
Expand Down Expand Up @@ -938,12 +984,16 @@ def __init__(self, bucket_name, object_name):
self._relative_path = ''


def _get_encryption_headers(key):
def _get_encryption_headers(key, source=False):
"""Builds customer encryption key headers
:type key: bytes
:param key: 32 byte key to build request key and hash.
:type source: bool
:param source: If true, return headers for the "source" blob; otherwise,
return headers for the "destination" blob.
:rtype: dict
:returns: dict of HTTP headers being sent in request.
"""
Expand All @@ -955,8 +1005,13 @@ def _get_encryption_headers(key):
key_hash = base64.b64encode(key_hash).rstrip()
key = base64.b64encode(key).rstrip()

if source:
prefix = 'X-Goog-Copy-Source-Encryption-'
else:
prefix = 'X-Goog-Encryption-'

return {
'X-Goog-Encryption-Algorithm': 'AES256',
'X-Goog-Encryption-Key': _bytes_to_unicode(key),
'X-Goog-Encryption-Key-Sha256': _bytes_to_unicode(key_hash),
prefix + 'Algorithm': 'AES256',
prefix + 'Key': _bytes_to_unicode(key),
prefix + 'Key-Sha256': _bytes_to_unicode(key_hash),
}
159 changes: 156 additions & 3 deletions storage/unit_tests/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,6 +1202,159 @@ def test_compose_w_additional_property_changes(self):
self.assertEqual(kw[0]['path'], '/b/name/o/%s/compose' % DESTINATION)
self.assertEqual(kw[0]['data'], SENT)

def test_rewrite_other_bucket_other_name_no_encryption_partial(self):
from six.moves.http_client import OK
SOURCE_BLOB = 'source'
DEST_BLOB = 'dest'
DEST_BUCKET = 'other-bucket'
TOKEN = 'TOKEN'
RESPONSE = {
'totalBytesRewritten': 33,
'objectSize': 42,
'done': False,
'rewriteToken': TOKEN,
'resource': {'etag': 'DEADBEEF'},
}
response = ({'status': OK}, RESPONSE)
connection = _Connection(response)
client = _Client(connection)
source_bucket = _Bucket(client=client)
source_blob = self._makeOne(SOURCE_BLOB, bucket=source_bucket)
dest_bucket = _Bucket(client=client, name=DEST_BUCKET)
dest_blob = self._makeOne(DEST_BLOB, bucket=dest_bucket)

token, rewritten, size = dest_blob.rewrite(source_blob)

self.assertEqual(token, TOKEN)
self.assertEqual(rewritten, 33)
self.assertEqual(size, 42)

kw = connection._requested
self.assertEqual(len(kw), 1)
self.assertEqual(kw[0]['method'], 'POST')
PATH = '/b/name/o/%s/rewriteTo/b/%s/o/%s' % (
SOURCE_BLOB, DEST_BUCKET, DEST_BLOB)
self.assertEqual(kw[0]['path'], PATH)
self.assertEqual(kw[0]['query_params'], {})
SENT = {}
self.assertEqual(kw[0]['data'], SENT)

headers = {
key.title(): str(value) for key, value in kw[0]['headers'].items()}
self.assertNotIn('X-Goog-Copy-Source-Encryption-Algorithm', headers)
self.assertNotIn('X-Goog-Copy-Source-Encryption-Key', headers)
self.assertNotIn('X-Goog-Copy-Source-Encryption-Key-Sha256', headers)
self.assertNotIn('X-Goog-Encryption-Algorithm', headers)
self.assertNotIn('X-Goog-Encryption-Key', headers)
self.assertNotIn('X-Goog-Encryption-Key-Sha256', headers)

def test_rewrite_same_name_no_old_key_new_key_done(self):
import base64
import hashlib
from six.moves.http_client import OK
KEY = b'01234567890123456789012345678901' # 32 bytes
KEY_B64 = base64.b64encode(KEY).rstrip().decode('ascii')
KEY_HASH = hashlib.sha256(KEY).digest()
KEY_HASH_B64 = base64.b64encode(KEY_HASH).rstrip().decode('ascii')
BLOB_NAME = 'blob'
RESPONSE = {
'totalBytesRewritten': 42,
'objectSize': 42,
'done': True,
'resource': {'etag': 'DEADBEEF'},
}
response = ({'status': OK}, RESPONSE)
connection = _Connection(response)
client = _Client(connection)
bucket = _Bucket(client=client)
plain = self._makeOne(BLOB_NAME, bucket=bucket)
encrypted = self._makeOne(BLOB_NAME, bucket=bucket, encryption_key=KEY)

token, rewritten, size = encrypted.rewrite(plain)

self.assertIsNone(token)
self.assertEqual(rewritten, 42)
self.assertEqual(size, 42)

kw = connection._requested
self.assertEqual(len(kw), 1)
self.assertEqual(kw[0]['method'], 'POST')
PATH = '/b/name/o/%s/rewriteTo/b/name/o/%s' % (BLOB_NAME, BLOB_NAME)
self.assertEqual(kw[0]['path'], PATH)
self.assertEqual(kw[0]['query_params'], {})
SENT = {}
self.assertEqual(kw[0]['data'], SENT)

headers = {
key.title(): str(value) for key, value in kw[0]['headers'].items()}
self.assertNotIn('X-Goog-Copy-Source-Encryption-Algorithm', headers)
self.assertNotIn('X-Goog-Copy-Source-Encryption-Key', headers)
self.assertNotIn('X-Goog-Copy-Source-Encryption-Key-Sha256', headers)
self.assertEqual(headers['X-Goog-Encryption-Algorithm'], 'AES256')
self.assertEqual(headers['X-Goog-Encryption-Key'], KEY_B64)
self.assertEqual(headers['X-Goog-Encryption-Key-Sha256'], KEY_HASH_B64)

def test_rewrite_same_name_no_key_new_key_w_token(self):
import base64
import hashlib
from six.moves.http_client import OK
SOURCE_KEY = b'01234567890123456789012345678901' # 32 bytes
SOURCE_KEY_B64 = base64.b64encode(SOURCE_KEY).rstrip().decode('ascii')
SOURCE_KEY_HASH = hashlib.sha256(SOURCE_KEY).digest()
SOURCE_KEY_HASH_B64 = base64.b64encode(
SOURCE_KEY_HASH).rstrip().decode('ascii')
DEST_KEY = b'90123456789012345678901234567890' # 32 bytes
DEST_KEY_B64 = base64.b64encode(DEST_KEY).rstrip().decode('ascii')
DEST_KEY_HASH = hashlib.sha256(DEST_KEY).digest()
DEST_KEY_HASH_B64 = base64.b64encode(
DEST_KEY_HASH).rstrip().decode('ascii')
BLOB_NAME = 'blob'
TOKEN = 'TOKEN'
RESPONSE = {
'totalBytesRewritten': 42,
'objectSize': 42,
'done': True,
'resource': {'etag': 'DEADBEEF'},
}
response = ({'status': OK}, RESPONSE)
connection = _Connection(response)
client = _Client(connection)
bucket = _Bucket(client=client)
source = self._makeOne(
BLOB_NAME, bucket=bucket, encryption_key=SOURCE_KEY)
dest = self._makeOne(BLOB_NAME, bucket=bucket, encryption_key=DEST_KEY)

token, rewritten, size = dest.rewrite(source, token=TOKEN)

self.assertIsNone(token)
self.assertEqual(rewritten, 42)
self.assertEqual(size, 42)

kw = connection._requested
self.assertEqual(len(kw), 1)
self.assertEqual(kw[0]['method'], 'POST')
PATH = '/b/name/o/%s/rewriteTo/b/name/o/%s' % (BLOB_NAME, BLOB_NAME)
self.assertEqual(kw[0]['path'], PATH)
self.assertEqual(kw[0]['query_params'], {'rewriteToken': TOKEN})
SENT = {}
self.assertEqual(kw[0]['data'], SENT)

headers = {
key.title(): str(value) for key, value in kw[0]['headers'].items()}
self.assertEqual(
headers['X-Goog-Copy-Source-Encryption-Algorithm'], 'AES256')
self.assertEqual(
headers['X-Goog-Copy-Source-Encryption-Key'], SOURCE_KEY_B64)
self.assertEqual(
headers['X-Goog-Copy-Source-Encryption-Key-Sha256'],
SOURCE_KEY_HASH_B64)
self.assertEqual(
headers['X-Goog-Encryption-Algorithm'], 'AES256')
self.assertEqual(
headers['X-Goog-Encryption-Key'], DEST_KEY_B64)
self.assertEqual(
headers['X-Goog-Encryption-Key-Sha256'], DEST_KEY_HASH_B64)

def test_cache_control_getter(self):
BLOB_NAME = 'blob-name'
bucket = _Bucket()
Expand Down Expand Up @@ -1555,17 +1708,17 @@ def request(self, uri, method, headers, body, **kw):


class _Bucket(object):
path = '/b/name'
name = 'name'

def __init__(self, client=None):
def __init__(self, client=None, name='name'):
if client is None:
connection = _Connection()
client = _Client(connection)
self.client = client
self._blobs = {}
self._copied = []
self._deleted = []
self.name = name
self.path = '/b/' + name

def delete_blob(self, blob_name, client=None):
del self._blobs[blob_name]
Expand Down
49 changes: 49 additions & 0 deletions system_tests/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,3 +436,52 @@ def test_compose_replace_existing_blob(self):

composed = original.download_as_string()
self.assertEqual(composed, BEFORE + TO_APPEND)


class TestStorageRewrite(TestStorageFiles):

FILENAMES = (
'file01.txt',
)

def test_rewrite_create_new_blob_add_encryption_key(self):
file_data = self.FILES['simple']

source = self.bucket.blob('source')
source.upload_from_filename(file_data['path'])
self.case_blobs_to_delete.append(source)
source_data = source.download_as_string()

KEY = os.urandom(32)
dest = self.bucket.blob('dest', encryption_key=KEY)
token, rewritten, total = dest.rewrite(source)
self.case_blobs_to_delete.append(dest)

self.assertEqual(token, None)
self.assertEqual(rewritten, len(source_data))
self.assertEqual(total, len(source_data))

self.assertEqual(source.download_as_string(),
dest.download_as_string())

def test_rewrite_rotate_encryption_key(self):
BLOB_NAME = 'rotating-keys'
file_data = self.FILES['simple']

SOURCE_KEY = os.urandom(32)
source = self.bucket.blob(BLOB_NAME, encryption_key=SOURCE_KEY)
source.upload_from_filename(file_data['path'])
self.case_blobs_to_delete.append(source)
source_data = source.download_as_string()

DEST_KEY = os.urandom(32)
dest = self.bucket.blob(BLOB_NAME, encryption_key=DEST_KEY)
token, rewritten, total = dest.rewrite(source)
# Not adding 'dest' to 'self.case_blobs_to_delete': it is the
# same object as 'source'.

self.assertEqual(token, None)
self.assertEqual(rewritten, len(source_data))
self.assertEqual(total, len(source_data))

self.assertEqual(dest.download_as_string(), source_data)

0 comments on commit b797f7c

Please sign in to comment.