Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v2] S3 high level checksums #8933

Merged
merged 22 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4291c82
Progress towards Motorcade high-level S3 commands support.
aemous Sep 18, 2024
39ba573
Revert experimental change.
aemous Sep 18, 2024
6a339ef
Implement validation for checksum-algorithm and checksum-mode.
aemous Sep 19, 2024
dfc8efd
Remove debugging infrastructure.
aemous Sep 19, 2024
9b48f5c
Implement unit tests for checksum parameter validation and S3Transfer…
aemous Sep 19, 2024
212c82f
Create skeletons for functional tests for flexible checksums implemen…
aemous Sep 19, 2024
eb6b771
Implement functional tests for flexible checksums support for high-le…
aemous Sep 20, 2024
730a4f3
Cleanup documentation and comments.
aemous Sep 20, 2024
a321446
Remove newline.
aemous Sep 20, 2024
2457b67
Generate new change entry.
aemous Sep 20, 2024
3c71f94
Commit s3 high-level checksums support for copy.
aemous Sep 23, 2024
9879de8
Update test function and class names. Enable checksum-algorithm flag …
aemous Sep 23, 2024
4ebe30a
Swap format operation in favor of formatted string syntax.
aemous Sep 23, 2024
c23f797
Fix failing CI tests and bugfix.
aemous Sep 24, 2024
35ccf56
Delete merged file.
aemous Sep 24, 2024
b2bc1ae
Iterate on feedback
aemous Sep 30, 2024
a821f70
Update tests in respond to feedback.
aemous Sep 30, 2024
1fed639
Update functional tests based on feedback.
aemous Sep 30, 2024
3a7dda6
Update tests based on feedback.
aemous Sep 30, 2024
08b65be
Made updates according to feedback.
aemous Oct 1, 2024
1c67e94
Updates
aemous Oct 2, 2024
320e75e
Update test to avoid race condition.
aemous Oct 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changes/next-release/feature-s3-46667.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "feature",
"category": "s3",
"description": "Adds ``--checksum-mode`` and ``--checksum-algorithm`` parameters to high-level ``s3`` commands."
}
37 changes: 36 additions & 1 deletion awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,17 @@
)
}

CHECKSUM_MODE = {
'name': 'checksum-mode', 'choices': ['ENABLED'],
'help_text': 'To retrieve the checksum, this mode must be enabled. If the object has a '
'checksum, it will be verified.'
}

CHECKSUM_ALGORITHM = {
'name': 'checksum-algorithm', 'choices': ['CRC32', 'SHA256', 'SHA1', 'CRC32C'],
aemous marked this conversation as resolved.
Show resolved Hide resolved
'help_text': 'Indicates the algorithm used to create the checksum for the object.'
}

TRANSFER_ARGS = [DRYRUN, QUIET, INCLUDE, EXCLUDE, ACL,
FOLLOW_SYMLINKS, NO_FOLLOW_SYMLINKS, NO_GUESS_MIME_TYPE,
SSE, SSE_C, SSE_C_KEY, SSE_KMS_KEY_ID, SSE_C_COPY_SOURCE,
Expand All @@ -489,7 +500,7 @@
CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE,
EXPIRES, SOURCE_REGION, ONLY_SHOW_ERRORS, NO_PROGRESS,
PAGE_SIZE, IGNORE_GLACIER_WARNINGS, FORCE_GLACIER_TRANSFER,
REQUEST_PAYER]
REQUEST_PAYER, CHECKSUM_MODE, CHECKSUM_ALGORITHM]


class S3Command(BasicCommand):
Expand Down Expand Up @@ -1276,6 +1287,17 @@ def _validate_path_args(self):
if self._should_emit_validate_s3_paths_warning():
self._emit_validate_s3_paths_warning()

if params.get('checksum_algorithm'):
self._raise_if_paths_type_incorrect_for_param(
CHECKSUM_ALGORITHM['name'],
params['paths_type'],
['locals3', 's3s3'])
if params.get('checksum_mode'):
self._raise_if_paths_type_incorrect_for_param(
CHECKSUM_MODE['name'],
params['paths_type'],
['s3local'])

# If the user provided local path does not exist, hard fail because
# we know that we will not be able to upload the file.
if 'locals3' == params['paths_type'] and not params['is_stream']:
Expand Down Expand Up @@ -1359,6 +1381,19 @@ def _raise_if_mv_same_paths(self, src, dest):
f"{self.parameters['src']} - {self.parameters['dest']}"
)

def _raise_if_paths_type_incorrect_for_param(self, param, paths_type, allowed_paths):
if paths_type not in allowed_paths:
expected_usage_map = {
'locals3': '<LocalPath> <S3Uri>',
's3s3': '<S3Uri> <S3Uri>',
's3local': '<S3Uri> <LocalPath>',
's3': '<S3Uri>'
}
raise ParamValidationError(
f"Expected {param} parameter to be used with one of following path formats: "
f"{', '.join([expected_usage_map[path] for path in allowed_paths])}. Instead, received {expected_usage_map[paths_type]}."
)

def _normalize_s3_trailing_slash(self, paths):
for i, path in enumerate(paths):
if path.startswith('s3://'):
Expand Down
13 changes: 13 additions & 0 deletions awscli/customizations/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,12 +470,14 @@ def map_put_object_params(cls, request_params, cli_params):
cls._set_sse_request_params(request_params, cli_params)
cls._set_sse_c_request_params(request_params, cli_params)
cls._set_request_payer_param(request_params, cli_params)
cls._set_checksum_algorithm_param(request_params, cli_params)

@classmethod
def map_get_object_params(cls, request_params, cli_params):
"""Map CLI params to GetObject request params"""
cls._set_sse_c_request_params(request_params, cli_params)
cls._set_request_payer_param(request_params, cli_params)
cls._set_checksum_mode_param(request_params, cli_params)

@classmethod
def map_get_object_tagging_params(cls, request_params, cli_params):
Expand All @@ -498,6 +500,7 @@ def map_copy_object_params(cls, request_params, cli_params):
cls._set_sse_c_and_copy_source_request_params(
request_params, cli_params)
cls._set_request_payer_param(request_params, cli_params)
cls._set_checksum_algorithm_param(request_params, cli_params)

@classmethod
def map_head_object_params(cls, request_params, cli_params):
Expand Down Expand Up @@ -540,6 +543,16 @@ def _set_request_payer_param(cls, request_params, cli_params):
if cli_params.get('request_payer'):
request_params['RequestPayer'] = cli_params['request_payer']

@classmethod
def _set_checksum_mode_param(cls, request_params, cli_params):
if cli_params.get('checksum_mode'):
request_params['ChecksumMode'] = cli_params['checksum_mode']

@classmethod
def _set_checksum_algorithm_param(cls, request_params, cli_params):
if cli_params.get('checksum_algorithm'):
request_params['ChecksumAlgorithm'] = cli_params['checksum_algorithm']

@classmethod
def _set_general_object_params(cls, request_params, cli_params):
# Parameters set in this method should be applicable to the following
Expand Down
80 changes: 80 additions & 0 deletions tests/functional/s3/test_cp_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,86 @@ def test_cp_with_error_and_warning_permissions(self):
self.assertIn('upload failed', stderr)
self.assertIn('warning: File has an invalid timestamp.', stderr)

def test_upload_with_checksum_algorithm_crc32(self):
aemous marked this conversation as resolved.
Show resolved Hide resolved
full_path = self.files.create_file('foo.txt', 'contents')
cmdline = f'{self.prefix} {full_path} s3://bucket/key.txt --checksum-algorithm CRC32'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[0][0].name, 'PutObject')
self.assertEqual(self.operations_called[0][1]['ChecksumAlgorithm'], 'CRC32')

def test_upload_with_checksum_algorithm_crc32c(self):
full_path = self.files.create_file('foo.txt', 'contents')
cmdline = f'{self.prefix} {full_path} s3://bucket/key.txt --checksum-algorithm CRC32C'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[0][0].name, 'PutObject')
self.assertEqual(self.operations_called[0][1]['ChecksumAlgorithm'], 'CRC32C')

def test_multipart_upload_with_checksum_algorithm_crc32(self):
full_path = self.files.create_file('foo.txt', 'a' * 10 * (1024 ** 2))
self.parsed_responses = [
{'UploadId': 'foo'},
{'ETag': 'foo-e1', 'ChecksumCRC32': 'foo-1'},
{'ETag': 'foo-e2', 'ChecksumCRC32': 'foo-2'},
{}
]
cmdline = ('%s %s s3://bucket/key2.txt'
' --checksum-algorithm CRC32' % (self.prefix, full_path))
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(len(self.operations_called), 4, self.operations_called)
self.assertEqual(self.operations_called[0][0].name, 'CreateMultipartUpload')
self.assertEqual(self.operations_called[0][1]['ChecksumAlgorithm'], 'CRC32')
self.assertEqual(self.operations_called[1][0].name, 'UploadPart')
self.assertEqual(self.operations_called[1][1]['ChecksumAlgorithm'], 'CRC32')
self.assertEqual(self.operations_called[3][0].name, 'CompleteMultipartUpload')
self.assertIn({'ETag': 'foo-e1', 'ChecksumCRC32': 'foo-1', 'PartNumber': 1},
self.operations_called[3][1]['MultipartUpload']['Parts'])
self.assertIn({'ETag': 'foo-e2', 'ChecksumCRC32': 'foo-2', 'PartNumber': 2},
self.operations_called[3][1]['MultipartUpload']['Parts'])

def test_copy_with_checksum_algorithm_crc32(self):
self.parsed_responses = [
self.head_object_response(),
# Mocked CopyObject response with a CRC32 checksum specified
{
'ETag': 'foo-1',
'ChecksumCRC32': 'Tq0H4g=='
}
]
cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket2/key.txt --checksum-algorithm CRC32'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[1][0].name, 'CopyObject')
self.assertEqual(self.operations_called[1][1]['ChecksumAlgorithm'], 'CRC32')

def test_download_with_checksum_mode_crc32(self):
self.parsed_responses = [
self.head_object_response(),
# Mocked GetObject response with a checksum algorithm specified
{
'ETag': 'foo-1',
'ChecksumCRC32': 'Tq0H4g==',
'Body': BytesIO(b'foo')
}
]
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
self.assertEqual(self.operations_called[1][1]['ChecksumMode'], 'ENABLED')

def test_download_with_checksum_mode_crc32c(self):
self.parsed_responses = [
self.head_object_response(),
# Mocked GetObject response with a checksum algorithm specified
{
'ETag': 'foo-1',
'ChecksumCRC32C': 'checksum',
'Body': BytesIO(b'foo')
}
]
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
self.assertEqual(self.operations_called[1][1]['ChecksumMode'], 'ENABLED')


class TestStreamingCPCommand(BaseAWSCommandParamsTest):
def test_streaming_upload(self):
Expand Down
23 changes: 23 additions & 0 deletions tests/functional/s3/test_mv_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,29 @@ def test_mv_does_not_delete_source_on_failed_put_tagging(self):
]
)

def test_upload_with_checksum_algorithm_crc32(self):
full_path = self.files.create_file('foo.txt', 'contents')
cmdline = f'{self.prefix} {full_path} s3://bucket/key.txt --checksum-algorithm CRC32'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[0][0].name, 'PutObject')
self.assertEqual(self.operations_called[0][1]['ChecksumAlgorithm'], 'CRC32')

def test_download_with_checksum_mode_crc32(self):
self.parsed_responses = [
self.head_object_response(),
# Mocked GetObject response with a checksum algorithm specified
{
'ETag': 'foo-1',
'ChecksumCRC32': 'checksum',
'Body': BytesIO(b'foo')
},
self.delete_object_response()
]
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
self.assertEqual(self.operations_called[1][1]['ChecksumMode'], 'ENABLED')


class TestMvWithCRTClient(BaseCRTTransferClientTest):
def test_upload_move_using_crt_client(self):
Expand Down
87 changes: 87 additions & 0 deletions tests/functional/s3/test_sync_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,93 @@ def test_with_copy_props(self):
]
)

def test_upload_with_checksum_algorithm_sha1(self):
aemous marked this conversation as resolved.
Show resolved Hide resolved
self.files.create_file('foo.txt', 'contents')
cmdline = f'{self.prefix} {self.files.rootdir} s3://bucket/ --checksum-algorithm SHA1'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[1][0].name, 'PutObject')
self.assertEqual(self.operations_called[1][1]['ChecksumAlgorithm'], 'SHA1')

def test_copy_with_checksum_algorithm_update_sha1(self):
cmdline = f'{self.prefix} s3://src-bucket/ s3://dest-bucket/ --checksum-algorithm SHA1'
self.parsed_responses = [
# Response for ListObjects on source bucket
{
'Contents': [
{
'Key': 'mykey',
'LastModified': '00:00:00Z',
'Size': 100,
'ChecksumAlgorithm': 'SHA1'
}
],
'CommonPrefixes': []
},
# Response for ListObjects on destination bucket
self.list_objects_response([]),
# Response for CopyObject
{
'ChecksumSHA1': 'sha1-checksum'
}
]
self.run_cmd(cmdline, expected_rc=0)
self.assert_operations_called(
[
self.list_objects_request('src-bucket'),
self.list_objects_request('dest-bucket'),
(
'CopyObject', {
'CopySource': {
'Bucket': 'src-bucket',
'Key': 'mykey'
},
'Bucket': 'dest-bucket',
'Key': 'mykey',
'ChecksumAlgorithm': 'SHA1'
}
)
]
)

def test_upload_with_checksum_algorithm_sha256(self):
self.files.create_file('foo.txt', 'contents')
cmdline = f'{self.prefix} {self.files.rootdir} s3://bucket/ --checksum-algorithm SHA256'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[1][0].name, 'PutObject')
self.assertEqual(self.operations_called[1][1]['ChecksumAlgorithm'], 'SHA256')

def test_download_with_checksum_mode_sha1(self):
self.parsed_responses = [
self.list_objects_response(['bucket']),
# Mocked GetObject response with a checksum algorithm specified
{
'ETag': 'foo-1',
'ChecksumSHA1': 'checksum',
'Body': BytesIO(b'foo')
}
]
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2')
aemous marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
self.assertIn(('ChecksumMode', 'ENABLED'), self.operations_called[1][1].items())

def test_download_with_checksum_mode_sha256(self):
self.parsed_responses = [
self.list_objects_response(['bucket']),
# Mocked GetObject response with a checksum algorithm specified
{
'ETag': 'foo-1',
'ChecksumSHA256': 'checksum',
'Body': BytesIO(b'foo')
}
]
cmdline = f'{self.prefix} s3://bucket/foo {self.files.rootdir} --checksum-mode ENABLED'
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2')
self.assertEqual(self.operations_called[1][0].name, 'GetObject')
self.assertIn(('ChecksumMode', 'ENABLED'), self.operations_called[1][1].items())


class TestSyncSourceRegion(BaseS3CLIRunnerTest):
def test_respects_source_region(self):
Expand Down
40 changes: 40 additions & 0 deletions tests/unit/customizations/s3/test_subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,46 @@ def test_validate_no_streaming_paths(self):
cmd_params.add_paths(paths)
self.assertFalse(cmd_params.parameters['is_stream'])

def test_validate_checksum_algorithm_download_error(self):
paths = ['s3://bucket/key', self.file_creator.rootdir]
parameters = {'checksum_algorithm': 'CRC32'}
cmd_params = CommandParameters('cp', parameters, '')
with self.assertRaises(ParamValidationError) as cm:
cmd_params.add_paths(paths)
self.assertIn('Expected checksum-algorithm parameter to be used with one of following path formats', cm.msg)
aemous marked this conversation as resolved.
Show resolved Hide resolved

def test_validate_checksum_algorithm_sync_download_error(self):
paths = ['s3://bucket/key', self.file_creator.rootdir]
parameters = {'checksum_algorithm': 'CRC32C'}
cmd_params = CommandParameters('sync', parameters, '')
with self.assertRaises(ParamValidationError) as cm:
cmd_params.add_paths(paths)
self.assertIn('Expected checksum-algorithm parameter to be used with one of following path formats', cm.msg)

def test_validate_checksum_mode_upload_error(self):
paths = [self.file_creator.rootdir, 's3://bucket/key']
parameters = {'checksum_mode': 'ENABLED'}
cmd_params = CommandParameters('cp', parameters, '')
with self.assertRaises(ParamValidationError) as cm:
cmd_params.add_paths(paths)
self.assertIn('Expected checksum-mode parameter to be used with one of following path formats', cm.msg)

def test_validate_checksum_mode_sync_upload_error(self):
paths = [self.file_creator.rootdir, 's3://bucket/key']
parameters = {'checksum_mode': 'ENABLED'}
cmd_params = CommandParameters('sync', parameters, '')
with self.assertRaises(ParamValidationError) as cm:
cmd_params.add_paths(paths)
self.assertIn('Expected checksum-mode parameter to be used with one of following path formats', cm.msg)

def test_validate_checksum_mode_move_error(self):
paths = ['s3://bucket/key', 's3://bucket2/key']
parameters = {'checksum_mode': 'ENABLED'}
cmd_params = CommandParameters('mv', parameters, '')
with self.assertRaises(ParamValidationError) as cm:
cmd_params.add_paths(paths)
self.assertIn('Expected checksum-mode parameter to be used with one of following path formats', cm.msg)

def test_validate_streaming_paths_error(self):
parameters = {'src': '-', 'dest': 's3://bucket'}
cmd_params = CommandParameters('sync', parameters, '')
Expand Down
Loading
Loading