Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate S3 metadata only contains ascii chars #861

Merged
merged 2 commits into from
Apr 4, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions botocore/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import unicode_literals
from botocore.vendored.requests.exceptions import ConnectionError


Expand Down
36 changes: 36 additions & 0 deletions botocore/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,38 @@ def document_base64_encoding(param):
return append.append_documentation


def validate_ascii_metadata(params, **kwargs):
"""Verify S3 Metadata only contains ascii characters.

From: http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html

"Amazon S3 stores user-defined metadata in lowercase. Each name, value pair
must conform to US-ASCII when using REST and UTF-8 when using SOAP or
browser-based uploads via POST."

"""
metadata = params.get('Metadata')
if not metadata or not isinstance(metadata, dict):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that is an interesting problem in that we cannot validate with the validator that the parameter passed by the user is correct prior to the handler. It would be interesting to expose a new event down the line passed the validation or a helper method to run a quick validation in the future if we run into more of these scenarios.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked through several before-parameter-buildevent handlers, and there's several that are susceptible to this problem. I'm thinking it's probably worth having this event in the near future.

# We have to at least type check the metadata as a dict type
# because this handler is called before param validation.
# We'll go ahead and return because the param validator will
# give a descriptive error message for us.
# We might need a post-param validation event.
return
for key, value in metadata.items():
try:
key.encode('ascii')
value.encode('ascii')
except UnicodeEncodeError as e:
error_msg = (
'Non ascii characters found in S3 metadata '
'for key "%s", value: "%s". \nS3 metadata can only '
'contain ASCII characters. ' % (key, value)
)
raise ParamValidationError(
report=error_msg)


def fix_route53_ids(params, model, **kwargs):
"""
Check for and split apart Route53 resource IDs, setting
Expand Down Expand Up @@ -753,6 +785,10 @@ def _replace_content(self, section):
handle_copy_source_param),
('before-parameter-build.s3.UploadPartCopy',
handle_copy_source_param),
('before-parameter-build.s3.CopyObject', validate_ascii_metadata),
('before-parameter-build.s3.PutObject', validate_ascii_metadata),
('before-parameter-build.s3.CreateMultipartUpload',
validate_ascii_metadata),
('docs.*.s3.CopyObject.complete-section', document_copy_source_form),
('docs.*.s3.UploadPartCopy.complete-section', document_copy_source_form),

Expand Down
10 changes: 10 additions & 0 deletions tests/functional/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ def tearDown(self):
self.session_send_patch.stop()


class TestOnlyAsciiCharsAllowed(BaseS3OperationTest):
def test_validates_non_ascii_chars_trigger_validation_error(self):
self.http_session_send_mock.return_value = mock.Mock(status_code=200,
headers={},
content=b'')
with self.assertRaises(ParamValidationError):
self.client.put_object(Bucket='foo', Key='bar',
Metadata={'goodkey': 'good',
'non-ascii': u'\u2713'})

class TestS3GetBucketLifecycle(BaseS3OperationTest):
def test_multiple_transitions_returns_one(self):
http_response = mock.Mock()
Expand Down
21 changes: 21 additions & 0 deletions tests/unit/test_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,27 @@ def test_valid_bucket_name_period(self):
def test_validation_is_noop_if_no_bucket_param_exists(self):
self.assertIsNone(handlers.validate_bucket_name(params={}))

def test_validate_non_ascii_metadata_values(self):
with self.assertRaises(ParamValidationError):
handlers.validate_ascii_metadata({'Metadata': {'foo': u'\u2713'}})

def test_validate_non_ascii_metadata_keys(self):
with self.assertRaises(ParamValidationError):
handlers.validate_ascii_metadata(
{'Metadata': {u'\u2713': 'bar'}})

def test_validate_non_triggered_when_no_md_specified(self):
original = {'NotMetadata': ''}
copied = original.copy()
handlers.validate_ascii_metadata(copied)
self.assertEqual(original, copied)

def test_validation_passes_when_all_ascii_chars(self):
original = {'Metadata': {'foo': 'bar'}}
copied = original.copy()
handlers.validate_ascii_metadata(original)
self.assertEqual(original, copied)

def test_set_encoding_type(self):
params = {}
context = {}
Expand Down