diff --git a/src/sentry/interfaces/http.py b/src/sentry/interfaces/http.py index bc5c96c39e3d16..d1e9bc3c0ee6d9 100644 --- a/src/sentry/interfaces/http.py +++ b/src/sentry/interfaces/http.py @@ -19,6 +19,7 @@ from sentry.interfaces.base import Interface, InterfaceValidationError, prune_empty_keys from sentry.interfaces.schemas import validate_and_default_interface +from sentry.utils import json from sentry.utils.safe import trim, trim_dict, trim_pairs from sentry.utils.http import heuristic_decode from sentry.utils.validators import validate_ip @@ -86,6 +87,10 @@ def fix_broken_encoding(value): return value +def jsonify(value): + return to_bytes(value) if isinstance(value, six.string_types) else json.dumps(value) + + class Http(Interface): """ The Request information is stored in the Http interface. Two arguments @@ -147,18 +152,22 @@ def to_python(cls, data): query_string = data.get('query_string') or query_bit if query_string: - # if querystring was a dict, convert it to a string - if isinstance(query_string, dict): - query_string = urlencode( - [(to_bytes(k), to_bytes(v)) for k, v in query_string.items()] - ) - else: + if isinstance(query_string, six.string_types): if query_string[0] == '?': - # remove '?' prefix query_string = query_string[1:] + query_string = parse_qsl(query_string, keep_blank_values=True) + elif isinstance(query_string, dict): + query_string = [(to_bytes(k), jsonify(v)) for k, v in six.iteritems(query_string)] + elif isinstance(query_string, list): + query_string = [ + tuple(tup) for tup in query_string + if isinstance(tup, (tuple, list)) and len(tup) == 2 + ] + else: + query_string = [] kwargs['query_string'] = trim(query_string, 4096) else: - kwargs['query_string'] = '' + kwargs['query_string'] = [] fragment = data.get('fragment') or fragment_bit @@ -190,7 +199,7 @@ def to_python(cls, data): inferred_content_type = data.get('inferred_content_type', content_type) if 'inferred_content_type' not in data and not isinstance(body, dict): - body, inferred_content_type = heuristic_decode(body, content_type) + inferred_content_type = heuristic_decode(body, content_type) if body: body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE) @@ -230,7 +239,7 @@ def to_json(self): def full_url(self): url = self.url if self.query_string: - url = url + '?' + self.query_string + url = url + '?' + urlencode(self.query_string) if self.fragment: url = url + '#' + self.fragment return url @@ -242,7 +251,7 @@ def to_email_html(self, event, **kwargs): 'url': self.full_url, 'short_url': self.url, 'method': self.method, - 'query_string': self.query_string, + 'query_string': urlencode(self.query_string), 'fragment': self.fragment, } ) diff --git a/src/sentry/interfaces/schemas.py b/src/sentry/interfaces/schemas.py index 98a180e8296b43..789f5bd6471b01 100644 --- a/src/sentry/interfaces/schemas.py +++ b/src/sentry/interfaces/schemas.py @@ -63,7 +63,16 @@ def apierror(message="Invalid data"): 'minLength': 1, }, 'method': {'type': 'string'}, - 'query_string': {'type': ['string', 'object']}, + 'query_string': { + 'anyOf': [ + {'type': ['string', 'object']}, + {'type': 'array', 'items': { + 'type': 'array', + 'maxItems': 2, + 'minItems': 2, + }}, + ], + }, 'inferred_content_type': {'type': 'string'}, 'cookies': { 'anyOf': [ diff --git a/src/sentry/static/sentry/app/components/events/interfaces/richHttpContent.jsx b/src/sentry/static/sentry/app/components/events/interfaces/richHttpContent.jsx index d4919e02352df7..df4d3967cfcc9b 100644 --- a/src/sentry/static/sentry/app/components/events/interfaces/richHttpContent.jsx +++ b/src/sentry/static/sentry/app/components/events/interfaces/richHttpContent.jsx @@ -1,6 +1,5 @@ import PropTypes from 'prop-types'; import React from 'react'; -import queryString from 'query-string'; import {objectIsEmpty} from 'app/utils'; import {objectToSortedTupleArray} from 'app/components/events/interfaces/utils'; @@ -33,12 +32,7 @@ class RichHttpContent extends React.Component { try { // Sentry API abbreviates long query string values, sometimes resulting in // an un-parsable querystring ... stay safe kids - return ( - - ); + return ; } catch (e) { return
{data}
; } @@ -48,7 +42,7 @@ class RichHttpContent extends React.Component { let data = this.props.data; return (
- {data.query && ( + {!objectIsEmpty(data.query) && ( {this.getQueryStringOrRaw(data.query)} diff --git a/src/sentry/utils/data_scrubber.py b/src/sentry/utils/data_scrubber.py index 6461b23c78d334..ddddd08c18f3e6 100644 --- a/src/sentry/utils/data_scrubber.py +++ b/src/sentry/utils/data_scrubber.py @@ -104,8 +104,8 @@ def apply(self, data): data['contexts'][key] = varmap(self.sanitize, value) def sanitize(self, key, value): - if value is None: - return + if value is None or value == '': + return value if isinstance(key, six.string_types): key = key.lower() diff --git a/src/sentry/utils/http.py b/src/sentry/utils/http.py index 7ac23514234bff..2116e7e533edf6 100644 --- a/src/sentry/utils/http.py +++ b/src/sentry/utils/http.py @@ -256,12 +256,13 @@ def heuristic_decode(data, possible_content_type=None): for decoding_type, decoder in decoders: try: - return (decoder(data), decoding_type) + decoder(data) except Exception: # Try another decoder continue + return decoding_type - return (data, inferred_content_type) + return inferred_content_type def percent_encode(val): diff --git a/src/sentry/utils/safe.py b/src/sentry/utils/safe.py index 072db67203ac86..cec7ca959cb868 100644 --- a/src/sentry/utils/safe.py +++ b/src/sentry/utils/safe.py @@ -90,6 +90,8 @@ def trim( _size += len(force_text(trim_v)) if _size >= max_size: break + if isinstance(value, tuple): + result = tuple(result) elif isinstance(value, six.string_types): result = truncatechars(value, max_size - _size) diff --git a/tests/sentry/interfaces/test_http.py b/tests/sentry/interfaces/test_http.py index 60013aae30f516..73f919c324740e 100644 --- a/tests/sentry/interfaces/test_http.py +++ b/tests/sentry/interfaces/test_http.py @@ -28,7 +28,7 @@ def test_basic(self): assert result.url == 'http://example.com' assert result.method is None assert result.fragment == '' - assert result.query_string == '' + assert result.query_string == [] assert result.data is None assert result.cookies == [] assert result.headers == [] @@ -49,7 +49,7 @@ def test_full(self): ) ) assert result.method == 'GET' - assert result.query_string == 'foo=bar' + assert result.query_string == [('foo', 'bar')] assert result.fragment == 'foobar' assert result.cookies == [('foo', 'bar')] assert result.headers == [('X-Foo-Bar', 'baz')] @@ -61,7 +61,14 @@ def test_query_string_as_dict(self): url='http://example.com', query_string={'foo': 'bar'}, )) - assert result.query_string == 'foo=bar' + assert result.query_string == [('foo', 'bar')] + + def test_query_string_as_pairlist(self): + result = Http.to_python(dict( + url='http://example.com', + query_string=[['foo', 'bar']], + )) + assert result.query_string == [('foo', 'bar')] def test_query_string_as_dict_unicode(self): result = Http.to_python( @@ -70,7 +77,7 @@ def test_query_string_as_dict_unicode(self): query_string={'foo': u'\N{SNOWMAN}'}, ) ) - assert result.query_string == 'foo=%E2%98%83' + assert result.query_string == [('foo', '\xe2\x98\x83')] def test_data_as_dict(self): result = Http.to_python(dict( @@ -79,7 +86,7 @@ def test_data_as_dict(self): )) assert result.data == {'foo': 'bar'} - def test_form_encoded_data(self): + def test_urlencoded_data(self): result = Http.to_python( dict( url='http://example.com', @@ -87,7 +94,43 @@ def test_form_encoded_data(self): data='foo=bar', ) ) - assert result.data == {'foo': ['bar']} + + assert result.data == 'foo=bar' + assert result.inferred_content_type == 'application/x-www-form-urlencoded' + + def test_infer_urlencoded_content_type(self): + result = Http.to_python( + dict( + url='http://example.com', + data='foo=bar', + ) + ) + + assert result.data == 'foo=bar' + assert result.inferred_content_type == 'application/x-www-form-urlencoded' + + def test_json_data(self): + result = Http.to_python( + dict( + url='http://example.com', + headers={'Content-Type': 'application/json'}, + data='{"foo":"bar"}', + ) + ) + + assert result.data == '{"foo":"bar"}' + assert result.inferred_content_type == 'application/json' + + def test_infer_json_content_type(self): + result = Http.to_python( + dict( + url='http://example.com', + data='{"foo":"bar"}', + ) + ) + + assert result.data == '{"foo":"bar"}' + assert result.inferred_content_type == 'application/json' def test_cookies_as_string(self): result = Http.to_python(dict( diff --git a/tests/sentry/utils/http/tests.py b/tests/sentry/utils/http/tests.py index 43ddc424413f83..c4a8d8e06e545d 100644 --- a/tests/sentry/utils/http/tests.py +++ b/tests/sentry/utils/http/tests.py @@ -351,30 +351,24 @@ class HeuristicDecodeTestCase(TestCase): url_body = 'key=value&key2=value2' def test_json(self): - data, content_type = heuristic_decode(self.json_body, 'application/json') - assert data == {'key': 'value', 'key2': 'value2'} + content_type = heuristic_decode(self.json_body, 'application/json') assert content_type == 'application/json' def test_url_encoded(self): - data, content_type = heuristic_decode(self.url_body, 'application/x-www-form-urlencoded') - assert data == {'key': ['value'], 'key2': ['value2']} + content_type = heuristic_decode(self.url_body, 'application/x-www-form-urlencoded') assert content_type == 'application/x-www-form-urlencoded' def test_possible_type_mismatch(self): - data, content_type = heuristic_decode(self.json_body, 'application/x-www-form-urlencoded') - assert data == {'key': 'value', 'key2': 'value2'} + content_type = heuristic_decode(self.json_body, 'application/x-www-form-urlencoded') assert content_type == 'application/json' - data, content_type = heuristic_decode(self.url_body, 'application/json') - assert data == {'key': ['value'], 'key2': ['value2']} + content_type = heuristic_decode(self.url_body, 'application/json') assert content_type == 'application/x-www-form-urlencoded' def test_no_possible_type(self): - data, content_type = heuristic_decode(self.json_body) - assert data == {'key': 'value', 'key2': 'value2'} + content_type = heuristic_decode(self.json_body) assert content_type == 'application/json' def test_unable_to_decode(self): - data, content_type = heuristic_decode('string body', 'text/plain') - assert data == 'string body' + content_type = heuristic_decode('string body', 'text/plain') assert content_type == 'text/plain' diff --git a/tests/sentry/utils/test_data_scrubber.py b/tests/sentry/utils/test_data_scrubber.py index bad4cccc230f7d..e035d92ba21076 100644 --- a/tests/sentry/utils/test_data_scrubber.py +++ b/tests/sentry/utils/test_data_scrubber.py @@ -164,6 +164,32 @@ def test_querystring_as_string(self): } ) + def test_querystring_as_pairlist(self): + data = { + 'request': { + 'query_string': [ + ['foo', 'bar'], + ['password', 'hello'], + ['the_secret', 'hello'], + ['a_password_here', 'hello'], + ['api_key', 'secret_key'], + ], + } + } + + proc = SensitiveDataFilter() + proc.apply(data) + + assert 'request' in data + http = data['request'] + assert http['query_string'] == [ + ['foo', 'bar'], + ['password', FILTER_MASK], + ['the_secret', FILTER_MASK], + ['a_password_here', FILTER_MASK], + ['api_key', FILTER_MASK], + ] + def test_querystring_as_string_with_partials(self): data = { 'request': { @@ -178,6 +204,28 @@ def test_querystring_as_string_with_partials(self): http = data['request'] assert http['query_string'] == 'foo=bar&password&baz=bar' + def test_querystring_as_pairlist_with_partials(self): + data = { + 'request': { + 'query_string': [ + ['foo', 'bar'], + ['password', ''], + ['baz', 'bar'], + ] + } + } + + proc = SensitiveDataFilter() + proc.apply(data) + + assert 'request' in data + http = data['request'] + assert http['query_string'] == [ + ['foo', 'bar'], + ['password', ''], + ['baz', 'bar'], + ] + def test_sanitize_additional_sensitive_fields(self): additional_sensitive_dict = {'fieldy_field': 'value', 'moar_other_field': 'another value'} data = {'extra': dict(list(VARS.items()) + list(additional_sensitive_dict.items()))} diff --git a/tests/sentry/web/api/tests.py b/tests/sentry/web/api/tests.py index 8898e0ba59dab2..2be3afe8fc7f62 100644 --- a/tests/sentry/web/api/tests.py +++ b/tests/sentry/web/api/tests.py @@ -464,12 +464,7 @@ def test_scrub_data_off(self, mock_insert_data_to_database): assert resp.status_code == 200, (resp.status_code, resp.content) call_data = mock_insert_data_to_database.call_args[0][0] - assert call_data['request']['data'] == { - 'password': ['lol'], - 'foo': ['1'], - 'bar': ['2'], - 'baz': ['3'] - } + assert call_data['request']['data'] == "password=lol&foo=1&bar=2&baz=3" @mock.patch('sentry.coreapi.ClientApiHelper.insert_data_to_database') def test_scrub_data_on(self, mock_insert_data_to_database): @@ -490,12 +485,7 @@ def test_scrub_data_on(self, mock_insert_data_to_database): assert resp.status_code == 200, (resp.status_code, resp.content) call_data = mock_insert_data_to_database.call_args[0][0] - assert call_data['request']['data'] == { - 'password': ['lol'], - 'foo': ['1'], - 'bar': ['2'], - 'baz': ['3'] - } + assert call_data['request']['data'] == "password=lol&foo=1&bar=2&baz=3" @mock.patch('sentry.coreapi.ClientApiHelper.insert_data_to_database') def test_scrub_data_defaults(self, mock_insert_data_to_database): @@ -516,12 +506,7 @@ def test_scrub_data_defaults(self, mock_insert_data_to_database): assert resp.status_code == 200, (resp.status_code, resp.content) call_data = mock_insert_data_to_database.call_args[0][0] - assert call_data['request']['data'] == { - 'password': ['[Filtered]'], - 'foo': ['1'], - 'bar': ['2'], - 'baz': ['3'] - } + assert call_data['request']['data'] == "password=[Filtered]&foo=1&bar=2&baz=3" @mock.patch('sentry.coreapi.ClientApiHelper.insert_data_to_database') def test_scrub_data_sensitive_fields(self, mock_insert_data_to_database): @@ -543,12 +528,7 @@ def test_scrub_data_sensitive_fields(self, mock_insert_data_to_database): assert resp.status_code == 200, (resp.status_code, resp.content) call_data = mock_insert_data_to_database.call_args[0][0] - assert call_data['request']['data'] == { - 'password': ['[Filtered]'], - 'foo': ['[Filtered]'], - 'bar': ['[Filtered]'], - 'baz': ['3'] - } + assert call_data['request']['data'] == "password=[Filtered]&foo=[Filtered]&bar=[Filtered]&baz=3" @mock.patch('sentry.coreapi.ClientApiHelper.insert_data_to_database') def test_scrub_data_org_override(self, mock_insert_data_to_database): @@ -571,12 +551,7 @@ def test_scrub_data_org_override(self, mock_insert_data_to_database): assert resp.status_code == 200, (resp.status_code, resp.content) call_data = mock_insert_data_to_database.call_args[0][0] - assert call_data['request']['data'] == { - 'password': ['[Filtered]'], - 'foo': ['1'], - 'bar': ['2'], - 'baz': ['3'] - } + assert call_data['request']['data'] == "password=[Filtered]&foo=1&bar=2&baz=3" @mock.patch('sentry.coreapi.ClientApiHelper.insert_data_to_database') def test_scrub_data_org_override_sensitive_fields(self, mock_insert_data_to_database): @@ -599,12 +574,7 @@ def test_scrub_data_org_override_sensitive_fields(self, mock_insert_data_to_data assert resp.status_code == 200, (resp.status_code, resp.content) call_data = mock_insert_data_to_database.call_args[0][0] - assert call_data['request']['data'] == { - 'password': ['[Filtered]'], - 'foo': ['[Filtered]'], - 'bar': ['[Filtered]'], - 'baz': ['[Filtered]'] - } + assert call_data['request']['data'] == "password=[Filtered]&foo=[Filtered]&bar=[Filtered]&baz=[Filtered]" @mock.patch('sentry.coreapi.ClientApiHelper.insert_data_to_database') def test_uses_client_as_sdk(self, mock_insert_data_to_database):