Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 33 additions & 17 deletions src/sentry/interfaces/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

from sentry.interfaces.base import Interface, InterfaceValidationError, prune_empty_keys
from sentry.interfaces.schemas import validate_and_default_interface
from sentry.utils import json
from sentry.utils.strings import to_unicode
from sentry.utils.safe import trim, trim_dict, trim_pairs
from sentry.utils.http import heuristic_decode
from sentry.utils.validators import validate_ip
Expand All @@ -29,12 +31,6 @@
http_method_re = re.compile(r'^[A-Z\-_]{3,32}$')


def to_bytes(value):
if isinstance(value, six.text_type):
return value.encode('utf-8')
return six.binary_type(value)


def format_headers(value):
if not value:
return ()
Expand Down Expand Up @@ -86,6 +82,10 @@ def fix_broken_encoding(value):
return value


def jsonify(value):
return to_unicode(value) if isinstance(value, six.string_types) else json.dumps(value)


class Http(Interface):
"""
The Request information is stored in the Http interface. Two arguments
Expand Down Expand Up @@ -141,24 +141,40 @@ def to_python(cls, data):
kwargs['method'] = None

if data.get('url', None):
scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])
url = to_unicode(data['url'])
# The JavaScript SDK used to send an ellipsis character for
# truncated URLs. Canonical URLs do not contain UTF-8 characters in
# either the path, query string or fragment, so we replace it with
# three dots (which is the behavior of other SDKs). This effectively
# makes the string two characters longer, but it will be trimmed
# again down below.
if url.endswith("\u2026"):
url = url[:-1] + "..."
scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
else:
scheme = netloc = path = query_bit = fragment_bit = None

query_string = data.get('query_string') or query_bit
if query_string:
# if querystring was a dict, convert it to a string
if isinstance(query_string, dict):
query_string = urlencode(
[(to_bytes(k), to_bytes(v)) for k, v in query_string.items()]
)
else:
if isinstance(query_string, six.string_types):
if query_string[0] == '?':
# remove '?' prefix
query_string = query_string[1:]
query_string = [
(to_unicode(k), jsonify(v))
for k, v in parse_qsl(query_string, keep_blank_values=True)
]
elif isinstance(query_string, dict):
query_string = [(to_unicode(k), jsonify(v)) for k, v in six.iteritems(query_string)]
elif isinstance(query_string, list):
query_string = [
tuple(tup) for tup in query_string
if isinstance(tup, (tuple, list)) and len(tup) == 2
]
else:
query_string = []
kwargs['query_string'] = trim(query_string, 4096)
else:
kwargs['query_string'] = ''
kwargs['query_string'] = []

fragment = data.get('fragment') or fragment_bit

Expand Down Expand Up @@ -230,7 +246,7 @@ def to_json(self):
def full_url(self):
url = self.url
if self.query_string:
url = url + '?' + self.query_string
url = url + '?' + urlencode(self.query_string)
if self.fragment:
url = url + '#' + self.fragment
return url
Expand All @@ -242,7 +258,7 @@ def to_email_html(self, event, **kwargs):
'url': self.full_url,
'short_url': self.url,
'method': self.method,
'query_string': self.query_string,
'query_string': urlencode(self.query_string),
'fragment': self.fragment,
}
)
Expand Down
11 changes: 10 additions & 1 deletion src/sentry/interfaces/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,16 @@ def apierror(message="Invalid data"):
'minLength': 1,
},
'method': {'type': 'string'},
'query_string': {'type': ['string', 'object']},
'query_string': {
'anyOf': [
{'type': ['string', 'object']},
{'type': 'array', 'items': {
'type': 'array',
'maxItems': 2,
'minItems': 2,
}},
],
},
'inferred_content_type': {'type': 'string'},
'cookies': {
'anyOf': [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import PropTypes from 'prop-types';
import React from 'react';
import queryString from 'query-string';

import {objectIsEmpty} from 'app/utils';
import {objectToSortedTupleArray} from 'app/components/events/interfaces/utils';
Expand Down Expand Up @@ -33,12 +32,7 @@ class RichHttpContent extends React.Component {
try {
// Sentry API abbreviates long query string values, sometimes resulting in
// an un-parsable querystring ... stay safe kids
return (
<KeyValueList
data={objectToSortedTupleArray(queryString.parse(data))}
isContextData={true}
/>
);
return <KeyValueList data={data} isContextData={true} />;
} catch (e) {
return <pre>{data}</pre>;
}
Expand All @@ -48,7 +42,7 @@ class RichHttpContent extends React.Component {
let data = this.props.data;
return (
<div>
{data.query && (
{!objectIsEmpty(data.query) && (
<ClippedBox title={t('Query String')}>
<ErrorBoundary mini>{this.getQueryStringOrRaw(data.query)}</ErrorBoundary>
</ClippedBox>
Expand Down
4 changes: 2 additions & 2 deletions src/sentry/utils/data_scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def apply(self, data):
data['contexts'][key] = varmap(self.sanitize, value)

def sanitize(self, key, value):
if value is None:
return
if value is None or value == '':
return value

if isinstance(key, six.string_types):
key = key.lower()
Expand Down
2 changes: 2 additions & 0 deletions src/sentry/utils/safe.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def trim(
_size += len(force_text(trim_v))
if _size >= max_size:
break
if isinstance(value, tuple):
result = tuple(result)

elif isinstance(value, six.string_types):
result = truncatechars(value, max_size - _size)
Expand Down
57 changes: 50 additions & 7 deletions tests/sentry/interfaces/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_basic(self):
assert result.url == 'http://example.com'
assert result.method is None
assert result.fragment == ''
assert result.query_string == ''
assert result.query_string == []
assert result.data is None
assert result.cookies == []
assert result.headers == []
Expand All @@ -49,7 +49,7 @@ def test_full(self):
)
)
assert result.method == 'GET'
assert result.query_string == 'foo=bar'
assert result.query_string == [('foo', 'bar')]
assert result.fragment == 'foobar'
assert result.cookies == [('foo', 'bar')]
assert result.headers == [('X-Foo-Bar', 'baz')]
Expand All @@ -61,16 +61,23 @@ def test_query_string_as_dict(self):
url='http://example.com',
query_string={'foo': 'bar'},
))
assert result.query_string == 'foo=bar'
assert result.query_string == [('foo', 'bar')]

def test_query_string_as_dict_unicode(self):
def test_query_string_as_pairlist(self):
result = Http.to_python(dict(
url='http://example.com',
query_string=[['foo', 'bar']],
))
assert result.query_string == [('foo', 'bar')]

def test_query_string_as_bytes(self):
result = Http.to_python(
dict(
url='http://example.com',
query_string={'foo': u'\N{SNOWMAN}'},
query_string=b'foo=\x00',
)
)
assert result.query_string == 'foo=%E2%98%83'
assert result.query_string == [('foo', '\x00')]

def test_data_as_dict(self):
result = Http.to_python(dict(
Expand All @@ -79,15 +86,51 @@ def test_data_as_dict(self):
))
assert result.data == {'foo': 'bar'}

def test_form_encoded_data(self):
def test_urlencoded_data(self):
result = Http.to_python(
dict(
url='http://example.com',
headers={'Content-Type': 'application/x-www-form-urlencoded'},
data='foo=bar',
)
)

assert result.data == {'foo': ['bar']}
assert result.inferred_content_type == 'application/x-www-form-urlencoded'

def test_infer_urlencoded_content_type(self):
result = Http.to_python(
dict(
url='http://example.com',
data='foo=bar',
)
)

assert result.data == {'foo': ['bar']}
assert result.inferred_content_type == 'application/x-www-form-urlencoded'

def test_json_data(self):
result = Http.to_python(
dict(
url='http://example.com',
headers={'Content-Type': 'application/json'},
data='{"foo":"bar"}',
)
)

assert result.data == {'foo': 'bar'}
assert result.inferred_content_type == 'application/json'

def test_infer_json_content_type(self):
result = Http.to_python(
dict(
url='http://example.com',
data='{"foo":"bar"}',
)
)

assert result.data == {'foo': 'bar'}
assert result.inferred_content_type == 'application/json'

def test_cookies_as_string(self):
result = Http.to_python(dict(
Expand Down
48 changes: 48 additions & 0 deletions tests/sentry/utils/test_data_scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,32 @@ def test_querystring_as_string(self):
}
)

def test_querystring_as_pairlist(self):
data = {
'request': {
'query_string': [
['foo', 'bar'],
['password', 'hello'],
['the_secret', 'hello'],
['a_password_here', 'hello'],
['api_key', 'secret_key'],
],
}
}

proc = SensitiveDataFilter()
proc.apply(data)

assert 'request' in data
http = data['request']
assert http['query_string'] == [
['foo', 'bar'],
['password', FILTER_MASK],
['the_secret', FILTER_MASK],
['a_password_here', FILTER_MASK],
['api_key', FILTER_MASK],
]

def test_querystring_as_string_with_partials(self):
data = {
'request': {
Expand All @@ -178,6 +204,28 @@ def test_querystring_as_string_with_partials(self):
http = data['request']
assert http['query_string'] == 'foo=bar&password&baz=bar'

def test_querystring_as_pairlist_with_partials(self):
data = {
'request': {
'query_string': [
['foo', 'bar'],
['password', ''],
['baz', 'bar'],
]
}
}

proc = SensitiveDataFilter()
proc.apply(data)

assert 'request' in data
http = data['request']
assert http['query_string'] == [
['foo', 'bar'],
['password', ''],
['baz', 'bar'],
]

def test_sanitize_additional_sensitive_fields(self):
additional_sensitive_dict = {'fieldy_field': 'value', 'moar_other_field': 'another value'}
data = {'extra': dict(list(VARS.items()) + list(additional_sensitive_dict.items()))}
Expand Down