getsentry · untitaker · Jan 11, 2019 · Dec 19, 2018 · Jan 4, 2019 · Jan 7, 2019
@@ -19,6 +19,8 @@
 
 from sentry.interfaces.base import Interface, InterfaceValidationError, prune_empty_keys
 from sentry.interfaces.schemas import validate_and_default_interface
+from sentry.utils import json
+from sentry.utils.strings import to_unicode
 from sentry.utils.safe import trim, trim_dict, trim_pairs
 from sentry.utils.http import heuristic_decode
 from sentry.utils.validators import validate_ip
@@ -29,12 +31,6 @@
 http_method_re = re.compile(r'^[A-Z\-_]{3,32}$')
 
 
-def to_bytes(value):
-    if isinstance(value, six.text_type):
-        return value.encode('utf-8')
-    return six.binary_type(value)
-
-
 def format_headers(value):
     if not value:
         return ()
@@ -86,6 +82,10 @@ def fix_broken_encoding(value):
     return value
 
 
+def jsonify(value):
+    return to_unicode(value) if isinstance(value, six.string_types) else json.dumps(value)
+
+
 class Http(Interface):
     """
     The Request information is stored in the Http interface. Two arguments
@@ -141,24 +141,40 @@ def to_python(cls, data):
             kwargs['method'] = None
 
         if data.get('url', None):
-            scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])
+            url = to_unicode(data['url'])
+            # The JavaScript SDK used to send an ellipsis character for
+            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
+            # either the path, query string or fragment, so we replace it with
+            # three dots (which is the behavior of other SDKs). This effectively
+            # makes the string two characters longer, but it will be trimmed
+            # again down below.
+            if url.endswith("\u2026"):
+                url = url[:-1] + "..."
+            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
         else:
             scheme = netloc = path = query_bit = fragment_bit = None
 
         query_string = data.get('query_string') or query_bit
         if query_string:
-            # if querystring was a dict, convert it to a string
-            if isinstance(query_string, dict):
-                query_string = urlencode(
-                    [(to_bytes(k), to_bytes(v)) for k, v in query_string.items()]
-                )
-            else:
+            if isinstance(query_string, six.string_types):
                 if query_string[0] == '?':
-                    # remove '?' prefix
                     query_string = query_string[1:]
+                query_string = [
+                    (to_unicode(k), jsonify(v))
+                    for k, v in parse_qsl(query_string, keep_blank_values=True)
+                ]
+            elif isinstance(query_string, dict):
+                query_string = [(to_unicode(k), jsonify(v)) for k, v in six.iteritems(query_string)]
+            elif isinstance(query_string, list):
+                query_string = [
+                    tuple(tup) for tup in query_string
+                    if isinstance(tup, (tuple, list)) and len(tup) == 2
+                ]
+            else:
+                query_string = []
             kwargs['query_string'] = trim(query_string, 4096)
         else:
-            kwargs['query_string'] = ''
+            kwargs['query_string'] = []
 
         fragment = data.get('fragment') or fragment_bit
 
@@ -230,7 +246,7 @@ def to_json(self):
     def full_url(self):
         url = self.url
         if self.query_string:
-            url = url + '?' + self.query_string
+            url = url + '?' + urlencode(self.query_string)
         if self.fragment:
             url = url + '#' + self.fragment
         return url
@@ -242,7 +258,7 @@ def to_email_html(self, event, **kwargs):
                 'url': self.full_url,
                 'short_url': self.url,
                 'method': self.method,
-                'query_string': self.query_string,
+                'query_string': urlencode(self.query_string),
                 'fragment': self.fragment,
             }
         )

@@ -63,7 +63,16 @@ def apierror(message="Invalid data"):
             'minLength': 1,
         },
         'method': {'type': 'string'},
-        'query_string': {'type': ['string', 'object']},
+        'query_string': {
+            'anyOf': [
+                {'type': ['string', 'object']},
+                {'type': 'array', 'items': {
+                    'type': 'array',
+                    'maxItems': 2,
+                    'minItems': 2,
+                }},
+            ],
+        },
         'inferred_content_type': {'type': 'string'},
         'cookies': {
             'anyOf': [

diff --git a/src/sentry/static/sentry/app/components/events/interfaces/richHttpContent.jsx b/src/sentry/static/sentry/app/components/events/interfaces/richHttpContent.jsx
@@ -1,6 +1,5 @@
 import PropTypes from 'prop-types';
 import React from 'react';
-import queryString from 'query-string';
 
 import {objectIsEmpty} from 'app/utils';
 import {objectToSortedTupleArray} from 'app/components/events/interfaces/utils';
@@ -33,12 +32,7 @@ class RichHttpContent extends React.Component {
     try {
       // Sentry API abbreviates long query string values, sometimes resulting in
       // an un-parsable querystring ... stay safe kids
-      return (
-        <KeyValueList
-          data={objectToSortedTupleArray(queryString.parse(data))}
-          isContextData={true}
-        />
-      );
+      return <KeyValueList data={data} isContextData={true} />;
     } catch (e) {
       return <pre>{data}</pre>;
     }
@@ -48,7 +42,7 @@ class RichHttpContent extends React.Component {
     let data = this.props.data;
     return (
       <div>
-        {data.query && (
+        {!objectIsEmpty(data.query) && (
           <ClippedBox title={t('Query String')}>
             <ErrorBoundary mini>{this.getQueryStringOrRaw(data.query)}</ErrorBoundary>
           </ClippedBox>

diff --git a/src/sentry/utils/data_scrubber.py b/src/sentry/utils/data_scrubber.py
@@ -104,8 +104,8 @@ def apply(self, data):
                 data['contexts'][key] = varmap(self.sanitize, value)
 
     def sanitize(self, key, value):
-        if value is None:
-            return
+        if value is None or value == '':
+            return value
 
         if isinstance(key, six.string_types):
             key = key.lower()

diff --git a/src/sentry/utils/safe.py b/src/sentry/utils/safe.py
@@ -90,6 +90,8 @@ def trim(
             _size += len(force_text(trim_v))
             if _size >= max_size:
                 break
+        if isinstance(value, tuple):
+            result = tuple(result)
 
     elif isinstance(value, six.string_types):
         result = truncatechars(value, max_size - _size)

diff --git a/tests/sentry/interfaces/test_http.py b/tests/sentry/interfaces/test_http.py
@@ -28,7 +28,7 @@ def test_basic(self):
         assert result.url == 'http://example.com'
         assert result.method is None
         assert result.fragment == ''
-        assert result.query_string == ''
+        assert result.query_string == []
         assert result.data is None
         assert result.cookies == []
         assert result.headers == []
@@ -49,7 +49,7 @@ def test_full(self):
             )
         )
         assert result.method == 'GET'
-        assert result.query_string == 'foo=bar'
+        assert result.query_string == [('foo', 'bar')]
         assert result.fragment == 'foobar'
         assert result.cookies == [('foo', 'bar')]
         assert result.headers == [('X-Foo-Bar', 'baz')]
@@ -61,16 +61,23 @@ def test_query_string_as_dict(self):
             url='http://example.com',
             query_string={'foo': 'bar'},
         ))
-        assert result.query_string == 'foo=bar'
+        assert result.query_string == [('foo', 'bar')]
 
-    def test_query_string_as_dict_unicode(self):
+    def test_query_string_as_pairlist(self):
+        result = Http.to_python(dict(
+            url='http://example.com',
+            query_string=[['foo', 'bar']],
+        ))
+        assert result.query_string == [('foo', 'bar')]
+
+    def test_query_string_as_bytes(self):
         result = Http.to_python(
             dict(
                 url='http://example.com',
-                query_string={'foo': u'\N{SNOWMAN}'},
+                query_string=b'foo=\x00',
             )
         )
-        assert result.query_string == 'foo=%E2%98%83'
+        assert result.query_string == [('foo', '\x00')]
 
     def test_data_as_dict(self):
         result = Http.to_python(dict(
@@ -79,15 +86,51 @@ def test_data_as_dict(self):
         ))
         assert result.data == {'foo': 'bar'}
 
-    def test_form_encoded_data(self):
+    def test_urlencoded_data(self):
         result = Http.to_python(
             dict(
                 url='http://example.com',
                 headers={'Content-Type': 'application/x-www-form-urlencoded'},
                 data='foo=bar',
             )
         )
+
         assert result.data == {'foo': ['bar']}
+        assert result.inferred_content_type == 'application/x-www-form-urlencoded'
+
+    def test_infer_urlencoded_content_type(self):
+        result = Http.to_python(
+            dict(
+                url='http://example.com',
+                data='foo=bar',
+            )
+        )
+
+        assert result.data == {'foo': ['bar']}
+        assert result.inferred_content_type == 'application/x-www-form-urlencoded'
+
+    def test_json_data(self):
+        result = Http.to_python(
+            dict(
+                url='http://example.com',
+                headers={'Content-Type': 'application/json'},
+                data='{"foo":"bar"}',
+            )
+        )
+
+        assert result.data == {'foo': 'bar'}
+        assert result.inferred_content_type == 'application/json'
+
+    def test_infer_json_content_type(self):
+        result = Http.to_python(
+            dict(
+                url='http://example.com',
+                data='{"foo":"bar"}',
+            )
+        )
+
+        assert result.data == {'foo': 'bar'}
+        assert result.inferred_content_type == 'application/json'
 
     def test_cookies_as_string(self):
         result = Http.to_python(dict(

diff --git a/tests/sentry/utils/test_data_scrubber.py b/tests/sentry/utils/test_data_scrubber.py
@@ -164,6 +164,32 @@ def test_querystring_as_string(self):
             }
         )
 
+    def test_querystring_as_pairlist(self):
+        data = {
+            'request': {
+                'query_string': [
+                    ['foo', 'bar'],
+                    ['password', 'hello'],
+                    ['the_secret', 'hello'],
+                    ['a_password_here', 'hello'],
+                    ['api_key', 'secret_key'],
+                ],
+            }
+        }
+
+        proc = SensitiveDataFilter()
+        proc.apply(data)
+
+        assert 'request' in data
+        http = data['request']
+        assert http['query_string'] == [
+            ['foo', 'bar'],
+            ['password', FILTER_MASK],
+            ['the_secret', FILTER_MASK],
+            ['a_password_here', FILTER_MASK],
+            ['api_key', FILTER_MASK],
+        ]
+
     def test_querystring_as_string_with_partials(self):
         data = {
             'request': {
@@ -178,6 +204,28 @@ def test_querystring_as_string_with_partials(self):
         http = data['request']
         assert http['query_string'] == 'foo=bar&password&baz=bar'
 
+    def test_querystring_as_pairlist_with_partials(self):
+        data = {
+            'request': {
+                'query_string': [
+                    ['foo', 'bar'],
+                    ['password', ''],
+                    ['baz', 'bar'],
+                ]
+            }
+        }
+
+        proc = SensitiveDataFilter()
+        proc.apply(data)
+
+        assert 'request' in data
+        http = data['request']
+        assert http['query_string'] == [
+            ['foo', 'bar'],
+            ['password', ''],
+            ['baz', 'bar'],
+        ]
+
     def test_sanitize_additional_sensitive_fields(self):
         additional_sensitive_dict = {'fieldy_field': 'value', 'moar_other_field': 'another value'}
         data = {'extra': dict(list(VARS.items()) + list(additional_sensitive_dict.items()))}