From 91676ecbb9fa0584b4c7484e584bfe81de711903 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 20 Nov 2023 12:34:15 +0100 Subject: [PATCH] Handling asgi body in the right way. For real (#2513) Handling the request body in ASGI applications. By reading the body first it gets cached (by for example Django) which makes it possible to read the body multiple times. --- sentry_sdk/integrations/_wsgi_common.py | 22 +++- sentry_sdk/integrations/django/asgi.py | 6 - sentry_sdk/integrations/django/views.py | 6 +- tests/integrations/django/asgi/image.png | Bin 0 -> 308 bytes tests/integrations/django/asgi/test_asgi.py | 127 ++++++++++++++++++-- tests/integrations/django/myapp/views.py | 6 +- 6 files changed, 143 insertions(+), 24 deletions(-) create mode 100644 tests/integrations/django/asgi/image.png diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index 585abe25de..5a41654498 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import + import json from copy import deepcopy @@ -7,6 +9,12 @@ from sentry_sdk._types import TYPE_CHECKING +try: + from django.http.request import RawPostDataException +except ImportError: + RawPostDataException = None + + if TYPE_CHECKING: import sentry_sdk @@ -67,10 +75,22 @@ def extract_into_event(self, event): if not request_body_within_bounds(client, content_length): data = AnnotatedValue.removed_because_over_size_limit() else: + # First read the raw body data + # It is important to read this first because if it is Django + # it will cache the body and then we can read the cached version + # again in parsed_body() (or json() or wherever). + raw_data = None + try: + raw_data = self.raw_data() + except (RawPostDataException, ValueError): + # If DjangoRestFramework is used it already read the body for us + # so reading it here will fail. We can ignore this. + pass + parsed_body = self.parsed_body() if parsed_body is not None: data = parsed_body - elif self.raw_data(): + elif raw_data: data = AnnotatedValue.removed_because_raw_data() else: data = None diff --git a/sentry_sdk/integrations/django/asgi.py b/sentry_sdk/integrations/django/asgi.py index bd785a23c2..18f6a58811 100644 --- a/sentry_sdk/integrations/django/asgi.py +++ b/sentry_sdk/integrations/django/asgi.py @@ -94,12 +94,6 @@ def sentry_patched_create_request(self, *args, **kwargs): with hub.configure_scope() as scope: request, error_response = old_create_request(self, *args, **kwargs) - - # read the body once, to signal Django to cache the body stream - # so we can read the body in our event processor - # (otherwise Django closes the body stream and makes it impossible to read it again) - _ = request.body - scope.add_event_processor(_make_asgi_request_event_processor(request)) return request, error_response diff --git a/sentry_sdk/integrations/django/views.py b/sentry_sdk/integrations/django/views.py index c1034d0d85..d918afad66 100644 --- a/sentry_sdk/integrations/django/views.py +++ b/sentry_sdk/integrations/django/views.py @@ -47,13 +47,13 @@ def sentry_patched_make_view_atomic(self, *args, **kwargs): hub = Hub.current integration = hub.get_integration(DjangoIntegration) - if integration is not None and integration.middleware_spans: - if ( + is_async_view = ( iscoroutinefunction is not None and wrap_async_view is not None and iscoroutinefunction(callback) - ): + ) + if is_async_view: sentry_wrapped_callback = wrap_async_view(hub, callback) else: sentry_wrapped_callback = _wrap_sync_view(hub, callback) diff --git a/tests/integrations/django/asgi/image.png b/tests/integrations/django/asgi/image.png new file mode 100644 index 0000000000000000000000000000000000000000..8db277a9fc653b30dd5f1598b353653b55454d6e GIT binary patch literal 308 zcmV-40n7f0P)@bR~YD@IZ1@1DmneO@gCFE1BE zW>PbR&BqN^3|IIJXwvg%uNnG*R@b#;GTgfP0Bm|{RtQ2NND;^cBU4R=$QUmMkUP64 z7BQ6O_W?C!Fh~KN0X7jN0kRI{u3I-AGN@_DgH1Cs)nZt&WIIq(F$3ex>ks}*N{KKu z)y`l@%?tsX1~R3oW(LEI`Lzs', + "", + ), + ( + True, + "POST", + [ + (b"content-type", b"multipart/form-data; boundary=fd721ef49ea403a6"), + (b"content-length", BODY_FORM_CONTENT_LENGTH), + ], + "post_echo_async", + BODY_FORM, + {"password": "hello123", "photo": "", "username": "Jane"}, + ), + ( + False, + "POST", + [(b"content-type", b"text/plain")], + "post_echo_async", + b"", + None, + ), + ( + False, + "POST", + [(b"content-type", b"text/plain")], + "post_echo_async", + b"some raw text body", + "", + ), + ( + False, + "POST", + [(b"content-type", b"application/json")], + "post_echo_async", + b'{"username":"xyz","password":"xyz"}', + {"username": "xyz", "password": "[Filtered]"}, + ), + ( + False, + "POST", + [(b"content-type", b"application/xml")], + "post_echo_async", + b'', + "", + ), + ( + False, + "POST", + [ + (b"content-type", b"multipart/form-data; boundary=fd721ef49ea403a6"), + (b"content-length", BODY_FORM_CONTENT_LENGTH), + ], + "post_echo_async", + BODY_FORM, + {"password": "[Filtered]", "photo": "", "username": "Jane"}, + ), ], ) @pytest.mark.asyncio @@ -388,28 +479,42 @@ async def test_trace_from_headers_if_performance_disabled(sentry_init, capture_e django.VERSION < (3, 1), reason="async views have been introduced in Django 3.1" ) async def test_asgi_request_body( - sentry_init, capture_envelopes, application, body, expected_return_data + sentry_init, + capture_envelopes, + application, + send_default_pii, + method, + headers, + url_name, + body, + expected_data, ): - sentry_init(integrations=[DjangoIntegration()], send_default_pii=True) + sentry_init( + send_default_pii=send_default_pii, + integrations=[ + DjangoIntegration(), + ], + ) envelopes = capture_envelopes() comm = HttpCommunicator( application, - method="POST", - path=reverse("post_echo_async"), + method=method, + headers=headers, + path=reverse(url_name), body=body, - headers=[(b"content-type", b"application/json")], ) response = await comm.get_response() - assert response["status"] == 200 + + await comm.wait() assert response["body"] == body (envelope,) = envelopes event = envelope.get_event() - if expected_return_data is not None: - assert event["request"]["data"] == expected_return_data + if expected_data is not None: + assert event["request"]["data"] == expected_data else: assert "data" not in event["request"] diff --git a/tests/integrations/django/myapp/views.py b/tests/integrations/django/myapp/views.py index 6362adc121..08262b4e8a 100644 --- a/tests/integrations/django/myapp/views.py +++ b/tests/integrations/django/myapp/views.py @@ -237,10 +237,10 @@ def thread_ids_sync(*args, **kwargs): ) exec( - """@csrf_exempt -def post_echo_async(request): + """async def post_echo_async(request): sentry_sdk.capture_message("hi") - return HttpResponse(request.body)""" + return HttpResponse(request.body) +post_echo_async.csrf_exempt = True""" ) else: async_message = None