diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index e246f05363..2e73f60c9c 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -320,12 +320,7 @@ def _prepare_event( # Postprocess the event here so that annotated types do # generally not surface in before_send if event is not None: - event = serialize( - event, - smart_transaction_trimming=self.options["_experiments"].get( - "smart_transaction_trimming" - ), - ) + event = serialize(event) before_send = self.options["before_send"] if before_send is not None and event.get("type") != "transaction": diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 99f3ca4c1f..52e8b78548 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -33,8 +33,7 @@ { "max_spans": Optional[int], "record_sql_params": Optional[bool], - "smart_transaction_trimming": Optional[bool], - # TODO: Remvoe these 2 profiling related experiments + # TODO: Remove these 2 profiling related experiments "profiles_sample_rate": Optional[float], "profiler_mode": Optional[ProfilerMode], }, diff --git a/sentry_sdk/serializer.py b/sentry_sdk/serializer.py index 29495c3118..22eec490ae 100644 --- a/sentry_sdk/serializer.py +++ b/sentry_sdk/serializer.py @@ -8,13 +8,9 @@ capture_internal_exception, disable_capture_event, format_timestamp, - json_dumps, safe_repr, strip_string, ) - -import sentry_sdk.utils - from sentry_sdk._compat import ( text_type, PY2, @@ -23,12 +19,9 @@ iteritems, binary_sequence_types, ) - from sentry_sdk._types import TYPE_CHECKING if TYPE_CHECKING: - from datetime import timedelta - from types import TracebackType from typing import Any @@ -37,7 +30,6 @@ from typing import Dict from typing import List from typing import Optional - from typing import Tuple from typing import Type from typing import Union @@ -120,12 +112,11 @@ def __exit__( self._ids.pop(id(self._objs.pop()), None) -def serialize(event, smart_transaction_trimming=False, **kwargs): - # type: (Event, bool, **Any) -> Event +def serialize(event, **kwargs): + # type: (Event, **Any) -> Event memo = Memo() path = [] # type: List[Segment] meta_stack = [] # type: List[Dict[str, Any]] - span_description_bytes = [] # type: List[int] def _annotate(**meta): # type: (**Any) -> None @@ -365,113 +356,23 @@ def _serialize_node_impl( if not isinstance(obj, string_types): obj = safe_repr(obj) - # Allow span descriptions to be longer than other strings. - # - # For database auto-instrumented spans, the description contains - # potentially long SQL queries that are most useful when not truncated. - # Because arbitrarily large events may be discarded by the server as a - # protection mechanism, we dynamically limit the description length - # later in _truncate_span_descriptions. - if ( - smart_transaction_trimming - and len(path) == 3 - and path[0] == "spans" - and path[-1] == "description" - ): - span_description_bytes.append(len(obj)) + is_span_description = ( + len(path) == 3 and path[0] == "spans" and path[-1] == "description" + ) + if is_span_description: return obj - return _flatten_annotated(strip_string(obj)) - def _truncate_span_descriptions(serialized_event, event, excess_bytes): - # type: (Event, Event, int) -> None - """ - Modifies serialized_event in-place trying to remove excess_bytes from - span descriptions. The original event is used read-only to access the - span timestamps (represented as RFC3399-formatted strings in - serialized_event). - - It uses heuristics to prioritize preserving the description of spans - that might be the most interesting ones in terms of understanding and - optimizing performance. - """ - # When truncating a description, preserve a small prefix. - min_length = 10 - - def shortest_duration_longest_description_first(args): - # type: (Tuple[int, Span]) -> Tuple[timedelta, int] - i, serialized_span = args - span = event["spans"][i] - now = datetime.utcnow() - start = span.get("start_timestamp") or now - end = span.get("timestamp") or now - duration = end - start - description = serialized_span.get("description") or "" - return (duration, -len(description)) - - # Note: for simplicity we sort spans by exact duration and description - # length. If ever needed, we could have a more involved heuristic, e.g. - # replacing exact durations with "buckets" and/or looking at other span - # properties. - path.append("spans") - for i, span in sorted( - enumerate(serialized_event.get("spans") or []), - key=shortest_duration_longest_description_first, - ): - description = span.get("description") or "" - if len(description) <= min_length: - continue - excess_bytes -= len(description) - min_length - path.extend([i, "description"]) - # Note: the last time we call strip_string we could preserve a few - # more bytes up to a total length of MAX_EVENT_BYTES. Since that's - # not strictly required, we leave it out for now for simplicity. - span["description"] = _flatten_annotated( - strip_string(description, max_length=min_length) - ) - del path[-2:] - del meta_stack[len(path) + 1 :] - - if excess_bytes <= 0: - break - path.pop() - del meta_stack[len(path) + 1 :] + return _flatten_annotated(strip_string(obj)) + # + # Start of serialize() function + # disable_capture_event.set(True) try: - rv = _serialize_node(event, **kwargs) - if meta_stack and isinstance(rv, dict): - rv["_meta"] = meta_stack[0] - - sum_span_description_bytes = sum(span_description_bytes) - if smart_transaction_trimming and sum_span_description_bytes > 0: - span_count = len(event.get("spans") or []) - # This is an upper bound of how many bytes all descriptions would - # consume if the usual string truncation in _serialize_node_impl - # would have taken place, not accounting for the metadata attached - # as event["_meta"]. - descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH - - # If by not truncating descriptions we ended up with more bytes than - # per the usual string truncation, check if the event is too large - # and we need to truncate some descriptions. - # - # This is guarded with an if statement to avoid JSON-encoding the - # event unnecessarily. - if sum_span_description_bytes > descriptions_budget_bytes: - original_bytes = len(json_dumps(rv)) - excess_bytes = original_bytes - MAX_EVENT_BYTES - if excess_bytes > 0: - # Event is too large, will likely be discarded by the - # server. Trim it down before sending. - _truncate_span_descriptions(rv, event, excess_bytes) - - # Span descriptions truncated, set or reset _meta. - # - # We run the same code earlier because we want to account - # for _meta when calculating original_bytes, the number of - # bytes in the JSON-encoded event. - if meta_stack and isinstance(rv, dict): - rv["_meta"] = meta_stack[0] - return rv + serialized_event = _serialize_node(event, **kwargs) + if meta_stack and isinstance(serialized_event, dict): + serialized_event["_meta"] = meta_stack[0] + + return serialized_event finally: disable_capture_event.set(False) diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index e9d8c4e849..d45ea36a19 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -8,8 +8,8 @@ from sentry_sdk import capture_message, start_transaction, configure_scope from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration -from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH from sentry_sdk.serializer import MAX_EVENT_BYTES +from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH def test_orm_queries(sentry_init, capture_events): @@ -143,7 +143,6 @@ def test_long_sql_query_preserved(sentry_init, capture_events): sentry_init( traces_sample_rate=1, integrations=[SqlalchemyIntegration()], - _experiments={"smart_transaction_trimming": True}, ) events = capture_events() @@ -158,11 +157,10 @@ def test_long_sql_query_preserved(sentry_init, capture_events): assert description.endswith("SELECT 98 UNION SELECT 99") -def test_too_large_event_truncated(sentry_init, capture_events): +def test_large_event_not_truncated(sentry_init, capture_events): sentry_init( traces_sample_rate=1, integrations=[SqlalchemyIntegration()], - _experiments={"smart_transaction_trimming": True}, ) events = capture_events() @@ -178,36 +176,26 @@ def processor(event, hint): engine = create_engine("sqlite:///:memory:") with start_transaction(name="test"): with engine.connect() as con: - for _ in range(2000): + for _ in range(1500): con.execute(" UNION ".join("SELECT {}".format(i) for i in range(100))) (event,) = events - # Because of attached metadata in the "_meta" key, we may send out a little - # bit more than MAX_EVENT_BYTES. - max_bytes = 1.2 * MAX_EVENT_BYTES - assert len(json_dumps(event)) < max_bytes + assert len(json_dumps(event)) > MAX_EVENT_BYTES # Some spans are discarded. assert len(event["spans"]) == 1000 - for i, span in enumerate(event["spans"]): - description = span["description"] - - assert description.startswith("SELECT ") - if str(i) in event["_meta"]["spans"]: - # Description must have been truncated - assert len(description) == 10 - assert description.endswith("...") - else: - # Description was not truncated, check for original length - assert len(description) == 1583 - assert description.endswith("SELECT 98 UNION SELECT 99") - - # Smoke check the meta info for one of the spans. - assert next(iter(event["_meta"]["spans"].values())) == { - "description": {"": {"len": 1583, "rem": [["!limit", "x", 7, 10]]}} - } + # Span descriptions are not truncated. + description = event["spans"][0]["description"] + assert len(description) == 1583 + assert description.startswith("SELECT 0") + assert description.endswith("SELECT 98 UNION SELECT 99") + + description = event["spans"][999]["description"] + assert len(description) == 1583 + assert description.startswith("SELECT 0") + assert description.endswith("SELECT 98 UNION SELECT 99") # Smoke check that truncation of other fields has not changed. assert len(event["message"]) == MAX_STRING_LENGTH