diff --git a/src/sentry/utils/performance_issues/base.py b/src/sentry/utils/performance_issues/base.py index 7235ae4a19b522..8c468fb13bb342 100644 --- a/src/sentry/utils/performance_issues/base.py +++ b/src/sentry/utils/performance_issues/base.py @@ -167,19 +167,46 @@ def get_duration_between_spans(first_span: Span, second_span: Span): def get_url_from_span(span: Span) -> str: + """ + Parses the span data and pulls out the URL. Accounts for different SDKs and + different versions of SDKs formatting and parsing the URL contents + differently. + """ + data = span.get("data") or {} - url = data.get("url") or "" - if not url: - # If data is missing, fall back to description - description = span.get("description") or "" - parts = description.split(" ", 1) - if len(parts) == 2: - url = parts[1] - - if type(url) is dict: - url = url.get("pathname") or "" - - return url + + # The most modern version is to provide URL information in the span + # data + url_data = data.get("url") + + if type(url_data) is dict: + # Some transactions mysteriously provide the URL as a dict that looks + # like JavaScript's URL object + url = url_data.get("pathname") or "" + url += url_data.get("search") or "" + return url + + if type(url_data) is str: + # Usually the URL is a regular string, and so is the query. This + # is the standardized format for all SDKs, and is the preferred + # format going forward. Otherwise, if `http.query` is absent, `url` + # contains the query. + url = url_data + query_data = data.get("http.query") + if type(query_data) is str and len(query_data) > 0: + url += f"?{query_data}" + + return url + + # Attempt to parse the full URL from the span description, in case + # the previous approaches did not yield a good result + description = span.get("description") or "" + parts = description.split(" ", 1) + if len(parts) == 2: + url = parts[1] + return url + + return "" def fingerprint_spans(spans: List[Span], unique_only: bool = False): diff --git a/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py b/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py index 4da85efc12cb74..1e2bb73afa8fcd 100644 --- a/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py +++ b/src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py @@ -157,10 +157,12 @@ def is_span_eligible(cls, span: Span) -> bool: if description.strip()[:3].upper() != "GET": return False + url = get_url_from_span(span) + # GraphQL URLs have complicated queries in them. Until we parse those # queries to check for what's duplicated, we can't tell what is being # duplicated. Ignore them for now - if "graphql" in description: + if "graphql" in url: return False # Next.js infixes its data URLs with a build ID. (e.g., @@ -168,11 +170,9 @@ def is_span_eligible(cls, span: Span) -> bool: # explosion, since every deploy would change this ID and create new # fingerprints. Since we're not parameterizing URLs yet, we need to # exclude them - if "_next/data" in description: + if "_next/data" in url: return False - url = get_url_from_span(span) - # Next.js error pages cause an N+1 API Call that isn't useful to anyone if "__nextjs_original-stack-frame" in url: return False @@ -180,6 +180,9 @@ def is_span_eligible(cls, span: Span) -> bool: if not url: return False + # Once most users update their SDKs to use the latest standard, we + # won't have to do this, since the URLs will be sent in as `span.data` + # in a parsed format parsed_url = urlparse(str(url)) if parsed_url.netloc in cls.HOST_DENYLIST: diff --git a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py index 9f755cac0ecb39..288f64c4b3a3d2 100644 --- a/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py +++ b/tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py @@ -343,7 +343,7 @@ def test_allows_eligible_spans(span): { "span_id": "a", "op": "http.client", - "description": "GET http://service.io/resource", + "description": "GET /resource.js", "hash": "a", "data": {"url": "/resource.js"}, }, @@ -353,6 +353,16 @@ def test_allows_eligible_spans(span): "description": "GET http://service.io/resource?graphql=somequery", "hash": "a", }, + { + "span_id": "a", + "op": "http.client", + "description": "GET http://service.io/resource", # New JS SDK removes query string from description + "hash": "a", + "data": { + "http.query": "graphql=somequery", + "url": "http://service.io/resource", + }, + }, { "span_id": "a", "op": "http.client",