Skip to content

Commit

Permalink
ref(utils): SDK name tag normalizer (getsentry#59504)
Browse files Browse the repository at this point in the history
getsentry#59501 Normalizes SDK tags to reduce their cardinality. 
Related to getsentry#59075 and getsentry#59379.

- non-Sentry SDK tags are ignored (collapsed into `"other"`)
- official Sentry SDK tags are normalized and shortened: 
    -  `sentry.javascript.*` are mostly kept as-is
    -  `sentry.native.*` are collapsed to 3 levels
    -  all other `sentry.*` are collapsed to 2 levels

---------

Co-authored-by: Katie Byers <katie.byers@sentry.io>
Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored and pull[bot] committed Mar 16, 2024
1 parent 0ebcb04 commit 203b322
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 0 deletions.
94 changes: 94 additions & 0 deletions src/sentry/utils/tag_normalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import functools
import re

_KNOWN_TAGS = {
"sentry.cocoa",
"sentry.dart",
"sentry.dotnet",
"sentry.elixir",
"sentry.go",
"sentry.java",
"sentry.javascript.angular",
"sentry.javascript.browser",
"sentry.javascript.capacitor",
"sentry.javascript.cordova",
"sentry.javascript.deno",
"sentry.javascript.electron",
"sentry.javascript.ember",
"sentry.javascript.gatsby",
"sentry.javascript.nextjs",
"sentry.javascript.node",
"sentry.javascript.react",
"sentry.javascript.react.native",
"sentry.javascript.remix",
"sentry.javascript.serverless",
"sentry.javascript.svelte",
"sentry.javascript.sveltekit",
"sentry.javascript.vue",
"sentry.kubernetes",
"sentry.native.android",
"sentry.native.dotnet",
"sentry.native.unity",
"sentry.native.unreal",
"sentry.objc",
"sentry.perl",
"sentry.php",
"sentry.python",
"sentry.ruby",
"sentry.rust",
"sentry.swift",
}


_SYNONYMOUS_TAGS = {
"sentry.cordova": "sentery.javascript.cordova",
"sentry.electron": "sentry.javascript.electron",
"sentry.javascript.angular.ivy": "sentry.javascript.angular",
"sentry.javascript.node.experimental": "sentry.javascript.node",
"sentry.javascript.react.expo": "sentry.javascript.react",
"sentry.javascript.react.native.expo": "sentry.javascript.react.native",
"sentry.laravel": "sentry.php.laravel",
"sentry.react": "sentry.javascript.react",
"sentry.symfony": "sentry.php.symfony",
"sentry.unity": "sentry.native.unity",
}

# TODO: Should we be grouping by origin SDK instead? (For example, should we be
# combining all flutter events rather than all native events?)
# See https://github.com/getsentry/sentry/pull/59504#discussion_r1385483963


@functools.lru_cache(maxsize=300)
def normalize_sdk_tag(tag: str) -> str:
"""
Normalize tags coming from SDKs to more manageable canonical form, by:
- combining synonymous tags (`sentry.react` -> `sentry.javascript.react`),
- ignoring framework differences (`sentry.python.flask` and `sentry.python.django` -> `sentry.python`)
- collapsing all community/third-party SDKs into a single `other` category
Note: Some platforms may keep their framework-specific values, as needed for analytics.
"""

# replace non-word characters with dots (normalize sentry-foo to sentry.foo)
tag = re.sub(r"[\W_]+", ".", tag)

# collapse known synonymous tags
tag = _SYNONYMOUS_TAGS.get(tag, tag)

# ignore non-sentry SDK tags
if not tag.startswith("sentry."):
return "other"

# collapse tags other than JavaScript / Native to their top-level SDK

if not tag.split(".")[1] in {"javascript", "native"}:
tag = ".".join(tag.split(".", 2)[0:2])

if tag.split(".")[1] == "native":
tag = ".".join(tag.split(".", 3)[0:3])

if tag not in _KNOWN_TAGS:
tag = "other"

return tag
72 changes: 72 additions & 0 deletions tests/sentry/utils/test_tag_normalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import pytest

from sentry.utils.tag_normalization import normalize_sdk_tag


@pytest.mark.parametrize(
("tag", "expected"),
(
("sentry-javascript-angular", "sentry.javascript.angular"),
("sentry_python", "sentry.python"),
),
)
def test_normalizes_to_dots(tag, expected):
assert normalize_sdk_tag(tag) == expected


@pytest.mark.parametrize(
("tag", "expected"),
(
("sentry.javascript.angular", "sentry.javascript.angular"),
(
"sentry.javascript.react.native",
"sentry.javascript.react.native",
),
("sentry.python.django", "sentry.python"),
(
"sentry.native.android.flutter",
"sentry.native.android",
),
),
)
def test_shortens_non_js(tag, expected):
assert normalize_sdk_tag(tag) == expected


@pytest.mark.parametrize(
("tag", "expected"),
(
("sentry.javascript.angular", "sentry.javascript.angular"),
("sentry.javascript.angular.ivy", "sentry.javascript.angular"),
("sentry.symfony", "sentry.php"),
("sentry.unity", "sentry.native.unity"),
("sentry.javascript.react.native.expo", "sentry.javascript.react.native"),
),
)
def test_uses_synonyms(tag, expected):
assert normalize_sdk_tag(tag) == expected


@pytest.mark.parametrize(
("tag", "expected"),
(("foo.baz.bar", "other"), ("sentryfoo", "other"), ("raven", "other")),
)
def test_non_sentry_to_other(tag, expected):
assert normalize_sdk_tag(tag) == expected


@pytest.mark.parametrize(
("tag", "expected"),
(("sentry.sparql", "other"), ("sentry.terraform.hcl", "other"), ("sentry-native", "other")),
)
def test_unknown_sentry_to_other(tag, expected):
assert normalize_sdk_tag(tag) == expected


def test_responses_cached():
normalize_sdk_tag.cache_clear()
assert normalize_sdk_tag("sentry.javascript.react") == "sentry.javascript.react"
assert normalize_sdk_tag("sentry.javascript.react") == "sentry.javascript.react"

assert normalize_sdk_tag.cache_info().hits == 1
assert normalize_sdk_tag.cache_info().misses == 1

0 comments on commit 203b322

Please sign in to comment.