forked from getsentry/sentry
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ref(utils): SDK name tag normalizer (getsentry#59504)
getsentry#59501 Normalizes SDK tags to reduce their cardinality. Related to getsentry#59075 and getsentry#59379. - non-Sentry SDK tags are ignored (collapsed into `"other"`) - official Sentry SDK tags are normalized and shortened: - `sentry.javascript.*` are mostly kept as-is - `sentry.native.*` are collapsed to 3 levels - all other `sentry.*` are collapsed to 2 levels --------- Co-authored-by: Katie Byers <katie.byers@sentry.io> Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
- Loading branch information
Showing
2 changed files
with
166 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import functools | ||
import re | ||
|
||
_KNOWN_TAGS = { | ||
"sentry.cocoa", | ||
"sentry.dart", | ||
"sentry.dotnet", | ||
"sentry.elixir", | ||
"sentry.go", | ||
"sentry.java", | ||
"sentry.javascript.angular", | ||
"sentry.javascript.browser", | ||
"sentry.javascript.capacitor", | ||
"sentry.javascript.cordova", | ||
"sentry.javascript.deno", | ||
"sentry.javascript.electron", | ||
"sentry.javascript.ember", | ||
"sentry.javascript.gatsby", | ||
"sentry.javascript.nextjs", | ||
"sentry.javascript.node", | ||
"sentry.javascript.react", | ||
"sentry.javascript.react.native", | ||
"sentry.javascript.remix", | ||
"sentry.javascript.serverless", | ||
"sentry.javascript.svelte", | ||
"sentry.javascript.sveltekit", | ||
"sentry.javascript.vue", | ||
"sentry.kubernetes", | ||
"sentry.native.android", | ||
"sentry.native.dotnet", | ||
"sentry.native.unity", | ||
"sentry.native.unreal", | ||
"sentry.objc", | ||
"sentry.perl", | ||
"sentry.php", | ||
"sentry.python", | ||
"sentry.ruby", | ||
"sentry.rust", | ||
"sentry.swift", | ||
} | ||
|
||
|
||
_SYNONYMOUS_TAGS = { | ||
"sentry.cordova": "sentery.javascript.cordova", | ||
"sentry.electron": "sentry.javascript.electron", | ||
"sentry.javascript.angular.ivy": "sentry.javascript.angular", | ||
"sentry.javascript.node.experimental": "sentry.javascript.node", | ||
"sentry.javascript.react.expo": "sentry.javascript.react", | ||
"sentry.javascript.react.native.expo": "sentry.javascript.react.native", | ||
"sentry.laravel": "sentry.php.laravel", | ||
"sentry.react": "sentry.javascript.react", | ||
"sentry.symfony": "sentry.php.symfony", | ||
"sentry.unity": "sentry.native.unity", | ||
} | ||
|
||
# TODO: Should we be grouping by origin SDK instead? (For example, should we be | ||
# combining all flutter events rather than all native events?) | ||
# See https://github.com/getsentry/sentry/pull/59504#discussion_r1385483963 | ||
|
||
|
||
@functools.lru_cache(maxsize=300) | ||
def normalize_sdk_tag(tag: str) -> str: | ||
""" | ||
Normalize tags coming from SDKs to more manageable canonical form, by: | ||
- combining synonymous tags (`sentry.react` -> `sentry.javascript.react`), | ||
- ignoring framework differences (`sentry.python.flask` and `sentry.python.django` -> `sentry.python`) | ||
- collapsing all community/third-party SDKs into a single `other` category | ||
Note: Some platforms may keep their framework-specific values, as needed for analytics. | ||
""" | ||
|
||
# replace non-word characters with dots (normalize sentry-foo to sentry.foo) | ||
tag = re.sub(r"[\W_]+", ".", tag) | ||
|
||
# collapse known synonymous tags | ||
tag = _SYNONYMOUS_TAGS.get(tag, tag) | ||
|
||
# ignore non-sentry SDK tags | ||
if not tag.startswith("sentry."): | ||
return "other" | ||
|
||
# collapse tags other than JavaScript / Native to their top-level SDK | ||
|
||
if not tag.split(".")[1] in {"javascript", "native"}: | ||
tag = ".".join(tag.split(".", 2)[0:2]) | ||
|
||
if tag.split(".")[1] == "native": | ||
tag = ".".join(tag.split(".", 3)[0:3]) | ||
|
||
if tag not in _KNOWN_TAGS: | ||
tag = "other" | ||
|
||
return tag |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import pytest | ||
|
||
from sentry.utils.tag_normalization import normalize_sdk_tag | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("tag", "expected"), | ||
( | ||
("sentry-javascript-angular", "sentry.javascript.angular"), | ||
("sentry_python", "sentry.python"), | ||
), | ||
) | ||
def test_normalizes_to_dots(tag, expected): | ||
assert normalize_sdk_tag(tag) == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("tag", "expected"), | ||
( | ||
("sentry.javascript.angular", "sentry.javascript.angular"), | ||
( | ||
"sentry.javascript.react.native", | ||
"sentry.javascript.react.native", | ||
), | ||
("sentry.python.django", "sentry.python"), | ||
( | ||
"sentry.native.android.flutter", | ||
"sentry.native.android", | ||
), | ||
), | ||
) | ||
def test_shortens_non_js(tag, expected): | ||
assert normalize_sdk_tag(tag) == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("tag", "expected"), | ||
( | ||
("sentry.javascript.angular", "sentry.javascript.angular"), | ||
("sentry.javascript.angular.ivy", "sentry.javascript.angular"), | ||
("sentry.symfony", "sentry.php"), | ||
("sentry.unity", "sentry.native.unity"), | ||
("sentry.javascript.react.native.expo", "sentry.javascript.react.native"), | ||
), | ||
) | ||
def test_uses_synonyms(tag, expected): | ||
assert normalize_sdk_tag(tag) == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("tag", "expected"), | ||
(("foo.baz.bar", "other"), ("sentryfoo", "other"), ("raven", "other")), | ||
) | ||
def test_non_sentry_to_other(tag, expected): | ||
assert normalize_sdk_tag(tag) == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("tag", "expected"), | ||
(("sentry.sparql", "other"), ("sentry.terraform.hcl", "other"), ("sentry-native", "other")), | ||
) | ||
def test_unknown_sentry_to_other(tag, expected): | ||
assert normalize_sdk_tag(tag) == expected | ||
|
||
|
||
def test_responses_cached(): | ||
normalize_sdk_tag.cache_clear() | ||
assert normalize_sdk_tag("sentry.javascript.react") == "sentry.javascript.react" | ||
assert normalize_sdk_tag("sentry.javascript.react") == "sentry.javascript.react" | ||
|
||
assert normalize_sdk_tag.cache_info().hits == 1 | ||
assert normalize_sdk_tag.cache_info().misses == 1 |