Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
18 changes: 18 additions & 0 deletions src/sentry/grouping/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
"exception": "exception",
"stacktrace": "stacktrace",
"threads": "thread",
"thread_id": "thread-id",
"thread_name": "thread-name",
"hostname": "hostname",
"violation": "violation",
"uri": "URL",
Expand Down Expand Up @@ -271,6 +273,10 @@ class NSErrorGroupingComponent(
id: str = "ns_error"


class ThreadNameGroupingComponent(BaseGroupingComponent[str]):
id: str = "thread_name"


FrameGroupingComponentChild = (
ContextLineGroupingComponent
| FilenameGroupingComponent
Expand Down Expand Up @@ -456,16 +462,28 @@ class ThreadsGroupingComponent(BaseGroupingComponent[StacktraceGroupingComponent
id: str = "threads"
key: str = "thread_stacktrace"
frame_counts: Counter[str]
metadata: list[BaseGroupingComponent[str]]

def __init__(
self,
values: Sequence[StacktraceGroupingComponent] | None = None,
hint: str | None = None,
contributes: bool | None = None,
frame_counts: Counter[str] | None = None,
metadata: list[BaseGroupingComponent[str]] | None = None,
):
super().__init__(hint=hint, contributes=contributes, values=values)
self.frame_counts = frame_counts or Counter()
self.metadata = metadata or []

def iter_values(self) -> Generator[str | int]:
"""Include both stacktrace values and metadata in hash calculation."""
# First yield values from stacktrace components
yield from super().iter_values()
# Then yield values from metadata components
for meta in self.metadata:
if meta.contributes:
yield from meta.iter_values()


class CSPGroupingComponent(
Expand Down
82 changes: 82 additions & 0 deletions src/sentry/grouping/fingerprinting/matchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
"app": "app",
"sdk": "sdk",
"release": "release",
# thread fields
"thread.id": "thread_id",
"thread.name": "thread_name",
"thread.state": "thread_state",
"thread.crashed": "thread_crashed",
"thread.current": "thread_current",
}


Expand Down Expand Up @@ -69,6 +75,8 @@ def match_type(self) -> str:
return "family"
if self.key == "release":
return "release"
if self.key.startswith("thread_"):
return "threads"
return "frames"

def matches(self, event_values: dict[str, Any]) -> bool:
Expand Down Expand Up @@ -121,6 +129,12 @@ def _positive_match(self, event_values: dict[str, Any]) -> bool:
if self.key in ["level", "value"]:
return glob_match(value, self.pattern, ignorecase=True)

if self.key in ["thread_crashed", "thread_current"]:
return value == bool_from_string(self.pattern)

if self.key in ["thread_id", "thread_name", "thread_state"]:
return glob_match(value, self.pattern, ignorecase=True)

return glob_match(value, self.pattern, ignorecase=False)

def _to_config_structure(self) -> list[str]:
Expand All @@ -145,3 +159,71 @@ def text(self) -> str:
self.key,
self.pattern,
)


class CallerMatcher:
"""
Wraps a FingerprintMatcher to match frames above (callers) in the stack.
Syntax: [ function:foo ] | matches when the caller is foo
"""

def __init__(self, inner: FingerprintMatcher):
self.inner = inner

@property
def match_type(self) -> str:
# Caller matchers only work with frame-based matching
return "frames"

def matches(
self, event_values: dict[str, Any], frame_idx: int, all_frames: list[dict[str, Any]]
) -> bool:
# Check if there's a frame above (caller)
if frame_idx + 1 < len(all_frames):
caller_frame = all_frames[frame_idx + 1]
return self.inner.matches(caller_frame)
return False

def _to_config_structure(self) -> list[str]:
inner_structure = self.inner._to_config_structure()
# Mark as caller matcher by wrapping key with brackets and pipe
inner_structure[0] = f"[{inner_structure[0]}]|"
return inner_structure

@property
def text(self) -> str:
return f"[ {self.inner.text} ] |"


class CalleeMatcher:
"""
Wraps a FingerprintMatcher to match frames below (callees) in the stack.
Syntax: | [ function:bar ] matches when the callee is bar
"""

def __init__(self, inner: FingerprintMatcher):
self.inner = inner

@property
def match_type(self) -> str:
# Callee matchers only work with frame-based matching
return "frames"

def matches(
self, event_values: dict[str, Any], frame_idx: int, all_frames: list[dict[str, Any]]
) -> bool:
# Check if there's a frame below (callee)
if frame_idx > 0:
callee_frame = all_frames[frame_idx - 1]
return self.inner.matches(callee_frame)
return False

def _to_config_structure(self) -> list[str]:
inner_structure = self.inner._to_config_structure()
# Mark as callee matcher by wrapping key with pipe and brackets
inner_structure[0] = f"|[{inner_structure[0]}]"
return inner_structure

@property
def text(self) -> str:
return f"| [ {self.inner.text} ]"
66 changes: 59 additions & 7 deletions src/sentry/grouping/fingerprinting/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from parsimonious.nodes import Node, NodeVisitor, RegexNode

from sentry.grouping.fingerprinting.exceptions import InvalidFingerprintingConfig
from sentry.grouping.fingerprinting.matchers import FingerprintMatcher
from sentry.grouping.fingerprinting.matchers import CalleeMatcher, CallerMatcher, FingerprintMatcher
from sentry.grouping.fingerprinting.rules import (
FingerprintRule,
FingerprintRuleAttributes,
Expand All @@ -30,8 +30,10 @@

rule = _ matchers _ follow _ fingerprint

matchers = matcher+
matcher = _ negation? matcher_type sep argument
matchers = caller_matcher? frame_matcher+ callee_matcher?
frame_matcher = _ negation? matcher_type sep argument
caller_matcher = _ "[" _ frame_matcher _ "]" _ "|"
callee_matcher = _ "|" _ "[" _ frame_matcher _ "]"
matcher_type = key / quoted_key
argument = quoted / unquoted

Expand Down Expand Up @@ -90,13 +92,63 @@ def visit_rule(
self,
_: object,
children: tuple[
object, list[FingerprintMatcher], object, object, object, FingerprintWithAttributes
object,
list[FingerprintMatcher | CallerMatcher | CalleeMatcher],
object,
object,
object,
FingerprintWithAttributes,
],
) -> FingerprintRule:
_, matcher, _, _, _, (fingerprint, attributes) = children
return FingerprintRule(matcher, fingerprint, attributes)
_, matchers, _, _, _, (fingerprint, attributes) = children
return FingerprintRule(matchers, fingerprint, attributes)

def visit_matcher(
def visit_matchers(
self,
_: object,
children: tuple[
list[FingerprintMatcher | CallerMatcher | CalleeMatcher] | None,
list[FingerprintMatcher],
list[FingerprintMatcher | CallerMatcher | CalleeMatcher] | None,
],
) -> list[FingerprintMatcher | CallerMatcher | CalleeMatcher]:
caller_matcher, frame_matchers, callee_matcher = children
result: list[FingerprintMatcher | CallerMatcher | CalleeMatcher] = []

# Parsimonious wraps optional matches (? quantifier) in an extra list
# Flatten caller_matcher if it's a nested list
if caller_matcher and len(caller_matcher) > 0 and isinstance(caller_matcher[0], list):
result.extend(caller_matcher[0])
elif caller_matcher:
result.extend(caller_matcher)

result.extend(frame_matchers)

# Flatten callee_matcher if it's a nested list
if callee_matcher and len(callee_matcher) > 0 and isinstance(callee_matcher[0], list):
result.extend(callee_matcher[0])
elif callee_matcher:
result.extend(callee_matcher)

return result

def visit_caller_matcher(
self,
_: object,
children: tuple[object, object, object, FingerprintMatcher, object, object, object, object],
) -> list[FingerprintMatcher | CallerMatcher | CalleeMatcher]:
_, _, _, frame_matcher, _, _, _, _ = children
return [CallerMatcher(frame_matcher)]

def visit_callee_matcher(
self,
_: object,
children: tuple[object, object, object, object, object, FingerprintMatcher, object, object],
) -> list[FingerprintMatcher | CallerMatcher | CalleeMatcher]:
_, _, _, _, _, frame_matcher, _, _ = children
return [CalleeMatcher(frame_matcher)]

def visit_frame_matcher(
self, _: object, children: tuple[object, list[str], str, object, str]
) -> FingerprintMatcher:
_, negation, key, _, pattern = children
Expand Down
75 changes: 70 additions & 5 deletions src/sentry/grouping/fingerprinting/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections.abc import Sequence
from typing import NamedTuple, NotRequired, Self, TypedDict

from sentry.grouping.fingerprinting.matchers import FingerprintMatcher
from sentry.grouping.fingerprinting.matchers import CalleeMatcher, CallerMatcher, FingerprintMatcher
from sentry.grouping.fingerprinting.utils import EventDatastore

logger = logging.getLogger("sentry.events.grouping")
Expand Down Expand Up @@ -48,7 +48,7 @@ class FingerprintRuleMatch(NamedTuple):
class FingerprintRule:
def __init__(
self,
matchers: Sequence[FingerprintMatcher],
matchers: Sequence[FingerprintMatcher | CallerMatcher | CalleeMatcher],
fingerprint: list[str],
attributes: FingerprintRuleAttributes,
is_builtin: bool = False,
Expand All @@ -61,19 +61,84 @@ def __init__(
def test_for_match_with_event(
self, event_datastore: EventDatastore
) -> None | FingerprintWithAttributes:
matchers_by_match_type: dict[str, list[FingerprintMatcher]] = {}
matchers_by_match_type: dict[
str, list[FingerprintMatcher | CallerMatcher | CalleeMatcher]
] = {}
has_sibling_matchers = False

for matcher in self.matchers:
matchers_by_match_type.setdefault(matcher.match_type, []).append(matcher)
if isinstance(matcher, (CallerMatcher, CalleeMatcher)):
has_sibling_matchers = True
# Only add frame-related matchers to the dict for sibling matching
matchers_by_match_type.setdefault("frames", []).append(matcher)
else:
matchers_by_match_type.setdefault(matcher.match_type, []).append(matcher)

# If we have sibling matchers, we need to match against frame sequences
if has_sibling_matchers:
return self._test_with_frame_context(event_datastore, matchers_by_match_type)

# Original logic for simple matchers (no CallerMatcher/CalleeMatcher here)
for match_type, matchers in matchers_by_match_type.items():
for event_values in event_datastore.get_values(match_type):
if all(matcher.matches(event_values) for matcher in matchers):
if all(matcher.matches(event_values) for matcher in matchers): # type: ignore[call-arg]
break
else:
return None

return FingerprintWithAttributes(self.fingerprint, self.attributes)

def _test_with_frame_context(
self,
event_datastore: EventDatastore,
matchers_by_match_type: dict[str, list[FingerprintMatcher | CallerMatcher | CalleeMatcher]],
) -> None | FingerprintWithAttributes:
# First, handle non-frame matchers
for match_type, matchers in matchers_by_match_type.items():
if match_type != "frames":
for event_values in event_datastore.get_values(match_type):
# These are all FingerprintMatcher instances
if all(
m.matches(event_values)
for m in matchers
if isinstance(m, FingerprintMatcher)
):
break
else:
return None

# Now handle frame matchers with context
if "frames" in matchers_by_match_type:
frame_matchers = matchers_by_match_type["frames"]
all_frames = event_datastore.get_values("frames")

# Try to find a matching frame sequence
for frame_idx, frame in enumerate(all_frames):
match_found = True

for matcher in frame_matchers:
if isinstance(matcher, CallerMatcher):
if not matcher.matches(frame, frame_idx, all_frames):
match_found = False
break
elif isinstance(matcher, CalleeMatcher):
if not matcher.matches(frame, frame_idx, all_frames):
match_found = False
break
else:
# Regular frame matcher
if not matcher.matches(frame):
match_found = False
break

if match_found:
return FingerprintWithAttributes(self.fingerprint, self.attributes)

# No matching frame sequence found
return None

return FingerprintWithAttributes(self.fingerprint, self.attributes)

def _to_config_structure(self) -> FingerprintRuleJSON:
config_structure: FingerprintRuleJSON = {
"text": self.text,
Expand Down
24 changes: 24 additions & 0 deletions src/sentry/grouping/fingerprinting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ class _ReleaseInfo(TypedDict):
release: str | None


class _ThreadInfo(TypedDict):
thread_id: str | None
thread_name: str | None
thread_state: str | None
thread_crashed: bool
thread_current: bool


class EventDatastore:
def __init__(self, event: Mapping[str, Any]) -> None:
self.event = event
Expand All @@ -60,6 +68,7 @@ def __init__(self, event: Mapping[str, Any]) -> None:
self._sdk: list[_SdkInfo] | None = None
self._family: list[_FamilyInfo] | None = None
self._release: list[_ReleaseInfo] | None = None
self._threads: list[_ThreadInfo] | None = None

def get_values(self, match_type: str) -> list[dict[str, Any]]:
"""
Expand Down Expand Up @@ -154,3 +163,18 @@ def _get_release(self) -> list[_ReleaseInfo]:
{"release": self.event["release"].strip() if self.event.get("release") else None}
]
return self._release

def _get_threads(self) -> list[_ThreadInfo]:
if self._threads is None:
self._threads = []
for thread in get_path(self.event, "threads", "values", filter=True) or ():
self._threads.append(
{
"thread_id": thread.get("id"),
"thread_name": thread.get("name"),
"thread_state": thread.get("state"),
"thread_crashed": bool(thread.get("crashed")),
"thread_current": bool(thread.get("current")),
}
)
return self._threads
Loading
Loading