Skip to content

feat(replays): Improve index page query performance #45098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 28, 2023
2 changes: 1 addition & 1 deletion src/sentry/replays/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def generate_normalized_output(
"""For each payload in the response strip "agg_" prefixes."""
for item in response:
item["id"] = item.pop("replay_id", None)
item["project_id"] = item.pop("projectId", None)
item["project_id"] = str(item["project_id"])
item["trace_ids"] = item.pop("traceIds", [])
item["error_ids"] = item.pop("errorIds", [])
item["environment"] = item.pop("agg_environment", None)
Expand Down
113 changes: 49 additions & 64 deletions src/sentry/replays/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from snuba_sdk import (
Column,
Condition,
CurriedFunction,
Entity,
Function,
Granularity,
Expand Down Expand Up @@ -153,7 +154,7 @@ def query_replays_dataset(
*having,
],
orderby=sorting,
groupby=[Column("replay_id")],
groupby=[Column("project_id"), Column("replay_id")],
granularity=Granularity(3600),
**query_options,
),
Expand Down Expand Up @@ -308,17 +309,22 @@ def _grouped_unique_values(
)


def _grouped_unique_scalar_value(
column_name: str, alias: Optional[str] = None, aliased: bool = True
def take_first_from_aggregation(
column_name: str,
alias: Optional[str] = None,
aliased: bool = True,
) -> Function:
"""Returns the first value of a unique array.
"""Returns the first value encountered in an aggregated array.

E.g.
[1, 2, 2, 3, 3, 3, null] => [1, 2, 3] => 1
[1, 2, 2, 3, 3, 3, null] => [1] => 1
"""
return Function(
"arrayElement",
parameters=[_grouped_unique_values(column_name), 1],
parameters=[
CurriedFunction("groupArray", initializers=[1], parameters=[Column(column_name)]),
1,
],
alias=alias or column_name if aliased else None,
)

Expand Down Expand Up @@ -383,11 +389,11 @@ class ReplayQueryConfig(QueryConfig):
releases = ListField()
release = ListField(query_alias="releases")
dist = String()
error_ids = ListField(query_alias="error_ids", is_uuid=True)
error_id = ListField(query_alias="error_ids", is_uuid=True)
trace_ids = ListField(query_alias="trace_ids", is_uuid=True)
trace_id = ListField(query_alias="trace_ids", is_uuid=True)
trace = ListField(query_alias="trace_ids", is_uuid=True)
error_ids = ListField(query_alias="errorIds")
error_id = ListField(query_alias="errorIds")
trace_ids = ListField(query_alias="traceIds")
trace_id = ListField(query_alias="traceIds")
trace = ListField(query_alias="traceIds")
urls = ListField(query_alias="urls_sorted")
url = ListField(query_alias="urls_sorted")
user_id = String(field_alias="user.id", query_alias="user_id")
Expand Down Expand Up @@ -421,8 +427,8 @@ class ReplayQueryConfig(QueryConfig):
started_at = String(is_filterable=False)
finished_at = String(is_filterable=False)
# Dedicated url parameter should be used.
project_id = String(query_alias="projectId", is_filterable=False)
project = String(query_alias="projectId", is_filterable=False)
project_id = String(query_alias="project_id", is_filterable=False)
project = String(query_alias="project_id", is_filterable=False)


# Pagination.
Expand Down Expand Up @@ -467,20 +473,8 @@ def _activity_score():
# score = (count_errors * 25 + pagesVisited * 5 ) / 10;
# score = Math.floor(Math.min(10, Math.max(1, score)));

error_weight = Function(
"multiply",
parameters=[Column("count_errors"), 25],
)
pages_visited_weight = Function(
"multiply",
parameters=[
Function(
"length",
parameters=[Column("urls_sorted")],
),
5,
],
)
error_weight = Function("multiply", parameters=[Column("count_errors"), 25])
pages_visited_weight = Function("multiply", parameters=[Column("count_urls"), 5])

combined_weight = Function(
"plus",
Expand Down Expand Up @@ -549,10 +543,10 @@ def _activity_score():
"urls": ["urls_sorted", "agg_urls"],
"url": ["urls_sorted", "agg_urls"],
"count_errors": ["count_errors"],
"count_urls": ["count_urls", "urls_sorted", "agg_urls"],
"count_urls": ["count_urls"],
"count_segments": ["count_segments"],
"is_archived": ["is_archived"],
"activity": ["activity", "count_errors", "urls_sorted", "agg_urls"],
"activity": ["activity", "count_errors", "count_urls"],
"user": ["user_id", "user_email", "user_name", "user_ip"],
"os": ["os_name", "os_version"],
"browser": ["browser_name", "browser_version"],
Expand Down Expand Up @@ -582,18 +576,7 @@ def _activity_score():

QUERY_ALIAS_COLUMN_MAP = {
"replay_id": _strip_uuid_dashes("replay_id", Column("replay_id")),
"replay_type": _grouped_unique_scalar_value(column_name="replay_type", alias="replay_type"),
"project_id": Function(
"toString",
parameters=[_grouped_unique_scalar_value(column_name="project_id", alias="agg_pid")],
alias="projectId",
),
"platform": _grouped_unique_scalar_value(column_name="platform"),
"agg_environment": _grouped_unique_scalar_value(
column_name="environment", alias="agg_environment"
),
"releases": _grouped_unique_values(column_name="release", alias="releases", aliased=True),
"dist": _grouped_unique_scalar_value(column_name="dist"),
"project_id": Column("project_id"),
"trace_ids": Function(
"arrayMap",
parameters=[
Expand Down Expand Up @@ -637,13 +620,13 @@ def _activity_score():
),
"count_segments": Function("count", parameters=[Column("segment_id")], alias="count_segments"),
"count_errors": Function(
"uniqArray",
parameters=[Column("error_ids")],
"sum",
parameters=[Function("length", parameters=[Column("error_ids")])],
alias="count_errors",
),
"count_urls": Function(
"length",
parameters=[Column("urls_sorted")],
"sum",
parameters=[Function("length", parameters=[Column("urls")])],
alias="count_urls",
),
"is_archived": Function(
Expand All @@ -652,29 +635,31 @@ def _activity_score():
alias="isArchived",
),
"activity": _activity_score(),
"user_id": _grouped_unique_scalar_value(column_name="user_id"),
"user_email": _grouped_unique_scalar_value(column_name="user_email"),
"user_name": _grouped_unique_scalar_value(column_name="user_name"),
"releases": _grouped_unique_values(column_name="release", alias="releases", aliased=True),
"replay_type": take_first_from_aggregation(column_name="replay_type", alias="replay_type"),
"platform": take_first_from_aggregation(column_name="platform"),
"agg_environment": take_first_from_aggregation(
column_name="environment", alias="agg_environment"
),
"dist": take_first_from_aggregation(column_name="dist"),
"user_id": take_first_from_aggregation(column_name="user_id"),
"user_email": take_first_from_aggregation(column_name="user_email"),
"user_name": take_first_from_aggregation(column_name="user_name"),
"user_ip": Function(
"IPv4NumToString",
parameters=[
_grouped_unique_scalar_value(
column_name="ip_address_v4",
aliased=False,
)
],
parameters=[take_first_from_aggregation(column_name="ip_address_v4", aliased=False)],
alias="user_ip",
),
"os_name": _grouped_unique_scalar_value(column_name="os_name"),
"os_version": _grouped_unique_scalar_value(column_name="os_version"),
"browser_name": _grouped_unique_scalar_value(column_name="browser_name"),
"browser_version": _grouped_unique_scalar_value(column_name="browser_version"),
"device_name": _grouped_unique_scalar_value(column_name="device_name"),
"device_brand": _grouped_unique_scalar_value(column_name="device_brand"),
"device_family": _grouped_unique_scalar_value(column_name="device_family"),
"device_model": _grouped_unique_scalar_value(column_name="device_model"),
"sdk_name": _grouped_unique_scalar_value(column_name="sdk_name"),
"sdk_version": _grouped_unique_scalar_value(column_name="sdk_version"),
"os_name": take_first_from_aggregation(column_name="os_name"),
"os_version": take_first_from_aggregation(column_name="os_version"),
"browser_name": take_first_from_aggregation(column_name="browser_name"),
"browser_version": take_first_from_aggregation(column_name="browser_version"),
"device_name": take_first_from_aggregation(column_name="device_name"),
"device_brand": take_first_from_aggregation(column_name="device_brand"),
"device_family": take_first_from_aggregation(column_name="device_family"),
"device_model": take_first_from_aggregation(column_name="device_model"),
"sdk_name": take_first_from_aggregation(column_name="sdk_name"),
"sdk_version": take_first_from_aggregation(column_name="sdk_version"),
"tk": Function("groupArrayArray", parameters=[Column("tags.key")], alias="tk"),
"tv": Function("groupArrayArray", parameters=[Column("tags.value")], alias="tv"),
}
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/replays/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def serialize(self, obj: ReplayRecordingSegment, attrs, user):
VALID_FIELD_SET = {
"id",
"title",
"projectId",
"project_id",
"errorIds",
"traceIds",
"urls",
Expand Down
6 changes: 6 additions & 0 deletions tests/sentry/replays/test_organization_replay_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def test_get_replays(self):
# error_ids=[uuid.uuid4().hex, replay1_id], # duplicate error-id
urls=["http://localhost:3000/"], # duplicate urls are okay
tags={"test": "world", "other": "hello"},
error_ids=[],
)
)

Expand Down Expand Up @@ -434,6 +435,10 @@ def test_get_replays_user_filters(self):
seq2_timestamp,
project.id,
replay1_id,
user_id=None,
user_name=None,
user_email=None,
ipv4=None,
os_name=None,
os_version=None,
browser_name=None,
Expand All @@ -443,6 +448,7 @@ def test_get_replays_user_filters(self):
device_family=None,
device_model=None,
tags={"a": "n", "b": "o"},
error_ids=[],
)
)

Expand Down
2 changes: 2 additions & 0 deletions tests/sentry/replays/test_project_replay_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def test_get_replay_schema(self):
segment_id=1,
trace_ids=[trace_id_2],
urls=["http://www.sentry.io/"],
error_ids=[],
)
)
self.store_replays(
Expand All @@ -119,6 +120,7 @@ def test_get_replay_schema(self):
segment_id=2,
trace_ids=[trace_id_2],
urls=["http://localhost:3000/"],
error_ids=[],
)
)

Expand Down