Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add metrics to track /messages response time by room size #13545

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13545.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update metrics to track `/messages` response time by room size.
55 changes: 53 additions & 2 deletions synapse/rest/client/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
""" This module contains REST servlets to do with rooms: /rooms/<paths> """
import logging
import re
from enum import Enum
from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
from urllib import parse as urlparse

Expand Down Expand Up @@ -48,6 +49,7 @@
parse_strings_from_args,
)
from synapse.http.site import SynapseRequest
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.logging.opentracing import set_tag
from synapse.rest.client._base import client_patterns
from synapse.rest.client.transactions import HttpTransactionCache
Expand All @@ -62,6 +64,33 @@

logger = logging.getLogger(__name__)


class _RoomSize(Enum):
"""
Enum to differentiate sizes of rooms. This is a pretty good approximation
about how hard it will be to get events in the room. We could also look at
room "complexity".
"""

# This doesn't necessarily mean the room is a DM, just that there is a DM
# amount of people there.
DM_SIZE = "direct_message_size"
SMALL = "small"
SUBSTANTIAL = "substantial"
LARGE = "large"

@staticmethod
def from_member_count(member_count: int) -> "_RoomSize":
if member_count <= 2:
return _RoomSize.DM_SIZE
elif member_count < 100:
return _RoomSize.SMALL
elif member_count < 1000:
return _RoomSize.SUBSTANTIAL
else:
return _RoomSize.LARGE


# This is an extra metric on top of `synapse_http_server_response_time_seconds`
# which times the same sort of thing but this one allows us to see values
# greater than 10s. We use a separate dedicated histogram with its own buckets
Expand All @@ -70,7 +99,11 @@
messsages_response_timer = Histogram(
"synapse_room_message_list_rest_servlet_response_time_seconds",
"sec",
[],
# We have a label for room size so we can try to see a more realistic
# picture of /messages response time for bigger rooms. We don't want the
# tiny rooms that can always respond fast skewing our results when we're trying
# to optimize the bigger cases.
["room_size"],
buckets=(
0.005,
0.01,
Expand Down Expand Up @@ -587,14 +620,26 @@ class RoomMessageListRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
super().__init__()
self._hs = hs
self.clock = hs.get_clock()
self.pagination_handler = hs.get_pagination_handler()
self.auth = hs.get_auth()
self.store = hs.get_datastores().main

@messsages_response_timer.time()
async def on_GET(
self, request: SynapseRequest, room_id: str
) -> Tuple[int, JsonDict]:
processing_start_time = self.clock.time_msec()
# Fire off and hope that we get a result by the end.
#
# We're using the mypy type ignore comment because the `@cached`
# decorator on `get_number_joined_users_in_room` doesn't play well with
# the type system. Maybe in the future, it can use some ParamSpec
# wizardry to fix it up.
room_member_count_deferred = run_in_background( # type: ignore[call-arg]
self.store.get_number_joined_users_in_room,
room_id, # type: ignore[arg-type]
)

requester = await self.auth.get_user_by_req(request, allow_guest=True)
pagination_config = await PaginationConfig.from_request(
self.store, request, default_limit=10
Expand Down Expand Up @@ -625,6 +670,12 @@ async def on_GET(
event_filter=event_filter,
)

processing_end_time = self.clock.time_msec()
room_member_count = await make_deferred_yieldable(room_member_count_deferred)
messsages_response_timer.labels(
room_size=_RoomSize.from_member_count(room_member_count)
).observe((processing_start_time - processing_end_time) / 1000)

return 200, msgs


Expand Down