diff --git a/docker/conf/homeserver.yaml b/docker/conf/homeserver.yaml index f10f78a48cd2..d35be7650933 100644 --- a/docker/conf/homeserver.yaml +++ b/docker/conf/homeserver.yaml @@ -184,3 +184,23 @@ trusted_key_servers: password_config: enabled: true + + +# foo +tracing: + enabled: true + sample_rate: 1 + homeserver_whitelist: + - ".*" + jaeger_exporter_config: + agent_host_name: host.docker.internal + agent_port: 6831 + # Split UDP packets (UDP_PACKET_MAX_LENGTH is set to 65k in OpenTelemetry) + udp_split_oversized_batches: true + # If you define a collector, it will communicate directly to the collector, + # bypassing the agent + # + # It does not seem like the agent can keep up with the massive UDP load + # (1065 spans in one trace) so lets just use the HTTP collector endpoint + # instead which seems to work. + collector_endpoint: "http://host.docker.internal:14268/api/traces?format=jaeger.thrift" diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index f9000d326aa4..79e792395f72 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -61,7 +61,13 @@ from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.tracing import SynapseTags, set_attribute, tag_args, trace +from synapse.logging.tracing import ( + SynapseTags, + set_attribute, + start_active_span, + tag_args, + trace, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -349,10 +355,14 @@ async def _maybe_backfill_inner( # attempting to paginate before backfill reached the visible history. extremities_to_request: List[str] = [] - for bp in sorted_backfill_points: + for i, bp in enumerate(sorted_backfill_points): if len(extremities_to_request) >= 5: break + set_attribute( + SynapseTags.RESULT_PREFIX + "backfill_point" + str(i), str(bp) + ) + # For regular backwards extremities, we don't have the extremity events # themselves, so we need to actually check the events that reference them - # their "successor" events. @@ -409,14 +419,15 @@ async def _maybe_backfill_inner( str(len(extremities_to_request)), ) - # Now we need to decide which hosts to hit first. - # First we try hosts that are already in the room. - # TODO: HEURISTIC ALERT. - likely_domains = ( - await self._storage_controllers.state.get_current_hosts_in_room_ordered( - room_id + with start_active_span("getting likely_domains"): + # Now we need to decide which hosts to hit first. + # First we try hosts that are already in the room. + # TODO: HEURISTIC ALERT. + likely_domains = ( + await self._storage_controllers.state.get_current_hosts_in_room_ordered( + room_id + ) ) - ) async def try_backfill(domains: Collection[str]) -> bool: # TODO: Should we try multiple of these at a time? diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 8357f6f84e83..378b863c5f8a 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1249,6 +1249,11 @@ async def _get_state_ids_after_missing_prev_event( str(len(failed_to_fetch)), ) + set_attribute( + SynapseTags.RESULT_PREFIX + f"failed_to_fetch ({len(failed_to_fetch)})", + str(failed_to_fetch), + ) + if remote_event.is_state() and remote_event.rejected_reason is None: state_map[ (remote_event.type, remote_event.state_key) diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index 2c990aab0a21..64d373e9d70c 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -20,7 +20,7 @@ from synapse.api.constants import EventTypes, RelationTypes from synapse.api.errors import SynapseError from synapse.events import EventBase, relation_from_event -from synapse.logging.tracing import trace +from synapse.logging.tracing import SynapseTags, set_attribute, trace from synapse.storage.databases.main.relations import ThreadsNextBatch, _RelatedEvent from synapse.streams.config import PaginationConfig from synapse.types import JsonDict, Requester, StreamToken, UserID @@ -172,6 +172,7 @@ async def get_relations( return return_value + @trace async def get_relations_for_event( self, event_id: str, @@ -259,6 +260,7 @@ async def redact_events_related_to( e.msg, ) + @trace async def get_annotations_for_event( self, event_id: str, @@ -304,6 +306,7 @@ async def get_annotations_for_event( return filtered_results + @trace async def _get_threads_for_events( self, events_by_id: Dict[str, EventBase], @@ -465,6 +468,11 @@ async def get_bundled_aggregations( # The event should get bundled aggregations. events_by_id[event.event_id] = event + set_attribute( + SynapseTags.FUNC_ARG_PREFIX + f"event_ids ({len(events_by_id)})", + str(events_by_id.keys()), + ) + # event ID -> bundled aggregation in non-serialized form. results: Dict[str, BundledAggregations] = {} diff --git a/synapse/logging/tracing.py b/synapse/logging/tracing.py index d455854467ee..c92769e1a042 100644 --- a/synapse/logging/tracing.py +++ b/synapse/logging/tracing.py @@ -291,7 +291,8 @@ class SynapseTags: # Used to tag function arguments # - # Tag a named arg. The name of the argument should be appended to this prefix. + # Tag a named arg. The name of the argument should be appended to this + # prefix. FUNC_ARG_PREFIX = "ARG." # Tag extra variadic number of positional arguments (`def foo(first, second, *extras)`) FUNC_ARGS = "args" @@ -928,7 +929,6 @@ def trace(func: Callable[P, R]) -> Callable[P, R]: See the module's doc string for usage examples. """ - return trace_with_opname(func.__name__)(func) diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index ca431002c8d7..8a1c206d9585 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -32,6 +32,7 @@ from synapse.api.constants import MAIN_TIMELINE, RelationTypes from synapse.api.errors import SynapseError from synapse.events import EventBase +from synapse.logging.tracing import trace from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, @@ -503,6 +504,10 @@ def _get_aggregation_groups_for_users_txn( def get_applicable_edit(self, event_id: str) -> Optional[EventBase]: raise NotImplementedError() + # TODO: What's the proper way to fix this so we can stack @trace on top of + # @cachedList + # + # @trace @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids") async def get_applicable_edits( self, event_ids: Collection[str]