diff --git a/changelog.d/9854.bugfix b/changelog.d/9854.bugfix new file mode 100644 index 000000000000..e39a3f991506 --- /dev/null +++ b/changelog.d/9854.bugfix @@ -0,0 +1 @@ +Fix a regression in Synapse 1.32.0 which caused Synapse to report large numbers of Prometheus time series, potentially overwhelming Prometheus instances. diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 78e9cfbc26ee..3f621539f367 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -16,7 +16,7 @@ import logging import threading from functools import wraps -from typing import TYPE_CHECKING, Dict, Optional, Set +from typing import TYPE_CHECKING, Dict, Optional, Set, Union from prometheus_client.core import REGISTRY, Counter, Gauge @@ -199,7 +199,7 @@ async def run(): _background_process_start_count.labels(desc).inc() _background_process_in_flight_count.labels(desc).inc() - with BackgroundProcessLoggingContext("%s-%s" % (desc, count)) as context: + with BackgroundProcessLoggingContext(desc, count) as context: try: ctx = noop_context_manager() if bg_start_span: @@ -244,8 +244,20 @@ class BackgroundProcessLoggingContext(LoggingContext): __slots__ = ["_proc"] - def __init__(self, name: str): - super().__init__(name) + def __init__(self, name: str, instance_id: Optional[Union[int, str]] = None): + """ + + Args: + name: The name of the background process. Each distinct `name` gets a + separate prometheus time series. + + instance_id: an identifer to add to `name` to distinguish this instance of + the named background process in the logs. If this is `None`, one is + made up based on id(self). + """ + if instance_id is None: + instance_id = id(self) + super().__init__("%s-%s" % (name, instance_id)) self._proc = _BackgroundProcess(name, self) def start(self, rusage: "Optional[resource._RUsage]"): diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index ba753318bd81..d10d57424618 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -185,7 +185,7 @@ def __init__(self, clock: Clock, handler: "ReplicationCommandHandler"): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. self._logging_context = BackgroundProcessLoggingContext( - "replication-conn-%s" % (self.conn_id,) + "replication-conn", self.conn_id ) def connectionMade(self):