Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Share some metrics between the Prometheus exporter and the phone home stats #13671

Merged
merged 13 commits into from
Sep 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13671.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Introduce a `CommonUsageMetrics` class to share some usage metrics between the Prometheus exporter and the phone home stats.
5 changes: 3 additions & 2 deletions synapse/app/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,9 +511,10 @@ def run_sighup(*args: Any, **kwargs: Any) -> None:
setup_sentry(hs)
setup_sdnotify(hs)

# If background tasks are running on the main process, start collecting the
# phone home stats.
# If background tasks are running on the main process or this is the worker in
# charge of them, start collecting the phone home stats and shared usage metrics.
if hs.config.worker.run_background_tasks:
await hs.get_common_usage_metrics_manager().setup()
start_phone_stats_home(hs)

# We now freeze all allocated objects in the hopes that (almost)
Expand Down
13 changes: 12 additions & 1 deletion synapse/app/phone_stats_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,16 @@ async def phone_stats_home(
stats: JsonDict,
stats_process: List[Tuple[int, "resource.struct_rusage"]] = _stats_process,
) -> None:
"""Collect usage statistics and send them to the configured endpoint.

Args:
hs: the HomeServer object to use for gathering usage data.
stats: the dict in which to store the statistics sent to the configured
endpoint. Mostly used in tests to figure out the data that is supposed to
be sent.
stats_process: statistics about resource usage of the process.
"""

logger.info("Gathering stats for reporting")
now = int(hs.get_clock().time())
# Ensure the homeserver has started.
Expand Down Expand Up @@ -83,6 +93,7 @@ async def phone_stats_home(
#

store = hs.get_datastores().main
common_metrics = await hs.get_common_usage_metrics_manager().get_metrics()

stats["homeserver"] = hs.config.server.server_name
stats["server_context"] = hs.config.server.server_context
Expand All @@ -104,7 +115,7 @@ async def phone_stats_home(
room_count = await store.get_room_count()
stats["total_room_count"] = room_count

stats["daily_active_users"] = await store.count_daily_users()
stats["daily_active_users"] = common_metrics.daily_active_users
stats["monthly_active_users"] = await store.count_monthly_users()
daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms()
stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms
Expand Down
79 changes: 79 additions & 0 deletions synapse/metrics/common_usage_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2022 The Matrix.org Foundation C.I.C
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING

import attr

from synapse.metrics.background_process_metrics import run_as_background_process

if TYPE_CHECKING:
from synapse.server import HomeServer

from prometheus_client import Gauge

# Gauge to expose daily active users metrics
current_dau_gauge = Gauge(
"synapse_admin_daily_active_users",
"Current daily active users count",
)


@attr.s(auto_attribs=True)
class CommonUsageMetrics:
"""Usage metrics shared between the phone home stats and the prometheus exporter."""

daily_active_users: int


class CommonUsageMetricsManager:
"""Collects common usage metrics."""

def __init__(self, hs: "HomeServer") -> None:
self._store = hs.get_datastores().main
self._clock = hs.get_clock()

async def get_metrics(self) -> CommonUsageMetrics:
"""Get the CommonUsageMetrics object. If no collection has happened yet, do it
before returning the metrics.

Returns:
The CommonUsageMetrics object to read common metrics from.
"""
return await self._collect()

async def setup(self) -> None:
"""Keep the gauges for common usage metrics up to date."""
await self._update_gauges()
self._clock.looping_call(
run_as_background_process,
5 * 60 * 1000,
desc="common_usage_metrics_update_gauges",
func=self._update_gauges,
)

async def _collect(self) -> CommonUsageMetrics:
"""Collect the common metrics and either create the CommonUsageMetrics object to
use if it doesn't exist yet, or update it.
"""
dau_count = await self._store.count_daily_users()

return CommonUsageMetrics(
daily_active_users=dau_count,
)

async def _update_gauges(self) -> None:
"""Update the Prometheus gauges."""
metrics = await self._collect()

current_dau_gauge.set(float(metrics.daily_active_users))
6 changes: 6 additions & 0 deletions synapse/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
from synapse.handlers.user_directory import UserDirectoryHandler
from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient
from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
from synapse.module_api import ModuleApi
from synapse.notifier import Notifier
from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
Expand Down Expand Up @@ -829,3 +830,8 @@ def get_request_ratelimiter(self) -> RequestRatelimiter:
self.config.ratelimiting.rc_message,
self.config.ratelimiting.rc_admin_redaction,
)

@cache_in_self
def get_common_usage_metrics_manager(self) -> CommonUsageMetricsManager:
"""Usage metrics shared between phone home stats and the prometheus exporter."""
return CommonUsageMetricsManager(self)
46 changes: 45 additions & 1 deletion tests/test_phone_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@
import resource
from unittest import mock

from twisted.test.proto_helpers import MemoryReactor

from synapse.app.phone_stats_home import phone_stats_home
from synapse.rest import admin
from synapse.rest.client import login, sync
from synapse.server import HomeServer
from synapse.types import JsonDict
from synapse.util import Clock

from tests.unittest import HomeserverTestCase

Expand Down Expand Up @@ -47,5 +53,43 @@ def test_performance_100(self) -> None:
stats: JsonDict = {}
self.reactor.advance(1)
# `old_resource` has type `Mock` instead of `struct_rusage`
self.get_success(phone_stats_home(self.hs, stats, past_stats)) # type: ignore[arg-type]
self.get_success(
phone_stats_home(self.hs, stats, past_stats) # type: ignore[arg-type]
)
self.assertApproximates(stats["cpu_average"], 100, tolerance=2.5)


class CommonMetricsTestCase(HomeserverTestCase):
servlets = [
admin.register_servlets,
login.register_servlets,
sync.register_servlets,
]

def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.metrics_manager = hs.get_common_usage_metrics_manager()
self.get_success(self.metrics_manager.setup())

def test_dau(self) -> None:
"""Tests that the daily active users count is correctly updated."""
self._assert_metric_value("daily_active_users", 0)

self.register_user("user", "password")
tok = self.login("user", "password")
self.make_request("GET", "/sync", access_token=tok)

self.pump(1)

self._assert_metric_value("daily_active_users", 1)

def _assert_metric_value(self, metric_name: str, expected: int) -> None:
"""Compare the given value to the current value of the common usage metric with
the given name.

Args:
metric_name: The metric to look up.
expected: Expected value for this metric.
"""
metrics = self.get_success(self.metrics_manager.get_metrics())
value = getattr(metrics, metric_name)
self.assertEqual(value, expected)