From b13836da7f5951bfe3973819b5affa12805185cf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 3 Sep 2018 17:22:49 +0100 Subject: [PATCH 1/3] Remove conn_id from repl prometheus metrics `conn_id` gets set to a random string, and so we end up filling up prometheus with tonnes of data series, which is bad. --- synapse/replication/tcp/protocol.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 74e892c10416..0d28152703bb 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -607,9 +607,9 @@ def transport_buffer_size(protocol): transport_send_buffer = LaterGauge( "synapse_replication_tcp_protocol_transport_send_buffer", "", - ["name", "conn_id"], + ["name"], lambda: { - (p.name, p.conn_id): transport_buffer_size(p) for p in connected_connections + (p.name,): transport_buffer_size(p) for p in connected_connections }, ) @@ -632,9 +632,9 @@ def transport_kernel_read_buffer_size(protocol, read=True): tcp_transport_kernel_send_buffer = LaterGauge( "synapse_replication_tcp_protocol_transport_kernel_send_buffer", "", - ["name", "conn_id"], + ["name"], lambda: { - (p.name, p.conn_id): transport_kernel_read_buffer_size(p, False) + (p.name,): transport_kernel_read_buffer_size(p, False) for p in connected_connections }, ) @@ -643,9 +643,9 @@ def transport_kernel_read_buffer_size(protocol, read=True): tcp_transport_kernel_read_buffer = LaterGauge( "synapse_replication_tcp_protocol_transport_kernel_read_buffer", "", - ["name", "conn_id"], + ["name"], lambda: { - (p.name, p.conn_id): transport_kernel_read_buffer_size(p, True) + (p.name,): transport_kernel_read_buffer_size(p, True) for p in connected_connections }, ) @@ -654,9 +654,9 @@ def transport_kernel_read_buffer_size(protocol, read=True): tcp_inbound_commands = LaterGauge( "synapse_replication_tcp_protocol_inbound_commands", "", - ["command", "name", "conn_id"], + ["command", "name"], lambda: { - (k[0], p.name, p.conn_id): count + (k[0], p.name,): count for p in connected_connections for k, count in iteritems(p.inbound_commands_counter) }, @@ -665,9 +665,9 @@ def transport_kernel_read_buffer_size(protocol, read=True): tcp_outbound_commands = LaterGauge( "synapse_replication_tcp_protocol_outbound_commands", "", - ["command", "name", "conn_id"], + ["command", "name"], lambda: { - (k[0], p.name, p.conn_id): count + (k[0], p.name,): count for p in connected_connections for k, count in iteritems(p.outbound_commands_counter) }, From 87b111f96a244cd212e8fff7f1a0f661fdd8073e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 3 Sep 2018 17:26:15 +0100 Subject: [PATCH 2/3] Newsfile --- changelog.d/3788.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3788.bugfix diff --git a/changelog.d/3788.bugfix b/changelog.d/3788.bugfix new file mode 100644 index 000000000000..72316fb881fa --- /dev/null +++ b/changelog.d/3788.bugfix @@ -0,0 +1 @@ +Remove connection ID for replication prometheus metrics, as it creates a large number of new series. From 3e242dc14976bf455717c5f631a997a2c0e5f2c4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 4 Sep 2018 11:45:52 +0100 Subject: [PATCH 3/3] Remove conn_id --- synapse/replication/tcp/protocol.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 0d28152703bb..5dc7b3fffcf3 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -590,9 +590,9 @@ def on_connection_closed(self): pending_commands = LaterGauge( "synapse_replication_tcp_protocol_pending_commands", "", - ["name", "conn_id"], + ["name"], lambda: { - (p.name, p.conn_id): len(p.pending_commands) for p in connected_connections + (p.name,): len(p.pending_commands) for p in connected_connections }, )