From 543c9e79dd634557a62cdc8cb24da82cbe572e17 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 28 Feb 2024 03:16:37 -0800 Subject: [PATCH] HDDS-9235. ReplicationManager metrics not collected after restart. (#6280) --- .../server-scm/dev-support/findbugsExcludeFile.xml | 5 +++++ .../container/replication/ReplicationManager.java | 1 + .../replication/ReplicationManagerMetrics.java | 13 +++++++++---- .../replication/TestContainerReplicaPendingOps.java | 8 ++++++++ .../replication/TestECUnderReplicationHandler.java | 8 ++++++++ .../replication/TestReplicationManager.java | 8 ++++++++ 6 files changed, 39 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/server-scm/dev-support/findbugsExcludeFile.xml b/hadoop-hdds/server-scm/dev-support/findbugsExcludeFile.xml index 50f34918608..dc08720c968 100644 --- a/hadoop-hdds/server-scm/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdds/server-scm/dev-support/findbugsExcludeFile.xml @@ -51,4 +51,9 @@ + + + + + diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index a3661243be6..32310ef9e7b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -294,6 +294,7 @@ public synchronized void start() { if (!isRunning()) { LOG.info("Starting Replication Monitor Thread."); running = true; + metrics = ReplicationManagerMetrics.create(this); if (rmConf.isLegacyEnabled()) { legacyReplicationManager.setMetrics(metrics); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java index 5c3ee4e29ae..eb75db9bd50 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerMetrics.java @@ -235,10 +235,15 @@ public ReplicationManagerMetrics(ReplicationManager manager) { } public static ReplicationManagerMetrics create(ReplicationManager manager) { - return DefaultMetricsSystem.instance().register(METRICS_SOURCE_NAME, - "SCM Replication manager (closed container replication) related " - + "metrics", - new ReplicationManagerMetrics(manager)); + ReplicationManagerMetrics replicationManagerMetrics = (ReplicationManagerMetrics) + DefaultMetricsSystem.instance().getSource(METRICS_SOURCE_NAME); + if (replicationManagerMetrics == null) { + return DefaultMetricsSystem.instance().register(METRICS_SOURCE_NAME, + "SCM Replication manager (closed container replication) related " + + "metrics", + new ReplicationManagerMetrics(manager)); + } + return replicationManagerMetrics; } @Override diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java index a97cdbddb8a..3775531d30d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestContainerReplicaPendingOps.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.ozone.test.TestClock; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -74,6 +75,13 @@ public void setup() { dn3 = MockDatanodeDetails.randomDatanodeDetails(); } + @AfterEach + void cleanup() { + if (metrics != null) { + metrics.unRegister(); + } + } + @Test public void testGetPendingOpsReturnsEmptyList() { List ops = diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java index 22c3630e0c6..f6982212936 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestECUnderReplicationHandler.java @@ -47,6 +47,7 @@ import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.ratis.protocol.exceptions.NotLeaderException; import org.assertj.core.util.Lists; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -174,6 +175,13 @@ public NodeStatus getNodeStatus(DatanodeDetails dd) { .thenReturn(new ContainerPlacementStatusDefault(2, 2, 3)); } + @AfterEach + void cleanup() { + if (metrics != null) { + metrics.unRegister(); + } + } + @ParameterizedTest @ValueSource(strings = {"rs-6-3-1024k", "rs-10-4-1024k"}) void defersNonCriticalPartialReconstruction(String rep) throws IOException { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java index 47844f32fb0..ecb3ce4b039 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java @@ -55,6 +55,7 @@ import org.apache.hadoop.util.Lists; import org.apache.ozone.test.TestClock; import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -190,6 +191,13 @@ public void setup() throws IOException { when(scmContext.getScm()).thenReturn(scm); } + @AfterEach + void cleanup() { + if (replicationManager.getMetrics() != null) { + replicationManager.getMetrics().unRegister(); + } + } + private ReplicationManager createReplicationManager() throws IOException { return new ReplicationManager( configuration,