From fed56e4a3c2e186abbedd5568acc3638301b5f83 Mon Sep 17 00:00:00 2001 From: Bhumika Sharma Date: Tue, 3 Jun 2025 13:07:35 +0530 Subject: [PATCH 1/6] Add node-left metric Signed-off-by: Bhumika Sharma --- .../cluster/ClusterManagerMetrics.java | 5 ++++- .../cluster/coordination/Coordinator.java | 21 +++++++++++++++++++ .../coordination/FollowersChecker.java | 10 ++++++--- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java index ba63b3fd2d6d0..c58607626d6da 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java @@ -23,6 +23,8 @@ */ public final class ClusterManagerMetrics { + public static final String NODE_ID_TAG = "node_id"; + public static final String REASON_TAG = "reason"; private static final String LATENCY_METRIC_UNIT_MS = "ms"; private static final String COUNTER_METRICS_UNIT = "1"; @@ -36,6 +38,7 @@ public final class ClusterManagerMetrics { public final Counter followerChecksFailureCounter; public final Counter asyncFetchFailureCounter; public final Counter asyncFetchSuccessCounter; + public final Counter nodeLeftCounter; public ClusterManagerMetrics(MetricsRegistry metricsRegistry) { clusterStateAppliersHistogram = metricsRegistry.createHistogram( @@ -83,7 +86,7 @@ public ClusterManagerMetrics(MetricsRegistry metricsRegistry) { "Counter for number of successful async fetches", COUNTER_METRICS_UNIT ); - + nodeLeftCounter = metricsRegistry.createCounter("node.left.count", "Counter for node left operation", COUNTER_METRICS_UNIT); } public void recordLatency(Histogram histogram, Double value) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index ef0f49b8ae394..f8a26395f1fa3 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -90,6 +90,7 @@ import org.opensearch.monitor.NodeHealthService; import org.opensearch.monitor.StatusInfo; import org.opensearch.node.remotestore.RemoteStoreNodeService; +import org.opensearch.telemetry.metrics.tags.Tags; import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportService; @@ -111,6 +112,12 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import static org.opensearch.cluster.ClusterManagerMetrics.NODE_ID_TAG; +import static org.opensearch.cluster.ClusterManagerMetrics.REASON_TAG; +import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_DISCONNECTED; +import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL; +import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_HEALTHCHECK_FAIL; +import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_LAGGING; import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_ID; import static org.opensearch.cluster.decommission.DecommissionHelper.nodeCommissioned; import static org.opensearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered; @@ -193,6 +200,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery private final RemoteStoreNodeService remoteStoreNodeService; private NodeConnectionsService nodeConnectionsService; private final ClusterSettings clusterSettings; + private final ClusterManagerMetrics clusterManagerMetrics; /** * @param nodeName The name of the node, used to name the {@link java.util.concurrent.ExecutorService} of the {@link SeedHostsResolver}. @@ -250,6 +258,7 @@ public Coordinator( this.publishTimeout = PUBLISH_TIMEOUT_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer(PUBLISH_TIMEOUT_SETTING, this::setPublishTimeout); this.publishInfoTimeout = PUBLISH_INFO_TIMEOUT_SETTING.get(settings); + this.clusterManagerMetrics = clusterManagerMetrics; this.random = random; this.electionSchedulerFactory = new ElectionSchedulerFactory(settings, random, transportService.getThreadPool()); this.preVoteCollector = new PreVoteCollector( @@ -359,6 +368,18 @@ private void removeNode(DiscoveryNode discoveryNode, String reason) { nodeRemovalExecutor, nodeRemovalExecutor ); + String reasonToPublish = switch (reason) { + case NODE_LEFT_REASON_DISCONNECTED -> "disconnected"; + case NODE_LEFT_REASON_LAGGING -> "lagging"; + case NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL -> "follower.check.fail"; + case NODE_LEFT_REASON_HEALTHCHECK_FAIL -> "health.check.fail"; + default -> reason; + }; + clusterManagerMetrics.incrementCounter( + clusterManagerMetrics.nodeLeftCounter, + 1.0, + Optional.ofNullable(Tags.create().addTag(NODE_ID_TAG, discoveryNode.getId()).addTag(REASON_TAG, reasonToPublish)) + ); } } } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index ca414ef7c4fc8..89f922b8f981b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -86,6 +86,10 @@ public class FollowersChecker { private static final Logger logger = LogManager.getLogger(FollowersChecker.class); public static final String FOLLOWER_CHECK_ACTION_NAME = "internal:coordination/fault_detection/follower_check"; + public static final String NODE_LEFT_REASON_LAGGING = "lagging"; + public static final String NODE_LEFT_REASON_DISCONNECTED = "disconnected"; + public static final String NODE_LEFT_REASON_HEALTHCHECK_FAIL = "health check failed"; + public static final String NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL = "followers check retry count exceeded"; // the time between checks sent to each node public static final Setting FOLLOWER_CHECK_INTERVAL_SETTING = Setting.timeSetting( @@ -398,13 +402,13 @@ public void handleException(TransportException exp) { final String reason; if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { logger.info(() -> new ParameterizedMessage("{} disconnected", FollowerChecker.this), exp); - reason = "disconnected"; + reason = NODE_LEFT_REASON_DISCONNECTED; } else if (exp.getCause() instanceof NodeHealthCheckFailureException) { logger.info(() -> new ParameterizedMessage("{} health check failed", FollowerChecker.this), exp); - reason = "health check failed"; + reason = NODE_LEFT_REASON_HEALTHCHECK_FAIL; } else if (failureCountSinceLastSuccess >= followerCheckRetryCount) { logger.info(() -> new ParameterizedMessage("{} failed too many times", FollowerChecker.this), exp); - reason = "followers check retry count exceeded"; + reason = NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL; } else { logger.info(() -> new ParameterizedMessage("{} failed, retrying", FollowerChecker.this), exp); scheduleNextWakeUp(); From a32a4c1193c5159a694f80be4d76a3f9284f31f6 Mon Sep 17 00:00:00 2001 From: Bhumika Sharma Date: Thu, 5 Jun 2025 17:23:25 +0530 Subject: [PATCH 2/6] Fix: updates tag name and removed lagging reason Signed-off-by: Bhumika Sharma --- .../main/java/org/opensearch/cluster/ClusterManagerMetrics.java | 2 +- .../java/org/opensearch/cluster/coordination/Coordinator.java | 2 -- .../org/opensearch/cluster/coordination/FollowersChecker.java | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java index c58607626d6da..e557c341240ef 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java @@ -23,7 +23,7 @@ */ public final class ClusterManagerMetrics { - public static final String NODE_ID_TAG = "node_id"; + public static final String NODE_ID_TAG = "follower_node_id"; public static final String REASON_TAG = "reason"; private static final String LATENCY_METRIC_UNIT_MS = "ms"; private static final String COUNTER_METRICS_UNIT = "1"; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index f8a26395f1fa3..e5edc209b55ed 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -117,7 +117,6 @@ import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_DISCONNECTED; import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL; import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_HEALTHCHECK_FAIL; -import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_LAGGING; import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_ID; import static org.opensearch.cluster.decommission.DecommissionHelper.nodeCommissioned; import static org.opensearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered; @@ -370,7 +369,6 @@ private void removeNode(DiscoveryNode discoveryNode, String reason) { ); String reasonToPublish = switch (reason) { case NODE_LEFT_REASON_DISCONNECTED -> "disconnected"; - case NODE_LEFT_REASON_LAGGING -> "lagging"; case NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL -> "follower.check.fail"; case NODE_LEFT_REASON_HEALTHCHECK_FAIL -> "health.check.fail"; default -> reason; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index 89f922b8f981b..8171e62c1eb7d 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -86,7 +86,6 @@ public class FollowersChecker { private static final Logger logger = LogManager.getLogger(FollowersChecker.class); public static final String FOLLOWER_CHECK_ACTION_NAME = "internal:coordination/fault_detection/follower_check"; - public static final String NODE_LEFT_REASON_LAGGING = "lagging"; public static final String NODE_LEFT_REASON_DISCONNECTED = "disconnected"; public static final String NODE_LEFT_REASON_HEALTHCHECK_FAIL = "health check failed"; public static final String NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL = "followers check retry count exceeded"; From e9836fb1f0f7cc5cc6d9c8a7cd5163d697c841b7 Mon Sep 17 00:00:00 2001 From: Bhumika Sharma Date: Fri, 6 Jun 2025 10:12:23 +0530 Subject: [PATCH 3/6] Fix: tag name and adds lagging reason Signed-off-by: Bhumika Sharma --- .../java/org/opensearch/cluster/ClusterManagerMetrics.java | 2 +- .../org/opensearch/cluster/coordination/Coordinator.java | 6 ++++-- .../opensearch/cluster/coordination/FollowersChecker.java | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java index e557c341240ef..cad1194bd6b00 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java @@ -23,7 +23,7 @@ */ public final class ClusterManagerMetrics { - public static final String NODE_ID_TAG = "follower_node_id"; + public static final String FOLLOWER_NODE_ID_TAG = "follower_node_id"; public static final String REASON_TAG = "reason"; private static final String LATENCY_METRIC_UNIT_MS = "ms"; private static final String COUNTER_METRICS_UNIT = "1"; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index e5edc209b55ed..f212943695695 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -112,11 +112,12 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; -import static org.opensearch.cluster.ClusterManagerMetrics.NODE_ID_TAG; +import static org.opensearch.cluster.ClusterManagerMetrics.FOLLOWER_NODE_ID_TAG; import static org.opensearch.cluster.ClusterManagerMetrics.REASON_TAG; import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_DISCONNECTED; import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL; import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_HEALTHCHECK_FAIL; +import static org.opensearch.cluster.coordination.FollowersChecker.NODE_LEFT_REASON_LAGGING; import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_ID; import static org.opensearch.cluster.decommission.DecommissionHelper.nodeCommissioned; import static org.opensearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered; @@ -369,6 +370,7 @@ private void removeNode(DiscoveryNode discoveryNode, String reason) { ); String reasonToPublish = switch (reason) { case NODE_LEFT_REASON_DISCONNECTED -> "disconnected"; + case NODE_LEFT_REASON_LAGGING -> "lagging"; case NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL -> "follower.check.fail"; case NODE_LEFT_REASON_HEALTHCHECK_FAIL -> "health.check.fail"; default -> reason; @@ -376,7 +378,7 @@ private void removeNode(DiscoveryNode discoveryNode, String reason) { clusterManagerMetrics.incrementCounter( clusterManagerMetrics.nodeLeftCounter, 1.0, - Optional.ofNullable(Tags.create().addTag(NODE_ID_TAG, discoveryNode.getId()).addTag(REASON_TAG, reasonToPublish)) + Optional.ofNullable(Tags.create().addTag(FOLLOWER_NODE_ID_TAG, discoveryNode.getId()).addTag(REASON_TAG, reasonToPublish)) ); } } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index 8171e62c1eb7d..89f922b8f981b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -86,6 +86,7 @@ public class FollowersChecker { private static final Logger logger = LogManager.getLogger(FollowersChecker.class); public static final String FOLLOWER_CHECK_ACTION_NAME = "internal:coordination/fault_detection/follower_check"; + public static final String NODE_LEFT_REASON_LAGGING = "lagging"; public static final String NODE_LEFT_REASON_DISCONNECTED = "disconnected"; public static final String NODE_LEFT_REASON_HEALTHCHECK_FAIL = "health check failed"; public static final String NODE_LEFT_REASON_FOLLOWER_CHECK_RETRY_FAIL = "followers check retry count exceeded"; From 569888a4f00e9f10a379fe4c1a4bc60bfa7502cc Mon Sep 17 00:00:00 2001 From: Bhumika Sharma Date: Mon, 9 Jun 2025 16:39:44 +0530 Subject: [PATCH 4/6] Fix: Updates UT to check the node.left.count metric Signed-off-by: Bhumika Sharma --- .../cluster/coordination/Coordinator.java | 4 ++- .../coordination/CoordinatorTests.java | 23 ++++++++++++++ .../coordination/FollowersCheckerTests.java | 2 +- .../coordination/LeaderCheckerTests.java | 2 +- test/framework/build.gradle | 1 + .../AbstractCoordinatorTestCase.java | 8 ++++- .../test}/telemetry/TestInMemoryCounter.java | 31 +++++++++++++------ .../telemetry/TestInMemoryHistogram.java | 15 ++++++++- .../TestInMemoryMetricsRegistry.java | 15 ++++++++- .../test/telemetry/package-info.java | 10 ++++++ 10 files changed, 96 insertions(+), 15 deletions(-) rename {server/src/test/java/org/opensearch => test/telemetry/src/main/java/org/opensearch/test}/telemetry/TestInMemoryCounter.java (62%) rename {server/src/test/java/org/opensearch => test/telemetry/src/main/java/org/opensearch/test}/telemetry/TestInMemoryHistogram.java (86%) rename {server/src/test/java/org/opensearch => test/telemetry/src/main/java/org/opensearch/test}/telemetry/TestInMemoryMetricsRegistry.java (91%) create mode 100644 test/telemetry/src/main/java/org/opensearch/test/telemetry/package-info.java diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index f212943695695..b25ac395c94cc 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -378,7 +378,9 @@ private void removeNode(DiscoveryNode discoveryNode, String reason) { clusterManagerMetrics.incrementCounter( clusterManagerMetrics.nodeLeftCounter, 1.0, - Optional.ofNullable(Tags.create().addTag(FOLLOWER_NODE_ID_TAG, discoveryNode.getId()).addTag(REASON_TAG, reasonToPublish)) + Optional.ofNullable( + Tags.create().addTag(FOLLOWER_NODE_ID_TAG, discoveryNode.getId()).addTag(REASON_TAG, reasonToPublish) + ) ); } } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java index 5eeebd2588416..300c6943e7cb0 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java @@ -57,13 +57,17 @@ import org.opensearch.gateway.GatewayService; import org.opensearch.monitor.StatusInfo; import org.opensearch.test.MockLogAppender; +import org.opensearch.test.telemetry.TestInMemoryCounter; +import org.opensearch.test.telemetry.TestInMemoryMetricsRegistry; import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; @@ -250,6 +254,25 @@ public void testUnhealthyNodesGetsRemoved() { assertThat(lastCommittedConfiguration + " should be 3 nodes", lastCommittedConfiguration.getNodeIds().size(), equalTo(3)); assertFalse(lastCommittedConfiguration.getNodeIds().contains(newNode1.getId())); assertFalse(lastCommittedConfiguration.getNodeIds().contains(newNode2.getId())); + + TestInMemoryMetricsRegistry clusterManagerMetricsRegistry = leader.getMetricsRegistry(); + TestInMemoryCounter nodeLeftCounter = clusterManagerMetricsRegistry.getCounterStore().get("node.left.count"); + assertNotNull("node.left.count counter should be present", nodeLeftCounter); + ConcurrentHashMap, Double> counterValuesByTags = nodeLeftCounter.getCounterValueForTags(); + + // Check for newNode1 + Map tags1 = new HashMap<>(); + tags1.put("follower_node_id", newNode1.getId()); + tags1.put("reason", "health.check.fail"); + assertTrue(counterValuesByTags.containsKey(tags1)); + assertEquals(Double.valueOf(1.0), counterValuesByTags.get(tags1)); + + // Check for newNode2 + Map tags2 = new HashMap<>(); + tags2.put("follower_node_id", newNode2.getId()); + tags2.put("reason", "health.check.fail"); + assertTrue(counterValuesByTags.containsKey(tags2)); + assertEquals(Double.valueOf(1.0), counterValuesByTags.get(tags2)); } } } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java index d0bc41b459cc3..39c3493d9ef89 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java @@ -48,13 +48,13 @@ import org.opensearch.core.transport.TransportResponse.Empty; import org.opensearch.monitor.NodeHealthService; import org.opensearch.monitor.StatusInfo; -import org.opensearch.telemetry.TestInMemoryMetricsRegistry; import org.opensearch.telemetry.metrics.MetricsRegistry; import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.EqualsHashCodeTestUtils; import org.opensearch.test.EqualsHashCodeTestUtils.CopyFunction; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.telemetry.TestInMemoryMetricsRegistry; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.test.transport.MockTransport; import org.opensearch.threadpool.ThreadPool.Names; diff --git a/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java index decdeddfa37a1..ec17ed8937f2f 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java @@ -45,11 +45,11 @@ import org.opensearch.core.transport.TransportResponse; import org.opensearch.core.transport.TransportResponse.Empty; import org.opensearch.monitor.StatusInfo; -import org.opensearch.telemetry.TestInMemoryMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.EqualsHashCodeTestUtils; import org.opensearch.test.EqualsHashCodeTestUtils.CopyFunction; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.telemetry.TestInMemoryMetricsRegistry; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.test.transport.MockTransport; import org.opensearch.threadpool.ThreadPool.Names; diff --git a/test/framework/build.gradle b/test/framework/build.gradle index fa334304775b6..943801fc29e51 100644 --- a/test/framework/build.gradle +++ b/test/framework/build.gradle @@ -39,6 +39,7 @@ dependencies { api project(':libs:opensearch-nio') api project(":server") api project(":libs:opensearch-cli") + api project(":libs:opensearch-telemetry") api project(":test:telemetry") api "com.carrotsearch.randomizedtesting:randomizedtesting-runner:${versions.randomizedrunner}" api "junit:junit:${versions.junit}" diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index 836a4cbffd54f..0a502430cb1b6 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -96,6 +96,7 @@ import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.disruption.DisruptableMockTransport; import org.opensearch.test.disruption.DisruptableMockTransport.ConnectionStatus; +import org.opensearch.test.telemetry.TestInMemoryMetricsRegistry; import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportInterceptor; @@ -1059,6 +1060,7 @@ class ClusterNode { private RepositoriesService repositoriesService; private RemoteStoreNodeService remoteStoreNodeService; List> extraJoinValidators = new ArrayList<>(); + private TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); ClusterNode(int nodeIndex, boolean clusterManagerEligible, Settings nodeSettings, NodeHealthService nodeHealthService) { this( @@ -1188,7 +1190,7 @@ protected Optional getDisruptableMockTransport(Transpo nodeHealthService, persistedStateRegistry, remoteStoreNodeService, - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + new ClusterManagerMetrics(metricsRegistry), null ); coordinator.setNodeConnectionsService(nodeConnectionsService); @@ -1211,6 +1213,10 @@ protected Optional getDisruptableMockTransport(Transpo coordinator.startInitialJoin(); } + public TestInMemoryMetricsRegistry getMetricsRegistry() { + return metricsRegistry; + } + void close() { assertThat("must add nodes to a cluster before closing them", clusterNodes, hasItem(ClusterNode.this)); onNode(() -> { diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java b/test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryCounter.java similarity index 62% rename from server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java rename to test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryCounter.java index d9aee5ebfa941..a546f31215e6c 100644 --- a/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java +++ b/test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryCounter.java @@ -6,12 +6,12 @@ * compatible open source license. */ -package org.opensearch.telemetry; +package org.opensearch.test.telemetry; import org.opensearch.telemetry.metrics.Counter; import org.opensearch.telemetry.metrics.tags.Tags; -import java.util.HashMap; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -24,13 +24,26 @@ public class TestInMemoryCounter implements Counter { private AtomicInteger counterValue = new AtomicInteger(0); - private ConcurrentHashMap, Double> counterValueForTags = new ConcurrentHashMap<>(); + private final ConcurrentHashMap, Double> counterValueForTags = new ConcurrentHashMap<>(); + /** + * Constructor. + */ + public TestInMemoryCounter() {} + + /** + * returns the counter value. + * @return + */ public Integer getCounterValue() { return this.counterValue.get(); } - public ConcurrentHashMap, Double> getCounterValueForTags() { + /** + * returns the counter value tags + * @return + */ + public ConcurrentHashMap, Double> getCounterValueForTags() { return this.counterValueForTags; } @@ -41,12 +54,12 @@ public void add(double value) { @Override public synchronized void add(double value, Tags tags) { - HashMap hashMap = (HashMap) tags.getTagsMap(); - if (counterValueForTags.get(hashMap) == null) { - counterValueForTags.put(hashMap, value); + Map tagsMap = tags.getTagsMap(); + if (counterValueForTags.get(tagsMap) == null) { + counterValueForTags.put(tagsMap, value); } else { - value = counterValueForTags.get(hashMap) + value; - counterValueForTags.put(hashMap, value); + value = counterValueForTags.get(tagsMap) + value; + counterValueForTags.put(tagsMap, value); } } } diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java b/test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryHistogram.java similarity index 86% rename from server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java rename to test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryHistogram.java index ff28df2b6529d..6028fcb43114f 100644 --- a/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java +++ b/test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryHistogram.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.telemetry; +package org.opensearch.test.telemetry; import org.opensearch.telemetry.metrics.Histogram; import org.opensearch.telemetry.metrics.tags.Tags; @@ -26,10 +26,23 @@ public class TestInMemoryHistogram implements Histogram { private AtomicInteger histogramValue = new AtomicInteger(0); private ConcurrentHashMap, Double> histogramValueForTags = new ConcurrentHashMap<>(); + /** + * Constructor. + */ + public TestInMemoryHistogram() {} + + /** + * Returns the Histogram value. + * @return + */ public Integer getHistogramValue() { return this.histogramValue.get(); } + /** + * Returns the Histogram value for tags + * @return + */ public ConcurrentHashMap, Double> getHistogramValueForTags() { return this.histogramValueForTags; } diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java b/test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryMetricsRegistry.java similarity index 91% rename from server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java rename to test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryMetricsRegistry.java index ceda373df1357..2c44f50abf872 100644 --- a/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java +++ b/test/telemetry/src/main/java/org/opensearch/test/telemetry/TestInMemoryMetricsRegistry.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.telemetry; +package org.opensearch.test.telemetry; import org.opensearch.telemetry.metrics.Counter; import org.opensearch.telemetry.metrics.Histogram; @@ -29,10 +29,23 @@ public class TestInMemoryMetricsRegistry implements MetricsRegistry { private ConcurrentHashMap counterStore = new ConcurrentHashMap<>(); private ConcurrentHashMap histogramStore = new ConcurrentHashMap<>(); + /** + * Constructor. + */ + public TestInMemoryMetricsRegistry() {} + + /** + * Returns counterStore + * @return + */ public ConcurrentHashMap getCounterStore() { return this.counterStore; } + /** + * Returns the histogramStore. + * @return + */ public ConcurrentHashMap getHistogramStore() { return this.histogramStore; } diff --git a/test/telemetry/src/main/java/org/opensearch/test/telemetry/package-info.java b/test/telemetry/src/main/java/org/opensearch/test/telemetry/package-info.java new file mode 100644 index 0000000000000..6cf33afe934fd --- /dev/null +++ b/test/telemetry/src/main/java/org/opensearch/test/telemetry/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Base opensearch package. */ +package org.opensearch.test.telemetry; From eb8e836c4ce599b8a6c576f1c54d55388aa0c4fe Mon Sep 17 00:00:00 2001 From: Bhumika Sharma Date: Tue, 10 Jun 2025 09:03:34 +0530 Subject: [PATCH 5/6] Fix: Update changelog Signed-off-by: Bhumika Sharma --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 273c4c3164b71..1e1dc6aa8bb0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added File Cache Pinning ([#17617](https://github.com/opensearch-project/OpenSearch/issues/13648)) - Support consumer reset in Resume API for pull-based ingestion. This PR includes a breaking change for the experimental pull-based ingestion feature. ([#18332](https://github.com/opensearch-project/OpenSearch/pull/18332)) - Add FIPS build tooling ([#4254](https://github.com/opensearch-project/security/issues/4254)) +- Added node-left metric to cluster manager ([#18421](https://github.com/opensearch-project/OpenSearch/pull/18421)) ### Changed - Create generic DocRequest to better categorize ActionRequests ([#18269](https://github.com/opensearch-project/OpenSearch/pull/18269))) From 6f560bdfaa1dc6308a70c5e3b39536b8d9e852fe Mon Sep 17 00:00:00 2001 From: Bhumika Sharma Date: Wed, 11 Jun 2025 01:24:57 +0530 Subject: [PATCH 6/6] Trigger Build Signed-off-by: Bhumika Sharma