From de25c9e2b6441a56234d3a0437733eb1de9c6e63 Mon Sep 17 00:00:00 2001 From: Suraj Singh Date: Sat, 24 Jun 2023 10:13:07 -0700 Subject: [PATCH] [Segment Replication] Update segrep bwc tests to verify replica checkpoints and skip tests for 1.x bwc versions (#8203) * [Segment Replication] Verify segment replication stats in bwc test Signed-off-by: Suraj Singh * Log cleanup Signed-off-by: Suraj Singh * Spotless check Signed-off-by: Suraj Singh * Add version check to skip test for 1.x bwc branches Signed-off-by: Suraj Singh * Add version check to skip test for 1.x bwc branches for mixed clusters Signed-off-by: Suraj Singh * Add version string in build to identify bwc version Signed-off-by: Suraj Singh * Use correct bwc version string Signed-off-by: Suraj Singh * Address review comments from https://github.com/opensearch-project/OpenSearch/pull/7626 Signed-off-by: Suraj Singh --------- Signed-off-by: Suraj Singh [Segment Replication] Use _cat/segments vs index stats + _search to verify doc count Signed-off-by: Suraj Singh Self review Signed-off-by: Suraj Singh remove unused imports Signed-off-by: Suraj Singh Handle 0 doc count segments Signed-off-by: Suraj Singh Add missing import statement Signed-off-by: Suraj Singh --- qa/mixed-cluster/build.gradle | 2 + .../org/opensearch/backwards/IndexingIT.java | 17 ++- .../org/opensearch/upgrades/IndexingIT.java | 108 ++++++++++-------- 3 files changed, 74 insertions(+), 53 deletions(-) diff --git a/qa/mixed-cluster/build.gradle b/qa/mixed-cluster/build.gradle index e24e4e17adcb6..511dca3c102ca 100644 --- a/qa/mixed-cluster/build.gradle +++ b/qa/mixed-cluster/build.gradle @@ -55,6 +55,7 @@ for (Version bwcVersion : BuildParams.bwcVersions.wireCompatible) { } String baseName = "v${bwcVersion}" + String bwcVersionStr = "${bwcVersion}" /* This project runs the core REST tests against a 4 node cluster where two of the nodes has a different minor. */ @@ -88,6 +89,7 @@ for (Version bwcVersion : BuildParams.bwcVersions.wireCompatible) { nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") } + systemProperty 'tests.upgrade_from_version', bwcVersionStr systemProperty 'tests.path.repo', "${buildDir}/cluster/shared/repo/${baseName}" onlyIf { project.bwc_tests_enabled } } diff --git a/qa/mixed-cluster/src/test/java/org/opensearch/backwards/IndexingIT.java b/qa/mixed-cluster/src/test/java/org/opensearch/backwards/IndexingIT.java index 7c869ccb89a2a..888cbb6c7fcd4 100644 --- a/qa/mixed-cluster/src/test/java/org/opensearch/backwards/IndexingIT.java +++ b/qa/mixed-cluster/src/test/java/org/opensearch/backwards/IndexingIT.java @@ -67,6 +67,9 @@ public class IndexingIT extends OpenSearchRestTestCase { + protected static final Version UPGRADE_FROM_VERSION = Version.fromString(System.getProperty("tests.upgrade_from_version")); + + private int indexDocs(String index, final int idStart, final int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { final int id = idStart + i; @@ -115,12 +118,16 @@ private void printClusterRouting() throws IOException, ParseException { * @throws Exception */ public void testIndexingWithPrimaryOnBwcNodes() throws Exception { + if (UPGRADE_FROM_VERSION.before(Version.V_2_4_0)) { + logger.info("--> Skip test for version {} where segment replication feature is not available", UPGRADE_FROM_VERSION); + return; + } Nodes nodes = buildNodeAndVersions(); assumeFalse("new nodes is empty", nodes.getNewNodes().isEmpty()); logger.info("cluster discovered:\n {}", nodes.toString()); final List bwcNamesList = nodes.getBWCNodes().stream().map(Node::getNodeName).collect(Collectors.toList()); final String bwcNames = bwcNamesList.stream().collect(Collectors.joining(",")); - // Exclude bwc nodes from allocation so that primaries gets allocated on current version + // Update allocation settings so that primaries gets allocated only on nodes running on older version Settings.Builder settings = Settings.builder() .put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) .put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0) @@ -134,7 +141,7 @@ public void testIndexingWithPrimaryOnBwcNodes() throws Exception { try (RestClient nodeClient = buildClient(restClientSettings(), nodes.getNewNodes().stream().map(Node::getPublishAddress).toArray(HttpHost[]::new))) { - logger.info("allowing replica shards assignment on bwc nodes"); + logger.info("Remove allocation include settings so that shards can be allocated on current version nodes"); updateIndexSettings(index, Settings.builder().putNull("index.routing.allocation.include._name")); // Add replicas so that it can be assigned on higher OS version nodes. updateIndexSettings(index, Settings.builder().put("index.number_of_replicas", 2)); @@ -155,13 +162,17 @@ public void testIndexingWithPrimaryOnBwcNodes() throws Exception { /** - * This test creates a cluster with primary on older version but due to {@link org.opensearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider}; + * This test creates a cluster with primary on higher version but due to {@link org.opensearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider}; * replica shard allocation on lower OpenSearch version is prevented. Thus, this test though cover the use case where * primary shard containing nodes are running on higher OS version while replicas are unassigned. * * @throws Exception */ public void testIndexingWithReplicaOnBwcNodes() throws Exception { + if (UPGRADE_FROM_VERSION.before(Version.V_2_4_0)) { + logger.info("--> Skip test for version {} where segment replication feature is not available", UPGRADE_FROM_VERSION); + return; + } Nodes nodes = buildNodeAndVersions(); assumeFalse("new nodes is empty", nodes.getNewNodes().isEmpty()); logger.info("cluster discovered:\n {}", nodes.toString()); diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/upgrades/IndexingIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/upgrades/IndexingIT.java index 543508d59d45b..d66eeea19eafb 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/upgrades/IndexingIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/upgrades/IndexingIT.java @@ -41,17 +41,18 @@ import org.opensearch.client.ResponseException; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.Booleans; +import org.opensearch.common.io.Streams; import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.CodecService; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.test.rest.yaml.ObjectPath; import java.io.IOException; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.TimeUnit; import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING; import static org.opensearch.rest.action.search.RestSearchAction.TOTAL_HITS_AS_INT_PARAM; @@ -87,54 +88,46 @@ private void printClusterNodes() throws IOException, ParseException, URISyntaxEx } // Verifies that for each shard copy holds same document count across all containing nodes. - private void waitForSearchableDocs(String index, int shardCount) throws Exception { - Map primaryShardToNodeIDMap = new HashMap<>(); - Map replicaShardToNodeIDMap = new HashMap<>(); + private void waitForSearchableDocs(String index, int shardCount, int replicaCount) throws Exception { + assertTrue(shardCount > 0); + assertTrue(replicaCount > 0); waitForClusterHealthWithNoShardMigration(index, "green"); logger.info("--> _cat/shards before search \n{}", EntityUtils.toString(client().performRequest(new Request("GET", "/_cat/shards?v")).getEntity())); - Request request = new Request("GET", index + "/_stats"); - request.addParameter("level", "shards"); - Response response = client().performRequest(request); - for (int shardNumber = 0; shardNumber < shardCount; shardNumber++) { - List shardStats = ObjectPath.createFromResponse(response).evaluate("indices." + index + ".shards." + shardNumber); - for (Object shard : shardStats) { - final String nodeId = ObjectPath.evaluate(shard, "routing.node"); - final Boolean primary = ObjectPath.evaluate(shard, "routing.primary"); - if (primary) { - primaryShardToNodeIDMap.putIfAbsent(shardNumber, nodeId); - } else { - replicaShardToNodeIDMap.putIfAbsent(shardNumber, nodeId); + // Verify segment replication stats + verifySegmentStats(index); + + // Verify segment store + assertBusy(() -> { + Request segrepStatsRequest = new Request("GET", "/_cat/segments/" + index + "?s=shard,segment,primaryOrReplica"); + segrepStatsRequest.addParameter("h", "index,shard,primaryOrReplica,segment,docs.count"); + Response segrepStatsResponse = client().performRequest(segrepStatsRequest); + logger.info("--> _cat/segments response\n {}", EntityUtils.toString(segrepStatsResponse.getEntity())); + List responseList = Streams.readAllLines(segrepStatsResponse.getEntity().getContent()); + for (int segmentsIndex=0; segmentsIndex < responseList.size();) { + String[] primaryRow = responseList.get(segmentsIndex++).split(" +"); + String shardId = primaryRow[0] + primaryRow[1]; + assertTrue(primaryRow[2].equals("p")); + for(int replicaIndex = 1; replicaIndex <= replicaCount; replicaIndex++) { + String[] replicaRow = responseList.get(segmentsIndex).split(" +"); + String replicaShardId = replicaRow[0] + replicaRow[1]; + // When segment has 0 doc count, not all replica copies posses that segment. Skip to next segment + if (replicaRow[2].equals("p")) { + assertTrue(primaryRow[4].equals("0")); + break; + } + // verify same shard id + assertTrue(replicaShardId.equals(shardId)); + // verify replica row + assertTrue(replicaRow[2].equals("r")); + // Verify segment name matches e.g. _0 + assertTrue(replicaRow[3].equals(primaryRow[3])); + // Verify doc count matches + assertTrue(replicaRow[4].equals(primaryRow[4])); + segmentsIndex++; } } - } - logger.info("--> primaryShardToNodeIDMap {}", primaryShardToNodeIDMap); - logger.info("--> replicaShardToNodeIDMap {}", replicaShardToNodeIDMap); - - for (int shardNumber = 0; shardNumber < shardCount; shardNumber++) { - logger.info("--> Verify doc count for shard number {}", shardNumber); - Request searchTestIndexRequest = new Request("POST", "/" + index + "/_search"); - searchTestIndexRequest.addParameter(TOTAL_HITS_AS_INT_PARAM, "true"); - searchTestIndexRequest.addParameter("filter_path", "hits.total"); - searchTestIndexRequest.addParameter("preference", "_shards:" + shardNumber + "|_only_nodes:" + primaryShardToNodeIDMap.get(shardNumber)); - Response searchTestIndexResponse = client().performRequest(searchTestIndexRequest); - final int primaryHits = ObjectPath.createFromResponse(searchTestIndexResponse).evaluate("hits.total"); - logger.info("--> primaryHits {}", primaryHits); - final int shardNum = shardNumber; - // Verify replica shard doc count only when available. - if (replicaShardToNodeIDMap.get(shardNum) != null) { - assertBusy(() -> { - Request replicaRequest = new Request("POST", "/" + index + "/_search"); - replicaRequest.addParameter(TOTAL_HITS_AS_INT_PARAM, "true"); - replicaRequest.addParameter("filter_path", "hits.total"); - replicaRequest.addParameter("preference", "_shards:" + shardNum + "|_only_nodes:" + replicaShardToNodeIDMap.get(shardNum)); - Response replicaResponse = client().performRequest(replicaRequest); - int replicaHits = ObjectPath.createFromResponse(replicaResponse).evaluate("hits.total"); - logger.info("--> ReplicaHits {}", replicaHits); - assertEquals(primaryHits, replicaHits); - }, 1, TimeUnit.MINUTES); - } - } + }); } private void waitForClusterHealthWithNoShardMigration(String indexName, String status) throws IOException { @@ -148,6 +141,18 @@ private void waitForClusterHealthWithNoShardMigration(String indexName, String s client().performRequest(waitForStatus); } + private void verifySegmentStats(String indexName) throws Exception { + assertBusy(() -> { + Request segrepStatsRequest = new Request("GET", "/_cat/segment_replication/" + indexName); + segrepStatsRequest.addParameter("h", "shardId,target_node,checkpoints_behind"); + Response segrepStatsResponse = client().performRequest(segrepStatsRequest); + for (String statLine : Streams.readAllLines(segrepStatsResponse.getEntity().getContent())) { + String[] elements = statLine.split(" +"); + assertEquals("Replica shard " + elements[0] + "not upto date with primary ", 0, Integer.parseInt(elements[2])); + } + }); + } + public void testIndexing() throws IOException, ParseException { switch (CLUSTER_TYPE) { case OLD: @@ -248,7 +253,7 @@ public void testIndexingWithSegRep() throws Exception { } final String indexName = "test-index-segrep"; final int shardCount = 3; - final int replicaCount = 1; + final int replicaCount = 2; logger.info("--> Case {}", CLUSTER_TYPE); printClusterNodes(); logger.info("--> _cat/shards before test execution \n{}", EntityUtils.toString(client().performRequest(new Request("GET", "/_cat/shards?v")).getEntity())); @@ -258,6 +263,10 @@ public void testIndexingWithSegRep() throws Exception { .put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), shardCount) .put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), replicaCount) .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put( + EngineConfig.INDEX_CODEC_SETTING.getKey(), + randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC, CodecService.LUCENE_DEFAULT_CODEC) + ) .put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms"); createIndex(indexName, settings.build()); waitForClusterHealthWithNoShardMigration(indexName, "green"); @@ -292,7 +301,7 @@ public void testIndexingWithSegRep() throws Exception { throw new UnsupportedOperationException("Unknown cluster type [" + CLUSTER_TYPE + "]"); } - waitForSearchableDocs(indexName, shardCount); + waitForSearchableDocs(indexName, shardCount, replicaCount); assertCount(indexName, expectedCount); if (CLUSTER_TYPE != ClusterType.OLD) { @@ -303,17 +312,16 @@ public void testIndexingWithSegRep() throws Exception { toBeDeleted.addParameter("refresh", "true"); toBeDeleted.setJsonEntity("{\"f1\": \"delete-me\"}"); client().performRequest(toBeDeleted); - waitForSearchableDocs(indexName, shardCount); + waitForSearchableDocs(indexName, shardCount, replicaCount); assertCount(indexName, expectedCount + 6); logger.info("--> Delete previously added doc and verify doc count"); Request delete = new Request("DELETE", "/" + indexName + "/_doc/to_be_deleted"); delete.addParameter("refresh", "true"); client().performRequest(delete); - waitForSearchableDocs(indexName, shardCount); + waitForSearchableDocs(indexName, shardCount, replicaCount); assertCount(indexName, expectedCount + 5); } - logger.info("--> _cat/shards post execution \n{}", EntityUtils.toString(client().performRequest(new Request("GET", "/_cat/shards?v")).getEntity())); } public void testAutoIdWithOpTypeCreate() throws IOException {