Skip to content

Commit

Permalink
[Segment Replication] Update segrep bwc tests to verify replica check…
Browse files Browse the repository at this point in the history
…points and skip tests for 1.x bwc versions (opensearch-project#8203)

* [Segment Replication] Verify segment replication stats in bwc test

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Log cleanup

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Spotless check

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Add version check to skip test for 1.x bwc branches

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Add version check to skip test for 1.x bwc branches for mixed clusters

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Add version string in build to identify bwc version

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Use correct bwc version string

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Address review comments from opensearch-project#7626

Signed-off-by: Suraj Singh <surajrider@gmail.com>

---------

Signed-off-by: Suraj Singh <surajrider@gmail.com>

[Segment Replication] Use _cat/segments vs index stats + _search to verify doc count

Signed-off-by: Suraj Singh <surajrider@gmail.com>

Self review

Signed-off-by: Suraj Singh <surajrider@gmail.com>

remove unused imports

Signed-off-by: Suraj Singh <surajrider@gmail.com>

Handle 0 doc count segments

Signed-off-by: Suraj Singh <surajrider@gmail.com>

Add missing import statement

Signed-off-by: Suraj Singh <surajrider@gmail.com>
  • Loading branch information
dreamer-89 committed Jul 11, 2023
1 parent 6c82937 commit de25c9e
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 53 deletions.
2 changes: 2 additions & 0 deletions qa/mixed-cluster/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ for (Version bwcVersion : BuildParams.bwcVersions.wireCompatible) {
}

String baseName = "v${bwcVersion}"
String bwcVersionStr = "${bwcVersion}"

/* This project runs the core REST tests against a 4 node cluster where two of
the nodes has a different minor. */
Expand Down Expand Up @@ -88,6 +89,7 @@ for (Version bwcVersion : BuildParams.bwcVersions.wireCompatible) {
nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
}
systemProperty 'tests.upgrade_from_version', bwcVersionStr
systemProperty 'tests.path.repo', "${buildDir}/cluster/shared/repo/${baseName}"
onlyIf { project.bwc_tests_enabled }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@

public class IndexingIT extends OpenSearchRestTestCase {

protected static final Version UPGRADE_FROM_VERSION = Version.fromString(System.getProperty("tests.upgrade_from_version"));


private int indexDocs(String index, final int idStart, final int numDocs) throws IOException {
for (int i = 0; i < numDocs; i++) {
final int id = idStart + i;
Expand Down Expand Up @@ -115,12 +118,16 @@ private void printClusterRouting() throws IOException, ParseException {
* @throws Exception
*/
public void testIndexingWithPrimaryOnBwcNodes() throws Exception {
if (UPGRADE_FROM_VERSION.before(Version.V_2_4_0)) {
logger.info("--> Skip test for version {} where segment replication feature is not available", UPGRADE_FROM_VERSION);
return;
}
Nodes nodes = buildNodeAndVersions();
assumeFalse("new nodes is empty", nodes.getNewNodes().isEmpty());
logger.info("cluster discovered:\n {}", nodes.toString());
final List<String> bwcNamesList = nodes.getBWCNodes().stream().map(Node::getNodeName).collect(Collectors.toList());
final String bwcNames = bwcNamesList.stream().collect(Collectors.joining(","));
// Exclude bwc nodes from allocation so that primaries gets allocated on current version
// Update allocation settings so that primaries gets allocated only on nodes running on older version
Settings.Builder settings = Settings.builder()
.put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1)
.put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0)
Expand All @@ -134,7 +141,7 @@ public void testIndexingWithPrimaryOnBwcNodes() throws Exception {
try (RestClient nodeClient = buildClient(restClientSettings(),
nodes.getNewNodes().stream().map(Node::getPublishAddress).toArray(HttpHost[]::new))) {

logger.info("allowing replica shards assignment on bwc nodes");
logger.info("Remove allocation include settings so that shards can be allocated on current version nodes");
updateIndexSettings(index, Settings.builder().putNull("index.routing.allocation.include._name"));
// Add replicas so that it can be assigned on higher OS version nodes.
updateIndexSettings(index, Settings.builder().put("index.number_of_replicas", 2));
Expand All @@ -155,13 +162,17 @@ public void testIndexingWithPrimaryOnBwcNodes() throws Exception {


/**
* This test creates a cluster with primary on older version but due to {@link org.opensearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider};
* This test creates a cluster with primary on higher version but due to {@link org.opensearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider};
* replica shard allocation on lower OpenSearch version is prevented. Thus, this test though cover the use case where
* primary shard containing nodes are running on higher OS version while replicas are unassigned.
*
* @throws Exception
*/
public void testIndexingWithReplicaOnBwcNodes() throws Exception {
if (UPGRADE_FROM_VERSION.before(Version.V_2_4_0)) {
logger.info("--> Skip test for version {} where segment replication feature is not available", UPGRADE_FROM_VERSION);
return;
}
Nodes nodes = buildNodeAndVersions();
assumeFalse("new nodes is empty", nodes.getNewNodes().isEmpty());
logger.info("cluster discovered:\n {}", nodes.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,18 @@
import org.opensearch.client.ResponseException;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.Booleans;
import org.opensearch.common.io.Streams;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.codec.CodecService;
import org.opensearch.index.engine.EngineConfig;
import org.opensearch.indices.replication.common.ReplicationType;
import org.opensearch.test.rest.yaml.ObjectPath;

import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING;
import static org.opensearch.rest.action.search.RestSearchAction.TOTAL_HITS_AS_INT_PARAM;
Expand Down Expand Up @@ -87,54 +88,46 @@ private void printClusterNodes() throws IOException, ParseException, URISyntaxEx
}

// Verifies that for each shard copy holds same document count across all containing nodes.
private void waitForSearchableDocs(String index, int shardCount) throws Exception {
Map<Integer,String> primaryShardToNodeIDMap = new HashMap<>();
Map<Integer,String> replicaShardToNodeIDMap = new HashMap<>();
private void waitForSearchableDocs(String index, int shardCount, int replicaCount) throws Exception {
assertTrue(shardCount > 0);
assertTrue(replicaCount > 0);
waitForClusterHealthWithNoShardMigration(index, "green");
logger.info("--> _cat/shards before search \n{}", EntityUtils.toString(client().performRequest(new Request("GET", "/_cat/shards?v")).getEntity()));

Request request = new Request("GET", index + "/_stats");
request.addParameter("level", "shards");
Response response = client().performRequest(request);
for (int shardNumber = 0; shardNumber < shardCount; shardNumber++) {
List<Object> shardStats = ObjectPath.createFromResponse(response).evaluate("indices." + index + ".shards." + shardNumber);
for (Object shard : shardStats) {
final String nodeId = ObjectPath.evaluate(shard, "routing.node");
final Boolean primary = ObjectPath.evaluate(shard, "routing.primary");
if (primary) {
primaryShardToNodeIDMap.putIfAbsent(shardNumber, nodeId);
} else {
replicaShardToNodeIDMap.putIfAbsent(shardNumber, nodeId);
// Verify segment replication stats
verifySegmentStats(index);

// Verify segment store
assertBusy(() -> {
Request segrepStatsRequest = new Request("GET", "/_cat/segments/" + index + "?s=shard,segment,primaryOrReplica");
segrepStatsRequest.addParameter("h", "index,shard,primaryOrReplica,segment,docs.count");
Response segrepStatsResponse = client().performRequest(segrepStatsRequest);
logger.info("--> _cat/segments response\n {}", EntityUtils.toString(segrepStatsResponse.getEntity()));
List<String> responseList = Streams.readAllLines(segrepStatsResponse.getEntity().getContent());
for (int segmentsIndex=0; segmentsIndex < responseList.size();) {
String[] primaryRow = responseList.get(segmentsIndex++).split(" +");
String shardId = primaryRow[0] + primaryRow[1];
assertTrue(primaryRow[2].equals("p"));
for(int replicaIndex = 1; replicaIndex <= replicaCount; replicaIndex++) {
String[] replicaRow = responseList.get(segmentsIndex).split(" +");
String replicaShardId = replicaRow[0] + replicaRow[1];
// When segment has 0 doc count, not all replica copies posses that segment. Skip to next segment
if (replicaRow[2].equals("p")) {
assertTrue(primaryRow[4].equals("0"));
break;
}
// verify same shard id
assertTrue(replicaShardId.equals(shardId));
// verify replica row
assertTrue(replicaRow[2].equals("r"));
// Verify segment name matches e.g. _0
assertTrue(replicaRow[3].equals(primaryRow[3]));
// Verify doc count matches
assertTrue(replicaRow[4].equals(primaryRow[4]));
segmentsIndex++;
}
}
}
logger.info("--> primaryShardToNodeIDMap {}", primaryShardToNodeIDMap);
logger.info("--> replicaShardToNodeIDMap {}", replicaShardToNodeIDMap);

for (int shardNumber = 0; shardNumber < shardCount; shardNumber++) {
logger.info("--> Verify doc count for shard number {}", shardNumber);
Request searchTestIndexRequest = new Request("POST", "/" + index + "/_search");
searchTestIndexRequest.addParameter(TOTAL_HITS_AS_INT_PARAM, "true");
searchTestIndexRequest.addParameter("filter_path", "hits.total");
searchTestIndexRequest.addParameter("preference", "_shards:" + shardNumber + "|_only_nodes:" + primaryShardToNodeIDMap.get(shardNumber));
Response searchTestIndexResponse = client().performRequest(searchTestIndexRequest);
final int primaryHits = ObjectPath.createFromResponse(searchTestIndexResponse).evaluate("hits.total");
logger.info("--> primaryHits {}", primaryHits);
final int shardNum = shardNumber;
// Verify replica shard doc count only when available.
if (replicaShardToNodeIDMap.get(shardNum) != null) {
assertBusy(() -> {
Request replicaRequest = new Request("POST", "/" + index + "/_search");
replicaRequest.addParameter(TOTAL_HITS_AS_INT_PARAM, "true");
replicaRequest.addParameter("filter_path", "hits.total");
replicaRequest.addParameter("preference", "_shards:" + shardNum + "|_only_nodes:" + replicaShardToNodeIDMap.get(shardNum));
Response replicaResponse = client().performRequest(replicaRequest);
int replicaHits = ObjectPath.createFromResponse(replicaResponse).evaluate("hits.total");
logger.info("--> ReplicaHits {}", replicaHits);
assertEquals(primaryHits, replicaHits);
}, 1, TimeUnit.MINUTES);
}
}
});
}

private void waitForClusterHealthWithNoShardMigration(String indexName, String status) throws IOException {
Expand All @@ -148,6 +141,18 @@ private void waitForClusterHealthWithNoShardMigration(String indexName, String s
client().performRequest(waitForStatus);
}

private void verifySegmentStats(String indexName) throws Exception {
assertBusy(() -> {
Request segrepStatsRequest = new Request("GET", "/_cat/segment_replication/" + indexName);
segrepStatsRequest.addParameter("h", "shardId,target_node,checkpoints_behind");
Response segrepStatsResponse = client().performRequest(segrepStatsRequest);
for (String statLine : Streams.readAllLines(segrepStatsResponse.getEntity().getContent())) {
String[] elements = statLine.split(" +");
assertEquals("Replica shard " + elements[0] + "not upto date with primary ", 0, Integer.parseInt(elements[2]));
}
});
}

public void testIndexing() throws IOException, ParseException {
switch (CLUSTER_TYPE) {
case OLD:
Expand Down Expand Up @@ -248,7 +253,7 @@ public void testIndexingWithSegRep() throws Exception {
}
final String indexName = "test-index-segrep";
final int shardCount = 3;
final int replicaCount = 1;
final int replicaCount = 2;
logger.info("--> Case {}", CLUSTER_TYPE);
printClusterNodes();
logger.info("--> _cat/shards before test execution \n{}", EntityUtils.toString(client().performRequest(new Request("GET", "/_cat/shards?v")).getEntity()));
Expand All @@ -258,6 +263,10 @@ public void testIndexingWithSegRep() throws Exception {
.put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), shardCount)
.put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), replicaCount)
.put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT)
.put(
EngineConfig.INDEX_CODEC_SETTING.getKey(),
randomFrom(CodecService.DEFAULT_CODEC, CodecService.BEST_COMPRESSION_CODEC, CodecService.LUCENE_DEFAULT_CODEC)
)
.put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms");
createIndex(indexName, settings.build());
waitForClusterHealthWithNoShardMigration(indexName, "green");
Expand Down Expand Up @@ -292,7 +301,7 @@ public void testIndexingWithSegRep() throws Exception {
throw new UnsupportedOperationException("Unknown cluster type [" + CLUSTER_TYPE + "]");
}

waitForSearchableDocs(indexName, shardCount);
waitForSearchableDocs(indexName, shardCount, replicaCount);
assertCount(indexName, expectedCount);

if (CLUSTER_TYPE != ClusterType.OLD) {
Expand All @@ -303,17 +312,16 @@ public void testIndexingWithSegRep() throws Exception {
toBeDeleted.addParameter("refresh", "true");
toBeDeleted.setJsonEntity("{\"f1\": \"delete-me\"}");
client().performRequest(toBeDeleted);
waitForSearchableDocs(indexName, shardCount);
waitForSearchableDocs(indexName, shardCount, replicaCount);
assertCount(indexName, expectedCount + 6);

logger.info("--> Delete previously added doc and verify doc count");
Request delete = new Request("DELETE", "/" + indexName + "/_doc/to_be_deleted");
delete.addParameter("refresh", "true");
client().performRequest(delete);
waitForSearchableDocs(indexName, shardCount);
waitForSearchableDocs(indexName, shardCount, replicaCount);
assertCount(indexName, expectedCount + 5);
}
logger.info("--> _cat/shards post execution \n{}", EntityUtils.toString(client().performRequest(new Request("GET", "/_cat/shards?v")).getEntity()));
}

public void testAutoIdWithOpTypeCreate() throws IOException {
Expand Down

0 comments on commit de25c9e

Please sign in to comment.