diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java index 33cea42eadc76..6966964dd722e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java @@ -1283,7 +1283,15 @@ private void verifyShardInfo(XContentParser parser, boolean primary, boolean inc assertTrue(parser.currentName().equals("id") || parser.currentName().equals("name") || parser.currentName().equals("transport_address") - || parser.currentName().equals("weight_ranking")); + || parser.currentName().equals("weight_ranking") + || parser.currentName().equals("attributes")); + // Skip past all the attributes object + if (parser.currentName().equals("attributes")) { + while(!parser.nextToken().equals(Token.END_OBJECT)) { + parser.nextToken(); + } + break; + } } else { assertTrue(token.isValue()); assertNotNull(parser.text()); @@ -1403,6 +1411,11 @@ private String verifyNodeDecisionPrologue(XContentParser parser) throws IOExcept parser.nextToken(); assertNotNull(parser.text()); parser.nextToken(); + assertEquals("node_attributes", parser.currentName()); + // skip past all the node_attributes object + while (!parser.currentName().equals("node_decision")) { + parser.nextToken(); + } assertEquals("node_decision", parser.currentName()); parser.nextToken(); return nodeName; diff --git a/server/src/internalClusterTest/java/org/opensearch/index/IndexingPressureIT.java b/server/src/internalClusterTest/java/org/opensearch/index/IndexingPressureIT.java index 41248f0ae7bc9..910f85d6556c7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/IndexingPressureIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/IndexingPressureIT.java @@ -148,24 +148,27 @@ public void testWriteBytesAreIncremented() throws Exception { final ActionFuture successFuture = client(coordinatingOnlyNode).bulk(bulkRequest); replicationSendPointReached.await(); - IndexingPressure primaryWriteLimits = internalCluster().getInstance(IndexingPressure.class, primaryName); - IndexingPressure replicaWriteLimits = internalCluster().getInstance(IndexingPressure.class, replicaName); - IndexingPressure coordinatingWriteLimits = internalCluster().getInstance(IndexingPressure.class, coordinatingOnlyNode); - - assertThat(primaryWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes(), greaterThan(bulkShardRequestSize)); - assertThat(primaryWriteLimits.getCurrentPrimaryBytes(), greaterThan(bulkShardRequestSize)); - assertEquals(0, primaryWriteLimits.getCurrentCoordinatingBytes()); - assertEquals(0, primaryWriteLimits.getCurrentReplicaBytes()); - - assertEquals(0, replicaWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, replicaWriteLimits.getCurrentCoordinatingBytes()); - assertEquals(0, replicaWriteLimits.getCurrentPrimaryBytes()); - assertEquals(0, replicaWriteLimits.getCurrentReplicaBytes()); - - assertEquals(bulkRequestSize, coordinatingWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(bulkRequestSize, coordinatingWriteLimits.getCurrentCoordinatingBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentPrimaryBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentReplicaBytes()); + IndexingPressureService primaryWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, primaryName); + IndexingPressureService replicaWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, replicaName); + IndexingPressureService coordinatingWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, coordinatingOnlyNode); + + assertThat(primaryWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes(), greaterThan(bulkShardRequestSize)); + assertThat(primaryWriteLimits.nodeStats().getCurrentPrimaryBytes(), greaterThan(bulkShardRequestSize)); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentReplicaBytes()); + + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentPrimaryBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentReplicaBytes()); + + assertEquals(bulkRequestSize, coordinatingWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(bulkRequestSize, coordinatingWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentPrimaryBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentReplicaBytes()); latchBlockingReplicationSend.countDown(); @@ -188,25 +191,26 @@ public void testWriteBytesAreIncremented() throws Exception { if (usePrimaryAsCoordinatingNode) { assertBusy(() -> { - assertThat(primaryWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes(), + assertThat(primaryWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes(), greaterThan(bulkShardRequestSize + secondBulkRequestSize)); - assertEquals(secondBulkRequestSize, primaryWriteLimits.getCurrentCoordinatingBytes()); - assertThat(primaryWriteLimits.getCurrentPrimaryBytes(), + assertEquals(secondBulkRequestSize, primaryWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertThat(primaryWriteLimits.nodeStats().getCurrentPrimaryBytes(), greaterThan(bulkShardRequestSize + secondBulkRequestSize)); - assertEquals(0, replicaWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, replicaWriteLimits.getCurrentCoordinatingBytes()); - assertEquals(0, replicaWriteLimits.getCurrentPrimaryBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentPrimaryBytes()); }); } else { - assertThat(primaryWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes(), greaterThan(bulkShardRequestSize)); + assertThat(primaryWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes(), + greaterThan(bulkShardRequestSize)); - assertEquals(secondBulkRequestSize, replicaWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(secondBulkRequestSize, replicaWriteLimits.getCurrentCoordinatingBytes()); - assertEquals(0, replicaWriteLimits.getCurrentPrimaryBytes()); + assertEquals(secondBulkRequestSize, replicaWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(secondBulkRequestSize, replicaWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentPrimaryBytes()); } - assertEquals(bulkRequestSize, coordinatingWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertBusy(() -> assertThat(replicaWriteLimits.getCurrentReplicaBytes(), + assertEquals(bulkRequestSize, coordinatingWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertBusy(() -> assertThat(replicaWriteLimits.nodeStats().getCurrentReplicaBytes(), greaterThan(bulkShardRequestSize + secondBulkShardRequestSize))); replicaRelease.close(); @@ -214,20 +218,20 @@ public void testWriteBytesAreIncremented() throws Exception { successFuture.actionGet(); secondFuture.actionGet(); - assertEquals(0, primaryWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, primaryWriteLimits.getCurrentCoordinatingBytes()); - assertEquals(0, primaryWriteLimits.getCurrentPrimaryBytes()); - assertEquals(0, primaryWriteLimits.getCurrentReplicaBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentPrimaryBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentReplicaBytes()); - assertEquals(0, replicaWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, replicaWriteLimits.getCurrentCoordinatingBytes()); - assertEquals(0, replicaWriteLimits.getCurrentPrimaryBytes()); - assertEquals(0, replicaWriteLimits.getCurrentReplicaBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentPrimaryBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentReplicaBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentCoordinatingBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentPrimaryBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentReplicaBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentCoordinatingBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentPrimaryBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentReplicaBytes()); } finally { if (replicationSendPointReached.getCount() > 0) { replicationSendPointReached.countDown(); @@ -271,17 +275,21 @@ public void testWriteCanBeRejectedAtCoordinatingLevel() throws Exception { try (Releasable replicaRelease = blockReplicas(replicaThreadPool)) { final ActionFuture successFuture = client(coordinatingOnlyNode).bulk(bulkRequest); - IndexingPressure primaryWriteLimits = internalCluster().getInstance(IndexingPressure.class, primaryName); - IndexingPressure replicaWriteLimits = internalCluster().getInstance(IndexingPressure.class, replicaName); - IndexingPressure coordinatingWriteLimits = internalCluster().getInstance(IndexingPressure.class, coordinatingOnlyNode); + IndexingPressureService primaryWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, primaryName); + IndexingPressureService replicaWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, replicaName); + IndexingPressureService coordinatingWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, coordinatingOnlyNode); assertBusy(() -> { - assertThat(primaryWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes(), greaterThan(bulkShardRequestSize)); - assertEquals(0, primaryWriteLimits.getCurrentReplicaBytes()); - assertEquals(0, replicaWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertThat(replicaWriteLimits.getCurrentReplicaBytes(), greaterThan(bulkShardRequestSize)); - assertEquals(bulkRequestSize, coordinatingWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentReplicaBytes()); + assertThat(primaryWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes(), + greaterThan(bulkShardRequestSize)); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentReplicaBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertThat(replicaWriteLimits.nodeStats().getCurrentReplicaBytes(), greaterThan(bulkShardRequestSize)); + assertEquals(bulkRequestSize, coordinatingWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentReplicaBytes()); }); expectThrows(OpenSearchRejectedExecutionException.class, () -> { @@ -298,12 +306,12 @@ public void testWriteCanBeRejectedAtCoordinatingLevel() throws Exception { successFuture.actionGet(); - assertEquals(0, primaryWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, primaryWriteLimits.getCurrentReplicaBytes()); - assertEquals(0, replicaWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, replicaWriteLimits.getCurrentReplicaBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentReplicaBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentReplicaBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentReplicaBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentReplicaBytes()); } } @@ -335,17 +343,21 @@ public void testWriteCanBeRejectedAtPrimaryLevel() throws Exception { try (Releasable replicaRelease = blockReplicas(replicaThreadPool)) { final ActionFuture successFuture = client(primaryName).bulk(bulkRequest); - IndexingPressure primaryWriteLimits = internalCluster().getInstance(IndexingPressure.class, primaryName); - IndexingPressure replicaWriteLimits = internalCluster().getInstance(IndexingPressure.class, replicaName); - IndexingPressure coordinatingWriteLimits = internalCluster().getInstance(IndexingPressure.class, coordinatingOnlyNode); + IndexingPressureService primaryWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, primaryName); + IndexingPressureService replicaWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, replicaName); + IndexingPressureService coordinatingWriteLimits = internalCluster() + .getInstance(IndexingPressureService.class, coordinatingOnlyNode); assertBusy(() -> { - assertThat(primaryWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes(), greaterThan(bulkShardRequestSize)); - assertEquals(0, primaryWriteLimits.getCurrentReplicaBytes()); - assertEquals(0, replicaWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertThat(replicaWriteLimits.getCurrentReplicaBytes(), greaterThan(bulkShardRequestSize)); - assertEquals(0, coordinatingWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentReplicaBytes()); + assertThat(primaryWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes(), + greaterThan(bulkShardRequestSize)); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentReplicaBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertThat(replicaWriteLimits.nodeStats().getCurrentReplicaBytes(), greaterThan(bulkShardRequestSize)); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentReplicaBytes()); }); BulkResponse responses = client(coordinatingOnlyNode).bulk(bulkRequest).actionGet(); @@ -356,12 +368,12 @@ public void testWriteCanBeRejectedAtPrimaryLevel() throws Exception { successFuture.actionGet(); - assertEquals(0, primaryWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, primaryWriteLimits.getCurrentReplicaBytes()); - assertEquals(0, replicaWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, replicaWriteLimits.getCurrentReplicaBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentCombinedCoordinatingAndPrimaryBytes()); - assertEquals(0, coordinatingWriteLimits.getCurrentReplicaBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, primaryWriteLimits.nodeStats().getCurrentReplicaBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, replicaWriteLimits.nodeStats().getCurrentReplicaBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, coordinatingWriteLimits.nodeStats().getCurrentReplicaBytes()); } } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java index 5b0c4ce0f6ec4..56ebd703df336 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java @@ -36,6 +36,7 @@ import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.io.stream.Writeable; +import org.opensearch.index.ShardIndexingPressureSettings; import java.io.IOException; import java.util.Collections; @@ -53,6 +54,8 @@ public class CommonStatsFlags implements Writeable, Cloneable { private String[] completionDataFields = null; private boolean includeSegmentFileSizes = false; private boolean includeUnloadedSegments = false; + private boolean includeAllShardIndexingPressureTrackers = false; + private boolean includeOnlyTopIndexingPressureMetrics = false; /** * @param flags flags to set. If no flags are supplied, default flags will be set. @@ -80,6 +83,15 @@ public CommonStatsFlags(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(LegacyESVersion.V_7_2_0)) { includeUnloadedSegments = in.readBoolean(); } + if (in.getVersion().onOrAfter(LegacyESVersion.V_7_10_2)) { + includeAllShardIndexingPressureTrackers = in.readBoolean(); + includeOnlyTopIndexingPressureMetrics = in.readBoolean(); + } else if (in.getVersion().onOrAfter(LegacyESVersion.V_7_9_0)) { + if (ShardIndexingPressureSettings.isShardIndexingPressureAttributeEnabled()) { + includeAllShardIndexingPressureTrackers = in.readBoolean(); + includeOnlyTopIndexingPressureMetrics = in.readBoolean(); + } + } } @Override @@ -98,6 +110,15 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(LegacyESVersion.V_7_2_0)) { out.writeBoolean(includeUnloadedSegments); } + if (out.getVersion().onOrAfter(LegacyESVersion.V_7_10_2)) { + out.writeBoolean(includeAllShardIndexingPressureTrackers); + out.writeBoolean(includeOnlyTopIndexingPressureMetrics); + } else if (out.getVersion().onOrAfter(LegacyESVersion.V_7_9_0)) { + if (ShardIndexingPressureSettings.isShardIndexingPressureAttributeEnabled()) { + out.writeBoolean(includeAllShardIndexingPressureTrackers); + out.writeBoolean(includeOnlyTopIndexingPressureMetrics); + } + } } /** @@ -111,6 +132,8 @@ public CommonStatsFlags all() { completionDataFields = null; includeSegmentFileSizes = false; includeUnloadedSegments = false; + includeAllShardIndexingPressureTrackers = false; + includeOnlyTopIndexingPressureMetrics = false; return this; } @@ -125,6 +148,8 @@ public CommonStatsFlags clear() { completionDataFields = null; includeSegmentFileSizes = false; includeUnloadedSegments = false; + includeAllShardIndexingPressureTrackers = false; + includeOnlyTopIndexingPressureMetrics = false; return this; } @@ -198,10 +223,28 @@ public CommonStatsFlags includeUnloadedSegments(boolean includeUnloadedSegments) return this; } + public CommonStatsFlags includeAllShardIndexingPressureTrackers(boolean includeAllShardPressureTrackers) { + this.includeAllShardIndexingPressureTrackers = includeAllShardPressureTrackers; + return this; + } + + public CommonStatsFlags includeOnlyTopIndexingPressureMetrics(boolean includeOnlyTopIndexingPressureMetrics) { + this.includeOnlyTopIndexingPressureMetrics = includeOnlyTopIndexingPressureMetrics; + return this; + } + public boolean includeUnloadedSegments() { return this.includeUnloadedSegments; } + public boolean includeAllShardIndexingPressureTrackers() { + return this.includeAllShardIndexingPressureTrackers; + } + + public boolean includeOnlyTopIndexingPressureMetrics() { + return this.includeOnlyTopIndexingPressureMetrics; + } + public boolean includeSegmentFileSizes() { return this.includeSegmentFileSizes; } diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java index 7aba9a40f05a0..64c040f2449ba 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java @@ -75,7 +75,7 @@ import org.opensearch.common.util.concurrent.AtomicArray; import org.opensearch.index.Index; import org.opensearch.index.IndexNotFoundException; -import org.opensearch.index.IndexingPressure; +import org.opensearch.index.IndexingPressureService; import org.opensearch.index.VersionType; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.ShardId; @@ -127,25 +127,26 @@ public class TransportBulkAction extends HandledTransportAction docWriteReque protected void doExecute(Task task, BulkRequest bulkRequest, ActionListener listener) { final long indexingBytes = bulkRequest.ramBytesUsed(); final boolean isOnlySystem = isOnlySystem(bulkRequest, clusterService.state().metadata().getIndicesLookup(), systemIndices); - final Releasable releasable = indexingPressure.markCoordinatingOperationStarted(indexingBytes, isOnlySystem); + final Releasable releasable = indexingPressureService.markCoordinatingOperationStarted(indexingBytes, isOnlySystem); final ActionListener releasingListener = ActionListener.runBefore(listener, releasable::close); final String executorName = isOnlySystem ? Names.SYSTEM_WRITE : Names.WRITE; try { @@ -562,7 +563,12 @@ protected void doRun() { if (task != null) { bulkShardRequest.setParentTask(nodeId, task.getId()); } - shardBulkAction.execute(bulkShardRequest, new ActionListener() { + // Add the shard level accounting for coordinating and supply the listener + final boolean isOnlySystem = isOnlySystem(bulkRequest, clusterService.state().metadata().getIndicesLookup(), systemIndices); + final Releasable releasable = indexingPressureService.markCoordinatingOperationStarted(shardId, + bulkShardRequest.ramBytesUsed(), isOnlySystem); + + shardBulkAction.execute(bulkShardRequest, ActionListener.runBefore(new ActionListener() { @Override public void onResponse(BulkShardResponse bulkShardResponse) { for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { @@ -595,7 +601,7 @@ private void finishHim() { listener.onResponse(new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(startTimeNanos))); } - }); + }, releasable::close)); } bulkRequest = null; // allow memory for bulk request items to be reclaimed before all items have been completed } diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java index 29b41f39851ab..2a34037d32fe4 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java @@ -68,7 +68,7 @@ import org.opensearch.common.xcontent.ToXContent; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.XContentType; -import org.opensearch.index.IndexingPressure; +import org.opensearch.index.IndexingPressureService; import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.VersionConflictEngineException; import org.opensearch.index.get.GetResult; @@ -115,9 +115,9 @@ public class TransportShardBulkAction extends TransportWriteAction extends TransportReplicationAction { - protected final IndexingPressure indexingPressure; + protected final IndexingPressureService indexingPressureService; protected final SystemIndices systemIndices; private final Function executorFunction; @@ -85,13 +85,14 @@ protected TransportWriteAction(Settings settings, String actionName, TransportSe ClusterService clusterService, IndicesService indicesService, ThreadPool threadPool, ShardStateAction shardStateAction, ActionFilters actionFilters, Writeable.Reader request, Writeable.Reader replicaRequest, Function executorFunction, - boolean forceExecutionOnPrimary, IndexingPressure indexingPressure, SystemIndices systemIndices) { + boolean forceExecutionOnPrimary, IndexingPressureService indexingPressureService, + SystemIndices systemIndices) { // We pass ThreadPool.Names.SAME to the super class as we control the dispatching to the // ThreadPool.Names.WRITE/ThreadPool.Names.SYSTEM_WRITE thread pools in this class. super(settings, actionName, transportService, clusterService, indicesService, threadPool, shardStateAction, actionFilters, request, replicaRequest, ThreadPool.Names.SAME, true, forceExecutionOnPrimary); this.executorFunction = executorFunction; - this.indexingPressure = indexingPressure; + this.indexingPressureService = indexingPressureService; this.systemIndices = systemIndices; } @@ -101,7 +102,7 @@ protected String executor(IndexShard shard) { @Override protected Releasable checkOperationLimits(Request request) { - return indexingPressure.markPrimaryOperationStarted(primaryOperationSize(request), force(request)); + return indexingPressureService.markPrimaryOperationStarted(request.shardId, primaryOperationSize(request), force(request)); } protected boolean force(ReplicatedWriteRequest request) { @@ -119,7 +120,8 @@ protected Releasable checkPrimaryLimits(Request request, boolean rerouteWasLocal // If this primary request was received from a local reroute initiated by the node client, we // must mark a new primary operation local to the coordinating node. if (localRerouteInitiatedByNodeClient) { - return indexingPressure.markPrimaryOperationLocalToCoordinatingNodeStarted(primaryOperationSize(request)); + return indexingPressureService.markPrimaryOperationLocalToCoordinatingNodeStarted(request.shardId, + primaryOperationSize(request)); } else { return () -> {}; } @@ -127,7 +129,7 @@ protected Releasable checkPrimaryLimits(Request request, boolean rerouteWasLocal // If this primary request was received directly from the network, we must mark a new primary // operation. This happens if the write action skips the reroute step (ex: rsync) or during // primary delegation, after the primary relocation hand-off. - return indexingPressure.markPrimaryOperationStarted(primaryOperationSize(request), force(request)); + return indexingPressureService.markPrimaryOperationStarted(request.shardId, primaryOperationSize(request), force(request)); } } @@ -137,7 +139,7 @@ protected long primaryOperationSize(Request request) { @Override protected Releasable checkReplicaLimits(ReplicaRequest request) { - return indexingPressure.markReplicaOperationStarted(replicaOperationSize(request), force(request)); + return indexingPressureService.markReplicaOperationStarted(request.shardId, replicaOperationSize(request), force(request)); } protected long replicaOperationSize(ReplicaRequest request) { diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java index 205eb050562e2..f52a2db41ac93 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java @@ -49,6 +49,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; +import org.opensearch.index.IndexingPressureService; import org.opensearch.node.Node; import org.opensearch.threadpool.ThreadPool; @@ -78,6 +79,8 @@ public class ClusterService extends AbstractLifecycleComponent { private RerouteService rerouteService; + private IndexingPressureService indexingPressureService; + public ClusterService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { this(settings, clusterSettings, new MasterService(settings, clusterSettings, threadPool), new ClusterApplierService(Node.NODE_NAME_SETTING.get(settings), settings, clusterSettings, threadPool)); @@ -203,6 +206,21 @@ public MasterService getMasterService() { return masterService; } + /** + * Getter and Setter for IndexingPressureService, This method exposes IndexingPressureService stats to other plugins. + * Indexing Pressure instances can be accessed via Node and NodeService class but none of them are + * present in the createComponents signature of Plugin interface currently. + * {@link org.opensearch.plugins.Plugin#createComponents} + */ + + public void setIndexingPressureService(IndexingPressureService indexingPressureService) { + this.indexingPressureService = indexingPressureService; + } + + public IndexingPressureService getIndexingPressureService() { + return indexingPressureService; + } + public ClusterApplierService getClusterApplierService() { return clusterApplierService; } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index d1174acca5099..79ad479c8d865 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -101,6 +101,9 @@ import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; import org.opensearch.index.IndexingPressure; +import org.opensearch.index.ShardIndexingPressureMemoryManager; +import org.opensearch.index.ShardIndexingPressureSettings; +import org.opensearch.index.ShardIndexingPressureStore; import org.opensearch.indices.IndexingMemoryController; import org.opensearch.indices.IndicesQueryCache; import org.opensearch.indices.IndicesRequestCache; @@ -266,6 +269,18 @@ public void apply(Settings value, Settings current, Settings previous) { DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING, SameShardAllocationDecider.CLUSTER_ROUTING_ALLOCATION_SAME_HOST_SETTING, ShardStateAction.FOLLOW_UP_REROUTE_PRIORITY_SETTING, + ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED, + ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED, + ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW, + ShardIndexingPressureSettings.SHARD_MIN_LIMIT, + ShardIndexingPressureStore.MAX_CACHE_STORE_SIZE, + ShardIndexingPressureMemoryManager.LOWER_OPERATING_FACTOR, + ShardIndexingPressureMemoryManager.OPTIMAL_OPERATING_FACTOR, + ShardIndexingPressureMemoryManager.UPPER_OPERATING_FACTOR, + ShardIndexingPressureMemoryManager.NODE_SOFT_LIMIT, + ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS, + ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT, + ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS, InternalClusterInfoService.INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING, InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING, InternalSnapshotsInfoService.INTERNAL_SNAPSHOT_INFO_MAX_CONCURRENT_FETCHES_SETTING, diff --git a/server/src/main/java/org/opensearch/index/IndexingPressure.java b/server/src/main/java/org/opensearch/index/IndexingPressure.java index 4c9cab1ea27ef..ddc27f30a7333 100644 --- a/server/src/main/java/org/opensearch/index/IndexingPressure.java +++ b/server/src/main/java/org/opensearch/index/IndexingPressure.java @@ -34,6 +34,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.common.inject.Inject; import org.opensearch.common.lease.Releasable; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; @@ -51,29 +52,29 @@ public class IndexingPressure { private static final Logger logger = LogManager.getLogger(IndexingPressure.class); - private final AtomicLong currentCombinedCoordinatingAndPrimaryBytes = new AtomicLong(0); - private final AtomicLong currentCoordinatingBytes = new AtomicLong(0); - private final AtomicLong currentPrimaryBytes = new AtomicLong(0); - private final AtomicLong currentReplicaBytes = new AtomicLong(0); + protected final AtomicLong currentCombinedCoordinatingAndPrimaryBytes = new AtomicLong(0); + protected final AtomicLong currentCoordinatingBytes = new AtomicLong(0); + protected final AtomicLong currentPrimaryBytes = new AtomicLong(0); + protected final AtomicLong currentReplicaBytes = new AtomicLong(0); - private final AtomicLong totalCombinedCoordinatingAndPrimaryBytes = new AtomicLong(0); - private final AtomicLong totalCoordinatingBytes = new AtomicLong(0); - private final AtomicLong totalPrimaryBytes = new AtomicLong(0); - private final AtomicLong totalReplicaBytes = new AtomicLong(0); + protected final AtomicLong totalCombinedCoordinatingAndPrimaryBytes = new AtomicLong(0); + protected final AtomicLong totalCoordinatingBytes = new AtomicLong(0); + protected final AtomicLong totalPrimaryBytes = new AtomicLong(0); + protected final AtomicLong totalReplicaBytes = new AtomicLong(0); - private final AtomicLong coordinatingRejections = new AtomicLong(0); - private final AtomicLong primaryRejections = new AtomicLong(0); - private final AtomicLong replicaRejections = new AtomicLong(0); + protected final AtomicLong coordinatingRejections = new AtomicLong(0); + protected final AtomicLong primaryRejections = new AtomicLong(0); + protected final AtomicLong replicaRejections = new AtomicLong(0); - private final long primaryAndCoordinatingLimits; - private final long replicaLimits; + protected final long primaryAndCoordinatingLimits; + protected final long replicaLimits; + @Inject public IndexingPressure(Settings settings) { this.primaryAndCoordinatingLimits = MAX_INDEXING_BYTES.get(settings).getBytes(); this.replicaLimits = (long) (this.primaryAndCoordinatingLimits * 1.5); } - private static Releasable wrapReleasable(Releasable releasable) { final AtomicBoolean called = new AtomicBoolean(); return () -> { diff --git a/server/src/main/java/org/opensearch/index/IndexingPressureService.java b/server/src/main/java/org/opensearch/index/IndexingPressureService.java new file mode 100644 index 0000000000000..105e1128913c1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/IndexingPressureService.java @@ -0,0 +1,79 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.shard.ShardId; +import org.opensearch.index.stats.IndexingPressureStats; +import org.opensearch.index.stats.ShardIndexingPressureStats; + +/** + * Sets up classes for node/shard level indexing pressure. + * Provides abstraction and orchestration for indexing pressure methods when called from Transport Actions and Stats. + */ +public class IndexingPressureService { + + private final ShardIndexingPressure shardIndexingPressure; + + public IndexingPressureService(Settings settings, ClusterService clusterService) { + shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + } + + public Releasable markCoordinatingOperationStarted(long bytes, boolean forceExecution) { + if (isShardIndexingPressureEnabled() == false) { + return shardIndexingPressure.markCoordinatingOperationStarted(bytes, forceExecution); + } else { + return () -> {}; + } + } + + public Releasable markCoordinatingOperationStarted(ShardId shardId, long bytes, boolean forceExecution) { + if (isShardIndexingPressureEnabled()) { + return shardIndexingPressure.markCoordinatingOperationStarted(shardId, bytes, forceExecution); + } else { + return () -> {}; + } + } + + public Releasable markPrimaryOperationStarted(ShardId shardId, long bytes, boolean forceExecution) { + if (isShardIndexingPressureEnabled()) { + return shardIndexingPressure.markPrimaryOperationStarted(shardId, bytes, forceExecution); + } else { + return shardIndexingPressure.markPrimaryOperationStarted(bytes, forceExecution); + } + } + + public Releasable markPrimaryOperationLocalToCoordinatingNodeStarted(ShardId shardId, long bytes) { + if (isShardIndexingPressureEnabled()) { + return shardIndexingPressure.markPrimaryOperationLocalToCoordinatingNodeStarted(shardId, bytes); + } else { + return shardIndexingPressure.markPrimaryOperationLocalToCoordinatingNodeStarted(bytes); + } + } + + public Releasable markReplicaOperationStarted(ShardId shardId, long bytes, boolean forceExecution) { + if (isShardIndexingPressureEnabled()) { + return shardIndexingPressure.markReplicaOperationStarted(shardId, bytes, forceExecution); + } else { + return shardIndexingPressure.markReplicaOperationStarted(bytes, forceExecution); + } + } + + public IndexingPressureStats nodeStats() { + return shardIndexingPressure.stats(); + } + + public ShardIndexingPressureStats shardStats(CommonStatsFlags statsFlags) { + return shardIndexingPressure.shardStats(statsFlags); + } + + private boolean isShardIndexingPressureEnabled() { + return shardIndexingPressure.isShardIndexingPressureEnabled(); + } +} diff --git a/server/src/main/java/org/opensearch/index/ShardIndexingPressure.java b/server/src/main/java/org/opensearch/index/ShardIndexingPressure.java new file mode 100644 index 0000000000000..e1f56306c8c85 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/ShardIndexingPressure.java @@ -0,0 +1,423 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException; +import org.opensearch.index.shard.ShardId; +import org.opensearch.index.stats.ShardIndexingPressureStats; +import org.opensearch.index.stats.IndexingPressurePerShardStats; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Shard Indexing Pressure is the uber level class derived from IndexingPressure. + * The methods of this class will be invoked from Transport Action to start the memory accounting and as a response + * it provides Releasable which will remove those memory accounting values or perform necessary actions once the request + * completes. + * + * This class will be responsible for + * 1. Memory Accounting at shard level. + * 2. Memory Accounting at Node level. The tracking happens in the same variables defined in IndexingPressure to support + * consistency even after feature toggle. + * 3. Instantiating new tracker objects for new shards and moving the shard tracker object to cold store from hot when + * the respective criteria meet via {@link ShardIndexingPressureStore} + * 4. Calling methods of {@link ShardIndexingPressureMemoryManager} to evaluate if a request can be process successfully + * and can increase the memory limits for a shard under certain scenarios + */ +public class ShardIndexingPressure extends IndexingPressure { + + private final Logger logger = LogManager.getLogger(getClass()); + + private final ShardIndexingPressureSettings shardIndexingPressureSettings; + private final ShardIndexingPressureMemoryManager memoryManager; + private final ShardIndexingPressureStore shardIndexingPressureStore; + + ShardIndexingPressure(Settings settings, ClusterService clusterService) { + super(settings); + shardIndexingPressureSettings = new ShardIndexingPressureSettings(clusterService, settings, primaryAndCoordinatingLimits); + ClusterSettings clusterSettings = clusterService.getClusterSettings(); + + this.memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, clusterSettings, settings); + this.shardIndexingPressureStore = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + } + + public Releasable markCoordinatingOperationStarted(ShardId shardId, long bytes, boolean forceExecution) { + if(0 == bytes) { return () -> {}; } + + long requestStartTime = System.currentTimeMillis(); + ShardIndexingPressureTracker tracker = getShardIndexingPressureTracker(shardId); + long nodeCombinedBytes = currentCombinedCoordinatingAndPrimaryBytes.addAndGet(bytes); + long nodeReplicaBytes = currentReplicaBytes.get(); + long nodeTotalBytes = nodeCombinedBytes + nodeReplicaBytes; + long shardCombinedBytes = tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(bytes); + + boolean shardLevelLimitBreached = false; + if (forceExecution == false) { + boolean nodeLevelLimitBreached = memoryManager.isCoordinatingNodeLimitBreached(tracker, nodeTotalBytes); + if (nodeLevelLimitBreached == false) { + shardLevelLimitBreached = memoryManager.isCoordinatingShardLimitBreached(tracker, requestStartTime, + shardIndexingPressureStore.getShardIndexingPressureHotStore(), nodeTotalBytes); + } + boolean shouldRejectRequest = nodeLevelLimitBreached || + (shardLevelLimitBreached && shardIndexingPressureSettings.isShardIndexingPressureEnforced()); + + if (shouldRejectRequest) { + long nodeBytesWithoutOperation = nodeCombinedBytes - bytes; + long nodeTotalBytesWithoutOperation = nodeTotalBytes - bytes; + long shardBytesWithoutOperation = shardCombinedBytes - bytes; + + currentCombinedCoordinatingAndPrimaryBytes.addAndGet(-bytes); + coordinatingRejections.getAndIncrement(); + tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().getAndAdd(-bytes); + tracker.rejection().getCoordinatingRejections().getAndIncrement(); + + throw new OpenSearchRejectedExecutionException("rejected execution of coordinating operation [" + + "shard_detail=[" + shardId.getIndexName() + "][" + shardId.id() + "][C], " + + "shard_coordinating_and_primary_bytes=" + shardBytesWithoutOperation + ", " + + "shard_operation_bytes=" + bytes + ", " + + "shard_max_coordinating_and_primary_bytes=" + tracker.getPrimaryAndCoordinatingLimits() + "] OR [" + + "node_coordinating_and_primary_bytes=" + nodeBytesWithoutOperation + ", " + + "node_replica_bytes=" + nodeReplicaBytes + ", " + + "node_all_bytes=" + nodeTotalBytesWithoutOperation + ", " + + "node_operation_bytes=" + bytes + ", " + + "node_max_coordinating_and_primary_bytes=" + primaryAndCoordinatingLimits + "]", false); + } + } + currentCoordinatingBytes.addAndGet(bytes); + totalCombinedCoordinatingAndPrimaryBytes.addAndGet(bytes); + totalCoordinatingBytes.addAndGet(bytes); + tracker.memory().getCurrentCoordinatingBytes().getAndAdd(bytes); + tracker.count().getCoordinatingCount().incrementAndGet(); + tracker.outstandingRequest().getTotalOutstandingCoordinatingRequests().incrementAndGet(); + + // In shadow mode if request was intended to rejected; it should only contribute to accounting limits and + // should not influence dynamic parameters such as throughput + if (shardLevelLimitBreached) { + return () -> { + currentCombinedCoordinatingAndPrimaryBytes.addAndGet(-bytes); + currentCoordinatingBytes.addAndGet(-bytes); + tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(-bytes); + tracker.memory().getCurrentCoordinatingBytes().addAndGet(-bytes); + tracker.memory().getTotalCombinedCoordinatingAndPrimaryBytes().getAndAdd(bytes); + tracker.memory().getTotalCoordinatingBytes().getAndAdd(bytes); + + memoryManager.decreaseShardPrimaryAndCoordinatingLimits(tracker); + shardIndexingPressureStore.tryIndexingPressureTrackerCleanup(tracker); + }; + } + + return () -> { + long requestEndTime = System.currentTimeMillis(); + long requestLatency = requestEndTime - requestStartTime; + + currentCombinedCoordinatingAndPrimaryBytes.addAndGet(-bytes); + currentCoordinatingBytes.addAndGet(-bytes); + tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(-bytes); + tracker.memory().getCurrentCoordinatingBytes().addAndGet(-bytes); + tracker.latency().getCoordinatingTimeInMillis().addAndGet(requestLatency); + tracker.memory().getTotalCombinedCoordinatingAndPrimaryBytes().getAndAdd(bytes); + tracker.memory().getTotalCoordinatingBytes().getAndAdd(bytes); + tracker.timeStamp().getLastSuccessfulCoordinatingRequestTimestamp().set(requestEndTime); + tracker.outstandingRequest().getTotalOutstandingCoordinatingRequests().set(0); + + if(requestLatency > 0) { + double requestThroughput = (double) bytes / requestLatency; + tracker.throughput().getCoordinatingThroughputMovingQueue().offer(requestThroughput); + if (tracker.throughput().getCoordinatingThroughputMovingQueue().size() > + shardIndexingPressureSettings.getRequestSizeWindow()) { + double front = tracker.throughput().getCoordinatingThroughputMovingQueue().poll(); + double movingAverage = calculateMovingAverage(tracker.throughput().getCoordinatingThroughputMovingAverage().get(), + front, requestThroughput, shardIndexingPressureSettings.getRequestSizeWindow()); + tracker.throughput().getCoordinatingThroughputMovingAverage().set(Double.doubleToLongBits(movingAverage)); + } else { + double movingAverage = (double) tracker.memory().getTotalCoordinatingBytes().get() / + tracker.latency().getCoordinatingTimeInMillis().get(); + tracker.throughput().getCoordinatingThroughputMovingAverage().set(Double.doubleToLongBits(movingAverage)); + } + } + memoryManager.decreaseShardPrimaryAndCoordinatingLimits(tracker); + shardIndexingPressureStore.tryIndexingPressureTrackerCleanup(tracker); + }; + } + + public Releasable markPrimaryOperationLocalToCoordinatingNodeStarted(ShardId shardId, long bytes) { + if(bytes == 0) { return () -> {}; } + + ShardIndexingPressureTracker tracker = getShardIndexingPressureTracker(shardId); + + currentPrimaryBytes.addAndGet(bytes); + totalPrimaryBytes.addAndGet(bytes); + tracker.memory().getCurrentPrimaryBytes().getAndAdd(bytes); + tracker.memory().getTotalPrimaryBytes().getAndAdd(bytes); + + return () -> { + currentPrimaryBytes.addAndGet(-bytes); + tracker.memory().getCurrentPrimaryBytes().addAndGet(-bytes); + }; + } + + public Releasable markPrimaryOperationStarted(ShardId shardId, long bytes, boolean forceExecution) { + if(0 == bytes) { return () -> {}; } + + long requestStartTime = System.currentTimeMillis(); + ShardIndexingPressureTracker tracker = getShardIndexingPressureTracker(shardId); + long nodeCombinedBytes = currentCombinedCoordinatingAndPrimaryBytes.addAndGet(bytes); + long nodeReplicaBytes = currentReplicaBytes.get(); + long nodeTotalBytes = nodeCombinedBytes + nodeReplicaBytes; + long shardCombinedBytes = tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(bytes); + + boolean shardLevelLimitBreached = false; + if (forceExecution == false) { + boolean nodeLevelLimitBreached = memoryManager.isPrimaryNodeLimitBreached(tracker, nodeTotalBytes); + if (nodeLevelLimitBreached == false) { + shardLevelLimitBreached = memoryManager.isPrimaryShardLimitBreached(tracker, requestStartTime, + shardIndexingPressureStore.getShardIndexingPressureHotStore(), nodeTotalBytes); + } + boolean shouldRejectRequest = nodeLevelLimitBreached || + (shardLevelLimitBreached && shardIndexingPressureSettings.isShardIndexingPressureEnforced()); + + if (shouldRejectRequest) { + long nodeBytesWithoutOperation = nodeCombinedBytes - bytes; + long nodeTotalBytesWithoutOperation = nodeTotalBytes - bytes; + long shardBytesWithoutOperation = shardCombinedBytes - bytes; + + currentCombinedCoordinatingAndPrimaryBytes.addAndGet(-bytes); + primaryRejections.getAndIncrement(); + tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().getAndAdd(-bytes); + tracker.rejection().getPrimaryRejections().getAndIncrement(); + + throw new OpenSearchRejectedExecutionException("rejected execution of primary operation [" + + "shard_detail=[" + shardId.getIndexName() + "][" + shardId.id() + "][P], " + + "shard_coordinating_and_primary_bytes=" + shardBytesWithoutOperation + ", " + + "shard_operation_bytes=" + bytes + ", " + + "shard_max_coordinating_and_primary_bytes=" + tracker.getPrimaryAndCoordinatingLimits() + "] OR [" + + "node_coordinating_and_primary_bytes=" + nodeBytesWithoutOperation + ", " + + "node_replica_bytes=" + nodeReplicaBytes + ", " + + "node_all_bytes=" + nodeTotalBytesWithoutOperation + ", " + + "node_operation_bytes=" + bytes + ", " + + "node_max_coordinating_and_primary_bytes=" + this.primaryAndCoordinatingLimits + "]", false); + } + } + currentPrimaryBytes.addAndGet(bytes); + totalCombinedCoordinatingAndPrimaryBytes.addAndGet(bytes); + totalPrimaryBytes.addAndGet(bytes); + tracker.memory().getCurrentPrimaryBytes().getAndAdd(bytes); + tracker.count().getPrimaryCount().incrementAndGet(); + tracker.outstandingRequest().getTotalOutstandingPrimaryRequests().incrementAndGet(); + + // In shadow mode if request was intended to rejected; it should only contribute to accounting limits and + // should not influence dynamic parameters such as throughput + if (shardLevelLimitBreached) { + return () -> { + currentCombinedCoordinatingAndPrimaryBytes.addAndGet(-bytes); + currentPrimaryBytes.addAndGet(-bytes); + tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(-bytes); + tracker.memory().getCurrentPrimaryBytes().addAndGet(-bytes); + tracker.memory().getTotalCombinedCoordinatingAndPrimaryBytes().getAndAdd(bytes); + tracker.memory().getTotalPrimaryBytes().getAndAdd(bytes); + + memoryManager.decreaseShardPrimaryAndCoordinatingLimits(tracker); + shardIndexingPressureStore.tryIndexingPressureTrackerCleanup(tracker); + }; + } + + return () -> { + long requestEndTime = System.currentTimeMillis(); + long requestLatency = requestEndTime - requestStartTime; + + currentCombinedCoordinatingAndPrimaryBytes.addAndGet(-bytes); + currentPrimaryBytes.addAndGet(-bytes); + tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(-bytes); + tracker.memory().getCurrentPrimaryBytes().addAndGet(-bytes); + tracker.latency().getPrimaryTimeInMillis().addAndGet(requestLatency); + tracker.memory().getTotalCombinedCoordinatingAndPrimaryBytes().getAndAdd(bytes); + tracker.memory().getTotalPrimaryBytes().getAndAdd(bytes); + tracker.timeStamp().getLastSuccessfulPrimaryRequestTimestamp().set(requestEndTime); + tracker.outstandingRequest().getTotalOutstandingPrimaryRequests().set(0); + + if(requestLatency > 0) { + double requestThroughput = (double)bytes / requestLatency; + tracker.throughput().getPrimaryThroughputMovingQueue().offer(requestThroughput); + if(tracker.throughput().getPrimaryThroughputMovingQueue().size() > shardIndexingPressureSettings.getRequestSizeWindow()) { + double front = tracker.throughput().getPrimaryThroughputMovingQueue().poll(); + double movingAverage = calculateMovingAverage(tracker.throughput().getPrimaryThroughputMovingAverage().get(), front, + requestThroughput, shardIndexingPressureSettings.getRequestSizeWindow()); + tracker.throughput().getPrimaryThroughputMovingAverage().set(Double.doubleToLongBits(movingAverage)); + } else { + double movingAverage = (double) tracker.memory().getTotalPrimaryBytes().get() / + tracker.latency().getPrimaryTimeInMillis().get(); + tracker.throughput().getPrimaryThroughputMovingAverage().set(Double.doubleToLongBits(movingAverage)); + } + } + memoryManager.decreaseShardPrimaryAndCoordinatingLimits(tracker); + shardIndexingPressureStore.tryIndexingPressureTrackerCleanup(tracker); + }; + } + + public Releasable markReplicaOperationStarted(ShardId shardId, long bytes, boolean forceExecution) { + if(0 == bytes) { return () -> {}; } + + long requestStartTime = System.currentTimeMillis(); + ShardIndexingPressureTracker tracker = getShardIndexingPressureTracker(shardId); + long nodeReplicaBytes = currentReplicaBytes.addAndGet(bytes); + long shardReplicaBytes = tracker.memory().getCurrentReplicaBytes().addAndGet(bytes); + + boolean shardLevelLimitBreached = false; + if (forceExecution == false) { + boolean nodeLevelLimitBreached = memoryManager.isReplicaNodeLimitBreached(tracker, nodeReplicaBytes); + if (nodeLevelLimitBreached == false) { + shardLevelLimitBreached = memoryManager.isReplicaShardLimitBreached(tracker, requestStartTime, + shardIndexingPressureStore.getShardIndexingPressureHotStore(), nodeReplicaBytes); + } + boolean shouldRejectRequest = nodeLevelLimitBreached || + (shardLevelLimitBreached && shardIndexingPressureSettings.isShardIndexingPressureEnforced()); + + if (shouldRejectRequest) { + long nodeReplicaBytesWithoutOperation = nodeReplicaBytes - bytes; + long shardReplicaBytesWithoutOperation = shardReplicaBytes - bytes; + + currentReplicaBytes.addAndGet(-bytes); + replicaRejections.getAndIncrement(); + tracker.memory().getCurrentReplicaBytes().getAndAdd(-bytes); + tracker.rejection().getReplicaRejections().getAndIncrement(); + + throw new OpenSearchRejectedExecutionException("rejected execution of replica operation [" + + "shard_detail=[" + shardId.getIndexName() + "][" + shardId.id() + "][R], " + + "shard_replica_bytes=" + shardReplicaBytesWithoutOperation + ", " + + "operation_bytes=" + bytes + ", " + + "max_coordinating_and_primary_bytes=" + tracker.getReplicaLimits() + "] OR [" + + "replica_bytes=" + nodeReplicaBytesWithoutOperation + ", " + + "operation_bytes=" + bytes + ", " + + "max_coordinating_and_primary_bytes=" + this.replicaLimits + "]", false); + } + } + totalReplicaBytes.addAndGet(bytes); + tracker.count().getReplicaCount().incrementAndGet(); + tracker.outstandingRequest().getTotalOutstandingReplicaRequests().incrementAndGet(); + + // In shadow-mode if request was intended to rejected; it should only contribute to accounting limits and + // should not influence dynamic parameters such as throughput + if (shardLevelLimitBreached) { + return () -> { + currentReplicaBytes.addAndGet(-bytes); + tracker.memory().getCurrentReplicaBytes().addAndGet(-bytes); + tracker.memory().getTotalReplicaBytes().getAndAdd(bytes); + + memoryManager.decreaseShardReplicaLimits(tracker); + shardIndexingPressureStore.tryIndexingPressureTrackerCleanup(tracker); + }; + } + + return () -> { + long requestEndTime = System.currentTimeMillis(); + long requestLatency = requestEndTime - requestStartTime; + + currentReplicaBytes.addAndGet(-bytes); + tracker.memory().getCurrentReplicaBytes().addAndGet(-bytes); + tracker.latency().getReplicaTimeInMillis().addAndGet(requestLatency); + tracker.memory().getTotalReplicaBytes().getAndAdd(bytes); + tracker.timeStamp().getLastSuccessfulReplicaRequestTimestamp().set(requestEndTime); + tracker.outstandingRequest().getTotalOutstandingReplicaRequests().set(0); + + if(requestLatency > 0) { + double requestThroughput = (double) bytes / requestLatency; + tracker.throughput().getReplicaThroughputMovingQueue().offer(requestThroughput); + if (tracker.throughput().getReplicaThroughputMovingQueue().size() > shardIndexingPressureSettings.getRequestSizeWindow()) { + double front = tracker.throughput().getReplicaThroughputMovingQueue().poll(); + double movingAverage = calculateMovingAverage(tracker.throughput().getReplicaThroughputMovingAverage().get(), front, + requestThroughput, shardIndexingPressureSettings.getRequestSizeWindow()); + tracker.throughput().getReplicaThroughputMovingAverage().set(Double.doubleToLongBits(movingAverage)); + } else { + double movingAverage = (double) tracker.memory().getTotalReplicaBytes().get() / + tracker.latency().getReplicaTimeInMillis().get(); + tracker.throughput().getReplicaThroughputMovingAverage().set(Double.doubleToLongBits(movingAverage)); + } + } + memoryManager.decreaseShardReplicaLimits(tracker); + shardIndexingPressureStore.tryIndexingPressureTrackerCleanup(tracker); + }; + } + + private double calculateMovingAverage(long currentAverage, double frontValue, double currentValue, int count) { + if(count > 0) { + return ((Double.longBitsToDouble(currentAverage) * count) + currentValue - frontValue) / count; + } else { + return currentValue; + } + } + + public ShardIndexingPressureStats shardStats(CommonStatsFlags statsFlags) { + + if (statsFlags.includeOnlyTopIndexingPressureMetrics()) { + return topStats(); + } else { + ShardIndexingPressureStats allStats = shardStats(); + if (statsFlags.includeAllShardIndexingPressureTrackers()) { + allStats.addAll(coldStats()); + } + return allStats; + } + } + + ShardIndexingPressureStats shardStats() { + Map statsPerShard = new HashMap<>(); + boolean isEnforcedMode = shardIndexingPressureSettings.isShardIndexingPressureEnforced(); + + for (Map.Entry shardEntry : + this.shardIndexingPressureStore.getShardIndexingPressureHotStore().entrySet()) { + IndexingPressurePerShardStats shardStats = new IndexingPressurePerShardStats(shardEntry.getValue(), + isEnforcedMode); + statsPerShard.put(shardEntry.getKey(), shardStats); + } + return new ShardIndexingPressureStats(statsPerShard, memoryManager.totalNodeLimitsBreachedRejections.get(), + memoryManager.totalLastSuccessfulRequestLimitsBreachedRejections.get(), + memoryManager.totalThroughputDegradationLimitsBreachedRejections.get(), + shardIndexingPressureSettings.isShardIndexingPressureEnabled(), + isEnforcedMode); + } + + ShardIndexingPressureStats coldStats() { + Map statsPerShard = new HashMap<>(); + boolean isEnforcedMode = shardIndexingPressureSettings.isShardIndexingPressureEnforced(); + + for (Map.Entry shardEntry : + this.shardIndexingPressureStore.getShardIndexingPressureColdStore().entrySet()) { + IndexingPressurePerShardStats shardStats = new IndexingPressurePerShardStats(shardEntry.getValue(), + isEnforcedMode); + statsPerShard.put(shardEntry.getKey(), shardStats); + } + return new ShardIndexingPressureStats(statsPerShard, memoryManager.totalNodeLimitsBreachedRejections.get(), + memoryManager.totalLastSuccessfulRequestLimitsBreachedRejections.get(), + memoryManager.totalThroughputDegradationLimitsBreachedRejections.get(), + shardIndexingPressureSettings.isShardIndexingPressureEnabled(), + isEnforcedMode); + } + + ShardIndexingPressureStats topStats() { + return new ShardIndexingPressureStats(Collections.emptyMap(), memoryManager.totalNodeLimitsBreachedRejections.get(), + memoryManager.totalLastSuccessfulRequestLimitsBreachedRejections.get(), + memoryManager.totalThroughputDegradationLimitsBreachedRejections.get(), + shardIndexingPressureSettings.isShardIndexingPressureEnabled(), + shardIndexingPressureSettings.isShardIndexingPressureEnforced()); + } + + ShardIndexingPressureTracker getShardIndexingPressureTracker(ShardId shardId) { + return shardIndexingPressureStore.getShardIndexingPressureTracker(shardId); + } + + public boolean isShardIndexingPressureEnabled() { + return shardIndexingPressureSettings.isShardIndexingPressureEnabled(); + } +} diff --git a/server/src/main/java/org/opensearch/index/ShardIndexingPressureMemoryManager.java b/server/src/main/java/org/opensearch/index/ShardIndexingPressureMemoryManager.java new file mode 100644 index 0000000000000..a8fe2171491cd --- /dev/null +++ b/server/src/main/java/org/opensearch/index/ShardIndexingPressureMemoryManager.java @@ -0,0 +1,543 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Setting.Property; +import org.opensearch.common.settings.Settings; + +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +/** + * The Shard Indexing Pressure Memory Manager is the class which will be responsible for increasing and decreasing the + * limits given to a shard in a thread safe manner. The limits is the maximum space that a shard can occupy in the heap + * and the values will be modified in certain scenarios. + * + * 1. If the limits assigned to the shard is breached(Primary Parameter) and the node level occupancy of all shards + * is not greater than primary_parameter.node.soft_limit, we will be increasing the shard limits without any further evaluation. + * 2. If the limits assigned to the shard is breached(Primary Parameter) and the node level occupancy of all the shards + * is greater than primary_parameter.node.soft_limit is when we will evaluate certain parameters like + * throughput degradation(Secondary Parameter) and last successful request elapsed timeout(Secondary Parameter) to evaluate if the limits + * for the shard needs to be modified or not. + * + * Secondary Parameters + * 1. ThroughputDegradationLimitsBreached - When the moving window throughput average has increased by some factor than + * the historical throughput average. If the factor by which it has increased is greater than the degradation limit this + * parameter is said to be breached. + * 2. LastSuccessfulRequestDurationLimitsBreached - When the difference between last successful request timestamp and + * current request timestamp is greater than the max timeout value and the number of outstanding requests is greater + * than the max outstanding requests then this parameter is said to be breached. + * + * Note : Every time we try to increase of decrease the shard limits. In case the shard utilization goes below operating_factor.lower or + * goes above operating_factor.upper of current shard limits then we try to set the new shard limit to be operating_factor.optimal of + * current shard utilization. + * + */ +public class ShardIndexingPressureMemoryManager { + private final Logger logger = LogManager.getLogger(getClass()); + + /* + Operating factor can be evaluated using currentShardBytes/shardLimits. Outcome of this expression is categorized as + lower, optimal and upper and appropriate action is taken once they breach the value mentioned below. + */ + public static final Setting LOWER_OPERATING_FACTOR = + Setting.doubleSetting("shard_indexing_pressure.operating_factor.lower", 0.75d, 0.0d, Property.NodeScope, Property.Dynamic); + public static final Setting OPTIMAL_OPERATING_FACTOR = + Setting.doubleSetting("shard_indexing_pressure.operating_factor.optimal", 0.85d, 0.0d, Property.NodeScope, Property.Dynamic); + public static final Setting UPPER_OPERATING_FACTOR = + Setting.doubleSetting("shard_indexing_pressure.operating_factor.upper", 0.95d, 0.0d, Property.NodeScope, Property.Dynamic); + + /* + This is the max time that can be elapsed after any request is processed successfully. Appropriate action is taken + once the below mentioned value is breached. + */ + public static final Setting SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT = + Setting.intSetting("shard_indexing_pressure.secondary_parameter.successful_request.elapsed_timeout", 300000, + Property.NodeScope, Property.Dynamic); + + /* + This is the max outstanding request that are present after any request is processed successfully. Appropriate + action is taken once the below mentioned value is breached. + */ + public static final Setting MAX_OUTSTANDING_REQUESTS = + Setting.intSetting("shard_indexing_pressure.secondary_parameter.successful_request.max_outstanding_requests", + 100, Property.NodeScope, Property.Dynamic); + + /* + Degradation limits can be evaluated using average throughput last N requests + and N being {@link ShardIndexingPressure#WINDOW_SIZE} divided by lifetime average throughput. + Appropriate action is taken once the outcome of above expression breaches the below mentioned factor + */ + public static final Setting THROUGHPUT_DEGRADATION_LIMITS = + Setting.doubleSetting("shard_indexing_pressure.secondary_parameter.throughput.degradation_factor", 5.0d, 1.0d, + Property.NodeScope, Property.Dynamic); + + /* + The secondary parameter accounting factor tells when the secondary parameter is considered. i.e. If the current + node level memory utilization divided by the node limits is greater than 70% then appropriate action is taken. + */ + public static final Setting NODE_SOFT_LIMIT = + Setting.doubleSetting("shard_indexing_pressure.primary_parameter.node.soft_limit", 0.7d, 0.0d, + Property.NodeScope, Property.Dynamic); + + public final AtomicLong totalNodeLimitsBreachedRejections = new AtomicLong(); + public final AtomicLong totalLastSuccessfulRequestLimitsBreachedRejections = new AtomicLong(); + public final AtomicLong totalThroughputDegradationLimitsBreachedRejections = new AtomicLong(); + + private final ShardIndexingPressureSettings shardIndexingPressureSettings; + + private volatile double lowerOperatingFactor; + private volatile double optimalOperatingFactor; + private volatile double upperOperatingFactor; + + private volatile int successfulRequestElapsedTimeout; + private volatile int maxOutstandingRequests; + + private volatile double primaryAndCoordinatingThroughputDegradationLimits; + private volatile double replicaThroughputDegradationLimits; + + private volatile double nodeSoftLimit; + + public ShardIndexingPressureMemoryManager(ShardIndexingPressureSettings shardIndexingPressureSettings, + ClusterSettings clusterSettings, Settings settings) { + this.shardIndexingPressureSettings = shardIndexingPressureSettings; + + this.lowerOperatingFactor = LOWER_OPERATING_FACTOR.get(settings).doubleValue(); + clusterSettings.addSettingsUpdateConsumer(LOWER_OPERATING_FACTOR, this::setLowerOperatingFactor); + + this.optimalOperatingFactor = OPTIMAL_OPERATING_FACTOR.get(settings).doubleValue(); + clusterSettings.addSettingsUpdateConsumer(OPTIMAL_OPERATING_FACTOR, this::setOptimalOperatingFactor); + + this.upperOperatingFactor = UPPER_OPERATING_FACTOR.get(settings).doubleValue(); + clusterSettings.addSettingsUpdateConsumer(UPPER_OPERATING_FACTOR, this::setUpperOperatingFactor); + + this.successfulRequestElapsedTimeout = SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.get(settings).intValue(); + clusterSettings.addSettingsUpdateConsumer(SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT, this::setSuccessfulRequestElapsedTimeout); + + this.maxOutstandingRequests = MAX_OUTSTANDING_REQUESTS.get(settings).intValue(); + clusterSettings.addSettingsUpdateConsumer(MAX_OUTSTANDING_REQUESTS, this::setMaxOutstandingRequests); + + this.primaryAndCoordinatingThroughputDegradationLimits = THROUGHPUT_DEGRADATION_LIMITS.get(settings).doubleValue(); + this.replicaThroughputDegradationLimits = this.primaryAndCoordinatingThroughputDegradationLimits * 1.5; + clusterSettings.addSettingsUpdateConsumer(THROUGHPUT_DEGRADATION_LIMITS, this::setThroughputDegradationLimits); + + this.nodeSoftLimit = NODE_SOFT_LIMIT.get(settings).doubleValue(); + clusterSettings.addSettingsUpdateConsumer(NODE_SOFT_LIMIT, this::setNodeSoftLimit); + } + + boolean isPrimaryNodeLimitBreached(ShardIndexingPressureTracker tracker, long nodeTotalBytes) { + + //Checks if the node level threshold is breached. + if(nodeTotalBytes > this.shardIndexingPressureSettings.getNodePrimaryAndCoordinatingLimits()) { + logger.debug("Node limits breached for primary operation [node_total_bytes={}, " + + "node_primary_and_coordinating_limits={}]", nodeTotalBytes, + this.shardIndexingPressureSettings.getNodePrimaryAndCoordinatingLimits()); + tracker.rejection().getPrimaryNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + + return true; + } + return false; + } + + boolean isPrimaryShardLimitBreached(ShardIndexingPressureTracker tracker, long requestStartTime, + Map shardIndexingPressureStore, long nodeTotalBytes) { + + /* Memory limits is breached when the current utilization is greater than operating_factor.upper of total shard limits. */ + long shardCombinedBytes = tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().get(); + long shardPrimaryAndCoordinatingLimits = tracker.getPrimaryAndCoordinatingLimits().get(); + boolean shardMemoryLimitsBreached = + ((double)shardCombinedBytes / shardPrimaryAndCoordinatingLimits) > this.upperOperatingFactor; + + if(shardMemoryLimitsBreached) { + /* + Secondary Parameters(i.e. LastSuccessfulRequestDuration and Throughput) is taken into consideration when + the current node utilization is greater than primary_parameter.node.soft_limit of total node limits. + */ + if(((double)nodeTotalBytes / this.shardIndexingPressureSettings.getNodePrimaryAndCoordinatingLimits()) < this.nodeSoftLimit) { + boolean isShardLimitsIncreased = + this.increaseShardPrimaryAndCoordinatingLimits(tracker, shardIndexingPressureStore); + if(isShardLimitsIncreased == false) { + tracker.rejection().getPrimaryNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + } + + return !isShardLimitsIncreased; + } else { + boolean shardLastSuccessfulRequestDurationLimitsBreached = + this.evaluateLastSuccessfulRequestDurationLimitsBreached(tracker.timeStamp().getLastSuccessfulPrimaryRequestTimestamp() + .get(), requestStartTime, tracker.outstandingRequest().getTotalOutstandingPrimaryRequests().get()); + + boolean shardThroughputDegradationLimitsBreached = + this.evaluateThroughputDegradationLimitsBreached( + Double.longBitsToDouble(tracker.throughput().getPrimaryThroughputMovingAverage().get()), + tracker.memory().getTotalPrimaryBytes().get(), tracker.latency().getPrimaryTimeInMillis().get(), + tracker.throughput().getPrimaryThroughputMovingQueue().size(), primaryAndCoordinatingThroughputDegradationLimits); + + if(shardLastSuccessfulRequestDurationLimitsBreached || shardThroughputDegradationLimitsBreached) { + if(shardLastSuccessfulRequestDurationLimitsBreached) { + tracker.rejection().getPrimaryLastSuccessfulRequestLimitsBreachedRejections().incrementAndGet(); + totalLastSuccessfulRequestLimitsBreachedRejections.incrementAndGet(); + } else { + tracker.rejection().getPrimaryThroughputDegradationLimitsBreachedRejections().incrementAndGet(); + totalThroughputDegradationLimitsBreachedRejections.incrementAndGet(); + } + + return true; + } else { + boolean isShardLimitsIncreased = + this.increaseShardPrimaryAndCoordinatingLimits(tracker, shardIndexingPressureStore); + if(isShardLimitsIncreased == false) { + tracker.rejection().getPrimaryNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + } + + return !isShardLimitsIncreased; + } + } + } else { + return false; + } + } + + boolean isCoordinatingNodeLimitBreached(ShardIndexingPressureTracker tracker, long nodeTotalBytes) { + + //Checks if the node level threshold is breached. + if(nodeTotalBytes > this.shardIndexingPressureSettings.getNodePrimaryAndCoordinatingLimits()) { + logger.debug("Node limits breached for coordinating operation [node_total_bytes={} , " + + "node_primary_and_coordinating_limits={}]", nodeTotalBytes, + this.shardIndexingPressureSettings.getNodePrimaryAndCoordinatingLimits()); + tracker.rejection().getCoordinatingNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + + return true; + } + return false; + } + + boolean isCoordinatingShardLimitBreached(ShardIndexingPressureTracker tracker, long requestStartTime, + Map shardIndexingPressureStore, long nodeTotalBytes) { + + //Shard memory limit is breached when the current utilization is greater than operating_factor.upper of total shard limits. + long shardCombinedBytes = tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().get(); + long shardPrimaryAndCoordinatingLimits = tracker.getPrimaryAndCoordinatingLimits().get(); + boolean shardMemoryLimitsBreached = + ((double)shardCombinedBytes / shardPrimaryAndCoordinatingLimits) > this.upperOperatingFactor; + + if(shardMemoryLimitsBreached) { + /* + Secondary Parameters(i.e. LastSuccessfulRequestDuration and Throughput) is taken into consideration when + the current node utilization is greater than primary_parameter.node.soft_limit of total node limits. + */ + if(((double)nodeTotalBytes / this.shardIndexingPressureSettings.getNodePrimaryAndCoordinatingLimits()) < this.nodeSoftLimit) { + boolean isShardLimitsIncreased = + this.increaseShardPrimaryAndCoordinatingLimits(tracker, shardIndexingPressureStore); + if(isShardLimitsIncreased == false) { + tracker.rejection().getCoordinatingNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + } + + return !isShardLimitsIncreased; + } else { + boolean shardLastSuccessfulRequestDurationLimitsBreached = + this.evaluateLastSuccessfulRequestDurationLimitsBreached( + tracker.timeStamp().getLastSuccessfulCoordinatingRequestTimestamp().get(), requestStartTime, + tracker.outstandingRequest().getTotalOutstandingCoordinatingRequests().get()); + + boolean shardThroughputDegradationLimitsBreached = + this.evaluateThroughputDegradationLimitsBreached( + Double.longBitsToDouble(tracker.throughput().getCoordinatingThroughputMovingAverage().get()), + tracker.memory().getTotalCoordinatingBytes().get(), tracker.latency().getCoordinatingTimeInMillis().get(), + tracker.throughput().getCoordinatingThroughputMovingQueue().size(), + primaryAndCoordinatingThroughputDegradationLimits); + + if (shardLastSuccessfulRequestDurationLimitsBreached || shardThroughputDegradationLimitsBreached) { + if(shardLastSuccessfulRequestDurationLimitsBreached) { + tracker.rejection().getCoordinatingLastSuccessfulRequestLimitsBreachedRejections().incrementAndGet(); + totalLastSuccessfulRequestLimitsBreachedRejections.incrementAndGet(); + } else { + tracker.rejection().getCoordinatingThroughputDegradationLimitsBreachedRejections().incrementAndGet(); + totalThroughputDegradationLimitsBreachedRejections.incrementAndGet(); + } + + return true; + } else { + boolean isShardLimitsIncreased = + this.increaseShardPrimaryAndCoordinatingLimits(tracker, shardIndexingPressureStore); + if(isShardLimitsIncreased == false) { + tracker.rejection().getCoordinatingNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + } + + return !isShardLimitsIncreased; + } + } + } else { + return false; + } + } + + boolean isReplicaNodeLimitBreached(ShardIndexingPressureTracker tracker, long nodeReplicaBytes) { + + //Checks if the node level threshold is breached. + if(nodeReplicaBytes > this.shardIndexingPressureSettings.getNodeReplicaLimits()) { + logger.debug("Node limits breached for replica operation [node_replica_bytes={} , " + + "node_replica_limits={}]", nodeReplicaBytes, this.shardIndexingPressureSettings.getNodeReplicaLimits()); + tracker.rejection().getReplicaNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + + return true; + } + return false; + } + + boolean isReplicaShardLimitBreached(ShardIndexingPressureTracker tracker, long requestStartTime, + Map shardIndexingPressureStore, long nodeReplicaBytes) { + + //Memory limits is breached when the current utilization is greater than operating_factor.upper of total shard limits. + long shardReplicaBytes = tracker.memory().getCurrentReplicaBytes().get(); + long shardReplicaLimits = tracker.getReplicaLimits().get(); + final boolean shardMemoryLimitsBreached = + ((double)shardReplicaBytes / shardReplicaLimits) > this.upperOperatingFactor; + + if(shardMemoryLimitsBreached) { + /* + Secondary Parameters(i.e. LastSuccessfulRequestDuration and Throughput) is taken into consideration when + the current node utilization is greater than primary_parameter.node.soft_limit of total node limits. + */ + if(((double)nodeReplicaBytes / this.shardIndexingPressureSettings.getNodeReplicaLimits()) < this.nodeSoftLimit) { + boolean isShardLimitsIncreased = + this.increaseShardReplicaLimits(tracker, shardIndexingPressureStore); + if(isShardLimitsIncreased == false) { + tracker.rejection().getReplicaNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + } + + return !isShardLimitsIncreased; + } else { + boolean shardLastSuccessfulRequestDurationLimitsBreached = + this.evaluateLastSuccessfulRequestDurationLimitsBreached( + tracker.timeStamp().getLastSuccessfulReplicaRequestTimestamp().get(), + requestStartTime, tracker.outstandingRequest().getTotalOutstandingReplicaRequests().get()); + + boolean shardThroughputDegradationLimitsBreached = + this.evaluateThroughputDegradationLimitsBreached( + Double.longBitsToDouble(tracker.throughput().getReplicaThroughputMovingAverage().get()), + tracker.memory().getTotalReplicaBytes().get(), tracker.latency().getReplicaTimeInMillis().get(), + tracker.throughput().getReplicaThroughputMovingQueue().size(), replicaThroughputDegradationLimits); + + if (shardLastSuccessfulRequestDurationLimitsBreached || shardThroughputDegradationLimitsBreached) { + if(shardLastSuccessfulRequestDurationLimitsBreached) { + tracker.rejection().getReplicaLastSuccessfulRequestLimitsBreachedRejections().incrementAndGet(); + totalLastSuccessfulRequestLimitsBreachedRejections.incrementAndGet(); + } else { + tracker.rejection().getReplicaThroughputDegradationLimitsBreachedRejections().incrementAndGet(); + totalThroughputDegradationLimitsBreachedRejections.incrementAndGet(); + } + + return true; + } else { + boolean isShardLimitsIncreased = + this.increaseShardReplicaLimits(tracker, shardIndexingPressureStore); + if(isShardLimitsIncreased == false) { + tracker.rejection().getReplicaNodeLimitsBreachedRejections().incrementAndGet(); + totalNodeLimitsBreachedRejections.incrementAndGet(); + } + + return !isShardLimitsIncreased; + } + } + } else { + return false; + } + } + + private boolean increaseShardPrimaryAndCoordinatingLimits(ShardIndexingPressureTracker tracker, + Map shardIndexingPressureStore) { + long shardPrimaryAndCoordinatingLimits; + long expectedShardPrimaryAndCoordinatingLimits; + do { + shardPrimaryAndCoordinatingLimits = tracker.getPrimaryAndCoordinatingLimits().get(); + long shardCombinedBytes = tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().get(); + expectedShardPrimaryAndCoordinatingLimits = (long)(shardCombinedBytes / this.optimalOperatingFactor); + + long totalPrimaryAndCoordinatingLimitsExceptCurrentShard = shardIndexingPressureStore.entrySet().stream() + .filter(entry -> !(tracker.getShardId().hashCode() == entry.getKey())) + .map(Map.Entry::getValue) + .mapToLong(entry -> entry.getPrimaryAndCoordinatingLimits().get()).sum(); + + if(((double)shardCombinedBytes / shardPrimaryAndCoordinatingLimits) > this.upperOperatingFactor) { + if (totalPrimaryAndCoordinatingLimitsExceptCurrentShard + expectedShardPrimaryAndCoordinatingLimits < + this.shardIndexingPressureSettings.getNodePrimaryAndCoordinatingLimits()) { + logger.debug("Increasing the Primary And Coordinating Limits [" + + "shard_detail=[{}][{}], shard_max_primary_and_coordinating_bytes={}, " + + "expected_shard_max_primary_and_coordinating_bytes={}]", + tracker.getShardId().getIndexName(), tracker.getShardId().id(), + shardPrimaryAndCoordinatingLimits, expectedShardPrimaryAndCoordinatingLimits); + } else { + logger.debug("Failed to increase the Primary And Coordinating Limits [shard_detail=[{}][{}}], " + + "shard_max_primary_and_coordinating_bytes={}, " + + "total_max_primary_and_coordinating_bytes_except_current_shard={}, " + + "expected_shard_max_primary_and_coordinating_bytes={}, node_max_coordinating_and_primary_bytes={}]", + tracker.getShardId().getIndexName(), tracker.getShardId().id(), shardPrimaryAndCoordinatingLimits, + totalPrimaryAndCoordinatingLimitsExceptCurrentShard, expectedShardPrimaryAndCoordinatingLimits, + this.shardIndexingPressureSettings.getNodePrimaryAndCoordinatingLimits()); + return false; + } + } else { + return true; + } + } while(!tracker.getPrimaryAndCoordinatingLimits().compareAndSet(shardPrimaryAndCoordinatingLimits, + expectedShardPrimaryAndCoordinatingLimits)); + return true; + } + + void decreaseShardPrimaryAndCoordinatingLimits(ShardIndexingPressureTracker tracker) { + long shardPrimaryAndCoordinatingLimits; + long expectedShardPrimaryAndCoordinatingLimits; + do { + shardPrimaryAndCoordinatingLimits = tracker.getPrimaryAndCoordinatingLimits().get(); + long shardCombinedBytes = tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().get(); + expectedShardPrimaryAndCoordinatingLimits = Math.max((long) (shardCombinedBytes / this.optimalOperatingFactor), + this.shardIndexingPressureSettings.getShardPrimaryAndCoordinatingBaseLimits()); + + if (((double)shardCombinedBytes / shardPrimaryAndCoordinatingLimits) < this.lowerOperatingFactor) { + logger.debug("Decreasing the Primary And Coordinating Limits [shard_detail=[{}][{}], " + + "shard_max_primary_and_coordinating_bytes={}, expected_shard_max_primary_and_coordinating_bytes={}]", + tracker.getShardId().getIndexName(), tracker.getShardId().id(), + shardPrimaryAndCoordinatingLimits, expectedShardPrimaryAndCoordinatingLimits); + } else { + logger.debug("Primary And Coordinating Limits Already Increased [" + + "shard_detail=[{}][{}], " + "shard_max_primary_and_coordinating_bytes={}, " + + "expected_shard_max_primary_and_coordinating_bytes={}]", + tracker.getShardId().getIndexName(), tracker.getShardId().id(), shardPrimaryAndCoordinatingLimits, + expectedShardPrimaryAndCoordinatingLimits); + return; + } + } while(!tracker.getPrimaryAndCoordinatingLimits().compareAndSet(shardPrimaryAndCoordinatingLimits, + expectedShardPrimaryAndCoordinatingLimits)); + } + + private boolean increaseShardReplicaLimits(ShardIndexingPressureTracker tracker, + Map shardIndexingPressureStore) { + long shardReplicaLimits; + long expectedShardReplicaLimits; + do { + shardReplicaLimits = tracker.getReplicaLimits().get(); + long shardReplicaBytes = tracker.memory().getCurrentReplicaBytes().get(); + expectedShardReplicaLimits = (long)(shardReplicaBytes / this.optimalOperatingFactor); + + long totalReplicaLimitsExceptCurrentShard = shardIndexingPressureStore.entrySet().stream() + .filter(entry -> !(tracker.getShardId().hashCode() == entry.getKey())) + .map(Map.Entry::getValue) + .mapToLong(entry -> entry.getReplicaLimits().get()).sum(); + + if(((double)shardReplicaBytes / shardReplicaLimits) > this.upperOperatingFactor) { + if (totalReplicaLimitsExceptCurrentShard + expectedShardReplicaLimits < + this.shardIndexingPressureSettings.getNodeReplicaLimits()) { + logger.debug("Increasing the Replica Limits [shard_detail=[{}][{}], " + + "shard_max_replica_bytes={}, expected_shard_max_replica_bytes={}]", + tracker.getShardId().getIndexName(), tracker.getShardId().id(), + shardReplicaLimits, expectedShardReplicaLimits); + } else { + logger.debug("Failed to increase the Replica Limits [shard_detail=[{}][{}], " + + "shard_max_replica_bytes={}, total_max_replica_except_current_shard={}}, " + + "expected_shard_max_replica_bytes={}, node_max_replica_bytes={}]", + tracker.getShardId().getIndexName(), tracker.getShardId().id(), shardReplicaLimits, + totalReplicaLimitsExceptCurrentShard, expectedShardReplicaLimits, + this.shardIndexingPressureSettings.getNodeReplicaLimits()); + return false; + } + } else { + return true; + } + } while(!tracker.getReplicaLimits().compareAndSet(shardReplicaLimits, expectedShardReplicaLimits)); + return true; + } + + void decreaseShardReplicaLimits(ShardIndexingPressureTracker tracker) { + + long shardReplicaLimits; + long expectedShardReplicaLimits; + do { + shardReplicaLimits = tracker.getReplicaLimits().get(); + long shardReplicaBytes = tracker.memory().getCurrentReplicaBytes().get(); + expectedShardReplicaLimits = Math.max((long) (shardReplicaBytes / this.optimalOperatingFactor), + this.shardIndexingPressureSettings.getShardReplicaBaseLimits()); + + if (((double)shardReplicaBytes / shardReplicaLimits) < this.lowerOperatingFactor) { + logger.debug("Decreasing the Replica Limits [shard_detail=[{}}][{}}], " + + "shard_max_replica_bytes={}, expected_shard_max_replica_bytes={}]", + tracker.getShardId().getIndexName(), tracker.getShardId().id(), shardReplicaLimits, + expectedShardReplicaLimits); + } else { + logger.debug("Replica Limits Already Increased [shard_detail=[{}][{}], " + + "shard_max_replica_bytes={}, expected_shard_max_replica_bytes={}]", + tracker.getShardId().getIndexName(), tracker.getShardId().id(), shardReplicaLimits, + expectedShardReplicaLimits); + return; + } + } while(!tracker.getReplicaLimits().compareAndSet(shardReplicaLimits, expectedShardReplicaLimits)); + } + + /** + * Throughput of last N request divided by the total lifetime requests throughput is greater than the acceptable + * degradation limits then we say this parameter has breached the threshold. + */ + private boolean evaluateThroughputDegradationLimitsBreached(double throughputMovingAverage, + long totalBytes, long totalLatency, + long queueSize, double degradationLimits) { + double throughputHistoricalAverage = (double)totalBytes / totalLatency; + return throughputMovingAverage > 0 && queueSize >= shardIndexingPressureSettings.getRequestSizeWindow() + && throughputHistoricalAverage / throughputMovingAverage > degradationLimits; + } + + /** + * The difference in the current timestamp and last successful request timestamp is greater than + * successful request elapsed timeout value and the total number of outstanding requests is greater than + * the maximum outstanding request count value then we say this parameter has breached the threshold. + */ + private boolean evaluateLastSuccessfulRequestDurationLimitsBreached(long lastSuccessfulRequestTimestamp, + long requestStartTime, + long totalOutstandingRequests) { + return (lastSuccessfulRequestTimestamp > 0) && + (((requestStartTime - lastSuccessfulRequestTimestamp) > this.successfulRequestElapsedTimeout + && totalOutstandingRequests > this.maxOutstandingRequests)); + } + + private void setLowerOperatingFactor(double lowerOperatingFactor) { + this.lowerOperatingFactor = lowerOperatingFactor; + } + + private void setOptimalOperatingFactor(double optimalOperatingFactor) { + this.optimalOperatingFactor = optimalOperatingFactor; + } + + private void setUpperOperatingFactor(double upperOperatingFactor) { + this.upperOperatingFactor = upperOperatingFactor; + } + + private void setSuccessfulRequestElapsedTimeout(int successfulRequestElapsedTimeout) { + this.successfulRequestElapsedTimeout = successfulRequestElapsedTimeout; + } + + private void setMaxOutstandingRequests(int maxOutstandingRequests) { + this.maxOutstandingRequests = maxOutstandingRequests; + } + + private void setThroughputDegradationLimits(double throughputDegradationLimits) { + this.primaryAndCoordinatingThroughputDegradationLimits = throughputDegradationLimits; + this.replicaThroughputDegradationLimits = this.primaryAndCoordinatingThroughputDegradationLimits * 1.5; + } + + private void setNodeSoftLimit(double nodeSoftLimit) { + this.nodeSoftLimit = nodeSoftLimit; + } +} diff --git a/server/src/main/java/org/opensearch/index/ShardIndexingPressureSettings.java b/server/src/main/java/org/opensearch/index/ShardIndexingPressureSettings.java new file mode 100644 index 0000000000000..93c642d083c63 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/ShardIndexingPressureSettings.java @@ -0,0 +1,140 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; + +import java.util.Iterator; + +/** + * This class contains all the setting which whose owner class in ShardIndexingPressure and it will be used in + * ShardIndexingPressure as well as the classes whose instantiation is done in ShardIndexingPressure, i.e. + * ShardIndexingPressureMemoryManager and ShardIndexingPressureStore + */ +public final class ShardIndexingPressureSettings { + + public static final String SHARD_INDEXING_PRESSURE_ENABLED_ATTRIBUTE_KEY = "shard_indexing_pressure_enabled"; + + public static final Setting SHARD_INDEXING_PRESSURE_ENABLED = + Setting.boolSetting("shard_indexing_pressure.enabled", false, Setting.Property.Dynamic, Setting.Property.NodeScope); + + /** + * Feature level setting to operate in shadow-mode or in enforced-mode. If enforced field is set to true, shard level + * rejection will be performed, otherwise only rejection metrics will be populated. + */ + public static final Setting SHARD_INDEXING_PRESSURE_ENFORCED = + Setting.boolSetting("shard_indexing_pressure.enforced", false, Setting.Property.Dynamic, Setting.Property.NodeScope); + + // This represents the last N request samples that will be considered for secondary parameter evaluation. + public static final Setting REQUEST_SIZE_WINDOW = + Setting.intSetting("shard_indexing_pressure.secondary_parameter.throughput.request_size_window", 2000, + Setting.Property.NodeScope, Setting.Property.Dynamic); + + //Each shard will be initially given 1/1000th bytes of node limits. + public static final Setting SHARD_MIN_LIMIT = + Setting.doubleSetting("shard_indexing_pressure.primary_parameter.shard.min_limit", 0.001d, 0.0d, + Setting.Property.NodeScope, Setting.Property.Dynamic); + + private volatile boolean shardIndexingPressureEnabled; + private volatile boolean shardIndexingPressureEnforced; + private volatile long shardPrimaryAndCoordinatingBaseLimits; + private volatile long shardReplicaBaseLimits; + private volatile int requestSizeWindow; + private volatile double shardMinLimit; + private final long primaryAndCoordinatingNodeLimits; + private static ClusterService clusterService; + + public ShardIndexingPressureSettings(ClusterService clusterService, Settings settings, long primaryAndCoordinatingLimits) { + ShardIndexingPressureSettings.clusterService = clusterService; + ClusterSettings clusterSettings = clusterService.getClusterSettings(); + + this.shardIndexingPressureEnabled = SHARD_INDEXING_PRESSURE_ENABLED.get(settings); + clusterSettings.addSettingsUpdateConsumer(SHARD_INDEXING_PRESSURE_ENABLED, this::setShardIndexingPressureEnabled); + + this.shardIndexingPressureEnforced = SHARD_INDEXING_PRESSURE_ENFORCED.get(settings); + clusterSettings.addSettingsUpdateConsumer(SHARD_INDEXING_PRESSURE_ENFORCED, this::setShardIndexingPressureEnforced); + + this.requestSizeWindow = REQUEST_SIZE_WINDOW.get(settings).intValue(); + clusterSettings.addSettingsUpdateConsumer(REQUEST_SIZE_WINDOW, this::setRequestSizeWindow); + + this.primaryAndCoordinatingNodeLimits = primaryAndCoordinatingLimits; + + this.shardMinLimit = SHARD_MIN_LIMIT.get(settings).floatValue(); + this.shardPrimaryAndCoordinatingBaseLimits = (long) (primaryAndCoordinatingLimits * shardMinLimit); + this.shardReplicaBaseLimits = (long) (shardPrimaryAndCoordinatingBaseLimits * 1.5); + clusterSettings.addSettingsUpdateConsumer(SHARD_MIN_LIMIT, this::setShardMinLimit); + } + + public static boolean isShardIndexingPressureAttributeEnabled() { + Iterator nodes = clusterService.state().getNodes().getNodes().valuesIt(); + while (nodes.hasNext()) { + if (Boolean.parseBoolean(nodes.next().getAttributes().get(SHARD_INDEXING_PRESSURE_ENABLED_ATTRIBUTE_KEY)) == false) { + return false; + } + } + return true; + } + + private void setShardIndexingPressureEnabled(Boolean shardIndexingPressureEnableValue) { + this.shardIndexingPressureEnabled = shardIndexingPressureEnableValue; + } + + private void setShardIndexingPressureEnforced(Boolean shardIndexingPressureEnforcedValue) { + this.shardIndexingPressureEnforced = shardIndexingPressureEnforcedValue; + } + + private void setRequestSizeWindow(int requestSizeWindow) { + this.requestSizeWindow = requestSizeWindow; + } + + private void setShardMinLimit(double shardMinLimit) { + this.shardMinLimit = shardMinLimit; + + //Updating the dependent value once when the dynamic settings update + this.setShardPrimaryAndCoordinatingBaseLimits(); + this.setShardReplicaBaseLimits(); + } + + private void setShardPrimaryAndCoordinatingBaseLimits() { + shardPrimaryAndCoordinatingBaseLimits = (long) (primaryAndCoordinatingNodeLimits * shardMinLimit); + } + + private void setShardReplicaBaseLimits() { + shardReplicaBaseLimits = (long) (shardPrimaryAndCoordinatingBaseLimits * 1.5); + } + + public boolean isShardIndexingPressureEnabled() { + return shardIndexingPressureEnabled; + } + + public boolean isShardIndexingPressureEnforced() { + return shardIndexingPressureEnforced; + } + + public int getRequestSizeWindow() { + return requestSizeWindow; + } + + public long getShardPrimaryAndCoordinatingBaseLimits() { + return shardPrimaryAndCoordinatingBaseLimits; + } + + public long getShardReplicaBaseLimits() { + return shardReplicaBaseLimits; + } + + public long getNodePrimaryAndCoordinatingLimits() { + return primaryAndCoordinatingNodeLimits; + } + + public long getNodeReplicaLimits() { + return (long) (primaryAndCoordinatingNodeLimits * 1.5); + } +} diff --git a/server/src/main/java/org/opensearch/index/ShardIndexingPressureStore.java b/server/src/main/java/org/opensearch/index/ShardIndexingPressureStore.java new file mode 100644 index 0000000000000..9a96998c88a18 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/ShardIndexingPressureStore.java @@ -0,0 +1,115 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.index.shard.ShardId; + +import java.util.Collections; +import java.util.Map; + +import static java.util.Objects.isNull; + +/** + * The Shard indexing pressure store acts as a central repository for all the shard-level tracking objects being + * used at a node level in order to track indexing pressure. It manages the tracker lifecycle. + * + * The shardIndexingPressureHotStore is a primary (hot) store and holds all the shard tracking object which are + * currently live i.e. they are performing request level tracking for in-flight requests. + * + * The shardIndexingPressureColdStore acts as the cold storage for all the shard tracking objects which were created, + * but are not currently live i.e. they are not tracking any in-flight requests currently. + * + * Tracking objects when created are part of both the hot store as well as cold store. However, once the object + * is no more live it is removed from the hot store. Objects in the cold store are evicted once the cold store + * reaches its maximum limit. Think of it like a periodic archival purge. + * During get if tracking object is not present in the hot store, a lookup is made into the cache store. If found, + * object is brought into the hot store again, until it remains active. If not present in the either store, a fresh + * object is instantiated an registered in both the stores. + * + * Note: The implementation of shardIndexingPressureColdStore methods is such that get, + * update and evict are abstracted out such that LRU logic can be plugged into it, if discovered a need later. + */ +public class ShardIndexingPressureStore { + + // This represents the initial value of cold store size. + public static final Setting MAX_CACHE_STORE_SIZE = + Setting.intSetting("shard_indexing_pressure.cache_store.max_size", 200, 100, 1000, + Setting.Property.NodeScope, Setting.Property.Dynamic); + + private final Map shardIndexingPressureHotStore = + ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(); + private final Map shardIndexingPressureColdStore = + ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(); + private final ShardIndexingPressureSettings shardIndexingPressureSettings; + + private volatile int maxColdStoreSize; + + public ShardIndexingPressureStore(ShardIndexingPressureSettings shardIndexingPressureSettings, + ClusterSettings clusterSettings, Settings settings) { + this.shardIndexingPressureSettings = shardIndexingPressureSettings; + this.maxColdStoreSize = MAX_CACHE_STORE_SIZE.get(settings).intValue(); + clusterSettings.addSettingsUpdateConsumer(MAX_CACHE_STORE_SIZE, this::setMaxColdStoreSize); + } + + public ShardIndexingPressureTracker getShardIndexingPressureTracker(ShardId shardId) { + ShardIndexingPressureTracker tracker = shardIndexingPressureHotStore.get((long)shardId.hashCode()); + if (isNull(tracker)) { + // Attempt from Indexing pressure cold store + tracker = shardIndexingPressureColdStore.get((long)shardId.hashCode()); + // If not present in cold store so instantiate a new one + if (isNull(tracker)) { + ShardIndexingPressureTracker newShardIndexingPressureTracker = new ShardIndexingPressureTracker(shardId, + this.shardIndexingPressureSettings.getShardPrimaryAndCoordinatingBaseLimits(), + this.shardIndexingPressureSettings.getShardReplicaBaseLimits()); + // Try update the new shard stat to the hot store + tracker = shardIndexingPressureHotStore.putIfAbsent((long) shardId.hashCode(), newShardIndexingPressureTracker); + // Update the tracker so that we use the one actual in the hot store + tracker = tracker == null ? newShardIndexingPressureTracker : tracker; + // Write through into the cold store for future reference + updateIndexingPressureColdStore(tracker); + } else { + // Attempt update tracker to the primary store and return tracker finally in the store to avoid any race + ShardIndexingPressureTracker newTracker = shardIndexingPressureHotStore.putIfAbsent((long) shardId.hashCode(), tracker); + tracker = newTracker == null ? tracker : newTracker; + } + } + return tracker; + } + + public Map getShardIndexingPressureHotStore() { + return Collections.unmodifiableMap(shardIndexingPressureHotStore); + } + + public Map getShardIndexingPressureColdStore() { + return Collections.unmodifiableMap(shardIndexingPressureColdStore); + } + + public void tryIndexingPressureTrackerCleanup(ShardIndexingPressureTracker tracker) { + if (tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().get() == 0 && + tracker.memory().getCurrentReplicaBytes().get() == 0) { + // Try inserting into cache again in case there was an eviction earlier + shardIndexingPressureColdStore.putIfAbsent((long)tracker.getShardId().hashCode(), tracker); + // Remove from the active store + shardIndexingPressureHotStore.remove((long)tracker.getShardId().hashCode(), tracker); + } + } + + private void updateIndexingPressureColdStore(ShardIndexingPressureTracker tracker) { + if (shardIndexingPressureColdStore.size() > maxColdStoreSize) { + shardIndexingPressureColdStore.clear(); + } + shardIndexingPressureColdStore.put((long)tracker.getShardId().hashCode(), tracker); + } + + private void setMaxColdStoreSize(int maxColdStoreSize) { + this.maxColdStoreSize = maxColdStoreSize; + } + +} diff --git a/server/src/main/java/org/opensearch/index/ShardIndexingPressureTracker.java b/server/src/main/java/org/opensearch/index/ShardIndexingPressureTracker.java new file mode 100644 index 0000000000000..7b4663c29dc86 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/ShardIndexingPressureTracker.java @@ -0,0 +1,356 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.opensearch.index.shard.ShardId; + +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicLong; + +/** + * This class contains all the tracking objects that will be maintained against a shard and will be used and modified + * while evaluating shard indexing pressure related information for a shard. + * + * This class tracks these parameters at coordinating, primary and replica indexing stage. + * 1. MemoryTracker + * a. CurrentBytes - Bytes of data that is inflight/processing for a shard. + * b. TotalBytes - Total bytes that was processed successfully for a shard. + * + * 2. CountTracker + * a. Counts - Total number of requests that were processed successfully for a shard. + * + * 3. LatencyTracker + * a. TimeInMillis - Total indexing time take by requests that were processed successfully for a shard. + * + * 4. RejectionTracker + * a. TotalRejections - Total number of requests that were rejected for a shard. + * b. NodeLimitsBreachedRejections - Total number of requests that were rejected due to the node level limits breached. + * i.e. when a request for a shard came and there was no scope for the shard to grow as + * node level limit was already reached. + * c. LastSuccessfulRequestLimitsBreachedRejections - Total number of requests that were rejected due to the + * last successful request limits breached for a shard. + * d. ThroughputDegradationLimitsBreachedRejections - Total number of requests that were rejected due to the + * last successful request limits breached for a shard. + * + * 5. TimeStampTracker + * a. LastSuccessfulRequestTimestamp - Timestamp of last successful request for a shard. + * + * 6. OutstandingRequestTracker + * a. TotalOutstandingRequests - At any given point how many requests are outstanding for a shard. + * + * 7. ThroughputTracker + * a. ThroughputMovingAverage - Hold the average throughput value for last N requests. + * b. ThroughputMovingQueue - Queue that holds the last N requests throughput such that we have a sliding window + * which keeps moving everytime a new request comes such that at any given point we are looking + * at last N requests only. EWMA cannot be used here as it evaluate the historical average + * and here we need the average of just last N requests. + * + * see {@link ShardIndexingPressureMemoryManager} + * + * ShardIndexingPressureTracker is the construct to track all the write requests targeted for a ShardId on the node, across all possible + * transport-actions i.e. Coordinator, Primary and Replica. Tracker is uniquely identified against a Shard-Id and contains explicit tracking + * fields for different kind of tracking needs against a Shard-Id, such as in-flight Coordinating requests, Coordinator + Primary requests, + * Primary requests and Replica requests. + * Currently the knowledge of shard roles (such as primary vs replica) is not explicit to the tracker, and it tracks different values based + * on the interaction hooks of the above layers, which are separate for different type of operations at the transport-action layers. + * + * There is room for introducing more unique identity to the trackers based on Shard-Role or Shard-Allocation-Id, but that will also + * increase the complexity of handling shard-lister events and handling other scenarios such as request-draining etc. + * + * To prefer simplicity for now we have modelled by keeping explicit fields for different operation tracking, while tracker by itself is + * agnostic of the role today. We can revisit this modelling in future as and when we add more tracking information here. + */ +public class ShardIndexingPressureTracker { + + private final ShardId shardId; + private final MemoryTracker memoryTracker = new MemoryTracker(); + private final CountTracker countTracker = new CountTracker(); + private final LatencyTracker latencyTracker = new LatencyTracker(); + private final RejectionTracker rejectionTracker = new RejectionTracker(); + private final TimeStampTracker timeStampTracker = new TimeStampTracker(); + private final OutstandingRequestTracker outstandingRequestTracker = new OutstandingRequestTracker(); + private final ThroughputTracker throughputTracker = new ThroughputTracker(); + + private final AtomicLong primaryAndCoordinatingLimits; + private final AtomicLong replicaLimits; + + public ShardId getShardId() { + return shardId; + } + + public ShardIndexingPressureTracker(ShardId shardId, long primaryAndCoordinatingLimits, long replicaLimits) { + this.shardId = shardId; + this.primaryAndCoordinatingLimits = new AtomicLong(primaryAndCoordinatingLimits); + this.replicaLimits = new AtomicLong(replicaLimits); + } + + public MemoryTracker memory() { + return memoryTracker; + } + + public CountTracker count() { + return countTracker; + } + + public RejectionTracker rejection() { + return rejectionTracker; + } + + public LatencyTracker latency() { + return latencyTracker; + } + + public TimeStampTracker timeStamp() { + return timeStampTracker; + } + + public OutstandingRequestTracker outstandingRequest() { + return outstandingRequestTracker; + } + + public ThroughputTracker throughput() { + return throughputTracker; + } + + public AtomicLong getPrimaryAndCoordinatingLimits() { + return primaryAndCoordinatingLimits; + } + + public AtomicLong getReplicaLimits() { + return replicaLimits; + } + + //Memory tracker + public static class MemoryTracker { + private final AtomicLong currentCombinedCoordinatingAndPrimaryBytes = new AtomicLong(); + private final AtomicLong currentCoordinatingBytes = new AtomicLong(); + private final AtomicLong currentPrimaryBytes = new AtomicLong(); + private final AtomicLong currentReplicaBytes = new AtomicLong(); + + private final AtomicLong totalCombinedCoordinatingAndPrimaryBytes = new AtomicLong(); + private final AtomicLong totalCoordinatingBytes = new AtomicLong(); + private final AtomicLong totalPrimaryBytes = new AtomicLong(); + private final AtomicLong totalReplicaBytes = new AtomicLong(); + + public AtomicLong getCurrentCombinedCoordinatingAndPrimaryBytes() { + return currentCombinedCoordinatingAndPrimaryBytes; + } + + public AtomicLong getCurrentCoordinatingBytes() { + return currentCoordinatingBytes; + } + + public AtomicLong getCurrentPrimaryBytes() { + return currentPrimaryBytes; + } + + public AtomicLong getCurrentReplicaBytes() { + return currentReplicaBytes; + } + + public AtomicLong getTotalCombinedCoordinatingAndPrimaryBytes() { + return totalCombinedCoordinatingAndPrimaryBytes; + } + + public AtomicLong getTotalCoordinatingBytes() { + return totalCoordinatingBytes; + } + + public AtomicLong getTotalPrimaryBytes() { + return totalPrimaryBytes; + } + + public AtomicLong getTotalReplicaBytes() { + return totalReplicaBytes; + } + } + + //Count based tracker + public static class CountTracker { + private final AtomicLong coordinatingCount = new AtomicLong(); + private final AtomicLong primaryCount = new AtomicLong(); + private final AtomicLong replicaCount = new AtomicLong(); + + public AtomicLong getCoordinatingCount() { + return coordinatingCount; + } + + public AtomicLong getPrimaryCount() { + return primaryCount; + } + + public AtomicLong getReplicaCount() { + return replicaCount; + } + } + + //Latency Tracker + public static class LatencyTracker { + private final AtomicLong coordinatingTimeInMillis = new AtomicLong(); + private final AtomicLong primaryTimeInMillis = new AtomicLong(); + private final AtomicLong replicaTimeInMillis = new AtomicLong(); + + public AtomicLong getCoordinatingTimeInMillis() { + return coordinatingTimeInMillis; + } + + public AtomicLong getPrimaryTimeInMillis() { + return primaryTimeInMillis; + } + + public AtomicLong getReplicaTimeInMillis() { + return replicaTimeInMillis; + } + } + + //Rejection Count tracker + public static class RejectionTracker { + //Coordinating Rejection Count + private final AtomicLong coordinatingRejections = new AtomicLong(); + private final AtomicLong coordinatingNodeLimitsBreachedRejections = new AtomicLong(); + private final AtomicLong coordinatingLastSuccessfulRequestLimitsBreachedRejections = new AtomicLong(); + private final AtomicLong coordinatingThroughputDegradationLimitsBreachedRejections = new AtomicLong(); + + //Primary Rejection Count + private final AtomicLong primaryRejections = new AtomicLong(); + private final AtomicLong primaryNodeLimitsBreachedRejections = new AtomicLong(); + private final AtomicLong primaryLastSuccessfulRequestLimitsBreachedRejections = new AtomicLong(); + private final AtomicLong primaryThroughputDegradationLimitsBreachedRejections = new AtomicLong(); + + //Replica Rejection Count + private final AtomicLong replicaRejections = new AtomicLong(); + private final AtomicLong replicaNodeLimitsBreachedRejections = new AtomicLong(); + private final AtomicLong replicaLastSuccessfulRequestLimitsBreachedRejections = new AtomicLong(); + private final AtomicLong replicaThroughputDegradationLimitsBreachedRejections = new AtomicLong(); + + public AtomicLong getCoordinatingRejections() { + return coordinatingRejections; + } + + public AtomicLong getCoordinatingNodeLimitsBreachedRejections() { + return coordinatingNodeLimitsBreachedRejections; + } + + public AtomicLong getCoordinatingLastSuccessfulRequestLimitsBreachedRejections() { + return coordinatingLastSuccessfulRequestLimitsBreachedRejections; + } + + public AtomicLong getCoordinatingThroughputDegradationLimitsBreachedRejections() { + return coordinatingThroughputDegradationLimitsBreachedRejections; + } + + public AtomicLong getPrimaryRejections() { + return primaryRejections; + } + + public AtomicLong getPrimaryNodeLimitsBreachedRejections() { + return primaryNodeLimitsBreachedRejections; + } + + public AtomicLong getPrimaryLastSuccessfulRequestLimitsBreachedRejections() { + return primaryLastSuccessfulRequestLimitsBreachedRejections; + } + + public AtomicLong getPrimaryThroughputDegradationLimitsBreachedRejections() { + return primaryThroughputDegradationLimitsBreachedRejections; + } + + public AtomicLong getReplicaRejections() { + return replicaRejections; + } + + public AtomicLong getReplicaNodeLimitsBreachedRejections() { + return replicaNodeLimitsBreachedRejections; + } + + public AtomicLong getReplicaLastSuccessfulRequestLimitsBreachedRejections() { + return replicaLastSuccessfulRequestLimitsBreachedRejections; + } + + public AtomicLong getReplicaThroughputDegradationLimitsBreachedRejections() { + return replicaThroughputDegradationLimitsBreachedRejections; + } + } + + //Last Successful TimeStamp Tracker + public static class TimeStampTracker { + private final AtomicLong lastSuccessfulCoordinatingRequestTimestamp = new AtomicLong(); + private final AtomicLong lastSuccessfulPrimaryRequestTimestamp = new AtomicLong(); + private final AtomicLong lastSuccessfulReplicaRequestTimestamp = new AtomicLong(); + + public AtomicLong getLastSuccessfulCoordinatingRequestTimestamp() { + return lastSuccessfulCoordinatingRequestTimestamp; + } + + public AtomicLong getLastSuccessfulPrimaryRequestTimestamp() { + return lastSuccessfulPrimaryRequestTimestamp; + } + + public AtomicLong getLastSuccessfulReplicaRequestTimestamp() { + return lastSuccessfulReplicaRequestTimestamp; + } + } + + //Total Outstanding requests after last successful request + public static class OutstandingRequestTracker { + private final AtomicLong totalOutstandingCoordinatingRequests = new AtomicLong(); + private final AtomicLong totalOutstandingPrimaryRequests = new AtomicLong(); + private final AtomicLong totalOutstandingReplicaRequests = new AtomicLong(); + + public AtomicLong getTotalOutstandingCoordinatingRequests() { + return totalOutstandingCoordinatingRequests; + } + + public AtomicLong getTotalOutstandingPrimaryRequests() { + return totalOutstandingPrimaryRequests; + } + + public AtomicLong getTotalOutstandingReplicaRequests() { + return totalOutstandingReplicaRequests; + } + } + + // Throughput/Moving avg Tracker + public static class ThroughputTracker { + /* + Shard Window Throughput Tracker. + We will be using atomic long to track double values as mentioned here - + https://docs.oracle.com/javase/6/docs/api/java/util/concurrent/atomic/package-summary.html + */ + private final AtomicLong coordinatingThroughputMovingAverage = new AtomicLong(); + private final AtomicLong primaryThroughputMovingAverage = new AtomicLong(); + private final AtomicLong replicaThroughputMovingAverage = new AtomicLong(); + + //Shard Window Throughput Queue + private final ConcurrentLinkedQueue coordinatingThroughputMovingQueue = new ConcurrentLinkedQueue(); + private final ConcurrentLinkedQueue primaryThroughputMovingQueue = new ConcurrentLinkedQueue(); + private final ConcurrentLinkedQueue replicaThroughputMovingQueue = new ConcurrentLinkedQueue(); + + public AtomicLong getCoordinatingThroughputMovingAverage() { + return coordinatingThroughputMovingAverage; + } + + public AtomicLong getPrimaryThroughputMovingAverage() { + return primaryThroughputMovingAverage; + } + + public AtomicLong getReplicaThroughputMovingAverage() { + return replicaThroughputMovingAverage; + } + + public ConcurrentLinkedQueue getCoordinatingThroughputMovingQueue() { + return coordinatingThroughputMovingQueue; + } + + public ConcurrentLinkedQueue getPrimaryThroughputMovingQueue() { + return primaryThroughputMovingQueue; + } + + public ConcurrentLinkedQueue getReplicaThroughputMovingQueue() { + return replicaThroughputMovingQueue; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java index 135b47cd4af8c..2ae5ff4ff9df5 100644 --- a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java +++ b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java @@ -38,7 +38,6 @@ import org.apache.lucene.store.AlreadyClosedException; import org.opensearch.ExceptionsHelper; import org.opensearch.action.ActionListener; -import org.opensearch.index.IndexingPressure; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.ActiveShardCount; import org.opensearch.action.support.WriteResponse; @@ -55,6 +54,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.index.IndexNotFoundException; +import org.opensearch.index.IndexingPressureService; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardClosedException; import org.opensearch.index.shard.ShardId; @@ -94,7 +94,7 @@ public RetentionLeaseSyncAction( final ThreadPool threadPool, final ShardStateAction shardStateAction, final ActionFilters actionFilters, - final IndexingPressure indexingPressure, + final IndexingPressureService indexingPressureService, final SystemIndices systemIndices) { super( settings, @@ -107,7 +107,7 @@ public RetentionLeaseSyncAction( actionFilters, RetentionLeaseSyncAction.Request::new, RetentionLeaseSyncAction.Request::new, - ignore -> ThreadPool.Names.MANAGEMENT, false, indexingPressure, systemIndices); + ignore -> ThreadPool.Names.MANAGEMENT, false, indexingPressureService, systemIndices); } @Override diff --git a/server/src/main/java/org/opensearch/index/stats/IndexingPressurePerShardStats.java b/server/src/main/java/org/opensearch/index/stats/IndexingPressurePerShardStats.java new file mode 100644 index 0000000000000..3307069614194 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/stats/IndexingPressurePerShardStats.java @@ -0,0 +1,416 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index.stats; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.unit.ByteSizeValue; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.ToXContentFragment; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.index.ShardIndexingPressureTracker; + +import java.io.IOException; + +public class IndexingPressurePerShardStats implements Writeable, ToXContentFragment { + + private final String shardId; + + private final long totalCombinedCoordinatingAndPrimaryBytes; + private final long totalCoordinatingBytes; + private final long totalPrimaryBytes; + private final long totalReplicaBytes; + + private final long currentCombinedCoordinatingAndPrimaryBytes; + private final long currentCoordinatingBytes; + private final long currentPrimaryBytes; + private final long currentReplicaBytes; + + private final long totalCoordinatingCount; + private final long totalPrimaryCount; + private final long totalReplicaCount; + + private final long coordinatingRejections; + private final long coordinatingNodeLimitsBreachedRejections; + private final long coordinatingLastSuccessfulRequestLimitsBreachedRejections; + private final long coordinatingThroughputDegradationLimitsBreachedRejections; + + private final long primaryRejections; + private final long primaryNodeLimitsBreachedRejections; + private final long primaryLastSuccessfulRequestLimitsBreachedRejections; + private final long primaryThroughputDegradationLimitsBreachedRejections; + + private final long replicaRejections; + private final long replicaNodeLimitsBreachedRejections; + private final long replicaLastSuccessfulRequestLimitsBreachedRejections; + private final long replicaThroughputDegradationLimitsBreachedRejections; + + private final long coordinatingTimeInMillis; + private final long primaryTimeInMillis; + private final long replicaTimeInMillis; + + private final long coordinatingLastSuccessfulRequestTimestampInMillis; + private final long primaryLastSuccessfulRequestTimestampInMillis; + private final long replicaLastSuccessfulRequestTimestampInMillis; + + private final long currentPrimaryAndCoordinatingLimits; + private final long currentReplicaLimits; + + private final boolean shardIndexingPressureEnforced; + + public IndexingPressurePerShardStats(StreamInput in) throws IOException { + shardId = in.readString(); + shardIndexingPressureEnforced = in.readBoolean(); + + totalCombinedCoordinatingAndPrimaryBytes = in.readVLong(); + totalCoordinatingBytes = in.readVLong(); + totalPrimaryBytes = in.readVLong(); + totalReplicaBytes = in.readVLong(); + + currentCombinedCoordinatingAndPrimaryBytes = in.readVLong(); + currentCoordinatingBytes = in.readVLong(); + currentPrimaryBytes = in.readVLong(); + currentReplicaBytes = in.readVLong(); + + totalCoordinatingCount = in.readVLong(); + totalPrimaryCount = in.readVLong(); + totalReplicaCount = in.readVLong(); + + coordinatingRejections = in.readVLong(); + coordinatingNodeLimitsBreachedRejections = in.readVLong(); + coordinatingLastSuccessfulRequestLimitsBreachedRejections = in.readVLong(); + coordinatingThroughputDegradationLimitsBreachedRejections = in.readVLong(); + + primaryRejections = in.readVLong(); + primaryNodeLimitsBreachedRejections = in.readVLong(); + primaryLastSuccessfulRequestLimitsBreachedRejections = in.readVLong(); + primaryThroughputDegradationLimitsBreachedRejections = in.readVLong(); + + replicaRejections = in.readVLong(); + replicaNodeLimitsBreachedRejections = in.readVLong(); + replicaLastSuccessfulRequestLimitsBreachedRejections = in.readVLong(); + replicaThroughputDegradationLimitsBreachedRejections = in.readVLong(); + + coordinatingTimeInMillis = in.readVLong(); + primaryTimeInMillis = in.readVLong(); + replicaTimeInMillis = in.readVLong(); + + coordinatingLastSuccessfulRequestTimestampInMillis = in.readVLong(); + primaryLastSuccessfulRequestTimestampInMillis = in.readVLong(); + replicaLastSuccessfulRequestTimestampInMillis = in.readVLong(); + + currentPrimaryAndCoordinatingLimits = in.readVLong(); + currentReplicaLimits = in.readVLong(); + } + + public IndexingPressurePerShardStats(ShardIndexingPressureTracker shardIndexingPressureTracker, + boolean shardIndexingPressureEnforced) { + + shardId = shardIndexingPressureTracker.getShardId().toString(); + this.shardIndexingPressureEnforced = shardIndexingPressureEnforced; + + totalCombinedCoordinatingAndPrimaryBytes = + shardIndexingPressureTracker.memory().getTotalCombinedCoordinatingAndPrimaryBytes().get(); + totalCoordinatingBytes = shardIndexingPressureTracker.memory().getTotalCoordinatingBytes().get(); + totalPrimaryBytes = shardIndexingPressureTracker.memory().getTotalPrimaryBytes().get(); + totalReplicaBytes = shardIndexingPressureTracker.memory().getTotalReplicaBytes().get(); + + currentCombinedCoordinatingAndPrimaryBytes = + shardIndexingPressureTracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().get(); + currentCoordinatingBytes = shardIndexingPressureTracker.memory().getCurrentCoordinatingBytes().get(); + currentPrimaryBytes = shardIndexingPressureTracker.memory().getCurrentPrimaryBytes().get(); + currentReplicaBytes = shardIndexingPressureTracker.memory().getCurrentReplicaBytes().get(); + + totalCoordinatingCount = shardIndexingPressureTracker.count().getCoordinatingCount().get(); + totalPrimaryCount = shardIndexingPressureTracker.count().getPrimaryCount().get(); + totalReplicaCount = shardIndexingPressureTracker.count().getReplicaCount().get(); + + coordinatingRejections = shardIndexingPressureTracker.rejection().getCoordinatingRejections().get(); + coordinatingNodeLimitsBreachedRejections = + shardIndexingPressureTracker.rejection().getCoordinatingNodeLimitsBreachedRejections().get(); + coordinatingLastSuccessfulRequestLimitsBreachedRejections = + shardIndexingPressureTracker.rejection().getCoordinatingLastSuccessfulRequestLimitsBreachedRejections().get(); + coordinatingThroughputDegradationLimitsBreachedRejections = + shardIndexingPressureTracker.rejection().getCoordinatingThroughputDegradationLimitsBreachedRejections().get(); + + primaryRejections = shardIndexingPressureTracker.rejection().getPrimaryRejections().get(); + primaryNodeLimitsBreachedRejections = shardIndexingPressureTracker.rejection().getPrimaryNodeLimitsBreachedRejections().get(); + primaryLastSuccessfulRequestLimitsBreachedRejections = + shardIndexingPressureTracker.rejection().getPrimaryLastSuccessfulRequestLimitsBreachedRejections().get(); + primaryThroughputDegradationLimitsBreachedRejections = + shardIndexingPressureTracker.rejection().getPrimaryThroughputDegradationLimitsBreachedRejections().get(); + + replicaRejections = shardIndexingPressureTracker.rejection().getReplicaRejections().get(); + replicaNodeLimitsBreachedRejections = shardIndexingPressureTracker.rejection().getReplicaNodeLimitsBreachedRejections().get(); + replicaLastSuccessfulRequestLimitsBreachedRejections = + shardIndexingPressureTracker.rejection().getReplicaLastSuccessfulRequestLimitsBreachedRejections().get(); + replicaThroughputDegradationLimitsBreachedRejections = + shardIndexingPressureTracker.rejection().getReplicaThroughputDegradationLimitsBreachedRejections().get(); + + coordinatingTimeInMillis = shardIndexingPressureTracker.latency().getCoordinatingTimeInMillis().get(); + primaryTimeInMillis = shardIndexingPressureTracker.latency().getPrimaryTimeInMillis().get(); + replicaTimeInMillis = shardIndexingPressureTracker.latency().getReplicaTimeInMillis().get(); + + coordinatingLastSuccessfulRequestTimestampInMillis = + shardIndexingPressureTracker.timeStamp().getLastSuccessfulCoordinatingRequestTimestamp().get(); + primaryLastSuccessfulRequestTimestampInMillis = + shardIndexingPressureTracker.timeStamp().getLastSuccessfulPrimaryRequestTimestamp().get(); + replicaLastSuccessfulRequestTimestampInMillis = + shardIndexingPressureTracker.timeStamp().getLastSuccessfulReplicaRequestTimestamp().get(); + + currentPrimaryAndCoordinatingLimits = shardIndexingPressureTracker.getPrimaryAndCoordinatingLimits().get(); + currentReplicaLimits = shardIndexingPressureTracker.getReplicaLimits().get(); + + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(shardId); + out.writeBoolean(shardIndexingPressureEnforced); + + out.writeVLong(totalCombinedCoordinatingAndPrimaryBytes); + out.writeVLong(totalCoordinatingBytes); + out.writeVLong(totalPrimaryBytes); + out.writeVLong(totalReplicaBytes); + + out.writeVLong(currentCombinedCoordinatingAndPrimaryBytes); + out.writeVLong(currentCoordinatingBytes); + out.writeVLong(currentPrimaryBytes); + out.writeVLong(currentReplicaBytes); + + out.writeVLong(totalCoordinatingCount); + out.writeVLong(totalPrimaryCount); + out.writeVLong(totalReplicaCount); + + out.writeVLong(coordinatingRejections); + out.writeVLong(coordinatingNodeLimitsBreachedRejections); + out.writeVLong(coordinatingLastSuccessfulRequestLimitsBreachedRejections); + out.writeVLong(coordinatingThroughputDegradationLimitsBreachedRejections); + + out.writeVLong(primaryRejections); + out.writeVLong(primaryNodeLimitsBreachedRejections); + out.writeVLong(primaryLastSuccessfulRequestLimitsBreachedRejections); + out.writeVLong(primaryThroughputDegradationLimitsBreachedRejections); + + out.writeVLong(replicaRejections); + out.writeVLong(replicaNodeLimitsBreachedRejections); + out.writeVLong(replicaLastSuccessfulRequestLimitsBreachedRejections); + out.writeVLong(replicaThroughputDegradationLimitsBreachedRejections); + + out.writeVLong(coordinatingTimeInMillis); + out.writeVLong(primaryTimeInMillis); + out.writeVLong(replicaTimeInMillis); + + out.writeVLong(coordinatingLastSuccessfulRequestTimestampInMillis); + out.writeVLong(primaryLastSuccessfulRequestTimestampInMillis); + out.writeVLong(replicaLastSuccessfulRequestTimestampInMillis); + + out.writeVLong(currentPrimaryAndCoordinatingLimits); + out.writeVLong(currentReplicaLimits); + } + + public long getTotalCombinedCoordinatingAndPrimaryBytes() { + return totalCombinedCoordinatingAndPrimaryBytes; + } + + public long getTotalCoordinatingBytes() { + return totalCoordinatingBytes; + } + + public long getTotalPrimaryBytes() { + return totalPrimaryBytes; + } + + public long getTotalReplicaBytes() { + return totalReplicaBytes; + } + + public long getCurrentCombinedCoordinatingAndPrimaryBytes() { + return currentCombinedCoordinatingAndPrimaryBytes; + } + + public long getCurrentCoordinatingBytes() { + return currentCoordinatingBytes; + } + + public long getCurrentPrimaryBytes() { + return currentPrimaryBytes; + } + + public long getCurrentReplicaBytes() { + return currentReplicaBytes; + } + + public long getCoordinatingRejections() { + return coordinatingRejections; + } + + public long getCoordinatingNodeLimitsBreachedRejections() { + return coordinatingNodeLimitsBreachedRejections; + } + + public long getCoordinatingLastSuccessfulRequestLimitsBreachedRejections() { + return coordinatingLastSuccessfulRequestLimitsBreachedRejections; + } + + public long getCoordinatingThroughputDegradationLimitsBreachedRejections() { + return coordinatingThroughputDegradationLimitsBreachedRejections; + } + + public long getPrimaryRejections() { + return primaryRejections; + } + + public long getPrimaryNodeLimitsBreachedRejections() { + return primaryNodeLimitsBreachedRejections; + } + + public long getPrimaryLastSuccessfulRequestLimitsBreachedRejections() { + return primaryLastSuccessfulRequestLimitsBreachedRejections; + } + + public long getPrimaryThroughputDegradationLimitsBreachedRejections() { + return primaryThroughputDegradationLimitsBreachedRejections; + } + + public long getReplicaRejections() { + return replicaRejections; + } + + public long getReplicaNodeLimitsBreachedRejections() { + return replicaNodeLimitsBreachedRejections; + } + + public long getReplicaLastSuccessfulRequestLimitsBreachedRejections() { + return replicaLastSuccessfulRequestLimitsBreachedRejections; + } + + public long getReplicaThroughputDegradationLimitsBreachedRejections() { + return replicaThroughputDegradationLimitsBreachedRejections; + } + + public long getCurrentPrimaryAndCoordinatingLimits() { + return currentPrimaryAndCoordinatingLimits; + } + + public long getCurrentReplicaLimits() { + return currentReplicaLimits; + } + + private static final String COORDINATING = "coordinating"; + private static final String COORDINATING_IN_BYTES = "coordinating_in_bytes"; + private static final String COORDINATING_COUNT = "coordinating_count"; + private static final String PRIMARY = "primary"; + private static final String PRIMARY_IN_BYTES = "primary_in_bytes"; + private static final String PRIMARY_COUNT = "primary_count"; + private static final String REPLICA = "replica"; + private static final String REPLICA_IN_BYTES = "replica_in_bytes"; + private static final String REPLICA_COUNT = "replica_count"; + private static final String COORDINATING_REJECTIONS = "coordinating_rejections"; + private static final String PRIMARY_REJECTIONS = "primary_rejections"; + private static final String REPLICA_REJECTIONS = "replica_rejections"; + private static final String BREAKUP_NODE_LIMITS = "node_limits"; + private static final String BREAKUP_NO_SUCCESSFUL_REQUEST_LIMITS = "no_successful_request_limits"; + private static final String BREAKUP_THROUGHPUT_DEGRADATION_LIMIT = "throughput_degradation_limits"; + private static final String COORDINATING_TIME_IN_MILLIS = "coordinating_time_in_millis"; + private static final String PRIMARY_TIME_IN_MILLIS = "primary_time_in_millis"; + private static final String REPLICA_TIME_IN_MILLIS = "replica_time_in_millis"; + private static final String COORDINATING_LAST_SUCCESSFUL_REQUEST_TIMESTAMP_IN_MILLIS = + "coordinating_last_successful_request_timestamp_in_millis"; + private static final String PRIMARY_LAST_SUCCESSFUL_REQUEST_TIMESTAMP_IN_MILLIS = + "primary_last_successful_request_timestamp_in_millis"; + private static final String REPLICA_LAST_SUCCESSFUL_REQUEST_TIMESTAMP_IN_MILLIS = "replica_last_successful_request_timestamp_in_millis"; + private static final String CURRENT_COORDINATING_AND_PRIMARY_LIMITS_IN_BYTES = "current_coordinating_and_primary_limits_in_bytes"; + private static final String CURRENT_REPLICA_LIMITS_IN_BYTES = "current_replica_limits_in_bytes"; + private static final String CURRENT_COORDINATING_AND_PRIMARY_IN_BYTES = "current_coordinating_and_primary_bytes"; + private static final String CURRENT_REPLICA_IN_BYTES = "current_replica_bytes"; + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(shardId); + + builder.startObject("memory"); + builder.startObject("current"); + builder.humanReadableField(COORDINATING_IN_BYTES, COORDINATING, new ByteSizeValue(currentCoordinatingBytes)); + builder.humanReadableField(PRIMARY_IN_BYTES, PRIMARY, new ByteSizeValue(currentPrimaryBytes)); + builder.humanReadableField(REPLICA_IN_BYTES, REPLICA, new ByteSizeValue(currentReplicaBytes)); + builder.endObject(); + builder.startObject("total"); + builder.humanReadableField(COORDINATING_IN_BYTES, COORDINATING, new ByteSizeValue(totalCoordinatingBytes)); + builder.humanReadableField(PRIMARY_IN_BYTES, PRIMARY, new ByteSizeValue(totalPrimaryBytes)); + builder.humanReadableField(REPLICA_IN_BYTES, REPLICA, new ByteSizeValue(totalReplicaBytes)); + builder.endObject(); + builder.endObject(); + + builder.startObject("rejection"); + builder.startObject("coordinating"); + builder.field(COORDINATING_REJECTIONS, coordinatingRejections); + if (shardIndexingPressureEnforced) { + builder.startObject("breakup"); + } else { + builder.startObject("breakup_shadow_mode"); + } + builder.field(BREAKUP_NODE_LIMITS, coordinatingNodeLimitsBreachedRejections); + builder.field(BREAKUP_NO_SUCCESSFUL_REQUEST_LIMITS, coordinatingLastSuccessfulRequestLimitsBreachedRejections); + builder.field(BREAKUP_THROUGHPUT_DEGRADATION_LIMIT, coordinatingThroughputDegradationLimitsBreachedRejections); + builder.endObject(); + builder.endObject(); + builder.startObject("primary"); + builder.field(PRIMARY_REJECTIONS, primaryRejections); + if (shardIndexingPressureEnforced) { + builder.startObject("breakup"); + } else { + builder.startObject("breakup_shadow_mode"); + } + builder.field(BREAKUP_NODE_LIMITS, primaryNodeLimitsBreachedRejections); + builder.field(BREAKUP_NO_SUCCESSFUL_REQUEST_LIMITS, primaryLastSuccessfulRequestLimitsBreachedRejections); + builder.field(BREAKUP_THROUGHPUT_DEGRADATION_LIMIT, primaryThroughputDegradationLimitsBreachedRejections); + builder.endObject(); + builder.endObject(); + builder.startObject("replica"); + builder.field(REPLICA_REJECTIONS, replicaRejections); + if (shardIndexingPressureEnforced) { + builder.startObject("breakup"); + } else { + builder.startObject("breakup_shadow_mode"); + } + builder.field(BREAKUP_NODE_LIMITS, replicaNodeLimitsBreachedRejections); + builder.field(BREAKUP_NO_SUCCESSFUL_REQUEST_LIMITS, replicaLastSuccessfulRequestLimitsBreachedRejections); + builder.field(BREAKUP_THROUGHPUT_DEGRADATION_LIMIT, replicaThroughputDegradationLimitsBreachedRejections); + builder.endObject(); + builder.endObject(); + builder.endObject(); + + builder.startObject("last_successful_timestamp"); + builder.field(COORDINATING_LAST_SUCCESSFUL_REQUEST_TIMESTAMP_IN_MILLIS, coordinatingLastSuccessfulRequestTimestampInMillis); + builder.field(PRIMARY_LAST_SUCCESSFUL_REQUEST_TIMESTAMP_IN_MILLIS, primaryLastSuccessfulRequestTimestampInMillis); + builder.field(REPLICA_LAST_SUCCESSFUL_REQUEST_TIMESTAMP_IN_MILLIS, replicaLastSuccessfulRequestTimestampInMillis); + builder.endObject(); + + builder.startObject("indexing"); + builder.field(COORDINATING_TIME_IN_MILLIS, coordinatingTimeInMillis); + builder.field(COORDINATING_COUNT, totalCoordinatingCount); + builder.field(PRIMARY_TIME_IN_MILLIS, primaryTimeInMillis); + builder.field(PRIMARY_COUNT, totalPrimaryCount); + builder.field(REPLICA_TIME_IN_MILLIS, replicaTimeInMillis); + builder.field(REPLICA_COUNT, totalReplicaCount); + builder.endObject(); + + builder.startObject("memory_allocation"); + builder.startObject("current"); + builder.field(CURRENT_COORDINATING_AND_PRIMARY_IN_BYTES, currentCombinedCoordinatingAndPrimaryBytes); + builder.field(CURRENT_REPLICA_IN_BYTES, currentReplicaBytes); + builder.endObject(); + builder.startObject("limit"); + builder.field(CURRENT_COORDINATING_AND_PRIMARY_LIMITS_IN_BYTES, currentPrimaryAndCoordinatingLimits); + builder.field(CURRENT_REPLICA_LIMITS_IN_BYTES, currentReplicaLimits); + builder.endObject(); + builder.endObject(); + + return builder.endObject(); + } +} diff --git a/server/src/main/java/org/opensearch/index/stats/ShardIndexingPressureStats.java b/server/src/main/java/org/opensearch/index/stats/ShardIndexingPressureStats.java new file mode 100644 index 0000000000000..73e7301ba7345 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/stats/ShardIndexingPressureStats.java @@ -0,0 +1,104 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index.stats; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.ToXContentFragment; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.index.shard.ShardId; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class ShardIndexingPressureStats implements Writeable, ToXContentFragment { + + private final Map shardIndexingPressureStore; + private final long totalNodeLimitsBreachedRejections; + private final long totalLastSuccessfulRequestLimitsBreachedRejections; + private final long totalThroughputDegradationLimitsBreachedRejections; + private final boolean shardIndexingPressureEnabled; + private final boolean shardIndexingPressureEnforced; + + public ShardIndexingPressureStats(StreamInput in) throws IOException { + int shardEntries = in.readInt(); + shardIndexingPressureStore = new HashMap<>(); + for (int i = 0; i < shardEntries; i++) { + Long hashCode = in.readLong(); + IndexingPressurePerShardStats shardStats = new IndexingPressurePerShardStats(in); + shardIndexingPressureStore.put(hashCode, shardStats); + } + totalNodeLimitsBreachedRejections = in.readVLong(); + totalLastSuccessfulRequestLimitsBreachedRejections = in.readVLong(); + totalThroughputDegradationLimitsBreachedRejections = in.readVLong(); + shardIndexingPressureEnabled = in.readBoolean(); + shardIndexingPressureEnforced = in.readBoolean(); + } + + public ShardIndexingPressureStats(Map shardIndexingPressureStore, + long totalNodeLimitsBreachedRejections, + long totalLastSuccessfulRequestLimitsBreachedRejections, + long totalThroughputDegradationLimitsBreachedRejections, + boolean shardIndexingPressureEnabled, + boolean shardIndexingPressureEnforced) { + this.shardIndexingPressureStore = shardIndexingPressureStore; + this.totalNodeLimitsBreachedRejections = totalNodeLimitsBreachedRejections; + this.totalLastSuccessfulRequestLimitsBreachedRejections = totalLastSuccessfulRequestLimitsBreachedRejections; + this.totalThroughputDegradationLimitsBreachedRejections = totalThroughputDegradationLimitsBreachedRejections; + this.shardIndexingPressureEnabled = shardIndexingPressureEnabled; + this.shardIndexingPressureEnforced = shardIndexingPressureEnforced; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeInt(shardIndexingPressureStore.size()); + for (Map.Entry entry : shardIndexingPressureStore.entrySet()) { + out.writeLong(entry.getKey()); + entry.getValue().writeTo(out); + } + out.writeVLong(totalNodeLimitsBreachedRejections); + out.writeVLong(totalLastSuccessfulRequestLimitsBreachedRejections); + out.writeVLong(totalThroughputDegradationLimitsBreachedRejections); + out.writeBoolean(shardIndexingPressureEnabled); + out.writeBoolean(shardIndexingPressureEnforced); + } + + public IndexingPressurePerShardStats getIndexingPressureShardStats(ShardId shardId) { + IndexingPressurePerShardStats value = shardIndexingPressureStore.get((long)shardId.hashCode()); + return value; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject("shard_indexing_pressure"); + builder.startObject("stats"); + for (Map.Entry entry : shardIndexingPressureStore.entrySet()) { + entry.getValue().toXContent(builder, params); + } + builder.endObject(); + if (shardIndexingPressureEnforced) { + builder.startObject("total_rejections_breakup"); + } else { + builder.startObject("total_rejections_breakup_shadow_mode"); + } + builder.field("node_limits", totalNodeLimitsBreachedRejections); + builder.field("no_successful_request_limits", totalLastSuccessfulRequestLimitsBreachedRejections); + builder.field("throughput_degradation_limits", totalThroughputDegradationLimitsBreachedRejections); + builder.endObject(); + builder.field("enabled", shardIndexingPressureEnabled); + builder.field("enforced", shardIndexingPressureEnforced); + return builder.endObject(); + } + + public void addAll(ShardIndexingPressureStats shardIndexingPressureStats) { + if (this.shardIndexingPressureStore != null) { + this.shardIndexingPressureStore.putAll(shardIndexingPressureStats.shardIndexingPressureStore); + } + } +} diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 87d8d516c4bd2..9444489d61d59 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.util.Constants; import org.apache.lucene.util.SetOnce; +import org.opensearch.index.IndexingPressureService; import org.opensearch.watcher.ResourceWatcherService; import org.opensearch.Assertions; import org.opensearch.Build; @@ -120,7 +121,6 @@ import org.opensearch.gateway.PersistedClusterStateService; import org.opensearch.http.HttpServerTransport; import org.opensearch.index.IndexSettings; -import org.opensearch.index.IndexingPressure; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.indices.IndicesModule; @@ -218,6 +218,7 @@ import java.util.stream.Stream; import static java.util.stream.Collectors.toList; +import static org.opensearch.index.ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED_ATTRIBUTE_KEY; /** * A node represent a node within a cluster ({@code cluster.name}). The {@link #client()} can be used @@ -317,6 +318,9 @@ protected Node(final Environment initialEnvironment, Settings tmpSettings = Settings.builder().put(initialEnvironment.settings()) .put(Client.CLIENT_TYPE_SETTING_S.getKey(), CLIENT_TYPE).build(); + // Enabling shard indexing backpressure + tmpSettings = addShardIndexingBackPressureAttributeSettings(tmpSettings); + final JvmInfo jvmInfo = JvmInfo.jvmInfo(); logger.info( "version[{}], pid[{}], build[{}/{}/{}], OS[{}/{}/{}], JVM[{}/{}/{}/{}]", @@ -599,7 +603,8 @@ protected Node(final Environment initialEnvironment, final SearchTransportService searchTransportService = new SearchTransportService(transportService, SearchExecutionStatsCollector.makeWrapper(responseCollectorService)); final HttpServerTransport httpServerTransport = newHttpTransport(networkModule); - final IndexingPressure indexingLimits = new IndexingPressure(settings); + final IndexingPressureService indexingPressureService = new IndexingPressureService(settings, clusterService); + clusterService.setIndexingPressureService(indexingPressureService); final RecoverySettings recoverySettings = new RecoverySettings(settings, settingsModule.getClusterSettings()); RepositoriesModule repositoriesModule = new RepositoriesModule(this.environment, @@ -628,7 +633,7 @@ protected Node(final Environment initialEnvironment, this.nodeService = new NodeService(settings, threadPool, monitorService, discoveryModule.getDiscovery(), transportService, indicesService, pluginsService, circuitBreakerService, scriptService, httpServerTransport, ingestService, clusterService, settingsModule.getSettingsFilter(), responseCollectorService, - searchTransportService, indexingLimits, searchModule.getValuesSourceRegistry().getUsageService()); + searchTransportService, indexingPressureService, searchModule.getValuesSourceRegistry().getUsageService()); final SearchService searchService = newSearchService(clusterService, indicesService, threadPool, scriptService, bigArrays, searchModule.getFetchPhase(), @@ -664,7 +669,7 @@ protected Node(final Environment initialEnvironment, b.bind(ScriptService.class).toInstance(scriptService); b.bind(AnalysisRegistry.class).toInstance(analysisModule.getAnalysisRegistry()); b.bind(IngestService.class).toInstance(ingestService); - b.bind(IndexingPressure.class).toInstance(indexingLimits); + b.bind(IndexingPressureService.class).toInstance(indexingPressureService); b.bind(UsageService.class).toInstance(usageService); b.bind(AggregationUsageService.class).toInstance(searchModule.getValuesSourceRegistry().getUsageService()); b.bind(NamedWriteableRegistry.class).toInstance(namedWriteableRegistry); @@ -745,6 +750,13 @@ protected Node(final Environment initialEnvironment, } } + private static Settings addShardIndexingBackPressureAttributeSettings(Settings settings) { + String ShardIndexingBackPressureEnabledValue = "true"; + return Settings.builder().put(settings) + .put(NODE_ATTRIBUTES.getKey() + SHARD_INDEXING_PRESSURE_ENABLED_ATTRIBUTE_KEY, ShardIndexingBackPressureEnabledValue) + .build(); + } + protected TransportService newTransportService(Settings settings, Transport transport, ThreadPool threadPool, TransportInterceptor interceptor, Function localNodeFactory, diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index c20874009c56a..188a5a9c821bf 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -32,7 +32,6 @@ package org.opensearch.node; -import org.opensearch.index.IndexingPressure; import org.opensearch.core.internal.io.IOUtils; import org.opensearch.Build; import org.opensearch.Version; @@ -46,6 +45,7 @@ import org.opensearch.common.settings.SettingsFilter; import org.opensearch.discovery.Discovery; import org.opensearch.http.HttpServerTransport; +import org.opensearch.index.IndexingPressureService; import org.opensearch.indices.IndicesService; import org.opensearch.indices.breaker.CircuitBreakerService; import org.opensearch.ingest.IngestService; @@ -74,7 +74,7 @@ public class NodeService implements Closeable { private final HttpServerTransport httpServerTransport; private final ResponseCollectorService responseCollectorService; private final SearchTransportService searchTransportService; - private final IndexingPressure indexingPressure; + private final IndexingPressureService indexingPressureService; private final AggregationUsageService aggregationUsageService; private final Discovery discovery; @@ -84,7 +84,7 @@ public class NodeService implements Closeable { CircuitBreakerService circuitBreakerService, ScriptService scriptService, @Nullable HttpServerTransport httpServerTransport, IngestService ingestService, ClusterService clusterService, SettingsFilter settingsFilter, ResponseCollectorService responseCollectorService, - SearchTransportService searchTransportService, IndexingPressure indexingPressure, + SearchTransportService searchTransportService, IndexingPressureService indexingPressureService, AggregationUsageService aggregationUsageService) { this.settings = settings; this.threadPool = threadPool; @@ -100,7 +100,7 @@ public class NodeService implements Closeable { this.scriptService = scriptService; this.responseCollectorService = responseCollectorService; this.searchTransportService = searchTransportService; - this.indexingPressure = indexingPressure; + this.indexingPressureService = indexingPressureService; this.aggregationUsageService = aggregationUsageService; clusterService.addStateApplier(ingestService); } @@ -143,7 +143,7 @@ public NodeStats stats(CommonStatsFlags indices, boolean os, boolean process, bo ingest ? ingestService.stats() : null, adaptiveSelection ? responseCollectorService.getAdaptiveStats(searchTransportService.getPendingSearchRequests()) : null, scriptCache ? scriptService.cacheStats() : null, - indexingPressure ? this.indexingPressure.stats() : null + indexingPressure ? this.indexingPressureService.nodeStats() : null ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java index bdb45e66678c9..79d62b8242224 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java @@ -44,13 +44,14 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.AtomicArray; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.index.IndexNotFoundException; +import org.opensearch.index.IndexingPressureService; import org.opensearch.index.VersionType; -import org.opensearch.index.IndexingPressure; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; import org.opensearch.test.OpenSearchTestCase; @@ -138,7 +139,9 @@ private void indicesThatCannotBeCreatedTestCase(Set expected, final ExecutorService direct = OpenSearchExecutors.newDirectExecutorService(); when(threadPool.executor(anyString())).thenReturn(direct); TransportBulkAction action = new TransportBulkAction(threadPool, mock(TransportService.class), clusterService, - null, null, null, mock(ActionFilters.class), null, null, new IndexingPressure(Settings.EMPTY), new SystemIndices(emptyMap())) { + null, null, null, mock(ActionFilters.class), null, null, + new IndexingPressureService(Settings.EMPTY, new ClusterService(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, + ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null)), new SystemIndices(emptyMap())) { @Override void executeBulk(Task task, BulkRequest bulkRequest, long startTimeNanos, ActionListener listener, AtomicArray responses, Map indicesThatCannotBeCreated) { diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java index 529078100e4bd..084d3b060c5db 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java @@ -67,7 +67,7 @@ import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexSettings; -import org.opensearch.index.IndexingPressure; +import org.opensearch.index.IndexingPressureService; import org.opensearch.indices.SystemIndices; import org.opensearch.ingest.IngestService; import org.opensearch.tasks.Task; @@ -163,7 +163,8 @@ null, null, new ActionFilters(Collections.emptySet()), null, SETTINGS, new ClusterSettings(SETTINGS, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)), new SystemIndices(emptyMap()) - ), new IndexingPressure(SETTINGS), new SystemIndices(emptyMap()) + ), new IndexingPressureService(SETTINGS, new ClusterService(SETTINGS, new ClusterSettings(SETTINGS, + ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null)), new SystemIndices(emptyMap()) ); } @Override diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java index a4c2203208aa6..dc2afe2d084df 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java @@ -58,7 +58,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException; import org.opensearch.index.IndexNotFoundException; -import org.opensearch.index.IndexingPressure; +import org.opensearch.index.IndexingPressureService; import org.opensearch.index.VersionType; import org.opensearch.indices.SystemIndexDescriptor; import org.opensearch.indices.SystemIndices; @@ -105,7 +105,8 @@ class TestTransportBulkAction extends TransportBulkAction { super(TransportBulkActionTests.this.threadPool, transportService, clusterService, null, null, null, new ActionFilters(Collections.emptySet()), new Resolver(), new AutoCreateIndex(Settings.EMPTY, clusterService.getClusterSettings(), new Resolver(), new SystemIndices(emptyMap())), - new IndexingPressure(Settings.EMPTY), new SystemIndices(emptyMap())); + new IndexingPressureService(Settings.EMPTY, clusterService), + new SystemIndices(emptyMap())); } @Override diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java index 3a8afe70a03f6..d02df4575b7c1 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java @@ -55,8 +55,8 @@ import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.common.xcontent.XContentType; import org.opensearch.index.IndexNotFoundException; +import org.opensearch.index.IndexingPressureService; import org.opensearch.rest.action.document.RestBulkAction; -import org.opensearch.index.IndexingPressure; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; import org.opensearch.test.OpenSearchTestCase; @@ -69,11 +69,6 @@ import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; -import org.opensearch.action.bulk.BulkItemResponse; -import org.opensearch.action.bulk.BulkRequest; -import org.opensearch.action.bulk.BulkResponse; -import org.opensearch.action.bulk.TransportBulkAction; -import org.opensearch.action.bulk.TransportShardBulkAction; import java.nio.charset.StandardCharsets; import java.util.Collections; @@ -266,7 +261,7 @@ static class TestTransportBulkAction extends TransportBulkAction { actionFilters, indexNameExpressionResolver, autoCreateIndex, - new IndexingPressure(Settings.EMPTY), + new IndexingPressureService(Settings.EMPTY, clusterService), new SystemIndices(emptyMap()), relativeTimeProvider); } diff --git a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java index 4e93551a147e2..ada79679a98b1 100644 --- a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java +++ b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java @@ -33,8 +33,10 @@ import org.opensearch.Version; import org.opensearch.action.ActionListener; +import org.opensearch.index.Index; +import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; -import org.opensearch.index.IndexingPressure; +import org.opensearch.index.IndexingPressureService; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.ClusterState; @@ -51,8 +53,6 @@ import org.opensearch.common.network.NetworkService; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.PageCacheRecycler; -import org.opensearch.index.Index; -import org.opensearch.index.IndexService; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ReplicationGroup; import org.opensearch.index.shard.ShardId; @@ -162,7 +162,8 @@ public void testResyncDoesNotBlockOnPrimaryAction() throws Exception { final TransportResyncReplicationAction action = new TransportResyncReplicationAction(Settings.EMPTY, transportService, clusterService, indexServices, threadPool, shardStateAction, new ActionFilters(new HashSet<>()), - new IndexingPressure(Settings.EMPTY), new SystemIndices(emptyMap())); + new IndexingPressureService(Settings.EMPTY, clusterService), + new SystemIndices(emptyMap())); assertThat(action.globalBlockLevel(), nullValue()); assertThat(action.indexBlockLevel(), nullValue()); diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java new file mode 100644 index 0000000000000..10769016c8cdd --- /dev/null +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java @@ -0,0 +1,489 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.action.support.replication; + +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.action.support.WriteResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Nullable; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.Index; +import org.opensearch.index.IndexService; +import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.ShardIndexingPressureSettings; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.IndexShardState; +import org.opensearch.index.shard.ReplicationGroup; +import org.opensearch.index.shard.ShardId; +import org.opensearch.index.shard.ShardNotFoundException; +import org.opensearch.index.shard.ShardNotInPrimaryModeException; +import org.opensearch.index.stats.IndexingPressurePerShardStats; +import org.opensearch.index.translog.Translog; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.SystemIndices; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.CapturingTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportResponse; +import org.opensearch.transport.TransportService; +import org.hamcrest.Matcher; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Locale; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import static java.util.Collections.emptyMap; +import static org.opensearch.action.support.replication.ClusterStateCreationUtils.state; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.anyObject; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TransportWriteActionForIndexingPressureTests extends OpenSearchTestCase { + private static ThreadPool threadPool; + + private ClusterService clusterService; + private TransportService transportService; + private CapturingTransport transport; + private ShardStateAction shardStateAction; + private Translog.Location location; + private Releasable releasable; + private IndexingPressureService indexingPressureService; + + public static final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + @BeforeClass + public static void beforeClass() { + threadPool = new TestThreadPool("ShardReplicationTests"); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + transport = new CapturingTransport(); + clusterService = createClusterService(threadPool); + transportService = transport.createTransportService(clusterService.getSettings(), threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, x -> clusterService.localNode(), null, Collections.emptySet()); + transportService.start(); + transportService.acceptIncomingRequests(); + shardStateAction = new ShardStateAction(clusterService, transportService, null, null, threadPool); + releasable = mock(Releasable.class); + location = mock(Translog.Location.class); + } + + @Override + @After + public void tearDown() throws Exception { + super.tearDown(); + clusterService.close(); + } + + @AfterClass + public static void afterClass() { + ThreadPool.terminate(threadPool, 30, TimeUnit.SECONDS); + threadPool = null; + } + + public void testIndexingPressureOperationStartedForReplicaNode() { + final ShardId shardId = new ShardId("test", "_na_", 0); + final ClusterState state = state(shardId.getIndexName(), true, ShardRoutingState.STARTED, ShardRoutingState.STARTED); + setState(clusterService, state); + final ShardRouting replicaRouting = state.getRoutingTable().shardRoutingTable(shardId).replicaShards().get(0); + final ReplicationTask task = maybeTask(); + final Settings settings = Settings.builder().put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), false) + .build(); + this.indexingPressureService = new IndexingPressureService(settings, clusterService); + + TestAction action = new TestAction(settings, "internal:testAction", transportService, clusterService, + shardStateAction, threadPool); + + action.handleReplicaRequest( + new TransportReplicationAction.ConcreteReplicaRequest<>( + new TestRequest(), replicaRouting.allocationId().getId(), randomNonNegativeLong(), + randomNonNegativeLong(), randomNonNegativeLong()), + createTransportChannel(new PlainActionFuture<>()), task); + + IndexingPressurePerShardStats shardStats = + this.indexingPressureService.shardStats(CommonStatsFlags.ALL).getIndexingPressureShardStats(shardId); + + assertPhase(task, "finished"); + assertTrue(Objects.isNull(shardStats)); + } + + public void testIndexingPressureOperationStartedForReplicaShard() { + final ShardId shardId = new ShardId("test", "_na_", 0); + final ClusterState state = state(shardId.getIndexName(), true, ShardRoutingState.STARTED, ShardRoutingState.STARTED); + setState(clusterService, state); + final ShardRouting replicaRouting = state.getRoutingTable().shardRoutingTable(shardId).replicaShards().get(0); + final ReplicationTask task = maybeTask(); + final Settings settings = Settings.builder().put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .build(); + this.indexingPressureService = new IndexingPressureService(settings, clusterService); + + TestAction action = new TestAction(settings, "internal:testAction", transportService, clusterService, + shardStateAction, threadPool); + + action.handleReplicaRequest( + new TransportReplicationAction.ConcreteReplicaRequest<>( + new TestRequest(), replicaRouting.allocationId().getId(), randomNonNegativeLong(), + randomNonNegativeLong(), randomNonNegativeLong()), + createTransportChannel(new PlainActionFuture<>()), task); + + CommonStatsFlags statsFlag = new CommonStatsFlags(); + statsFlag.includeAllShardIndexingPressureTrackers(true); + IndexingPressurePerShardStats shardStats = + this.indexingPressureService.shardStats(statsFlag).getIndexingPressureShardStats(shardId); + + assertPhase(task, "finished"); + assertTrue(!Objects.isNull(shardStats)); + assertEquals(100, shardStats.getTotalReplicaBytes()); + } + + public void testIndexingPressureOperationStartedForPrimaryNode() { + final ShardId shardId = new ShardId("test", "_na_", 0); + final ClusterState state = state(shardId.getIndexName(), true, ShardRoutingState.STARTED, ShardRoutingState.STARTED); + setState(clusterService, state); + final ShardRouting replicaRouting = state.getRoutingTable().shardRoutingTable(shardId).replicaShards().get(0); + final ReplicationTask task = maybeTask(); + final Settings settings = + Settings.builder().put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), false).build(); + this.indexingPressureService = new IndexingPressureService(settings, clusterService); + + TestAction action = new TestAction(settings, "internal:testActionWithExceptions", transportService, clusterService, + shardStateAction, threadPool); + + action.handlePrimaryRequest( + new TransportReplicationAction.ConcreteReplicaRequest<>( + new TestRequest(), replicaRouting.allocationId().getId(), randomNonNegativeLong(), + randomNonNegativeLong(), randomNonNegativeLong()), + createTransportChannel(new PlainActionFuture<>()), task); + + IndexingPressurePerShardStats shardStats = + this.indexingPressureService.shardStats(CommonStatsFlags.ALL).getIndexingPressureShardStats(shardId); + + assertPhase(task, "finished"); + assertTrue(Objects.isNull(shardStats)); + } + + public void testIndexingPressureOperationStartedForPrimaryShard() { + final ShardId shardId = new ShardId("test", "_na_", 0); + final ClusterState state = state(shardId.getIndexName(), true, ShardRoutingState.STARTED, ShardRoutingState.STARTED); + setState(clusterService, state); + final ShardRouting replicaRouting = state.getRoutingTable().shardRoutingTable(shardId).replicaShards().get(0); + final ReplicationTask task = maybeTask(); + final Settings settings = + Settings.builder().put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true).build(); + this.indexingPressureService = new IndexingPressureService(settings, clusterService); + + TestAction action = new TestAction(settings, "internal:testActionWithExceptions", transportService, clusterService, + shardStateAction, threadPool); + + action.handlePrimaryRequest( + new TransportReplicationAction.ConcreteReplicaRequest<>( + new TestRequest(), replicaRouting.allocationId().getId(), randomNonNegativeLong(), + randomNonNegativeLong(), randomNonNegativeLong()), + createTransportChannel(new PlainActionFuture<>()), task); + + CommonStatsFlags statsFlag = new CommonStatsFlags(); + statsFlag.includeAllShardIndexingPressureTrackers(true); + IndexingPressurePerShardStats shardStats = + this.indexingPressureService.shardStats(statsFlag).getIndexingPressureShardStats(shardId); + + assertPhase(task, "finished"); + assertTrue(!Objects.isNull(shardStats)); + assertEquals(100, shardStats.getTotalPrimaryBytes()); + } + + public void testIndexingPressureOperationStartedForLocalPrimaryNode() { + final ShardId shardId = new ShardId("test", "_na_", 0); + final ClusterState state = state(shardId.getIndexName(), true, ShardRoutingState.STARTED, ShardRoutingState.STARTED); + setState(clusterService, state); + final ShardRouting replicaRouting = state.getRoutingTable().shardRoutingTable(shardId).replicaShards().get(0); + final ReplicationTask task = maybeTask(); + final Settings settings = Settings.builder().put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), false) + .build(); + this.indexingPressureService = new IndexingPressureService(settings, clusterService); + + TestAction action = new TestAction(settings, "internal:testAction", transportService, clusterService, + shardStateAction, threadPool); + + action.handlePrimaryRequest( + new TransportReplicationAction.ConcreteShardRequest<>( + new TestRequest(), replicaRouting.allocationId().getId(), randomNonNegativeLong(), + true, true), + createTransportChannel(new PlainActionFuture<>()), task); + + IndexingPressurePerShardStats shardStats = + this.indexingPressureService.shardStats(CommonStatsFlags.ALL).getIndexingPressureShardStats(shardId); + + assertPhase(task, "finished"); + assertTrue(Objects.isNull(shardStats)); + } + + public void testIndexingPressureOperationStartedForLocalPrimaryShard() { + final ShardId shardId = new ShardId("test", "_na_", 0); + final ClusterState state = state(shardId.getIndexName(), true, ShardRoutingState.STARTED, ShardRoutingState.STARTED); + setState(clusterService, state); + final ShardRouting replicaRouting = state.getRoutingTable().shardRoutingTable(shardId).replicaShards().get(0); + final ReplicationTask task = maybeTask(); + final Settings settings = Settings.builder().put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .build(); + this.indexingPressureService = new IndexingPressureService(settings, clusterService); + + TestAction action = new TestAction(settings, "internal:testAction", transportService, clusterService, + shardStateAction, threadPool); + + action.handlePrimaryRequest( + new TransportReplicationAction.ConcreteShardRequest<>( + new TestRequest(), replicaRouting.allocationId().getId(), randomNonNegativeLong(), + true, true), + createTransportChannel(new PlainActionFuture<>()), task); + + CommonStatsFlags statsFlag = new CommonStatsFlags(); + statsFlag.includeAllShardIndexingPressureTrackers(true); + IndexingPressurePerShardStats shardStats = + this.indexingPressureService.shardStats(statsFlag).getIndexingPressureShardStats(shardId); + + assertPhase(task, "finished"); + assertTrue(!Objects.isNull(shardStats)); + } + + private final AtomicInteger count = new AtomicInteger(0); + + private final AtomicBoolean isRelocated = new AtomicBoolean(false); + + private final AtomicBoolean isPrimaryMode = new AtomicBoolean(true); + + /** + * Sometimes build a ReplicationTask for tracking the phase of the + * TransportReplicationAction. Since TransportReplicationAction has to work + * if the task as null just as well as if it is supplied this returns null + * half the time. + */ + ReplicationTask maybeTask() { + return random().nextBoolean() ? new ReplicationTask(0, null, null, null, null, null) : null; + } + + /** + * If the task is non-null this asserts that the phrase matches. + */ + void assertPhase(@Nullable ReplicationTask task, String phase) { + assertPhase(task, equalTo(phase)); + } + + private void assertPhase(@Nullable ReplicationTask task, Matcher phaseMatcher) { + if (task != null) { + assertThat(task.getPhase(), phaseMatcher); + } + } + + private class TestAction extends TransportWriteAction { + protected TestAction(Settings settings, String actionName, TransportService transportService, + ClusterService clusterService, ShardStateAction shardStateAction, ThreadPool threadPool) { + super(settings, actionName, transportService, clusterService, + mockIndicesService(clusterService), threadPool, shardStateAction, + new ActionFilters(new HashSet<>()), TestRequest::new, TestRequest::new, ignore -> ThreadPool.Names.SAME, false, + TransportWriteActionForIndexingPressureTests.this.indexingPressureService, new SystemIndices(emptyMap())); + } + + + @Override + protected TestResponse newResponseInstance(StreamInput in) throws IOException { + return new TestResponse(); + } + + @Override + protected long primaryOperationSize(TestRequest request) { + return 100; + } + + @Override + protected long replicaOperationSize(TestRequest request) { + return 100; + } + + @Override + protected void dispatchedShardOperationOnPrimary( + TestRequest request, IndexShard primary, ActionListener> listener) { + ActionListener.completeWith(listener, () -> new WritePrimaryResult<>(request, new TestResponse(), location, null, primary, + logger)); + } + + @Override + protected void dispatchedShardOperationOnReplica(TestRequest request, IndexShard replica, ActionListener listener) { + ActionListener.completeWith(listener, () -> new WriteReplicaResult<>(request, location, null, replica, logger)); + } + + } + + private static class TestRequest extends ReplicatedWriteRequest { + TestRequest(StreamInput in) throws IOException { + super(in); + } + + TestRequest() { + super(new ShardId("test", "_na_", 0)); + } + + @Override + public String toString() { + return "TestRequest{}"; + } + } + + private static class TestResponse extends ReplicationResponse implements WriteResponse { + boolean forcedRefresh; + + @Override + public void setForcedRefresh(boolean forcedRefresh) { + this.forcedRefresh = forcedRefresh; + } + } + + private IndicesService mockIndicesService(ClusterService clusterService) { + final IndicesService indicesService = mock(IndicesService.class); + when(indicesService.indexServiceSafe(any(Index.class))).then(invocation -> { + Index index = (Index)invocation.getArguments()[0]; + final ClusterState state = clusterService.state(); + final IndexMetadata indexSafe = state.metadata().getIndexSafe(index); + return mockIndexService(indexSafe, clusterService); + }); + when(indicesService.indexService(any(Index.class))).then(invocation -> { + Index index = (Index) invocation.getArguments()[0]; + final ClusterState state = clusterService.state(); + if (state.metadata().hasIndex(index.getName())) { + return mockIndexService(clusterService.state().metadata().getIndexSafe(index), clusterService); + } else { + return null; + } + }); + return indicesService; + } + + private IndexService mockIndexService(final IndexMetadata indexMetaData, ClusterService clusterService) { + final IndexService indexService = mock(IndexService.class); + when(indexService.getShard(anyInt())).then(invocation -> { + int shard = (Integer) invocation.getArguments()[0]; + final ShardId shardId = new ShardId(indexMetaData.getIndex(), shard); + if (shard > indexMetaData.getNumberOfShards()) { + throw new ShardNotFoundException(shardId); + } + return mockIndexShard(shardId, clusterService); + }); + return indexService; + } + + @SuppressWarnings("unchecked") + private IndexShard mockIndexShard(ShardId shardId, ClusterService clusterService) { + final IndexShard indexShard = mock(IndexShard.class); + when(indexShard.shardId()).thenReturn(shardId); + when(indexShard.state()).thenReturn(IndexShardState.STARTED); + doAnswer(invocation -> { + ActionListener callback = (ActionListener) invocation.getArguments()[0]; + if (isPrimaryMode.get()) { + count.incrementAndGet(); + callback.onResponse(count::decrementAndGet); + + } else { + callback.onFailure(new ShardNotInPrimaryModeException(shardId, IndexShardState.STARTED)); + } + return null; + }).when(indexShard).acquirePrimaryOperationPermit(any(ActionListener.class), anyString(), anyObject()); + doAnswer(invocation -> { + long term = (Long)invocation.getArguments()[0]; + ActionListener callback = (ActionListener) invocation.getArguments()[3]; + final long primaryTerm = indexShard.getPendingPrimaryTerm(); + if (term < primaryTerm) { + throw new IllegalArgumentException(String.format(Locale.ROOT, "%s operation term [%d] is too old (current [%d])", + shardId, term, primaryTerm)); + } + count.incrementAndGet(); + callback.onResponse(count::decrementAndGet); + return null; + }).when(indexShard) + .acquireReplicaOperationPermit(anyLong(), anyLong(), anyLong(), any(ActionListener.class), anyString(), anyObject()); + when(indexShard.getActiveOperationsCount()).thenAnswer(i -> count.get()); + + when(indexShard.routingEntry()).thenAnswer(invocationOnMock -> { + final ClusterState state = clusterService.state(); + final RoutingNode node = state.getRoutingNodes().node(state.nodes().getLocalNodeId()); + final ShardRouting routing = node.getByShardId(shardId); + if (routing == null) { + throw new ShardNotFoundException(shardId, "shard is no longer assigned to current node"); + } + return routing; + }); + when(indexShard.isRelocatedPrimary()).thenAnswer(invocationOnMock -> isRelocated.get()); + doThrow(new AssertionError("failed shard is not supported")).when(indexShard).failShard(anyString(), any(Exception.class)); + when(indexShard.getPendingPrimaryTerm()).thenAnswer(i -> + clusterService.state().metadata().getIndexSafe(shardId.getIndex()).primaryTerm(shardId.id())); + + ReplicationGroup replicationGroup = mock(ReplicationGroup.class); + when(indexShard.getReplicationGroup()).thenReturn(replicationGroup); + return indexShard; + } + + /** + * Transport channel that is needed for testing. + */ + public TransportChannel createTransportChannel(final PlainActionFuture listener) { + return new TransportChannel() { + + @Override + public String getProfileName() { + return ""; + } + + @Override + public void sendResponse(TransportResponse response) { + listener.onResponse(((TestResponse) response)); + } + + @Override + public void sendResponse(Exception exception) { + listener.onFailure(exception); + } + + @Override + public String getChannelType() { + return "replica_test"; + } + }; + } + +} diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java index 3ff154caec6de..31d4172cdb372 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java @@ -34,7 +34,6 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; -import org.opensearch.index.IndexingPressure; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.ActionTestUtils; import org.opensearch.action.support.PlainActionFuture; @@ -56,6 +55,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.Index; import org.opensearch.index.IndexService; +import org.opensearch.index.IndexingPressureService; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; import org.opensearch.index.shard.ShardNotFoundException; @@ -382,7 +382,8 @@ protected TestAction(boolean withDocumentFailureOnPrimary, boolean withDocumentF new TransportService(Settings.EMPTY, mock(Transport.class), null, TransportService.NOOP_TRANSPORT_INTERCEPTOR, x -> null, null, Collections.emptySet()), TransportWriteActionTests.this.clusterService, null, null, null, new ActionFilters(new HashSet<>()), TestRequest::new, TestRequest::new, ignore -> ThreadPool.Names.SAME, false, - new IndexingPressure(Settings.EMPTY), new SystemIndices(emptyMap())); + new IndexingPressureService(Settings.EMPTY, TransportWriteActionTests.this.clusterService), + new SystemIndices(emptyMap())); this.withDocumentFailureOnPrimary = withDocumentFailureOnPrimary; this.withDocumentFailureOnReplica = withDocumentFailureOnReplica; } @@ -392,7 +393,7 @@ protected TestAction(Settings settings, String actionName, TransportService tran super(settings, actionName, transportService, clusterService, mockIndicesService(clusterService), threadPool, shardStateAction, new ActionFilters(new HashSet<>()), TestRequest::new, TestRequest::new, ignore -> ThreadPool.Names.SAME, false, - new IndexingPressure(settings), new SystemIndices(emptyMap())); + new IndexingPressureService(settings, clusterService), new SystemIndices(emptyMap())); this.withDocumentFailureOnPrimary = false; this.withDocumentFailureOnReplica = false; } diff --git a/server/src/test/java/org/opensearch/index/IndexingPressureTests.java b/server/src/test/java/org/opensearch/index/IndexingPressureTests.java index 58f492b86ac21..b74792333685d 100644 --- a/server/src/test/java/org/opensearch/index/IndexingPressureTests.java +++ b/server/src/test/java/org/opensearch/index/IndexingPressureTests.java @@ -40,7 +40,8 @@ public class IndexingPressureTests extends OpenSearchTestCase { - private final Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB").build(); + private final Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), false).build(); public void testMemoryBytesMarkedAndReleased() { IndexingPressure indexingPressure = new IndexingPressure(settings); diff --git a/server/src/test/java/org/opensearch/index/ShardIndexingPressureMemoryManagerTests.java b/server/src/test/java/org/opensearch/index/ShardIndexingPressureMemoryManagerTests.java new file mode 100644 index 0000000000000..e95264b740618 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/ShardIndexingPressureMemoryManagerTests.java @@ -0,0 +1,561 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.shard.ShardId; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Collections; +import java.util.Map; + +public class ShardIndexingPressureMemoryManagerTests extends OpenSearchTestCase { + + private final Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 2) + .build(); + private final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + private final ShardIndexingPressureSettings shardIndexingPressureSettings = + new ShardIndexingPressureSettings(new ClusterService(settings, clusterSettings, null), settings, + IndexingPressure.MAX_INDEXING_BYTES.get(settings).getBytes()); + + private final Index index = new Index("IndexName", "UUID"); + private final ShardId shardId1 = new ShardId(index, 0); + private final ShardId shardId2 = new ShardId(index, 1); + + public void testCoordinatingPrimaryShardLimitsNotBreached() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker = store.getShardIndexingPressureTracker(shardId1); + tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(1); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertFalse(memoryManager.isCoordinatingShardLimitBreached(tracker, requestStartTime, hotStore, 1 * 1024)); + assertFalse(memoryManager.isPrimaryShardLimitBreached(tracker, requestStartTime, hotStore, 1 * 1024)); + } + + public void testReplicaShardLimitsNotBreached() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker = store.getShardIndexingPressureTracker(shardId1); + tracker.memory().getCurrentReplicaBytes().addAndGet(1); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = Collections.singletonMap((long) shardId1.hashCode(), tracker); + + assertFalse(memoryManager.isReplicaShardLimitBreached(tracker, requestStartTime, hotStore, 1 * 1024)); + } + + public void testCoordinatingPrimaryShardLimitsIncreasedAndSoftLimitNotBreached() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker = store.getShardIndexingPressureTracker(shardId1); + tracker.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(10); + long baseLimit = tracker.getPrimaryAndCoordinatingLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertFalse(memoryManager.isCoordinatingShardLimitBreached(tracker, requestStartTime, hotStore, 1 * 1024)); + assertFalse(memoryManager.isPrimaryShardLimitBreached(tracker, requestStartTime, hotStore, 1 * 1024)); + + assertTrue(tracker.getPrimaryAndCoordinatingLimits().get() > baseLimit); + assertEquals(tracker.getPrimaryAndCoordinatingLimits().get(), (long)(baseLimit/0.85)); + } + + public void testReplicaShardLimitsIncreasedAndSoftLimitNotBreached() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker = store.getShardIndexingPressureTracker(shardId1); + tracker.memory().getCurrentReplicaBytes().addAndGet(15); + long baseLimit = tracker.getReplicaLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertFalse(memoryManager.isReplicaShardLimitBreached(tracker, requestStartTime, hotStore, 1 * 1024)); + assertTrue(tracker.getReplicaLimits().get() > baseLimit); + assertEquals(tracker.getReplicaLimits().get(), (long)(baseLimit/0.85)); + } + + public void testCoordinatingPrimarySoftLimitNotBreachedAndNodeLevelRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(4 * 1024); + tracker2.getPrimaryAndCoordinatingLimits().addAndGet(6 * 1024); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + long limit2 = tracker2.getPrimaryAndCoordinatingLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertTrue(memoryManager.isCoordinatingShardLimitBreached(tracker1, requestStartTime, hotStore, 6 * 1024)); + assertEquals(1, tracker1.rejection().getCoordinatingNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getCoordinatingNodeLimitsBreachedRejections().get()); + + assertTrue(memoryManager.isPrimaryShardLimitBreached(tracker1, requestStartTime, hotStore, 6 * 1024)); + assertEquals(1, tracker1.rejection().getPrimaryNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getPrimaryNodeLimitsBreachedRejections().get()); + + assertEquals(limit1, tracker1.getPrimaryAndCoordinatingLimits().get()); + assertEquals(limit2, tracker2.getPrimaryAndCoordinatingLimits().get()); + assertEquals(2, memoryManager.totalNodeLimitsBreachedRejections.get()); + } + + public void testReplicaShardLimitsSoftLimitNotBreachedAndNodeLevelRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentReplicaBytes().addAndGet(5 * 1024); + tracker2.getReplicaLimits().addAndGet(10 * 1024); + long limit1 = tracker1.getReplicaLimits().get(); + long limit2 = tracker2.getReplicaLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertTrue(memoryManager.isReplicaShardLimitBreached(tracker1, requestStartTime, hotStore, 10 * 1024)); + assertEquals(limit1, tracker1.getReplicaLimits().get()); + assertEquals(limit2, tracker2.getReplicaLimits().get()); + assertEquals(1, tracker1.rejection().getReplicaNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getReplicaNodeLimitsBreachedRejections().get()); + assertEquals(1, memoryManager.totalNodeLimitsBreachedRejections.get()); + } + + public void testCoordinatingPrimarySoftLimitBreachedAndNodeLevelRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(4 * 1024); + tracker2.getPrimaryAndCoordinatingLimits().addAndGet(6 * 1024); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + long limit2 = tracker2.getPrimaryAndCoordinatingLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertTrue(memoryManager.isCoordinatingShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(1, tracker1.rejection().getCoordinatingNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getCoordinatingNodeLimitsBreachedRejections().get()); + + assertTrue(memoryManager.isPrimaryShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(1, tracker1.rejection().getPrimaryNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getPrimaryNodeLimitsBreachedRejections().get()); + + assertEquals(limit1, tracker1.getPrimaryAndCoordinatingLimits().get()); + assertEquals(limit2, tracker2.getPrimaryAndCoordinatingLimits().get()); + assertEquals(2, memoryManager.totalNodeLimitsBreachedRejections.get()); + } + + public void testReplicaShardLimitsSoftLimitBreachedAndNodeLevelRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentReplicaBytes().addAndGet(5 * 1024); + tracker2.getReplicaLimits().addAndGet(12 * 1024); + long limit1 = tracker1.getReplicaLimits().get(); + long limit2 = tracker2.getReplicaLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertTrue(memoryManager.isReplicaShardLimitBreached(tracker1, requestStartTime, hotStore, 12 * 1024)); + assertEquals(limit1, tracker1.getReplicaLimits().get()); + assertEquals(limit2, tracker2.getReplicaLimits().get()); + assertEquals(1, tracker1.rejection().getReplicaNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getReplicaNodeLimitsBreachedRejections().get()); + assertEquals(1, memoryManager.totalNodeLimitsBreachedRejections.get()); + } + + public void testCoordinatingPrimarySoftLimitBreachedAndLastSuccessfulRequestLimitRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(4 * 1024); + tracker2.getPrimaryAndCoordinatingLimits().addAndGet(6 * 1024); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + long limit2 = tracker2.getPrimaryAndCoordinatingLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + + tracker1.timeStamp().getLastSuccessfulCoordinatingRequestTimestamp().addAndGet(requestStartTime - 100); + tracker1.outstandingRequest().getTotalOutstandingCoordinatingRequests().addAndGet(2); + + assertTrue(memoryManager.isCoordinatingShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(1, tracker1.rejection().getCoordinatingLastSuccessfulRequestLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getCoordinatingLastSuccessfulRequestLimitsBreachedRejections().get()); + + tracker1.timeStamp().getLastSuccessfulPrimaryRequestTimestamp().addAndGet(requestStartTime - 100); + tracker1.outstandingRequest().getTotalOutstandingPrimaryRequests().addAndGet(2); + + assertTrue(memoryManager.isPrimaryShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(1, tracker1.rejection().getPrimaryLastSuccessfulRequestLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getPrimaryLastSuccessfulRequestLimitsBreachedRejections().get()); + + assertEquals(limit1, tracker1.getPrimaryAndCoordinatingLimits().get()); + assertEquals(limit2, tracker2.getPrimaryAndCoordinatingLimits().get()); + assertEquals(2, memoryManager.totalLastSuccessfulRequestLimitsBreachedRejections.get()); + } + + public void testReplicaShardLimitsSoftLimitBreachedAndLastSuccessfulRequestLimitRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentReplicaBytes().addAndGet(5 * 1024); + tracker2.getReplicaLimits().addAndGet(12 * 1024); + long limit1 = tracker1.getReplicaLimits().get(); + long limit2 = tracker2.getReplicaLimits().get(); + long requestStartTime = System.currentTimeMillis(); + tracker1.timeStamp().getLastSuccessfulReplicaRequestTimestamp().addAndGet(requestStartTime - 100); + tracker1.outstandingRequest().getTotalOutstandingReplicaRequests().addAndGet(2); + + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertTrue(memoryManager.isReplicaShardLimitBreached(tracker1, requestStartTime, hotStore, 12 * 1024)); + assertEquals(limit1, tracker1.getReplicaLimits().get()); + assertEquals(limit2, tracker2.getReplicaLimits().get()); + assertEquals(1, tracker1.rejection().getReplicaLastSuccessfulRequestLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getReplicaLastSuccessfulRequestLimitsBreachedRejections().get()); + assertEquals(1, memoryManager.totalLastSuccessfulRequestLimitsBreachedRejections.get()); + } + + public void testCoordinatingPrimarySoftLimitBreachedAndLessOutstandingRequestsAndNoLastSuccessfulRequestLimitRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(1 * 1024); + tracker2.getPrimaryAndCoordinatingLimits().addAndGet(6 * 1024); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + long limit2 = tracker2.getPrimaryAndCoordinatingLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + tracker1.timeStamp().getLastSuccessfulCoordinatingRequestTimestamp().addAndGet(requestStartTime - 100); + tracker1.outstandingRequest().getTotalOutstandingCoordinatingRequests().addAndGet(1); + + assertFalse(memoryManager.isCoordinatingShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(0, tracker1.rejection().getCoordinatingLastSuccessfulRequestLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getCoordinatingLastSuccessfulRequestLimitsBreachedRejections().get()); + + tracker1.timeStamp().getLastSuccessfulPrimaryRequestTimestamp().addAndGet(requestStartTime - 100); + tracker1.outstandingRequest().getTotalOutstandingPrimaryRequests().addAndGet(1); + + assertFalse(memoryManager.isPrimaryShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(0, tracker1.rejection().getPrimaryLastSuccessfulRequestLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getPrimaryLastSuccessfulRequestLimitsBreachedRejections().get()); + + assertTrue(tracker1.getPrimaryAndCoordinatingLimits().get() > limit1); + assertEquals((long)(1 * 1024/0.85), tracker1.getPrimaryAndCoordinatingLimits().get()); + assertEquals(limit2, tracker2.getPrimaryAndCoordinatingLimits().get()); + assertEquals(0, memoryManager.totalLastSuccessfulRequestLimitsBreachedRejections.get()); + } + + public void testReplicaShardLimitsSoftLimitBreachedAndLessOutstandingRequestsAndNoLastSuccessfulRequestLimitRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentReplicaBytes().addAndGet(2 * 1024); + tracker2.getReplicaLimits().addAndGet(12 * 1024); + long limit1 = tracker1.getReplicaLimits().get(); + long limit2 = tracker2.getReplicaLimits().get(); + long requestStartTime = System.currentTimeMillis(); + tracker1.timeStamp().getLastSuccessfulReplicaRequestTimestamp().addAndGet(requestStartTime - 100); + tracker1.outstandingRequest().getTotalOutstandingReplicaRequests().addAndGet(1); + + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertFalse(memoryManager.isReplicaShardLimitBreached(tracker1, requestStartTime, hotStore, 12 * 1024)); + assertTrue(tracker1.getReplicaLimits().get() > limit1); + assertEquals((long)(2 * 1024/0.85), tracker1.getReplicaLimits().get()); + assertEquals(limit2, tracker2.getReplicaLimits().get()); + assertEquals(0, tracker1.rejection().getReplicaLastSuccessfulRequestLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getReplicaLastSuccessfulRequestLimitsBreachedRejections().get()); + assertEquals(0, memoryManager.totalLastSuccessfulRequestLimitsBreachedRejections.get()); + } + + public void testCoordinatingPrimarySoftLimitBreachedAndThroughputDegradationLimitRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(4 * 1024); + tracker2.getPrimaryAndCoordinatingLimits().addAndGet(6 * 1024); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + long limit2 = tracker2.getPrimaryAndCoordinatingLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + tracker1.throughput().getCoordinatingThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingCoordinatingRequests().addAndGet(2); + tracker1.memory().getTotalCoordinatingBytes().addAndGet(60); + tracker1.latency().getCoordinatingTimeInMillis().addAndGet(10); + tracker1.throughput().getCoordinatingThroughputMovingQueue().offer(1d); + tracker1.throughput().getCoordinatingThroughputMovingQueue().offer(2d); + + assertTrue(memoryManager.isCoordinatingShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(1, tracker1.rejection().getCoordinatingThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getCoordinatingThroughputDegradationLimitsBreachedRejections().get()); + + tracker1.throughput().getPrimaryThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingPrimaryRequests().addAndGet(2); + tracker1.memory().getTotalPrimaryBytes().addAndGet(60); + tracker1.latency().getPrimaryTimeInMillis().addAndGet(10); + tracker1.throughput().getPrimaryThroughputMovingQueue().offer(1d); + tracker1.throughput().getPrimaryThroughputMovingQueue().offer(2d); + + assertTrue(memoryManager.isPrimaryShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(1, tracker1.rejection().getPrimaryThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getPrimaryThroughputDegradationLimitsBreachedRejections().get()); + + assertEquals(limit1, tracker1.getPrimaryAndCoordinatingLimits().get()); + assertEquals(limit2, tracker2.getPrimaryAndCoordinatingLimits().get()); + assertEquals(2, memoryManager.totalThroughputDegradationLimitsBreachedRejections.get()); + } + + public void testReplicaShardLimitsSoftLimitBreachedAndThroughputDegradationLimitRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentReplicaBytes().addAndGet(5 * 1024); + tracker2.getReplicaLimits().addAndGet(12 * 1024); + long limit1 = tracker1.getReplicaLimits().get(); + long limit2 = tracker2.getReplicaLimits().get(); + long requestStartTime = System.currentTimeMillis(); + tracker1.throughput().getReplicaThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingReplicaRequests().addAndGet(2); + tracker1.memory().getTotalReplicaBytes().addAndGet(80); + tracker1.latency().getReplicaTimeInMillis().addAndGet(10); + tracker1.throughput().getReplicaThroughputMovingQueue().offer(1d); + tracker1.throughput().getReplicaThroughputMovingQueue().offer(2d); + + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertTrue(memoryManager.isReplicaShardLimitBreached(tracker1, requestStartTime, hotStore, 12 * 1024)); + assertEquals(limit1, tracker1.getReplicaLimits().get()); + assertEquals(limit2, tracker2.getReplicaLimits().get()); + assertEquals(1, tracker1.rejection().getReplicaThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getReplicaThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(1, memoryManager.totalThroughputDegradationLimitsBreachedRejections.get()); + } + + public void testCoordinatingPrimarySoftLimitBreachedAndMovingAverageQueueNotBuildUpAndNoThroughputDegradationLimitRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(1 * 1024); + tracker2.getPrimaryAndCoordinatingLimits().addAndGet(6 * 1024); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + long limit2 = tracker2.getPrimaryAndCoordinatingLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + tracker1.throughput().getCoordinatingThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingCoordinatingRequests().addAndGet(1); + tracker1.memory().getTotalCoordinatingBytes().addAndGet(60); + tracker1.latency().getCoordinatingTimeInMillis().addAndGet(10); + tracker1.throughput().getCoordinatingThroughputMovingQueue().offer(1d); + + assertFalse(memoryManager.isCoordinatingShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(0, tracker1.rejection().getCoordinatingThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getCoordinatingThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker1.rejection().getCoordinatingNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getCoordinatingNodeLimitsBreachedRejections().get()); + + tracker1.throughput().getPrimaryThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingPrimaryRequests().addAndGet(1); + tracker1.memory().getTotalPrimaryBytes().addAndGet(60); + tracker1.latency().getPrimaryTimeInMillis().addAndGet(10); + tracker1.throughput().getPrimaryThroughputMovingQueue().offer(1d); + + assertFalse(memoryManager.isPrimaryShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(0, tracker1.rejection().getPrimaryThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getPrimaryThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker1.rejection().getPrimaryNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getPrimaryNodeLimitsBreachedRejections().get()); + + assertTrue(tracker1.getPrimaryAndCoordinatingLimits().get() > limit1); + assertEquals((long)(1 * 1024/0.85), tracker1.getPrimaryAndCoordinatingLimits().get()); + assertEquals(limit2, tracker2.getPrimaryAndCoordinatingLimits().get()); + assertEquals(0, memoryManager.totalThroughputDegradationLimitsBreachedRejections.get()); + } + + public void testReplicaShardLimitsSoftLimitBreachedAndMovingAverageQueueNotBuildUpAndNThroughputDegradationLimitRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentReplicaBytes().addAndGet(2 * 1024); + tracker2.getReplicaLimits().addAndGet(12 * 1024); + long limit1 = tracker1.getReplicaLimits().get(); + long limit2 = tracker2.getReplicaLimits().get(); + long requestStartTime = System.currentTimeMillis(); + tracker1.throughput().getReplicaThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingReplicaRequests().addAndGet(1); + tracker1.memory().getTotalReplicaBytes().addAndGet(80); + tracker1.latency().getReplicaTimeInMillis().addAndGet(10); + tracker1.throughput().getReplicaThroughputMovingQueue().offer(1d); + + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertFalse(memoryManager.isReplicaShardLimitBreached(tracker1, requestStartTime, hotStore, 12 * 1024)); + assertTrue(tracker1.getReplicaLimits().get() > limit1); + assertEquals((long)(2 * 1024/0.85), tracker1.getReplicaLimits().get()); + assertEquals(limit2, tracker2.getReplicaLimits().get()); + assertEquals(0, tracker1.rejection().getReplicaThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getReplicaThroughputDegradationLimitsBreachedRejections().get()); + assertEquals(0, tracker1.rejection().getReplicaNodeLimitsBreachedRejections().get()); + assertEquals(0, memoryManager.totalThroughputDegradationLimitsBreachedRejections.get()); + } + + public void testCoordinatingPrimarySoftLimitBreachedAndNoSecondaryParameterBreachedAndNodeLevelRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(4 * 1024); + tracker2.getPrimaryAndCoordinatingLimits().addAndGet(6 * 1024); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + long limit2 = tracker2.getPrimaryAndCoordinatingLimits().get(); + long requestStartTime = System.currentTimeMillis(); + Map hotStore = store.getShardIndexingPressureHotStore(); + + tracker1.throughput().getCoordinatingThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingCoordinatingRequests().addAndGet(1); + tracker1.memory().getTotalCoordinatingBytes().addAndGet(60); + tracker1.latency().getCoordinatingTimeInMillis().addAndGet(10); + tracker1.throughput().getCoordinatingThroughputMovingQueue().offer(1d); + + assertTrue(memoryManager.isCoordinatingShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(1, tracker1.rejection().getCoordinatingNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getCoordinatingNodeLimitsBreachedRejections().get()); + + tracker1.throughput().getPrimaryThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingPrimaryRequests().addAndGet(1); + tracker1.memory().getTotalPrimaryBytes().addAndGet(60); + tracker1.latency().getPrimaryTimeInMillis().addAndGet(10); + tracker1.throughput().getPrimaryThroughputMovingQueue().offer(1d); + + assertTrue(memoryManager.isPrimaryShardLimitBreached(tracker1, requestStartTime, hotStore, 8 * 1024)); + assertEquals(1, tracker1.rejection().getPrimaryNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getPrimaryNodeLimitsBreachedRejections().get()); + + assertEquals(limit1, tracker1.getPrimaryAndCoordinatingLimits().get()); + assertEquals(limit2, tracker2.getPrimaryAndCoordinatingLimits().get()); + assertEquals(2, memoryManager.totalNodeLimitsBreachedRejections.get()); + } + + public void testReplicaShardLimitsSoftLimitBreachedAndNoSecondaryParameterBreachedAndNodeLevelRejections() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + ShardIndexingPressureTracker tracker2 = store.getShardIndexingPressureTracker(shardId2); + tracker1.memory().getCurrentReplicaBytes().addAndGet(5 * 1024); + tracker2.getReplicaLimits().addAndGet(12 * 1024); + long limit1 = tracker1.getReplicaLimits().get(); + long limit2 = tracker2.getReplicaLimits().get(); + long requestStartTime = System.currentTimeMillis(); + tracker1.throughput().getReplicaThroughputMovingAverage().addAndGet(Double.doubleToLongBits(1d)); + tracker1.outstandingRequest().getTotalOutstandingReplicaRequests().addAndGet(1); + tracker1.memory().getTotalReplicaBytes().addAndGet(80); + tracker1.latency().getReplicaTimeInMillis().addAndGet(10); + tracker1.throughput().getReplicaThroughputMovingQueue().offer(1d); + + Map hotStore = store.getShardIndexingPressureHotStore(); + + assertTrue(memoryManager.isReplicaShardLimitBreached(tracker1, requestStartTime, hotStore, 12 * 1024)); + assertEquals(limit1, tracker1.getReplicaLimits().get()); + assertEquals(limit2, tracker2.getReplicaLimits().get()); + assertEquals(1, tracker1.rejection().getReplicaNodeLimitsBreachedRejections().get()); + assertEquals(0, tracker2.rejection().getReplicaNodeLimitsBreachedRejections().get()); + assertEquals(1, memoryManager.totalNodeLimitsBreachedRejections.get()); + } + + public void testDecreaseShardPrimaryAndCoordinatingLimitsToBaseLimit() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + tracker1.getPrimaryAndCoordinatingLimits().addAndGet(1 * 1024); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(0); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + memoryManager.decreaseShardPrimaryAndCoordinatingLimits(tracker1); + + assertTrue(tracker1.getPrimaryAndCoordinatingLimits().get() < limit1); + assertEquals(10, tracker1.getPrimaryAndCoordinatingLimits().get()); + } + + public void testDecreaseShardReplicaLimitsToBaseLimit() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + + tracker1.getReplicaLimits().addAndGet(1 * 1024); + tracker1.memory().getCurrentReplicaBytes().addAndGet(0); + long limit1 = tracker1.getReplicaLimits().get(); + memoryManager.decreaseShardReplicaLimits(tracker1); + + assertTrue(tracker1.getReplicaLimits().get() < limit1); + assertEquals(15, tracker1.getReplicaLimits().get()); + } + + public void testDecreaseShardPrimaryAndCoordinatingLimits() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + tracker1.getPrimaryAndCoordinatingLimits().addAndGet(1 * 1024); + tracker1.memory().getCurrentCombinedCoordinatingAndPrimaryBytes().addAndGet(512); + long limit1 = tracker1.getPrimaryAndCoordinatingLimits().get(); + memoryManager.decreaseShardPrimaryAndCoordinatingLimits(tracker1); + + assertTrue(tracker1.getPrimaryAndCoordinatingLimits().get() < limit1); + assertEquals((long)(512/0.85), tracker1.getPrimaryAndCoordinatingLimits().get()); + } + + public void testDecreaseShardReplicaLimits() { + ShardIndexingPressureMemoryManager memoryManager = new ShardIndexingPressureMemoryManager(shardIndexingPressureSettings, + clusterSettings, settings); + ShardIndexingPressureStore store = new ShardIndexingPressureStore(shardIndexingPressureSettings, clusterSettings, settings); + ShardIndexingPressureTracker tracker1 = store.getShardIndexingPressureTracker(shardId1); + + tracker1.getReplicaLimits().addAndGet(1 * 1024); + tracker1.memory().getCurrentReplicaBytes().addAndGet(512); + long limit1 = tracker1.getReplicaLimits().get(); + memoryManager.decreaseShardReplicaLimits(tracker1); + + assertTrue(tracker1.getReplicaLimits().get() < limit1); + assertEquals((long)(512/0.85), tracker1.getReplicaLimits().get()); + } +} diff --git a/server/src/test/java/org/opensearch/index/ShardIndexingPressureMultiThreadedTests.java b/server/src/test/java/org/opensearch/index/ShardIndexingPressureMultiThreadedTests.java new file mode 100644 index 0000000000000..97b55dbcbb6da --- /dev/null +++ b/server/src/test/java/org/opensearch/index/ShardIndexingPressureMultiThreadedTests.java @@ -0,0 +1,969 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException; +import org.opensearch.index.shard.ShardId; +import org.opensearch.index.stats.IndexingPressurePerShardStats; +import org.opensearch.index.stats.IndexingPressureStats; +import org.opensearch.index.stats.ShardIndexingPressureStats; +import org.opensearch.test.OpenSearchTestCase; +import org.hamcrest.Matchers; + +import java.util.concurrent.atomic.AtomicInteger; + +public class ShardIndexingPressureMultiThreadedTests extends OpenSearchTestCase { + + private final Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 100) + .build(); + + final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + final ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + + public void testCoordinatingPrimaryThreadedUpdateToShardLimits() throws Exception { + final int NUM_THREADS = scaledRandomIntBetween(100, 500); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + if(randomBoolean){ + releasables[counter] = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 15, false); + } else { + releasables[counter] = shardIndexingPressure.markPrimaryOperationStarted(shardId1, 15, false); + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + if(randomBoolean) { + assertEquals(NUM_THREADS * 15, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + } else { + assertEquals(NUM_THREADS * 15, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryBytes()); + } + assertEquals(NUM_THREADS * 15, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertTrue((double) (NUM_THREADS * 15) / shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits() < 0.95); + assertTrue((double) (NUM_THREADS * 15) / shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits() > 0.75); + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + + if(randomBoolean) { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + } else { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryBytes()); + } + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaThreadedUpdateToShardLimits() throws Exception { + final int NUM_THREADS = scaledRandomIntBetween(100, 500); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + releasables[counter] = shardIndexingPressure.markReplicaOperationStarted(shardId1, 15, false); + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + assertEquals(NUM_THREADS * 15, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentReplicaBytes()); + assertTrue((double)(NUM_THREADS * 15) / shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentReplicaLimits() < 0.95); + assertTrue((double)(NUM_THREADS * 15) / shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentReplicaLimits() > 0.75); + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes()); + assertEquals(15, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits()); + } + + public void testCoordinatingPrimaryThreadedSimultaneousUpdateToShardLimits() throws Exception { + final int NUM_THREADS = scaledRandomIntBetween(100, 500); + final Thread[] threads = new Thread[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + for (int i = 0; i < NUM_THREADS; i++) { + threads[i] = new Thread(() -> { + if(randomBoolean) { + Releasable coodinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 100, false); + coodinating.close(); + } else { + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId1, 100, false); + primary.close(); + } + }); + try { + Thread.sleep(randomIntBetween(5, 15)); + } catch (InterruptedException e) { + //Do Nothing + } + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + if(randomBoolean) { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + } else { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryBytes()); + } + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaThreadedSimultaneousUpdateToShardLimits() throws Exception { + final int NUM_THREADS = scaledRandomIntBetween(100, 500); + final Thread[] threads = new Thread[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + for (int i = 0; i < NUM_THREADS; i++) { + threads[i] = new Thread(() -> { + Releasable coodinating = shardIndexingPressure.markReplicaOperationStarted(shardId1, 100, false); + coodinating.close(); + }); + try { + Thread.sleep(randomIntBetween(5, 15)); + } catch (InterruptedException e) { + //Do Nothing + } + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes()); + assertEquals(15, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits()); + } + + public void testCoordinatingPrimaryThreadedUpdateToShardLimitsWithRandomBytes() throws Exception { + final int NUM_THREADS = scaledRandomIntBetween(100, 400); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + if(randomBoolean) { + releasables[counter] = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, + scaledRandomIntBetween(1, 20), false); + } else { + releasables[counter] = shardIndexingPressure.markPrimaryOperationStarted(shardId1, + scaledRandomIntBetween(1, 20), false); + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + + if(randomBoolean) { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + } else { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryBytes()); + } + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaThreadedUpdateToShardLimitsWithRandomBytes() throws Exception { + final int NUM_THREADS = scaledRandomIntBetween(100, 400); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + releasables[counter] = shardIndexingPressure.markReplicaOperationStarted(shardId1, + scaledRandomIntBetween(1, 20), false); + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes()); + assertEquals(15, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits()); + } + + public void testCoordinatingPrimaryThreadedUpdateToShardLimitsAndRejections() throws Exception { + final int NUM_THREADS = 100; + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + AtomicInteger rejectionCount = new AtomicInteger(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + try { + if(randomBoolean) { + releasables[counter] = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 200, false); + } else { + releasables[counter] = shardIndexingPressure.markPrimaryOperationStarted(shardId1, 200, false); + } + } catch (OpenSearchRejectedExecutionException e) { + rejectionCount.addAndGet(1); + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + ShardIndexingPressureStats shardStats = shardIndexingPressure.shardStats(); + if(randomBoolean) { + assertEquals(rejectionCount.get(), nodeStats.getCoordinatingRejections()); + assertTrue(shardStats.getIndexingPressureShardStats(shardId1).getCurrentCoordinatingBytes() < 50 * 200); + } else { + assertTrue(shardStats.getIndexingPressureShardStats(shardId1).getCurrentPrimaryBytes() < 50 * 200); + assertEquals(rejectionCount.get(), nodeStats.getPrimaryRejections()); + } + assertTrue(nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes() < 50 * 200); + assertTrue(shardStats.getIndexingPressureShardStats(shardId1).getCurrentCombinedCoordinatingAndPrimaryBytes() < 50 * 200); + + for (int i = 0; i < NUM_THREADS - rejectionCount.get(); i++) { + releasables[i].close(); + } + + nodeStats = shardIndexingPressure.stats(); + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + shardStats = shardIndexingPressure.coldStats(); + if(randomBoolean) { + assertEquals(rejectionCount.get(), nodeStats.getCoordinatingRejections()); + assertEquals(rejectionCount.get(), shardStats.getIndexingPressureShardStats(shardId1) + .getCoordinatingNodeLimitsBreachedRejections()); + assertEquals(0, shardStats.getIndexingPressureShardStats(shardId1).getCurrentCoordinatingBytes()); + } else { + assertEquals(rejectionCount.get(), nodeStats.getPrimaryRejections()); + assertEquals(rejectionCount.get(), shardStats.getIndexingPressureShardStats(shardId1) + .getPrimaryNodeLimitsBreachedRejections()); + assertEquals(0, shardStats.getIndexingPressureShardStats(shardId1).getCurrentPrimaryBytes()); + } + + assertEquals(0, nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, shardStats.getIndexingPressureShardStats(shardId1).getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getIndexingPressureShardStats(shardId1).getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaThreadedUpdateToShardLimitsAndRejections() throws Exception { + final int NUM_THREADS = 100; + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + AtomicInteger rejectionCount = new AtomicInteger(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + try { + releasables[counter] = shardIndexingPressure.markReplicaOperationStarted(shardId1, 300, false); + } catch (OpenSearchRejectedExecutionException e) { + rejectionCount.addAndGet(1); + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(rejectionCount.get(), nodeStats.getReplicaRejections()); + assertTrue(nodeStats.getCurrentReplicaBytes() < 50 * 300); + + ShardIndexingPressureStats shardStats = shardIndexingPressure.shardStats(); + assertTrue(shardStats.getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes() < 50 * 300); + + for (int i = 0; i < releasables.length - 1; i++) { + if(releasables[i] != null) { + releasables[i].close(); + } + } + + nodeStats = shardIndexingPressure.stats(); + assertEquals(rejectionCount.get(), nodeStats.getReplicaRejections()); + assertEquals(0, nodeStats.getCurrentReplicaBytes()); + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + + shardStats = shardIndexingPressure.coldStats(); + assertEquals(rejectionCount.get(), shardStats.getIndexingPressureShardStats(shardId1) + .getReplicaNodeLimitsBreachedRejections()); + assertEquals(0, shardStats.getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes()); + assertEquals(15, shardStats.getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits()); + } + + public void testCoordinatingPrimaryConcurrentUpdatesOnShardIndexingPressureTrackerObjects() throws Exception { + final int NUM_THREADS = scaledRandomIntBetween(100, 400); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "new_uuid"); + ShardId shardId1 = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + if(randomBoolean) { + releasables[counter] = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, + scaledRandomIntBetween(1, 20), false); + } else { + releasables[counter] = shardIndexingPressure.markPrimaryOperationStarted(shardId1, + scaledRandomIntBetween(1, 20), false); + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats() + .getIndexingPressureShardStats(shardId1); + assertThat(shardStoreStats.getCurrentPrimaryAndCoordinatingLimits(), Matchers.greaterThan(100L)); + + CommonStatsFlags statsFlag = new CommonStatsFlags(); + statsFlag.includeAllShardIndexingPressureTrackers(true); + IndexingPressurePerShardStats shardStoreStats2 = shardIndexingPressure.shardStats(statsFlag) + .getIndexingPressureShardStats(shardId1);; + assertEquals(shardStoreStats.getCurrentPrimaryAndCoordinatingLimits(), shardStoreStats2 + .getCurrentPrimaryAndCoordinatingLimits()); + + statsFlag.includeOnlyTopIndexingPressureMetrics(true); + assertNull(shardIndexingPressure.shardStats(statsFlag).getIndexingPressureShardStats(shardId1)); + statsFlag.includeOnlyTopIndexingPressureMetrics(false); + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + //No object in host store as no active shards + shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + + if(randomBoolean) { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + } else { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryBytes()); + } + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits()); + + shardStoreStats2 = shardIndexingPressure.shardStats(statsFlag).getIndexingPressureShardStats(shardId1); + assertEquals(shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits(), + shardStoreStats2.getCurrentPrimaryAndCoordinatingLimits()); + + statsFlag.includeAllShardIndexingPressureTrackers(false); + assertNull(shardIndexingPressure.shardStats(statsFlag).getIndexingPressureShardStats(shardId1)); + } + + public void testReplicaConcurrentUpdatesOnShardIndexingPressureTrackerObjects() throws Exception { + final int NUM_THREADS = scaledRandomIntBetween(100, 400); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "new_uuid"); + ShardId shardId1 = new ShardId(index, 0); + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + releasables[counter] = shardIndexingPressure.markReplicaOperationStarted(shardId1, + scaledRandomIntBetween(1, 20), false); + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats() + .getIndexingPressureShardStats(shardId1); + assertThat(shardStoreStats.getCurrentReplicaLimits(), Matchers.greaterThan(100L)); + + CommonStatsFlags statsFlag = new CommonStatsFlags(); + statsFlag.includeAllShardIndexingPressureTrackers(true); + IndexingPressurePerShardStats shardStoreStats2 = shardIndexingPressure.shardStats(statsFlag) + .getIndexingPressureShardStats(shardId1);; + assertEquals(shardStoreStats.getCurrentReplicaLimits(), shardStoreStats2.getCurrentReplicaLimits()); + + statsFlag.includeOnlyTopIndexingPressureMetrics(true); + assertNull(shardIndexingPressure.shardStats(statsFlag).getIndexingPressureShardStats(shardId1)); + statsFlag.includeOnlyTopIndexingPressureMetrics(false); + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + //No object in host store as no active shards + shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1); + assertNull(shardStoreStats); + + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes()); + assertEquals(15, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits()); + + shardStoreStats2 = shardIndexingPressure.shardStats(statsFlag).getIndexingPressureShardStats(shardId1);; + assertEquals(shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits(), + shardStoreStats2.getCurrentReplicaLimits()); + + statsFlag.includeAllShardIndexingPressureTrackers(false); + assertNull(shardIndexingPressure.shardStats(statsFlag).getIndexingPressureShardStats(shardId1)); + } + + public void testCoordinatingPrimaryThreadedThroughputDegradationAndRejection() throws Exception { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "15KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 80) + .build(); + final int NUM_THREADS = scaledRandomIntBetween(80, 100); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + + //Generating a load to have a fair throughput + for (int i = 0; i < NUM_THREADS; i++) { + threads[i] = new Thread(() -> { + for (int j = 0; j < randomIntBetween(400, 500); j++) { + Releasable releasable; + if(randomBoolean) { + releasable = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 100, false); + } else { + releasable = shardIndexingPressure.markPrimaryOperationStarted(shardId1, 100, false); + } + try { + Thread.sleep(100); + } catch (Exception e) { + //Do Nothing + } + releasable.close(); + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + //Generating a load to such that the requests in the window shows degradation in throughput. + for (int i = 0; i < ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.get(settings).intValue(); i++) { + int counter = i; + threads[i] = new Thread(() -> { + if(randomBoolean) { + releasables[counter] = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 100, false); + } else { + releasables[counter] = shardIndexingPressure.markPrimaryOperationStarted(shardId1, 100, false); + } + try { + Thread.sleep(200); + } catch (Exception e) { + //Do Nothing + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + for (int i = 0; i < ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.get(settings).intValue(); i++) { + releasables[i].close(); + } + + //Generate a load which breaches both primary parameter + if(randomBoolean) { + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 11 * 1024, false)); + + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingNodeLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingLastSuccessfulRequestLimitsBreachedRejections()); + } else { + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markPrimaryOperationStarted(shardId1, 11 * 1024, false)); + + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryBytes()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryNodeLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryLastSuccessfulRequestLimitsBreachedRejections()); + } + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(15, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaThreadedThroughputDegradationAndRejection() throws Exception { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 100) + .build(); + final int NUM_THREADS = scaledRandomIntBetween(100, 120); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + + //Generating a load to have a fair throughput + for (int i = 0; i < NUM_THREADS; i++) { + threads[i] = new Thread(() -> { + for (int j = 0; j < randomIntBetween(400, 500); j++) { + Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId1, 100, false); + try { + Thread.sleep(100); + } catch (Exception e) { + //Do Nothing + } + replica.close(); + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + //Generating a load to such that the requests in the window shows degradation in throughput. + for (int i = 0; i < ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.get(settings).intValue(); i++) { + int counter = i; + threads[i] = new Thread(() -> { + releasables[counter] = shardIndexingPressure.markReplicaOperationStarted(shardId1, 100, false); + try { + Thread.sleep(200); + } catch (Exception e) { + //Do Nothing + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + for (int i = 0; i < ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.get(settings).intValue(); i++) { + releasables[i].close(); + } + + //Generate a load which breaches both primary parameter + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markReplicaOperationStarted(shardId1, 11 * 1024, false)); + + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes()); + assertEquals(15, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getReplicaRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaNodeLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaLastSuccessfulRequestLimitsBreachedRejections()); + } + + public void testCoordinatingPrimaryThreadedLastSuccessfulRequestsAndRejection() throws Exception { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "250KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 100) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .build(); + final int NUM_THREADS = scaledRandomIntBetween(110, 150); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + + //One request being successful + if(randomBoolean) { + Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 10, false); + coordinating.close(); + } else { + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId1, 10, false); + primary.close(); + } + + //Generating a load such that requests are blocked requests. + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + if(randomBoolean) { + releasables[counter] = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 10, false); + } else { + releasables[counter] = shardIndexingPressure.markPrimaryOperationStarted(shardId1, 10, false); + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + //Mimic the time elapsed after requests being stuck + Thread.sleep(randomIntBetween(50, 100)); + + //Generate a load which breaches both primary parameter + if(randomBoolean) { + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 200 * 1024, false)); + } else { + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markPrimaryOperationStarted(shardId1, 200 * 1024, false)); + } + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + if(randomBoolean) { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingNodeLimitsBreachedRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingLastSuccessfulRequestLimitsBreachedRejections()); + } else { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryBytes()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryNodeLimitsBreachedRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryLastSuccessfulRequestLimitsBreachedRejections()); + } + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(256, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaThreadedLastSuccessfulRequestsAndRejection() throws Exception { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "250KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 100) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .build(); + final int NUM_THREADS = scaledRandomIntBetween(110, 150); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + + //One request being successful + Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId1, 10, false); + replica.close(); + + //Generating a load such that requests are blocked requests. + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + releasables[counter] = shardIndexingPressure.markReplicaOperationStarted(shardId1, 10, false); + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + //Mimic the time elapsed after requests being stuck + Thread.sleep(randomIntBetween(50, 100)); + + //Generate a load which breaches both primary parameter + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markReplicaOperationStarted(shardId1, 300 * 1024, false)); + + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getReplicaRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaNodeLimitsBreachedRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaLastSuccessfulRequestLimitsBreachedRejections()); + assertEquals(384, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits()); + } + + public void testCoordinatingPrimaryThreadedNodeLimitsAndRejection() throws Exception { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "250KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 100) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .build(); + final int NUM_THREADS = scaledRandomIntBetween(100, 150); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + + //Generating a load to such that the requests in the window shows degradation in throughput. + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + if(randomBoolean) { + releasables[counter] = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 10, false); + } else { + releasables[counter] = shardIndexingPressure.markPrimaryOperationStarted(shardId1, 10, false); + } + try { + Thread.sleep(randomIntBetween(50, 100)); + } catch (Exception e) { + //Do Nothing + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + //Generate a load which breaches both primary parameter + if(randomBoolean) { + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 240 * 1024, false)); + } else { + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markPrimaryOperationStarted(shardId1, 240 * 1024, false)); + } + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + if(randomBoolean) { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingThroughputDegradationLimitsBreachedRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingNodeLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCoordinatingLastSuccessfulRequestLimitsBreachedRejections()); + } else { + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryBytes()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryThroughputDegradationLimitsBreachedRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryNodeLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getPrimaryLastSuccessfulRequestLimitsBreachedRejections()); + } + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(256, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaThreadedNodeLimitsAndRejection() throws Exception { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "250KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 100) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .build(); + final int NUM_THREADS = scaledRandomIntBetween(100, 150); + final Thread[] threads = new Thread[NUM_THREADS]; + final Releasable[] releasables = new Releasable[NUM_THREADS]; + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + + //Generating a load to such that the requests in the window shows degradation in throughput. + for (int i = 0; i < NUM_THREADS; i++) { + int counter = i; + threads[i] = new Thread(() -> { + releasables[counter] = shardIndexingPressure.markReplicaOperationStarted(shardId1, 10, false); + try { + Thread.sleep(randomIntBetween(50, 100)); + } catch (Exception e) { + //Do Nothing + } + }); + threads[i].start(); + } + + for (Thread t : threads) { + t.join(); + } + + //Generate a load which breaches both primary parameter + expectThrows(OpenSearchRejectedExecutionException.class, + () -> shardIndexingPressure.markReplicaOperationStarted(shardId1, 340 * 1024, false)); + + + for (int i = 0; i < NUM_THREADS; i++) { + releasables[i].close(); + } + + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaBytes()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getReplicaRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaThroughputDegradationLimitsBreachedRejections()); + assertEquals(1, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaNodeLimitsBreachedRejections()); + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1) + .getReplicaLastSuccessfulRequestLimitsBreachedRejections()); + assertEquals(384, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1).getCurrentReplicaLimits()); + } + +} diff --git a/server/src/test/java/org/opensearch/index/ShardIndexingPressureTests.java b/server/src/test/java/org/opensearch/index/ShardIndexingPressureTests.java new file mode 100644 index 0000000000000..16c5707fd96b8 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/ShardIndexingPressureTests.java @@ -0,0 +1,795 @@ +/* + * Copyright OpenSearch Contributors. + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException; +import org.opensearch.index.shard.ShardId; +import org.opensearch.index.stats.IndexingPressurePerShardStats; +import org.opensearch.index.stats.IndexingPressureStats; +import org.opensearch.test.OpenSearchTestCase; + +public class ShardIndexingPressureTests extends OpenSearchTestCase { + + private final Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 100) + .build(); + + final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + final ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + + public void testMemoryBytesMarkedAndReleased() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 10, false); + Releasable coordinating2 = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 50, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 15, true); + Releasable primary2 = shardIndexingPressure.markPrimaryOperationStarted(shardId, 5, false); + Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 25, true); + Releasable replica2 = shardIndexingPressure.markReplicaOperationStarted(shardId, 10, false)) { + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(60, nodeStats.getCurrentCoordinatingBytes()); + assertEquals(20, nodeStats.getCurrentPrimaryBytes()); + assertEquals(80, nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(35, nodeStats.getCurrentReplicaBytes()); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertEquals(60, shardStats.getCurrentCoordinatingBytes()); + assertEquals(20, shardStats.getCurrentPrimaryBytes()); + assertEquals(80, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(35, shardStats.getCurrentReplicaBytes()); + + } + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(0, nodeStats.getCurrentCoordinatingBytes()); + assertEquals(0, nodeStats.getCurrentPrimaryBytes()); + assertEquals(0, nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, nodeStats.getCurrentReplicaBytes()); + assertEquals(60, nodeStats.getTotalCoordinatingBytes()); + assertEquals(20, nodeStats.getTotalPrimaryBytes()); + assertEquals(80, nodeStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + assertEquals(35, nodeStats.getTotalReplicaBytes()); + + IndexingPressurePerShardStats shardHotStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertNull(shardHotStoreStats); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(60, shardStats.getTotalCoordinatingBytes()); + assertEquals(20, shardStats.getTotalPrimaryBytes()); + assertEquals(80, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + assertEquals(35, shardStats.getTotalReplicaBytes()); + } + + public void testAvoidDoubleAccounting() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 10, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationLocalToCoordinatingNodeStarted(shardId, 15)) { + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(10, nodeStats.getCurrentCoordinatingBytes()); + assertEquals(15, nodeStats.getCurrentPrimaryBytes()); + assertEquals(10, nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertEquals(10, shardStats.getCurrentCoordinatingBytes()); + assertEquals(15, shardStats.getCurrentPrimaryBytes()); + assertEquals(10, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + } + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(0, nodeStats.getCurrentCoordinatingBytes()); + assertEquals(0, nodeStats.getCurrentPrimaryBytes()); + assertEquals(0, nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, nodeStats.getTotalCoordinatingBytes()); + assertEquals(15, nodeStats.getTotalPrimaryBytes()); + assertEquals(10, nodeStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertNull(shardStoreStats); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getTotalCoordinatingBytes()); + assertEquals(15, shardStats.getTotalPrimaryBytes()); + assertEquals(10, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + } + + public void testCoordinatingPrimaryRejections() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1024 * 3, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1024 * 3, false); + Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 1024 * 3, false)) { + if (randomBoolean()) { + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markCoordinatingOperationStarted(shardId, 1024 * 2, false)); + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(1, nodeStats.getCoordinatingRejections()); + assertEquals(1024 * 6, nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertEquals(1, shardStats.getCoordinatingRejections()); + assertEquals(1024 * 6, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(1, shardStats.getCoordinatingNodeLimitsBreachedRejections()); + } else { + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markPrimaryOperationStarted(shardId, 1024 * 2, false)); + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(1, nodeStats.getPrimaryRejections()); + assertEquals(1024 * 6, nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.shardStats() + .getIndexingPressureShardStats(shardId); + assertEquals(1, shardStats.getPrimaryRejections()); + assertEquals(1024 * 6, nodeStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(1, shardStats.getPrimaryNodeLimitsBreachedRejections()); + } + long preForceRejections = shardIndexingPressure.stats().getPrimaryRejections(); + long preForcedShardRejections = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getPrimaryRejections(); + // Primary can be forced + Releasable forced = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1024 * 2, true); + assertEquals(preForceRejections, shardIndexingPressure.stats().getPrimaryRejections()); + assertEquals(1024 * 8, shardIndexingPressure.stats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + + assertEquals(preForcedShardRejections, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getPrimaryRejections()); + assertEquals(1024 * 8, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(preForcedShardRejections, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getPrimaryNodeLimitsBreachedRejections()); + forced.close(); + + // Local to coordinating node primary actions not rejected + IndexingPressureStats preLocalNodeStats = shardIndexingPressure.stats(); + IndexingPressurePerShardStats preLocalShardStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + Releasable local = shardIndexingPressure.markPrimaryOperationLocalToCoordinatingNodeStarted(shardId, 1024 * 2); + assertEquals(preLocalNodeStats.getPrimaryRejections(), shardIndexingPressure.stats().getPrimaryRejections()); + assertEquals(1024 * 6, shardIndexingPressure.stats().getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(preLocalNodeStats.getCurrentPrimaryBytes() + 1024 * 2, shardIndexingPressure.stats().getCurrentPrimaryBytes()); + + assertEquals(preLocalShardStats.getPrimaryRejections(), shardIndexingPressure.shardStats() + .getIndexingPressureShardStats(shardId).getPrimaryRejections()); + assertEquals(1024 * 6, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(preLocalShardStats.getCurrentPrimaryBytes() + 1024 * 2, shardIndexingPressure.shardStats() + .getIndexingPressureShardStats(shardId).getCurrentPrimaryBytes()); + assertEquals(preLocalShardStats.getPrimaryNodeLimitsBreachedRejections(), shardIndexingPressure.shardStats() + .getIndexingPressureShardStats(shardId).getPrimaryNodeLimitsBreachedRejections()); + local.close(); + } + + assertEquals(1024 * 8, shardIndexingPressure.stats().getTotalCombinedCoordinatingAndPrimaryBytes()); + assertNull(shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId)); + assertEquals(1024 * 8, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId) + .getTotalCombinedCoordinatingAndPrimaryBytes()); + } + + public void testReplicaRejections() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1024 * 3, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1024 * 3, false); + Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 1024 * 3, false)) { + // Replica will not be rejected until replica bytes > 15KB + Releasable replica2 = shardIndexingPressure.markReplicaOperationStarted(shardId, 1024 * 9, false); + assertEquals(1024 * 12, shardIndexingPressure.stats().getCurrentReplicaBytes()); + assertEquals(1024 * 12, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + // Replica will be rejected once we cross 15KB Shard Limit + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markReplicaOperationStarted(shardId, 1024 * 2, false)); + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(1, nodeStats.getReplicaRejections()); + assertEquals(1024 * 12, nodeStats.getCurrentReplicaBytes()); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertEquals(1, shardStats.getReplicaRejections()); + assertEquals(1024 * 12, shardStats.getCurrentReplicaBytes()); + assertEquals(1, shardStats.getReplicaNodeLimitsBreachedRejections()); + + // Replica can be forced + Releasable forced = shardIndexingPressure.markReplicaOperationStarted(shardId, 1024 * 2, true); + assertEquals(1, shardIndexingPressure.stats().getReplicaRejections()); + assertEquals(1024 * 14, shardIndexingPressure.stats().getCurrentReplicaBytes()); + + assertEquals(1, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getReplicaRejections()); + assertEquals(1024 * 14, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + assertEquals(1, shardStats.getReplicaNodeLimitsBreachedRejections()); + forced.close(); + + replica2.close(); + } + + assertEquals(1024 * 14, shardIndexingPressure.stats().getTotalReplicaBytes()); + assertNull(shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId)); + assertEquals(1024 * 14, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId).getTotalReplicaBytes()); + } + + public void testCoordinatingPrimaryShardLimitIncrease() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 2, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 2, false)) { + assertEquals(2, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentCoordinatingBytes()); + assertEquals(4, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryAndCoordinatingLimits()); // Base Limit + if (randomBoolean) { + Releasable coordinating1 = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 6, false); + assertEquals(8, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentCoordinatingBytes()); + assertEquals(10, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(11, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryAndCoordinatingLimits()); // Increased Limit + coordinating1.close(); + } else { + Releasable primary1 = shardIndexingPressure.markPrimaryOperationStarted(shardId, 6, false); + assertEquals(8, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentPrimaryBytes()); + assertEquals(10, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(11, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryAndCoordinatingLimits()); // Increased Limit + primary1.close(); + } + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertNull(shardStoreStats); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + if(randomBoolean){ + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(8, shardStats.getTotalCoordinatingBytes()); + } else { + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(8, shardStats.getTotalPrimaryBytes()); + } + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaShardLimitIncrease() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 2, false)) { + assertEquals(2, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + assertEquals(15, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentReplicaLimits()); // Base Limit + + Releasable replica1 = shardIndexingPressure.markReplicaOperationStarted(shardId, 14, false); + assertEquals(16, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + assertEquals(18, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentReplicaLimits()); // Increased Limit + replica1.close(); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertNull(shardStoreStats); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(16, shardStats.getTotalReplicaBytes()); + assertEquals(15, shardStats.getCurrentReplicaLimits()); + } + + public void testCoordinatingPrimaryShardLimitIncreaseEvaluateSecondaryParam() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 4 * 1024, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 4 * 1024, false)) { + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCoordinatingBytes()); + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryBytes()); + assertEquals(8 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals((long)(8*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertNull(shardStoreStats); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(4 * 1024, shardStats.getTotalCoordinatingBytes()); + assertEquals(4 * 1024, shardStats.getTotalPrimaryBytes()); + assertEquals(8 * 1024, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testReplicaShardLimitIncreaseEvaluateSecondaryParam() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 11 * 1024, false)) { + assertEquals(11 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + assertEquals((long)(11 * 1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentReplicaLimits()); + } + + IndexingPressurePerShardStats shardStoreStats = shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId); + assertNull(shardStoreStats); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(11 * 1024, shardStats.getTotalReplicaBytes()); + assertEquals(15, shardStats.getCurrentReplicaLimits()); + } + + public void testCoordinatingPrimaryShardRejectionViaSuccessfulRequestsParam() throws InterruptedException { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .build(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1 * 1024, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1 * 1024, false)) { + assertEquals(1 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentCoordinatingBytes()); + assertEquals(1 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentPrimaryBytes()); + assertEquals(2 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals((long)(2*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(1 * 1024, shardStats.getTotalCoordinatingBytes()); + assertEquals(1 * 1024, shardStats.getTotalPrimaryBytes()); + assertEquals(2 * 1024, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getCurrentPrimaryAndCoordinatingLimits()); + + Thread.sleep(25); + //Total Bytes are 9*1024 and node limit is 10*1024 + if(randomBoolean) { + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 7 * 1024, false); + Releasable coordinating1 = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1 * 1024, false)) { + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markCoordinatingOperationStarted(shardId, 1 * 1024, false)); + } + } else { + try (Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 7 * 1024, false); + Releasable primary1 = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1 * 1024, false)) { + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markPrimaryOperationStarted(shardId, 1 * 1024, false)); + } + } + + shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + if(randomBoolean) { + assertEquals(1, shardStats.getCoordinatingRejections()); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(1, shardStats.getCoordinatingLastSuccessfulRequestLimitsBreachedRejections()); + } else { + assertEquals(1, shardStats.getPrimaryRejections()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(1, shardStats.getPrimaryLastSuccessfulRequestLimitsBreachedRejections()); + } + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + if(randomBoolean) { + assertEquals(1, nodeStats.getCoordinatingRejections()); + assertEquals(0, nodeStats.getCurrentCoordinatingBytes()); + } else { + assertEquals(1, nodeStats.getPrimaryRejections()); + assertEquals(0, nodeStats.getCurrentPrimaryBytes()); + } + } + + public void testReplicaShardRejectionViaSuccessfulRequestsParam() throws InterruptedException { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .build(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 1 * 1024, false)) { + assertEquals(1 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + assertEquals((long)(1*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentReplicaLimits()); + } + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(1 * 1024, shardStats.getTotalReplicaBytes()); + assertEquals(15, shardStats.getCurrentReplicaLimits()); + + Thread.sleep(25); + //Total Bytes are 14*1024 and node limit is 15*1024 + try (Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 10 * 1024, false); + Releasable replica1 = shardIndexingPressure.markReplicaOperationStarted(shardId, 2 * 1024, false)) { + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markReplicaOperationStarted(shardId, 2 * 1024, false)); + } + + shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(1, shardStats.getReplicaRejections()); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(1, shardStats.getReplicaLastSuccessfulRequestLimitsBreachedRejections()); + + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(1, nodeStats.getReplicaRejections()); + assertEquals(0, nodeStats.getCurrentReplicaBytes()); + } + + public void testCoordinatingPrimaryShardRejectionSkippedInShadowModeViaSuccessfulRequestsParam() throws InterruptedException { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), false) + .build(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1 * 1024, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1 * 1024, false)) { + assertEquals(1 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentCoordinatingBytes()); + assertEquals(1 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentPrimaryBytes()); + assertEquals(2 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals((long)(2*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(1 * 1024, shardStats.getTotalCoordinatingBytes()); + assertEquals(1 * 1024, shardStats.getTotalPrimaryBytes()); + assertEquals(2 * 1024, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getCurrentPrimaryAndCoordinatingLimits()); + + Thread.sleep(25); + //Total Bytes are 9*1024 and node limit is 10*1024 + if(randomBoolean) { + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 7 * 1024, false); + Releasable coordinating1 = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1 * 1024, false)) { + Releasable coordinating2 = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1 * 1024, false); + coordinating2.close(); + } + } else { + try (Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 7 * 1024, false); + Releasable primary1 = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1 * 1024, false)) { + Releasable primary2 = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1 * 1024, false); + primary2.close(); + } + } + + shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + if(randomBoolean) { + assertEquals(0, shardStats.getCoordinatingRejections()); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(1, shardStats.getCoordinatingLastSuccessfulRequestLimitsBreachedRejections()); + } else { + assertEquals(0, shardStats.getPrimaryRejections()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(1, shardStats.getPrimaryLastSuccessfulRequestLimitsBreachedRejections()); + } + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + if(randomBoolean) { + assertEquals(0, nodeStats.getCoordinatingRejections()); + assertEquals(0, nodeStats.getCurrentCoordinatingBytes()); + } else { + assertEquals(0, nodeStats.getPrimaryRejections()); + assertEquals(0, nodeStats.getCurrentPrimaryBytes()); + } + } + + public void testReplicaShardRejectionSkippedInShadowModeViaSuccessfulRequestsParam() throws InterruptedException { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.MAX_OUTSTANDING_REQUESTS.getKey(), 1) + .put(ShardIndexingPressureMemoryManager.SUCCESSFUL_REQUEST_ELAPSED_TIMEOUT.getKey(), 20) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), false) + .build(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 1 * 1024, false)) { + assertEquals(1 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + assertEquals((long)(1*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentReplicaLimits()); + } + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(1 * 1024, shardStats.getTotalReplicaBytes()); + assertEquals(15, shardStats.getCurrentReplicaLimits()); + + Thread.sleep(25); + //Total Bytes are 14*1024 and node limit is 15*1024 + try (Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 10 * 1024, false); + Releasable replica1 = shardIndexingPressure.markReplicaOperationStarted(shardId, 2 * 1024, false)) { + Releasable replica2 = shardIndexingPressure.markReplicaOperationStarted(shardId, 2 * 1024, false); + replica2.close(); + } + + shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getReplicaRejections()); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(1, shardStats.getReplicaLastSuccessfulRequestLimitsBreachedRejections()); + + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(0, nodeStats.getReplicaRejections()); + assertEquals(0, nodeStats.getCurrentReplicaBytes()); + } + + public void testCoordinatingPrimaryShardRejectionViaThroughputDegradationParam() throws InterruptedException { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 1) + .build(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1 * 1024, false); + Releasable coordinating1 = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 3 * 1024, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1 * 1024, false); + Releasable primary1 = shardIndexingPressure.markPrimaryOperationStarted(shardId, 3 * 1024, false)) { + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentCoordinatingBytes()); + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentPrimaryBytes()); + assertEquals(8 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals((long)(8*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryAndCoordinatingLimits()); + //Adding delay in the current in flight request to mimic throughput degradation + Thread.sleep(100); + } + if(randomBoolean) { + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markCoordinatingOperationStarted(shardId, 8 * 1024, false)); + } else { + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markPrimaryOperationStarted(shardId, 8 * 1024, false)); + } + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + if(randomBoolean) { + assertEquals(1, shardStats.getCoordinatingRejections()); + assertEquals(1, shardStats.getCoordinatingThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(4 * 1024, shardStats.getTotalCoordinatingBytes()); + } else { + assertEquals(1, shardStats.getPrimaryRejections()); + assertEquals(1, shardStats.getPrimaryThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(4 * 1024, shardStats.getTotalPrimaryBytes()); + } + + assertEquals(10, shardStats.getCurrentPrimaryAndCoordinatingLimits()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(8 * 1024, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + if(randomBoolean) { + assertEquals(1, nodeStats.getCoordinatingRejections()); + assertEquals(0, nodeStats.getCurrentCoordinatingBytes()); + } else { + assertEquals(1, nodeStats.getPrimaryRejections()); + assertEquals(0, nodeStats.getCurrentPrimaryBytes()); + } + } + + public void testReplicaShardRejectionViaThroughputDegradationParam() throws InterruptedException { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), true) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 1) + .build(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 1 * 1024, false); + Releasable replica1 = shardIndexingPressure.markReplicaOperationStarted(shardId, 3 * 1024, false)) { + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + assertEquals((long)(4*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentReplicaLimits()); + //Adding delay in the current in flight request to mimic throughput degradation + Thread.sleep(100); + } + + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markReplicaOperationStarted(shardId, 12 * 1024, false)); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(1, shardStats.getReplicaRejections()); + assertEquals(1, shardStats.getReplicaThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(4 * 1024, shardStats.getTotalReplicaBytes()); + assertEquals(15, shardStats.getCurrentReplicaLimits()); + + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(1, nodeStats.getReplicaRejections()); + assertEquals(0, nodeStats.getCurrentReplicaBytes()); + } + + public void testCoordinatingPrimaryShardRejectionSkippedInShadowModeViaThroughputDegradationParam() throws InterruptedException { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), false) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 1) + .build(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + boolean randomBoolean = randomBoolean(); + try (Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 1 * 1024, false); + Releasable coordinating1 = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 3 * 1024, false); + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 1 * 1024, false); + Releasable primary1 = shardIndexingPressure.markPrimaryOperationStarted(shardId, 3 * 1024, false)) { + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentCoordinatingBytes()); + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentPrimaryBytes()); + assertEquals(8 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals((long)(8*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentPrimaryAndCoordinatingLimits()); + //Adding delay in the current in flight request to mimic throughput degradation + Thread.sleep(100); + } + if(randomBoolean) { + Releasable coordinating = shardIndexingPressure.markCoordinatingOperationStarted(shardId, 8 * 1024, false); + coordinating.close(); + } else { + Releasable primary = shardIndexingPressure.markPrimaryOperationStarted(shardId, 8 * 1024, false); + primary.close(); + } + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + if(randomBoolean) { + assertEquals(0, shardStats.getCoordinatingRejections()); + assertEquals(1, shardStats.getCoordinatingThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(12 * 1024, shardStats.getTotalCoordinatingBytes()); + } else { + assertEquals(0, shardStats.getPrimaryRejections()); + assertEquals(1, shardStats.getPrimaryThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(12 * 1024, shardStats.getTotalPrimaryBytes()); + } + + assertEquals(10, shardStats.getCurrentPrimaryAndCoordinatingLimits()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(16 * 1024, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + if(randomBoolean) { + assertEquals(0, nodeStats.getCoordinatingRejections()); + assertEquals(0, nodeStats.getCurrentCoordinatingBytes()); + } else { + assertEquals(0, nodeStats.getPrimaryRejections()); + assertEquals(0, nodeStats.getCurrentPrimaryBytes()); + } + } + + public void testReplicaShardRejectionSkippedInShadowModeViaThroughputDegradationParam() throws InterruptedException { + Settings settings = Settings.builder().put(IndexingPressure.MAX_INDEXING_BYTES.getKey(), "10KB") + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENABLED.getKey(), true) + .put(ShardIndexingPressureSettings.SHARD_INDEXING_PRESSURE_ENFORCED.getKey(), false) + .put(ShardIndexingPressureMemoryManager.THROUGHPUT_DEGRADATION_LIMITS.getKey(), 1) + .put(ShardIndexingPressureSettings.REQUEST_SIZE_WINDOW.getKey(), 1) + .build(); + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + try (Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 1 * 1024, false); + Releasable replica1 = shardIndexingPressure.markReplicaOperationStarted(shardId, 3 * 1024, false)) { + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId).getCurrentReplicaBytes()); + assertEquals((long)(4*1024/0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentReplicaLimits()); + //Adding delay in the current in flight request to mimic throughput degradation + Thread.sleep(100); + } + + Releasable replica = shardIndexingPressure.markReplicaOperationStarted(shardId, 12 * 1024, false); + replica.close(); + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId); + assertEquals(0, shardStats.getReplicaRejections()); + assertEquals(1, shardStats.getReplicaThroughputDegradationLimitsBreachedRejections()); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(16 * 1024, shardStats.getTotalReplicaBytes()); + assertEquals(15, shardStats.getCurrentReplicaLimits()); + + IndexingPressureStats nodeStats = shardIndexingPressure.stats(); + assertEquals(0, nodeStats.getReplicaRejections()); + assertEquals(0, nodeStats.getCurrentReplicaBytes()); + } + + public void testShardLimitIncreaseMultipleShards() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId1 = new ShardId(index, 0); + ShardId shardId2 = new ShardId(index, 1); + try (Releasable coordinating1 = shardIndexingPressure.markCoordinatingOperationStarted(shardId1, 4 * 1024, false); + Releasable coordinating2 = shardIndexingPressure.markCoordinatingOperationStarted(shardId2, 4 * 1024, false);) { + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentCoordinatingBytes()); + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals((long)(4 * 1024 / 0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId1) + .getCurrentPrimaryAndCoordinatingLimits()); + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId2) + .getCurrentCoordinatingBytes()); + assertEquals(4 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId2) + .getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals((long)(4 * 1024 / 0.85), shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId2) + .getCurrentPrimaryAndCoordinatingLimits()); + } + + IndexingPressurePerShardStats shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId1); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(4 * 1024, shardStats.getTotalCoordinatingBytes()); + assertEquals(4 * 1024, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getCurrentPrimaryAndCoordinatingLimits()); + + shardStats = shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId2); + assertEquals(0, shardStats.getCurrentCoordinatingBytes()); + assertEquals(0, shardStats.getCurrentPrimaryBytes()); + assertEquals(0, shardStats.getCurrentCombinedCoordinatingAndPrimaryBytes()); + assertEquals(0, shardStats.getCurrentReplicaBytes()); + assertEquals(4 * 1024, shardStats.getTotalCoordinatingBytes()); + assertEquals(4 * 1024, shardStats.getTotalCombinedCoordinatingAndPrimaryBytes()); + assertEquals(10, shardStats.getCurrentPrimaryAndCoordinatingLimits()); + } + + public void testForceExecutionOnCoordinating() { + ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); + Index index = new Index("IndexName", "UUID"); + ShardId shardId = new ShardId(index, 0); + expectThrows(OpenSearchRejectedExecutionException.class, () -> shardIndexingPressure + .markCoordinatingOperationStarted(shardId,1024 * 11, false)); + try (Releasable ignore = shardIndexingPressure.markCoordinatingOperationStarted(shardId,11 * 1024, true)) { + assertEquals(11 * 1024, shardIndexingPressure.shardStats().getIndexingPressureShardStats(shardId) + .getCurrentCoordinatingBytes()); + } + assertEquals(0, shardIndexingPressure.coldStats().getIndexingPressureShardStats(shardId).getCurrentCoordinatingBytes()); + } +} diff --git a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java index b2e2b96d2c629..289957863027a 100644 --- a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java @@ -33,7 +33,6 @@ package org.opensearch.index.seqno; import org.opensearch.action.ActionListener; -import org.opensearch.index.IndexingPressure; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.ActionTestUtils; import org.opensearch.action.support.PlainActionFuture; @@ -44,6 +43,7 @@ import org.opensearch.core.internal.io.IOUtils; import org.opensearch.index.Index; import org.opensearch.index.IndexService; +import org.opensearch.index.IndexingPressureService; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; import org.opensearch.indices.IndicesService; @@ -120,7 +120,7 @@ public void testRetentionLeaseSyncActionOnPrimary() { threadPool, shardStateAction, new ActionFilters(Collections.emptySet()), - new IndexingPressure(Settings.EMPTY), + new IndexingPressureService(Settings.EMPTY, clusterService), new SystemIndices(emptyMap())); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -158,7 +158,7 @@ public void testRetentionLeaseSyncActionOnReplica() throws Exception { threadPool, shardStateAction, new ActionFilters(Collections.emptySet()), - new IndexingPressure(Settings.EMPTY), + new IndexingPressureService(Settings.EMPTY, clusterService), new SystemIndices(emptyMap())); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -199,7 +199,7 @@ public void testBlocks() { threadPool, shardStateAction, new ActionFilters(Collections.emptySet()), - new IndexingPressure(Settings.EMPTY), + new IndexingPressureService(Settings.EMPTY, clusterService), new SystemIndices(emptyMap())); assertNull(action.indexBlockLevel()); diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index a621faf5473b9..a97fb27789c34 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -166,7 +166,7 @@ import org.opensearch.gateway.MetaStateService; import org.opensearch.gateway.TransportNodesListGatewayStartedShards; import org.opensearch.index.Index; -import org.opensearch.index.IndexingPressure; +import org.opensearch.index.IndexingPressureService; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.seqno.GlobalCheckpointSyncAction; import org.opensearch.index.seqno.RetentionLeaseSyncer; @@ -1572,7 +1572,7 @@ public void onFailure(final Exception e) { threadPool, shardStateAction, actionFilters, - new IndexingPressure(settings), + new IndexingPressureService(settings, clusterService), new SystemIndices(emptyMap()))), new GlobalCheckpointSyncAction( settings, @@ -1599,7 +1599,7 @@ allocationService, new AliasValidator(), shardLimitValidator, environment, index mappingUpdatedAction.setClient(client); final TransportShardBulkAction transportShardBulkAction = new TransportShardBulkAction(settings, transportService, clusterService, indicesService, threadPool, shardStateAction, mappingUpdatedAction, new UpdateHelper(scriptService), - actionFilters, new IndexingPressure(settings), new SystemIndices(emptyMap())); + actionFilters, new IndexingPressureService(settings, clusterService), new SystemIndices(emptyMap())); actions.put(BulkAction.INSTANCE, new TransportBulkAction(threadPool, transportService, clusterService, new IngestService( @@ -1608,7 +1608,7 @@ clusterService, indicesService, threadPool, shardStateAction, mappingUpdatedActi Collections.emptyList(), client), transportShardBulkAction, client, actionFilters, indexNameExpressionResolver, new AutoCreateIndex(settings, clusterSettings, indexNameExpressionResolver, new SystemIndices(emptyMap())), - new IndexingPressure(settings), + new IndexingPressureService(settings, clusterService), new SystemIndices(emptyMap()) )); final RestoreService restoreService = new RestoreService(