From ed0d0f7ea04197b1e1f36a351699b64a0d71efd7 Mon Sep 17 00:00:00 2001 From: kh3ra Date: Wed, 6 Aug 2025 00:13:00 +0530 Subject: [PATCH 01/30] Adding stats for merged segment warmer Signed-off-by: kh3ra Signed-off-by: Aditya Khera --- .../admin/indices/stats/CommonStats.java | 28 ++- .../admin/indices/stats/CommonStatsFlags.java | 3 +- .../index/engine/MergedSegmentWarmer.java | 40 ++-- .../MergedSegmentReplicationTracker.java | 89 ++++++++ .../index/merge/MergedSegmentWarmerStats.java | 191 ++++++++++++++++++ .../opensearch/index/shard/IndexShard.java | 13 ++ .../opensearch/indices/IndicesService.java | 3 + .../opensearch/indices/NodeIndicesStats.java | 6 + .../AbstractSegmentReplicationTarget.java | 5 + .../MergedSegmentReplicationTarget.java | 9 +- .../RemoteStoreReplicationSource.java | 19 +- ...RemoteStorePublishMergedSegmentAction.java | 4 + .../rest/action/cat/RestIndicesAction.java | 99 +++++++++ .../rest/action/cat/RestNodesAction.java | 44 ++++ .../rest/action/cat/RestShardsAction.java | 47 +++++ 15 files changed, 571 insertions(+), 29 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/merge/MergedSegmentReplicationTracker.java create mode 100644 server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStats.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStats.java index 8bfeb13b253c3..1eb6a0a42009e 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStats.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStats.java @@ -49,6 +49,7 @@ import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.search.stats.SearchStats; @@ -92,6 +93,9 @@ public class CommonStats implements Writeable, ToXContentFragment { @Nullable public MergeStats merge; + @Nullable + public MergedSegmentWarmerStats mergedSegmentWarmerStats; + @Nullable public RefreshStats refresh; @@ -179,6 +183,9 @@ public CommonStats(CommonStatsFlags flags) { case Recovery: recoveryStats = new RecoveryStats(); break; + case MergedSegmentWarmer: + mergedSegmentWarmerStats = new MergedSegmentWarmerStats(); + break; default: throw new IllegalStateException("Unknown Flag: " + flag); } @@ -238,6 +245,9 @@ public CommonStats(IndicesQueryCache indicesQueryCache, IndexShard indexShard, C case Recovery: recoveryStats = indexShard.recoveryStats(); break; + case MergedSegmentWarmer: + mergedSegmentWarmerStats = indexShard.mergedSegmentWarmerStats(); + break; default: throw new IllegalStateException("Unknown Flag: " + flag); } @@ -264,6 +274,7 @@ public CommonStats(StreamInput in) throws IOException { translog = in.readOptionalWriteable(TranslogStats::new); requestCache = in.readOptionalWriteable(RequestCacheStats::new); recoveryStats = in.readOptionalWriteable(RecoveryStats::new); + mergedSegmentWarmerStats = in.readOptionalWriteable(MergedSegmentWarmerStats::new); } @Override @@ -284,6 +295,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(translog); out.writeOptionalWriteable(requestCache); out.writeOptionalWriteable(recoveryStats); + out.writeOptionalWriteable(mergedSegmentWarmerStats); } public void add(CommonStats stats) { @@ -416,6 +428,14 @@ public void add(CommonStats stats) { } else { recoveryStats.add(stats.getRecoveryStats()); } + if (mergedSegmentWarmerStats == null) { + if (stats.getMergedSegmentWarmer() != null) { + mergedSegmentWarmerStats = new MergedSegmentWarmerStats(); + mergedSegmentWarmerStats.add(stats.getMergedSegmentWarmer()); + } + } else { + mergedSegmentWarmerStats.add(stats.getMergedSegmentWarmer()); + } } @Nullable @@ -498,6 +518,11 @@ public RecoveryStats getRecoveryStats() { return recoveryStats; } + @Nullable + public MergedSegmentWarmerStats getMergedSegmentWarmer() { + return mergedSegmentWarmerStats; + } + /** * Utility method which computes total memory by adding * FieldData, PercolatorCache, Segments (index writer, version map) @@ -537,7 +562,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws segments, translog, requestCache, - recoveryStats } + recoveryStats, + mergedSegmentWarmerStats } ).filter(Objects::nonNull); for (ToXContent toXContent : ((Iterable) stream::iterator)) { toXContent.toXContent(builder, params); diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java index 03fb55323feec..30dbf481ed27d 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java @@ -332,7 +332,8 @@ public enum Flag { Translog("translog", 13), // 14 was previously used for Suggest RequestCache("request_cache", 15), - Recovery("recovery", 16); + Recovery("recovery", 16), + MergedSegmentWarmer("merged_segment_warmer", 17); private final String restName; private final int index; diff --git a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java index b3bec459eb1aa..b8d669c46e2a0 100644 --- a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java +++ b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java @@ -16,6 +16,7 @@ import org.apache.lucene.index.SegmentReader; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.logging.Loggers; +import org.opensearch.index.merge.MergedSegmentReplicationTracker; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.transport.TransportService; @@ -33,7 +34,7 @@ public class MergedSegmentWarmer implements IndexWriter.IndexReaderWarmer { private final RecoverySettings recoverySettings; private final ClusterService clusterService; private final IndexShard indexShard; - + private final MergedSegmentReplicationTracker mergedSegmentReplicationTracker; private final Logger logger; public MergedSegmentWarmer( @@ -46,23 +47,27 @@ public MergedSegmentWarmer( this.recoverySettings = recoverySettings; this.clusterService = clusterService; this.indexShard = indexShard; + this.mergedSegmentReplicationTracker = indexShard.mergedSegmentReplicationTracker(); this.logger = Loggers.getLogger(getClass(), indexShard.shardId()); } @Override public void warm(LeafReader leafReader) throws IOException { + mergedSegmentReplicationTracker.incrementTotalWarmInvocationsCount(); + mergedSegmentReplicationTracker.incrementOngoingWarms(); + if (shouldWarm() == false) { + return; + } + // IndexWriter.IndexReaderWarmer#warm is called by IndexWriter#mergeMiddle. The type of leafReader should be SegmentReader. + assert leafReader instanceof SegmentReader; + long startTime = System.currentTimeMillis(); + long elapsedTime = 0; try { - if (shouldWarm() == false) { - return; - } - // IndexWriter.IndexReaderWarmer#warm is called by IndexWriter#mergeMiddle. The type of leafReader should be SegmentReader. - assert leafReader instanceof SegmentReader; - assert indexShard.indexSettings().isSegRepLocalEnabled() || indexShard.indexSettings().isRemoteStoreEnabled(); - - long startTime = System.currentTimeMillis(); SegmentCommitInfo segmentCommitInfo = ((SegmentReader) leafReader).getSegmentInfo(); - logger.info(() -> new ParameterizedMessage("Warming segment: {}", segmentCommitInfo)); + logger.trace(() -> new ParameterizedMessage("Warming segment: {}", segmentCommitInfo)); indexShard.publishMergedSegment(segmentCommitInfo); + elapsedTime = System.currentTimeMillis() - startTime; + long finalElapsedTime = elapsedTime; logger.trace(() -> { long segmentSize = -1; try { @@ -72,17 +77,14 @@ public void warm(LeafReader leafReader) throws IOException { "Completed segment warming for {}. Size: {}B, Timing: {}ms", segmentCommitInfo.info.name, segmentSize, - (System.currentTimeMillis() - startTime) + finalElapsedTime ); }); - } catch (Exception e) { - logger.warn( - () -> new ParameterizedMessage( - "Throw exception during merged segment warmer, skip merged segment {} warmer", - ((SegmentReader) leafReader).getSegmentName() - ), - e - ); + } catch (IOException e) { + mergedSegmentReplicationTracker.incrementTotalWarmFailureCount(); + } finally { + mergedSegmentReplicationTracker.addTotalWarmTimeMillis(elapsedTime); + mergedSegmentReplicationTracker.decrementOngoingWarms(); } } diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentReplicationTracker.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentReplicationTracker.java new file mode 100644 index 0000000000000..2c33ed932a4c7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentReplicationTracker.java @@ -0,0 +1,89 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.merge; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.metrics.CounterMetric; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.shard.AbstractIndexShardComponent; + +/** + * A component that tracks stats related to merged segment replication operations. + * This includes metrics for pre-copy(warm) invocations, failures, bytes transferred, and timing information. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class MergedSegmentReplicationTracker extends AbstractIndexShardComponent { + + private final CounterMetric totalWarmInvocationsCount = new CounterMetric(); + private final CounterMetric totalWarmTimeMillis = new CounterMetric(); + private final CounterMetric totalWarmFailureCount = new CounterMetric(); + private final CounterMetric totalBytesUploaded = new CounterMetric(); + private final CounterMetric totalBytesDownloaded = new CounterMetric(); + private final CounterMetric totalUploadTimeMillis = new CounterMetric(); + private final CounterMetric totalDownloadTimeMillis = new CounterMetric(); + private final CounterMetric ongoingWarms = new CounterMetric(); + + public MergedSegmentReplicationTracker(ShardId shardId, IndexSettings indexSettings) { + super(shardId, indexSettings); + } + + public void incrementTotalWarmInvocationsCount() { + totalWarmInvocationsCount.inc(); + } + + public void incrementOngoingWarms() { + ongoingWarms.inc(); + } + + public void decrementOngoingWarms() { + ongoingWarms.dec(); + } + + public void incrementTotalWarmFailureCount() { + totalWarmFailureCount.inc(); + } + + public void addTotalWarmTimeMillis(long time) { + totalWarmTimeMillis.inc(time); + } + + public void addTotalUploadTimeMillis(long time) { + totalUploadTimeMillis.inc(time); + } + + public void addTotalDownloadTimeMillis(long time) { + totalDownloadTimeMillis.inc(time); + } + + public void addTotalBytesUploaded(long bytes) { + totalBytesUploaded.inc(bytes); + } + + public void addTotalBytesDownloaded(long bytes) { + totalBytesDownloaded.inc(bytes); + } + + public MergedSegmentWarmerStats stats() { + final MergedSegmentWarmerStats stats = new MergedSegmentWarmerStats(); + stats.add( + totalWarmInvocationsCount.count(), + totalWarmTimeMillis.count(), + totalWarmFailureCount.count(), + totalBytesUploaded.count(), + totalBytesDownloaded.count(), + totalUploadTimeMillis.count(), + totalDownloadTimeMillis.count(), + ongoingWarms.count() + ); + return stats; + } +} diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java new file mode 100644 index 0000000000000..16b5a23c62b0e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java @@ -0,0 +1,191 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.index.merge; + +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Stores stats about a merge process + * + * @opensearch.api + */ +@PublicApi(since = "1.0.0") +public class MergedSegmentWarmerStats implements Writeable, ToXContentFragment { + private long totalWarmInvocationsCount; + private long totalWarmTimeMillis; + private long totalWarmFailureCount; + private long totalBytesUploaded; + private long totalBytesDownloaded; + private long totalUploadTimeMillis; + private long totalDownloadTimeMillis; + private long ongoingWarms; + + public MergedSegmentWarmerStats() {} + + public MergedSegmentWarmerStats(StreamInput in) throws IOException { + totalWarmInvocationsCount = in.readVLong(); + totalWarmTimeMillis = in.readVLong(); + totalWarmFailureCount = in.readVLong(); + totalBytesUploaded = in.readVLong(); + totalBytesDownloaded = in.readVLong(); + totalUploadTimeMillis = in.readVLong(); + totalDownloadTimeMillis = in.readVLong(); + ongoingWarms = in.readVLong(); + } + + public synchronized void add( + long totalWarmInvocationsCount, + long totalWarmTimeMillis, + long totalWarmFailureCount, + long totalBytesUploaded, + long totalBytesDownloaded, + long totalUploadTimeMillis, + long totalDownloadTimeMillis, + long ongoingWarms + ) { + this.totalWarmInvocationsCount += totalWarmInvocationsCount; + this.totalWarmTimeMillis += totalWarmTimeMillis; + this.totalWarmFailureCount += totalWarmFailureCount; + this.totalBytesUploaded += totalBytesUploaded; + this.totalBytesDownloaded += totalBytesDownloaded; + this.totalUploadTimeMillis += totalUploadTimeMillis; + this.totalDownloadTimeMillis += totalDownloadTimeMillis; + this.ongoingWarms += ongoingWarms; + } + + public void add(MergedSegmentWarmerStats mergedSegmentWarmerStats) { + if (mergedSegmentWarmerStats == null) { + return; + } + this.ongoingWarms += mergedSegmentWarmerStats.ongoingWarms; + + addTotals(mergedSegmentWarmerStats); + } + + public synchronized void addTotals(MergedSegmentWarmerStats mergedSegmentWarmerStats) { + if (mergedSegmentWarmerStats == null) { + return; + } + this.totalWarmInvocationsCount += mergedSegmentWarmerStats.totalWarmInvocationsCount; + this.totalWarmTimeMillis += mergedSegmentWarmerStats.totalWarmTimeMillis; + this.totalWarmFailureCount += mergedSegmentWarmerStats.totalWarmFailureCount; + this.totalBytesUploaded += mergedSegmentWarmerStats.totalBytesUploaded; + this.totalBytesDownloaded += mergedSegmentWarmerStats.totalBytesDownloaded; + this.totalUploadTimeMillis += mergedSegmentWarmerStats.totalUploadTimeMillis; + this.totalDownloadTimeMillis += mergedSegmentWarmerStats.totalDownloadTimeMillis; + } + + public long getTotalWarmInvocationsCount() { + return this.totalWarmInvocationsCount; + } + + public long getTotalWarmTimeMillis() { + return this.totalWarmTimeMillis; + } + + public long getOngoingWarms() { + return ongoingWarms; + } + + public long getTotalBytesDownloaded() { + return totalBytesDownloaded; + } + + public long getTotalBytesUploaded() { + return totalBytesUploaded; + } + + public long getTotalDownloadTimeMillis() { + return totalDownloadTimeMillis; + } + + public long getTotalWarmFailureCount() { + return totalWarmFailureCount; + } + + public long getTotalUploadTimeMillis() { + return totalUploadTimeMillis; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(Fields.MERGED_SEGMENT_WARMER); + builder.field(Fields.TOTAL_WARM_INVOCATIONS_COUNT, totalWarmInvocationsCount); + builder.field(Fields.TOTAL_WARM_TIME_MILLIS, totalWarmTimeMillis); + builder.field(Fields.TOTAL_WARM_FAILURE_COUNT, totalWarmFailureCount); + builder.humanReadableField(Fields.TOTAL_BYTES_UPLOADED, Fields.TOTAL_BYTES_UPLOADED, new ByteSizeValue(totalBytesUploaded)); + builder.humanReadableField(Fields.TOTAL_BYTES_DOWNLOADED, Fields.TOTAL_BYTES_DOWNLOADED, new ByteSizeValue(totalBytesDownloaded)); + builder.field(Fields.TOTAL_UPLOAD_TIME_MILLIS, totalUploadTimeMillis); + builder.field(Fields.TOTAL_DOWNLOAD_TIME_MILLIS, totalDownloadTimeMillis); + builder.field(Fields.ONGOING_WARMS, ongoingWarms); + builder.endObject(); + return builder; + } + + /** + * Fields used for merge statistics + * + * @opensearch.internal + */ + static final class Fields { + static final String MERGED_SEGMENT_WARMER = "merged_segment_warmer"; + static final String TOTAL_WARM_INVOCATIONS_COUNT = "total_warm_invocations_count"; + static final String TOTAL_WARM_TIME_MILLIS = "total_warm_time_millis"; + static final String TOTAL_WARM_FAILURE_COUNT = "total_warm_failure_count"; + static final String TOTAL_BYTES_UPLOADED = "total_bytes_uploaded"; + static final String TOTAL_BYTES_DOWNLOADED = "total_bytes_downloaded"; + static final String TOTAL_UPLOAD_TIME_MILLIS = "total_upload_time_millis"; + static final String TOTAL_DOWNLOAD_TIME_MILLIS = "total_download_time_millis"; + static final String ONGOING_WARMS = "ongoing_warms"; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(totalWarmInvocationsCount); + out.writeVLong(totalWarmTimeMillis); + out.writeVLong(totalWarmFailureCount); + out.writeVLong(totalBytesUploaded); + out.writeVLong(totalBytesDownloaded); + out.writeVLong(totalUploadTimeMillis); + out.writeVLong(totalDownloadTimeMillis); + out.writeVLong(ongoingWarms); + } +} diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 0a365e4d756d8..86f9d39c84309 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -157,6 +157,8 @@ import org.opensearch.index.mapper.SourceToParse; import org.opensearch.index.mapper.Uid; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentReplicationTracker; +import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.remote.RemoteSegmentStats; @@ -389,6 +391,8 @@ Runnable getGlobalCheckpointSyncer() { private final MergedSegmentPublisher mergedSegmentPublisher; private final ReferencedSegmentsPublisher referencedSegmentsPublisher; private final Set pendingMergedSegmentCheckpoints = Sets.newConcurrentHashSet(); + private final MergedSegmentReplicationTracker mergedSegmentReplicationTracker; + @InternalApi public IndexShard( @@ -549,6 +553,7 @@ public boolean shouldCache(Query query) { this.clusterApplierService = clusterApplierService; this.mergedSegmentPublisher = mergedSegmentPublisher; this.referencedSegmentsPublisher = referencedSegmentsPublisher; + this.mergedSegmentReplicationTracker = new MergedSegmentReplicationTracker(shardId(), indexSettings); synchronized (this.refreshMutex) { if (shardLevelRefreshEnabled) { startRefreshTask(); @@ -1576,6 +1581,10 @@ public MergeStats mergeStats() { return mergeStats; } + public MergedSegmentWarmerStats mergedSegmentWarmerStats() { + return mergedSegmentReplicationTracker.stats(); + } + public SegmentsStats segmentStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments) { SegmentsStats segmentsStats = getEngine().segmentsStats(includeSegmentFileSizes, includeUnloadedSegments); segmentsStats.addBitsetMemoryInBytes(shardBitsetFilterCache.getMemorySizeInBytes()); @@ -2277,6 +2286,10 @@ public void resetToWriteableEngine() throws IOException, InterruptedException, T indexShardOperationPermits.blockOperations(30, TimeUnit.MINUTES, () -> { resetEngineToGlobalCheckpoint(); }); } + public MergedSegmentReplicationTracker mergedSegmentReplicationTracker() { + return mergedSegmentReplicationTracker; + } + /** * Wrapper for a non-closing reader * diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 667eb9edb65f7..69bd88ec38f89 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -126,6 +126,7 @@ import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.query.BaseQueryRewriteContext; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryRewriteContext; @@ -1400,6 +1401,7 @@ static class OldShardsStats implements IndexEventListener { final RefreshStats refreshStats = new RefreshStats(); final FlushStats flushStats = new FlushStats(); final RecoveryStats recoveryStats = new RecoveryStats(); + final MergedSegmentWarmerStats mergedSegmentWarmerStats = new MergedSegmentWarmerStats(); @Override public synchronized void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) { @@ -1412,6 +1414,7 @@ public synchronized void beforeIndexShardClosed(ShardId shardId, @Nullable Index refreshStats.addTotals(indexShard.refreshStats()); flushStats.addTotals(indexShard.flushStats()); recoveryStats.addTotals(indexShard.recoveryStats()); + mergedSegmentWarmerStats.addTotals(indexShard.mergedSegmentWarmerStats()); } } } diff --git a/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java b/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java index 4c28c08d8061b..7ac773d99c203 100644 --- a/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java +++ b/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java @@ -52,6 +52,7 @@ import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.search.stats.SearchStats; @@ -280,6 +281,11 @@ public RecoveryStats getRecoveryStats() { return stats.getRecoveryStats(); } + @Nullable + public MergedSegmentWarmerStats getMergedSegmentWarmerStats() { + return stats.getMergedSegmentWarmer(); + } + @Override public void writeTo(StreamOutput out) throws IOException { stats.writeTo(out); diff --git a/server/src/main/java/org/opensearch/indices/replication/AbstractSegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/AbstractSegmentReplicationTarget.java index 5a21b10d04c06..5f9cdbb76d2d1 100644 --- a/server/src/main/java/org/opensearch/indices/replication/AbstractSegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/AbstractSegmentReplicationTarget.java @@ -282,6 +282,11 @@ private boolean validateLocalChecksum(StoreFileMetadata file) { } } + protected void updateMergedSegmentFileRecoveryBytes(String fileName, long bytesRecovered) { + indexShard.mergedSegmentReplicationTracker().addTotalBytesDownloaded(bytesRecovered); + updateFileRecoveryBytes(fileName, bytesRecovered); + } + /** * Updates the state to reflect recovery progress for the given file and * updates the last access time for the target. diff --git a/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java index ead4007a1b62a..7252d198f04d3 100644 --- a/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java @@ -55,7 +55,14 @@ protected void getFilesFromSource( List filesToFetch, StepListener getFilesListener ) { - source.getMergedSegmentFiles(getId(), checkpoint, filesToFetch, indexShard, this::updateFileRecoveryBytes, getFilesListener); + source.getMergedSegmentFiles( + getId(), + checkpoint, + filesToFetch, + indexShard, + this::updateMergedSegmentFileRecoveryBytes, + getFilesListener + ); } @Override diff --git a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java index 2aedf9534abe3..e538dc657cc63 100644 --- a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java @@ -180,13 +180,18 @@ public void getMergedSegmentFiles( CountDownLatch latch = new CountDownLatch(1); indexShard.getFileDownloader() - .downloadAsync(cancellableThreads, remoteDirectory, storeDirectory, toDownloadSegmentNames, ActionListener.wrap(r -> { - latch.countDown(); - notifyOnceListener.onResponse(new GetSegmentFilesResponse(filesToFetch)); - }, e -> { - latch.countDown(); - notifyOnceListener.onFailure(e); - })); + .downloadAsync(cancellableThreads, + remoteDirectory, + new ReplicationStatsDirectoryWrapper(storeDirectory, fileProgressTracker), + toDownloadSegmentNames, + ActionListener.wrap(r -> { + latch.countDown(); + notifyOnceListener.onResponse(new GetSegmentFilesResponse(filesToFetch)); + }, e -> { + latch.countDown(); + notifyOnceListener.onFailure(e); + }) + ); try { if (latch.await( indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout().millis(), diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java index da5560875db3f..8e54efdf1c3d2 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java @@ -83,8 +83,10 @@ public RemoteStorePublishMergedSegmentAction( protected void doReplicaOperation(RemoteStorePublishMergedSegmentRequest shardRequest, IndexShard replica) { RemoteStoreMergedSegmentCheckpoint checkpoint = shardRequest.getMergedSegment(); if (checkpoint.getShardId().equals(replica.shardId())) { + long startTime = System.currentTimeMillis(); replica.getRemoteDirectory().markMergedSegmentsPendingDownload(checkpoint.getLocalToRemoteSegmentFilenameMap()); replicationService.onNewMergedSegmentCheckpoint(checkpoint, replica); + replica.mergedSegmentReplicationTracker().addTotalDownloadTimeMillis(System.currentTimeMillis() - startTime); } else { logger.warn( () -> new ParameterizedMessage( @@ -114,6 +116,7 @@ public final void publish(IndexShard indexShard, MergedSegmentCheckpoint checkpo long elapsedTimeMillis = endTimeMillis - startTimeMillis; long timeoutMillis = indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout().millis(); long timeLeftMillis = Math.max(0, timeoutMillis - elapsedTimeMillis); + indexShard.mergedSegmentReplicationTracker().addTotalUploadTimeMillis(elapsedTimeMillis); if (timeLeftMillis > 0) { RemoteStoreMergedSegmentCheckpoint remoteStoreMergedSegmentCheckpoint = new RemoteStoreMergedSegmentCheckpoint( @@ -167,6 +170,7 @@ public void beforeUpload(String file) {} @Override public void onSuccess(String file) { localToRemoteStoreFilenames.put(file, indexShard.getRemoteDirectory().getExistingRemoteFilename(file)); + indexShard.mergedSegmentReplicationTracker().addTotalBytesUploaded(checkpoint.getMetadataMap().get(file).length()); } @Override diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java index 6765423ffa65e..2a9d07917230f 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java @@ -591,6 +591,55 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa ); table.addCell("pri.merges.total_time", "default:false;text-align:right;desc:time spent in merges"); + table.addCell( + "merged_segment_warmer.total_warm_invocations", + "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("pri.merged_segment_warmer.total_warm_invocations", "default:false;text-align:right;desc:UPDATE"); + + table.addCell( + "merged_segment_warmer.total_warm_time_millis", + "alias:mswtwtm,mergedSegmentWarmerTotalWarmTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("pri.merged_segment_warmer.total_warm_time_millis", "default:false;text-align:right;desc:UPDATE"); + + table.addCell( + "merged_segment_warmer.ongoing_warms", + "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("pri.merged_segment_warmer.ongoing_warms", "default:false;text-align:right;desc:UPDATE"); + + table.addCell( + "merged_segment_warmer.total_bytes_downloaded", + "alias:mswtbd,mergedSegmentWarmerTotalBytesDownloaded;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("pri.merged_segment_warmer.total_bytes_downloaded", "default:false;text-align:right;desc:UPDATE"); + + table.addCell( + "merged_segment_warmer.total_bytes_uploaded", + "alias:mswtbu,mergedSegmentWarmerTotalBytesUploaded;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("pri.merged_segment_warmer.total_bytes_uploaded", "default:false;text-align:right;desc:UPDATE"); + + table.addCell( + "merged_segment_warmer.total_download_time_millis", + "alias:mswtdtm,mergedSegmentWarmerTotalDownloadTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("pri.merged_segment_warmer.total_download_time_millis", "default:false;text-align:right;desc:UPDATE"); + + table.addCell( + "merged_segment_warmer.total_warm_failure_count", + "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("pri.merged_segment_warmer.total_warm_failure_count", "default:false;text-align:right;desc:UPDATE"); + + table.addCell( + "merged_segment_warmer.total_upload_time_millis", + "alias:mswtutm,mergedSegmentWarmerTotalUploadTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("pri.merged_segment_warmer.total_upload_time_millis", "default:false;text-align:right;desc:UPDATE"); + + table.addCell("refresh.total", "sibling:pri;alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); table.addCell("pri.refresh.total", "default:false;text-align:right;desc:total refreshes"); @@ -994,6 +1043,56 @@ protected Table buildTable( table.addCell(totalStats.getMerge() == null ? null : totalStats.getMerge().getTotalTime()); table.addCell(primaryStats.getMerge() == null ? null : primaryStats.getMerge().getTotalTime()); + table.addCell( + totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalWarmInvocationsCount() + ); + table.addCell( + primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalWarmInvocationsCount() + ); + + table.addCell( + totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalWarmTimeMillis() + ); + table.addCell( + primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalWarmTimeMillis() + ); + + table.addCell(totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getOngoingWarms()); + table.addCell(primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getOngoingWarms()); + + table.addCell( + totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalBytesDownloaded() + ); + table.addCell( + primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalBytesDownloaded() + ); + + table.addCell(totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalBytesUploaded()); + table.addCell( + primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalBytesUploaded() + ); + + table.addCell( + totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalDownloadTimeMillis() + ); + table.addCell( + primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalDownloadTimeMillis() + ); + + table.addCell( + totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalWarmFailureCount() + ); + table.addCell( + primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalWarmFailureCount() + ); + + table.addCell( + totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalUploadTimeMillis() + ); + table.addCell( + primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalUploadTimeMillis() + ); + table.addCell(totalStats.getRefresh() == null ? null : totalStats.getRefresh().getTotal()); table.addCell(primaryStats.getRefresh() == null ? null : primaryStats.getRefresh().getTotal()); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java index 428ba17aae5a7..d303956db9e2d 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java @@ -57,6 +57,7 @@ import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.search.stats.SearchStats; import org.opensearch.index.shard.IndexingStats; @@ -274,6 +275,39 @@ protected Table getTableWithHeader(final RestRequest request) { table.addCell("merges.total_size", "alias:mts,mergesTotalSize;default:false;text-align:right;desc:size merged"); table.addCell("merges.total_time", "alias:mtt,mergesTotalTime;default:false;text-align:right;desc:time spent in merges"); + table.addCell( + "merged_segment_warmer.total_warm_invocations", + "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" + ); + table.addCell( + "merged_segment_warmer.total_warm_time_millis", + "alias:mswtwtm,mergedSegmentWarmerTotalWarmTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.ongoing_warms", + "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_bytes_downloaded", + "alias:mswtbd,mergedSegmentWarmerTotalBytesDownloaded;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_bytes_uploaded", + "alias:mswtbu,mergedSegmentWarmerTotalBytesUploaded;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_download_time_millis", + "alias:mswtdtm,mergedSegmentWarmerTotalDownloadTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_warm_failure_count", + "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_upload_time_millis", + "alias:mswtutm,mergedSegmentWarmerTotalUploadTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("refresh.total", "alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); table.addCell("refresh.time", "alias:rti,refreshTime;default:false;text-align:right;desc:time spent in refreshes"); table.addCell("refresh.external_total", "alias:rto,refreshTotal;default:false;text-align:right;desc:total external refreshes"); @@ -548,6 +582,16 @@ Table buildTable( table.addCell(mergeStats == null ? null : mergeStats.getTotalSize()); table.addCell(mergeStats == null ? null : mergeStats.getTotalTime()); + MergedSegmentWarmerStats mergedSegmentWarmerStats = indicesStats == null ? null : indicesStats.getMergedSegmentWarmerStats(); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmInvocationsCount()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmTimeMillis()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getOngoingWarms()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalBytesDownloaded()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalBytesUploaded()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalDownloadTimeMillis()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmFailureCount()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalUploadTimeMillis()); + RefreshStats refreshStats = indicesStats == null ? null : indicesStats.getRefresh(); table.addCell(refreshStats == null ? null : refreshStats.getTotal()); table.addCell(refreshStats == null ? null : refreshStats.getTotalTime()); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java index 51d11ce91c7d1..5649aca4cca25 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java @@ -54,6 +54,7 @@ import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.search.stats.SearchStats; import org.opensearch.index.seqno.SeqNoStats; @@ -214,6 +215,39 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa table.addCell("merges.total_size", "alias:mts,mergesTotalSize;default:false;text-align:right;desc:size merged"); table.addCell("merges.total_time", "alias:mtt,mergesTotalTime;default:false;text-align:right;desc:time spent in merges"); + table.addCell( + "merged_segment_warmer.total_warm_invocations", + "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" + ); + table.addCell( + "merged_segment_warmer.total_warm_time_millis", + "alias:mswtwtm,mergedSegmentWarmerTotalWarmTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.ongoing_warms", + "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_bytes_downloaded", + "alias:mswtbd,mergedSegmentWarmerTotalBytesDownloaded;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_bytes_uploaded", + "alias:mswtbu,mergedSegmentWarmerTotalBytesUploaded;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_download_time_millis", + "alias:mswtdtm,mergedSegmentWarmerTotalDownloadTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_warm_failure_count", + "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:UPDATE" + ); + table.addCell( + "merged_segment_warmer.total_upload_time_millis", + "alias:mswtutm,mergedSegmentWarmerTotalUploadTimeMillis;default:false;text-align:right;desc:UPDATE" + ); + table.addCell("refresh.total", "alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); table.addCell("refresh.time", "alias:rti,refreshTime;default:false;text-align:right;desc:time spent in refreshes"); table.addCell("refresh.external_total", "alias:rto,refreshTotal;default:false;text-align:right;desc:total external refreshes"); @@ -450,6 +484,19 @@ Table buildTable( table.addCell(getOrNull(commonStats, CommonStats::getMerge, MergeStats::getTotalSize)); table.addCell(getOrNull(commonStats, CommonStats::getMerge, MergeStats::getTotalTime)); + table.addCell( + getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalWarmInvocationsCount) + ); + table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalWarmTimeMillis)); + table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getOngoingWarms)); + table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalBytesDownloaded)); + table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalBytesUploaded)); + table.addCell( + getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalDownloadTimeMillis) + ); + table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalWarmFailureCount)); + table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalUploadTimeMillis)); + table.addCell(getOrNull(commonStats, CommonStats::getRefresh, RefreshStats::getTotal)); table.addCell(getOrNull(commonStats, CommonStats::getRefresh, RefreshStats::getTotalTime)); table.addCell(getOrNull(commonStats, CommonStats::getRefresh, RefreshStats::getExternalTotal)); From 22ce92b497420d0843bef77c673042c500ba133e Mon Sep 17 00:00:00 2001 From: kh3ra Date: Mon, 11 Aug 2025 11:10:33 +0530 Subject: [PATCH 02/30] Adding unit tests Signed-off-by: kh3ra Signed-off-by: Aditya Khera --- .../MergedSegmentReplicationTrackerTests.java | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java diff --git a/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java b/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java new file mode 100644 index 0000000000000..54971ed597ad5 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java @@ -0,0 +1,87 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.merge; + +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.IndexSettings; +import org.opensearch.test.OpenSearchTestCase; + +import static org.opensearch.common.settings.Settings.builder; + +public class MergedSegmentReplicationTrackerTests extends OpenSearchTestCase { + + private MergedSegmentReplicationTracker tracker; + + @Override + public void setUp() throws Exception { + super.setUp(); + ShardId shardId = new ShardId("test", "uuid", 0); + IndexSettings indexSettings = new IndexSettings( + newIndexMeta("test", builder().build()), + builder().build() + ); + tracker = new MergedSegmentReplicationTracker(shardId, indexSettings); + } + + public void testInitialStats() { + MergedSegmentWarmerStats stats = tracker.stats(); + assertEquals(0, stats.getTotalWarmInvocationsCount()); + assertEquals(0, stats.getTotalWarmTimeMillis()); + assertEquals(0, stats.getTotalWarmFailureCount()); + assertEquals(0, stats.getTotalBytesUploaded()); + assertEquals(0, stats.getTotalBytesDownloaded()); + assertEquals(0, stats.getTotalUploadTimeMillis()); + assertEquals(0, stats.getTotalDownloadTimeMillis()); + assertEquals(0, stats.getOngoingWarms()); + } + + public void testIncrementCounters() { + tracker.incrementTotalWarmInvocationsCount(); + tracker.incrementTotalWarmFailureCount(); + tracker.incrementTotalRejectedWarms(); + + MergedSegmentWarmerStats stats = tracker.stats(); + assertEquals(1, stats.getTotalWarmInvocationsCount()); + assertEquals(1, stats.getTotalWarmFailureCount()); + } + + public void testOngoingWarms() { + tracker.incrementOngoingWarms(); + tracker.incrementOngoingWarms(); + assertEquals(2, tracker.stats().getOngoingWarms()); + + tracker.decrementOngoingWarms(); + assertEquals(1, tracker.stats().getOngoingWarms()); + } + + public void testAddTimeAndBytes() { + tracker.addTotalWarmTimeMillis(100); + tracker.addTotalUploadTimeMillis(200); + tracker.addTotalDownloadTimeMillis(300); + tracker.addTotalBytesUploaded(1024); + tracker.addTotalBytesDownloaded(2048); + + MergedSegmentWarmerStats stats = tracker.stats(); + assertEquals(100, stats.getTotalWarmTimeMillis()); + assertEquals(200, stats.getTotalUploadTimeMillis()); + assertEquals(300, stats.getTotalDownloadTimeMillis()); + assertEquals(1024, stats.getTotalBytesUploaded()); + assertEquals(2048, stats.getTotalBytesDownloaded()); + } + + public void testAccumulativeStats() { + tracker.addTotalWarmTimeMillis(100); + tracker.addTotalWarmTimeMillis(50); + assertEquals(150, tracker.stats().getTotalWarmTimeMillis()); + + tracker.addTotalBytesUploaded(1000); + tracker.addTotalBytesUploaded(500); + assertEquals(1500, tracker.stats().getTotalBytesUploaded()); + } +} \ No newline at end of file From fed329e82572561bdb87c81f8c9990165e5aac99 Mon Sep 17 00:00:00 2001 From: kh3ra Date: Mon, 11 Aug 2025 19:45:24 +0530 Subject: [PATCH 03/30] SpotlessApply Signed-off-by: kh3ra Signed-off-by: Aditya Khera --- .../org/opensearch/index/shard/IndexShard.java | 1 - .../replication/RemoteStoreReplicationSource.java | 5 +++-- .../rest/action/cat/RestIndicesAction.java | 1 - .../MergedSegmentReplicationTrackerTests.java | 15 ++++++--------- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 86f9d39c84309..ece00b4940063 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -393,7 +393,6 @@ Runnable getGlobalCheckpointSyncer() { private final Set pendingMergedSegmentCheckpoints = Sets.newConcurrentHashSet(); private final MergedSegmentReplicationTracker mergedSegmentReplicationTracker; - @InternalApi public IndexShard( final ShardRouting shardRouting, diff --git a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java index e538dc657cc63..917f9ae77d207 100644 --- a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java @@ -174,13 +174,14 @@ public void getMergedSegmentFiles( assert checkpoint instanceof RemoteStoreMergedSegmentCheckpoint; final Directory storeDirectory = indexShard.store().directory(); - ActionListener notifyOnceListener = ActionListener.notifyOnce(listener); + ActionListener notifyOnceListener = ActionLisserver/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.javatener.notifyOnce(listener); List toDownloadSegmentNames = filesToFetch.stream().map(StoreFileMetadata::name).toList(); CountDownLatch latch = new CountDownLatch(1); indexShard.getFileDownloader() - .downloadAsync(cancellableThreads, + .downloadAsync( + cancellableThreads, remoteDirectory, new ReplicationStatsDirectoryWrapper(storeDirectory, fileProgressTracker), toDownloadSegmentNames, diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java index 2a9d07917230f..8fa24854900c5 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java @@ -639,7 +639,6 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa ); table.addCell("pri.merged_segment_warmer.total_upload_time_millis", "default:false;text-align:right;desc:UPDATE"); - table.addCell("refresh.total", "sibling:pri;alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); table.addCell("pri.refresh.total", "default:false;text-align:right;desc:total refreshes"); diff --git a/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java b/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java index 54971ed597ad5..51cb423b03662 100644 --- a/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java @@ -22,10 +22,7 @@ public class MergedSegmentReplicationTrackerTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); ShardId shardId = new ShardId("test", "uuid", 0); - IndexSettings indexSettings = new IndexSettings( - newIndexMeta("test", builder().build()), - builder().build() - ); + IndexSettings indexSettings = new IndexSettings(newIndexMeta("test", builder().build()), builder().build()); tracker = new MergedSegmentReplicationTracker(shardId, indexSettings); } @@ -45,7 +42,7 @@ public void testIncrementCounters() { tracker.incrementTotalWarmInvocationsCount(); tracker.incrementTotalWarmFailureCount(); tracker.incrementTotalRejectedWarms(); - + MergedSegmentWarmerStats stats = tracker.stats(); assertEquals(1, stats.getTotalWarmInvocationsCount()); assertEquals(1, stats.getTotalWarmFailureCount()); @@ -55,7 +52,7 @@ public void testOngoingWarms() { tracker.incrementOngoingWarms(); tracker.incrementOngoingWarms(); assertEquals(2, tracker.stats().getOngoingWarms()); - + tracker.decrementOngoingWarms(); assertEquals(1, tracker.stats().getOngoingWarms()); } @@ -66,7 +63,7 @@ public void testAddTimeAndBytes() { tracker.addTotalDownloadTimeMillis(300); tracker.addTotalBytesUploaded(1024); tracker.addTotalBytesDownloaded(2048); - + MergedSegmentWarmerStats stats = tracker.stats(); assertEquals(100, stats.getTotalWarmTimeMillis()); assertEquals(200, stats.getTotalUploadTimeMillis()); @@ -79,9 +76,9 @@ public void testAccumulativeStats() { tracker.addTotalWarmTimeMillis(100); tracker.addTotalWarmTimeMillis(50); assertEquals(150, tracker.stats().getTotalWarmTimeMillis()); - + tracker.addTotalBytesUploaded(1000); tracker.addTotalBytesUploaded(500); assertEquals(1500, tracker.stats().getTotalBytesUploaded()); } -} \ No newline at end of file +} From 33d0e1da4eba920101877dd2b01ca8e4fda5d543 Mon Sep 17 00:00:00 2001 From: kh3ra Date: Tue, 12 Aug 2025 11:18:23 +0530 Subject: [PATCH 04/30] Minor fix Signed-off-by: kh3ra Signed-off-by: Aditya Khera --- .../indices/replication/RemoteStoreReplicationSource.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java index 917f9ae77d207..b657fa9bcb4e8 100644 --- a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java @@ -174,7 +174,7 @@ public void getMergedSegmentFiles( assert checkpoint instanceof RemoteStoreMergedSegmentCheckpoint; final Directory storeDirectory = indexShard.store().directory(); - ActionListener notifyOnceListener = ActionLisserver/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.javatener.notifyOnce(listener); + ActionListener notifyOnceListener = ActionListener.notifyOnce(listener); List toDownloadSegmentNames = filesToFetch.stream().map(StoreFileMetadata::name).toList(); From 0b160500828877c80c941a993efe2b82fef983d4 Mon Sep 17 00:00:00 2001 From: kh3ra Date: Wed, 13 Aug 2025 16:46:50 +0530 Subject: [PATCH 05/30] minor fix Signed-off-by: kh3ra Signed-off-by: Aditya Khera --- .../index/merge/MergedSegmentWarmerStats.java | 28 ++----------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java index 16b5a23c62b0e..f5c98b82fa843 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.opensearch.index.merge; import org.opensearch.common.annotation.PublicApi; @@ -151,8 +127,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(Fields.TOTAL_WARM_INVOCATIONS_COUNT, totalWarmInvocationsCount); builder.field(Fields.TOTAL_WARM_TIME_MILLIS, totalWarmTimeMillis); builder.field(Fields.TOTAL_WARM_FAILURE_COUNT, totalWarmFailureCount); - builder.humanReadableField(Fields.TOTAL_BYTES_UPLOADED, Fields.TOTAL_BYTES_UPLOADED, new ByteSizeValue(totalBytesUploaded)); - builder.humanReadableField(Fields.TOTAL_BYTES_DOWNLOADED, Fields.TOTAL_BYTES_DOWNLOADED, new ByteSizeValue(totalBytesDownloaded)); + builder.field(Fields.TOTAL_BYTES_UPLOADED, new ByteSizeValue(totalBytesUploaded)); + builder.field(Fields.TOTAL_BYTES_DOWNLOADED, new ByteSizeValue(totalBytesDownloaded)); builder.field(Fields.TOTAL_UPLOAD_TIME_MILLIS, totalUploadTimeMillis); builder.field(Fields.TOTAL_DOWNLOAD_TIME_MILLIS, totalDownloadTimeMillis); builder.field(Fields.ONGOING_WARMS, ongoingWarms); From a9ae8d8ff8bae698035a33ac5e1b92cb9d8126be Mon Sep 17 00:00:00 2001 From: kh3ra Date: Fri, 29 Aug 2025 14:34:44 +0530 Subject: [PATCH 06/30] addressing review comments - tests to follow Signed-off-by: Aditya Khera --- .../admin/indices/stats/CommonStats.java | 28 +--- .../admin/indices/stats/CommonStatsFlags.java | 3 +- .../org/opensearch/index/engine/Engine.java | 7 + .../opensearch/index/engine/EngineConfig.java | 16 ++ .../index/engine/EngineConfigFactory.java | 5 +- .../index/engine/InternalEngine.java | 11 +- .../index/engine/MergedSegmentWarmer.java | 19 +-- .../index/engine/NRTReplicationEngine.java | 9 ++ .../OpenSearchConcurrentMergeScheduler.java | 12 +- .../opensearch/index/merge/MergeStats.java | 36 ++++- ...java => MergedSegmentTransferTracker.java} | 4 +- .../index/merge/MergedSegmentWarmerStats.java | 89 +++++++---- .../opensearch/index/shard/IndexShard.java | 18 +-- .../opensearch/indices/IndicesService.java | 1 - .../opensearch/indices/NodeIndicesStats.java | 6 - .../AbstractSegmentReplicationTarget.java | 5 - .../MergedSegmentReplicationTarget.java | 5 + .../AbstractPublishCheckpointAction.java | 5 +- ...RemoteStorePublishMergedSegmentAction.java | 7 +- .../rest/action/cat/RestIndicesAction.java | 138 +++++++++--------- .../rest/action/cat/RestNodesAction.java | 42 +++--- .../rest/action/cat/RestShardsAction.java | 90 +++++++++--- ...=> MergedSegmentTransferTrackerTests.java} | 33 +++-- 23 files changed, 356 insertions(+), 233 deletions(-) rename server/src/main/java/org/opensearch/index/merge/{MergedSegmentReplicationTracker.java => MergedSegmentTransferTracker.java} (94%) rename server/src/test/java/org/opensearch/index/merge/{MergedSegmentReplicationTrackerTests.java => MergedSegmentTransferTrackerTests.java} (66%) diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStats.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStats.java index 1eb6a0a42009e..8bfeb13b253c3 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStats.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStats.java @@ -49,7 +49,6 @@ import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; import org.opensearch.index.merge.MergeStats; -import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.search.stats.SearchStats; @@ -93,9 +92,6 @@ public class CommonStats implements Writeable, ToXContentFragment { @Nullable public MergeStats merge; - @Nullable - public MergedSegmentWarmerStats mergedSegmentWarmerStats; - @Nullable public RefreshStats refresh; @@ -183,9 +179,6 @@ public CommonStats(CommonStatsFlags flags) { case Recovery: recoveryStats = new RecoveryStats(); break; - case MergedSegmentWarmer: - mergedSegmentWarmerStats = new MergedSegmentWarmerStats(); - break; default: throw new IllegalStateException("Unknown Flag: " + flag); } @@ -245,9 +238,6 @@ public CommonStats(IndicesQueryCache indicesQueryCache, IndexShard indexShard, C case Recovery: recoveryStats = indexShard.recoveryStats(); break; - case MergedSegmentWarmer: - mergedSegmentWarmerStats = indexShard.mergedSegmentWarmerStats(); - break; default: throw new IllegalStateException("Unknown Flag: " + flag); } @@ -274,7 +264,6 @@ public CommonStats(StreamInput in) throws IOException { translog = in.readOptionalWriteable(TranslogStats::new); requestCache = in.readOptionalWriteable(RequestCacheStats::new); recoveryStats = in.readOptionalWriteable(RecoveryStats::new); - mergedSegmentWarmerStats = in.readOptionalWriteable(MergedSegmentWarmerStats::new); } @Override @@ -295,7 +284,6 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(translog); out.writeOptionalWriteable(requestCache); out.writeOptionalWriteable(recoveryStats); - out.writeOptionalWriteable(mergedSegmentWarmerStats); } public void add(CommonStats stats) { @@ -428,14 +416,6 @@ public void add(CommonStats stats) { } else { recoveryStats.add(stats.getRecoveryStats()); } - if (mergedSegmentWarmerStats == null) { - if (stats.getMergedSegmentWarmer() != null) { - mergedSegmentWarmerStats = new MergedSegmentWarmerStats(); - mergedSegmentWarmerStats.add(stats.getMergedSegmentWarmer()); - } - } else { - mergedSegmentWarmerStats.add(stats.getMergedSegmentWarmer()); - } } @Nullable @@ -518,11 +498,6 @@ public RecoveryStats getRecoveryStats() { return recoveryStats; } - @Nullable - public MergedSegmentWarmerStats getMergedSegmentWarmer() { - return mergedSegmentWarmerStats; - } - /** * Utility method which computes total memory by adding * FieldData, PercolatorCache, Segments (index writer, version map) @@ -562,8 +537,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws segments, translog, requestCache, - recoveryStats, - mergedSegmentWarmerStats } + recoveryStats } ).filter(Objects::nonNull); for (ToXContent toXContent : ((Iterable) stream::iterator)) { toXContent.toXContent(builder, params); diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java index 30dbf481ed27d..03fb55323feec 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java @@ -332,8 +332,7 @@ public enum Flag { Translog("translog", 13), // 14 was previously used for Suggest RequestCache("request_cache", 15), - Recovery("recovery", 16), - MergedSegmentWarmer("merged_segment_warmer", 17); + Recovery("recovery", 16); private final String restName; private final int index; diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index ebe7791aede8f..f70ec75485393 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -88,6 +88,7 @@ import org.opensearch.index.mapper.SourceToParse; import org.opensearch.index.mapper.Uid; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.index.seqno.SeqNoStats; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.DocsStats; @@ -213,9 +214,15 @@ public GatedCloseable getSegmentInfosSnapshot() { } public MergeStats getMergeStats() { + logger.info(getClass().getSimpleName() + " | getMergeStats called"); + return new MergeStats(); } + public MergedSegmentTransferTracker getMergedSegmentTransferTracker() { + return engineConfig.getMergedSegmentTransferTracker(); + } + /** returns the history uuid for the engine */ public abstract String getHistoryUUID(); diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index cb4b6ec6161d0..00f6d7e670d61 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -57,6 +57,7 @@ import org.opensearch.index.codec.CodecSettings; import org.opensearch.index.mapper.DocumentMapperForType; import org.opensearch.index.mapper.ParsedDocument; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.index.seqno.RetentionLeases; import org.opensearch.index.store.Store; import org.opensearch.index.translog.InternalTranslogFactory; @@ -115,6 +116,7 @@ public final class EngineConfig { private final Comparator leafSorter; private final Supplier documentMapperForTypeSupplier; private final ClusterApplierService clusterApplierService; + private final MergedSegmentTransferTracker mergedSegmentTransferTracker; /** * A supplier of the outstanding retention leases. This is used during merged operations to determine which operations that have been @@ -306,6 +308,7 @@ private EngineConfig(Builder builder) { this.documentMapperForTypeSupplier = builder.documentMapperForTypeSupplier; this.indexReaderWarmer = builder.indexReaderWarmer; this.clusterApplierService = builder.clusterApplierService; + this.mergedSegmentTransferTracker = builder.mergedSegmentTransferTracker; } /** @@ -625,6 +628,13 @@ public ClusterApplierService getClusterApplierService() { return this.clusterApplierService; } + /** + * Returns the MergedSegmentTransferTracker instance. + */ + public MergedSegmentTransferTracker getMergedSegmentTransferTracker() { + return this.mergedSegmentTransferTracker; + } + /** * Builder for EngineConfig class * @@ -662,6 +672,7 @@ public static class Builder { Comparator leafSorter; private IndexWriter.IndexReaderWarmer indexReaderWarmer; private ClusterApplierService clusterApplierService; + private MergedSegmentTransferTracker mergedSegmentTransferTracker; public Builder shardId(ShardId shardId) { this.shardId = shardId; @@ -813,6 +824,11 @@ public Builder clusterApplierService(ClusterApplierService clusterApplierService return this; } + public Builder mergedSegmentTransferTracker(MergedSegmentTransferTracker mergedSegmentTransferTracker) { + this.mergedSegmentTransferTracker = mergedSegmentTransferTracker; + return this; + } + public EngineConfig build() { return new EngineConfig(this); } diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java b/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java index 2240b6e76eaac..0bafb1bf788ed 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java @@ -29,6 +29,7 @@ import org.opensearch.index.codec.CodecServiceFactory; import org.opensearch.index.mapper.DocumentMapperForType; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.index.seqno.RetentionLeases; import org.opensearch.index.store.Store; import org.opensearch.index.translog.TranslogConfig; @@ -160,7 +161,8 @@ public EngineConfig newEngineConfig( Comparator leafSorter, Supplier documentMapperForTypeSupplier, IndexWriter.IndexReaderWarmer indexReaderWarmer, - ClusterApplierService clusterApplierService + ClusterApplierService clusterApplierService, + MergedSegmentTransferTracker mergedSegmentTransferTracker ) { CodecService codecServiceToUse = codecService; if (codecService == null && this.codecServiceFactory != null) { @@ -197,6 +199,7 @@ public EngineConfig newEngineConfig( .documentMapperForTypeSupplier(documentMapperForTypeSupplier) .indexReaderWarmer(indexReaderWarmer) .clusterApplierService(clusterApplierService) + .mergedSegmentTransferTracker(mergedSegmentTransferTracker) .build(); } diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index fcc81335d4363..e154c69fabf81 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -104,6 +104,7 @@ import org.opensearch.index.mapper.SourceFieldMapper; import org.opensearch.index.mapper.Uid; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.index.merge.OnGoingMerge; import org.opensearch.index.seqno.LocalCheckpointTracker; import org.opensearch.index.seqno.SeqNoStats; @@ -258,7 +259,11 @@ public TranslogManager translogManager() { boolean success = false; try { this.lastDeleteVersionPruneTimeMSec = engineConfig.getThreadPool().relativeTimeInMillis(); - mergeScheduler = scheduler = new EngineMergeScheduler(engineConfig.getShardId(), engineConfig.getIndexSettings()); + mergeScheduler = scheduler = new EngineMergeScheduler( + engineConfig.getShardId(), + engineConfig.getIndexSettings(), + getMergedSegmentTransferTracker() + ); throttle = new IndexThrottle(); try { store.trimUnsafeCommits(engineConfig.getTranslogConfig().getTranslogPath()); @@ -2475,8 +2480,8 @@ private final class EngineMergeScheduler extends OpenSearchConcurrentMergeSchedu private final AtomicInteger numMergesInFlight = new AtomicInteger(0); private final AtomicBoolean isThrottling = new AtomicBoolean(); - EngineMergeScheduler(ShardId shardId, IndexSettings indexSettings) { - super(shardId, indexSettings); + EngineMergeScheduler(ShardId shardId, IndexSettings indexSettings, MergedSegmentTransferTracker mergedSegmentTransferTracker) { + super(shardId, indexSettings, mergedSegmentTransferTracker); } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java index b8d669c46e2a0..a859b40716d12 100644 --- a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java +++ b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java @@ -16,7 +16,7 @@ import org.apache.lucene.index.SegmentReader; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.logging.Loggers; -import org.opensearch.index.merge.MergedSegmentReplicationTracker; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.transport.TransportService; @@ -34,7 +34,7 @@ public class MergedSegmentWarmer implements IndexWriter.IndexReaderWarmer { private final RecoverySettings recoverySettings; private final ClusterService clusterService; private final IndexShard indexShard; - private final MergedSegmentReplicationTracker mergedSegmentReplicationTracker; + private final MergedSegmentTransferTracker mergedSegmentTransferTracker; private final Logger logger; public MergedSegmentWarmer( @@ -47,17 +47,14 @@ public MergedSegmentWarmer( this.recoverySettings = recoverySettings; this.clusterService = clusterService; this.indexShard = indexShard; - this.mergedSegmentReplicationTracker = indexShard.mergedSegmentReplicationTracker(); + this.mergedSegmentTransferTracker = indexShard.mergedSegmentTransferTracker(); this.logger = Loggers.getLogger(getClass(), indexShard.shardId()); } @Override public void warm(LeafReader leafReader) throws IOException { - mergedSegmentReplicationTracker.incrementTotalWarmInvocationsCount(); - mergedSegmentReplicationTracker.incrementOngoingWarms(); - if (shouldWarm() == false) { - return; - } + mergedSegmentTransferTracker.incrementTotalWarmInvocationsCount(); + mergedSegmentTransferTracker.incrementOngoingWarms(); // IndexWriter.IndexReaderWarmer#warm is called by IndexWriter#mergeMiddle. The type of leafReader should be SegmentReader. assert leafReader instanceof SegmentReader; long startTime = System.currentTimeMillis(); @@ -81,10 +78,10 @@ public void warm(LeafReader leafReader) throws IOException { ); }); } catch (IOException e) { - mergedSegmentReplicationTracker.incrementTotalWarmFailureCount(); + mergedSegmentTransferTracker.incrementTotalWarmFailureCount(); } finally { - mergedSegmentReplicationTracker.addTotalWarmTimeMillis(elapsedTime); - mergedSegmentReplicationTracker.decrementOngoingWarms(); + mergedSegmentTransferTracker.addTotalWarmTimeMillis(elapsedTime); + mergedSegmentTransferTracker.decrementOngoingWarms(); } } diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 1fab651078cc4..f850778f633a9 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -21,6 +21,7 @@ import org.opensearch.common.util.concurrent.ReleasableLock; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.index.merge.MergeStats; import org.opensearch.index.seqno.LocalCheckpointTracker; import org.opensearch.index.seqno.SeqNoStats; import org.opensearch.index.seqno.SequenceNumbers; @@ -66,6 +67,7 @@ public class NRTReplicationEngine extends Engine { private final WriteOnlyTranslogManager translogManager; private final Lock flushLock = new ReentrantLock(); protected final ReplicaFileTracker replicaFileTracker; + private final MergeStats mergeStats; private volatile long lastReceivedPrimaryGen = SequenceNumbers.NO_OPS_PERFORMED; @@ -73,6 +75,7 @@ public class NRTReplicationEngine extends Engine { public NRTReplicationEngine(EngineConfig engineConfig) { super(engineConfig); + mergeStats = new MergeStats(); store.incRef(); NRTReplicationReaderManager readerManager = null; WriteOnlyTranslogManager translogManagerRef = null; @@ -499,6 +502,12 @@ public int fillSeqNoGaps(long primaryTerm) throws IOException { @Override public void maybePruneDeletes() {} + @Override + public MergeStats getMergeStats() { + this.mergeStats.add(engineConfig.getMergedSegmentTransferTracker().stats()); + return this.mergeStats; + } + @Override public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) {} diff --git a/server/src/main/java/org/opensearch/index/engine/OpenSearchConcurrentMergeScheduler.java b/server/src/main/java/org/opensearch/index/engine/OpenSearchConcurrentMergeScheduler.java index a9c46759b96f8..e79ca86daef04 100644 --- a/server/src/main/java/org/opensearch/index/engine/OpenSearchConcurrentMergeScheduler.java +++ b/server/src/main/java/org/opensearch/index/engine/OpenSearchConcurrentMergeScheduler.java @@ -47,6 +47,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.MergeSchedulerConfig; import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.index.merge.OnGoingMerge; import java.io.IOException; @@ -78,12 +79,18 @@ class OpenSearchConcurrentMergeScheduler extends ConcurrentMergeScheduler { private final Set onGoingMerges = ConcurrentCollections.newConcurrentSet(); private final Set readOnlyOnGoingMerges = Collections.unmodifiableSet(onGoingMerges); private final MergeSchedulerConfig config; + private final MergedSegmentTransferTracker mergedSegmentTransferTracker; - OpenSearchConcurrentMergeScheduler(ShardId shardId, IndexSettings indexSettings) { + OpenSearchConcurrentMergeScheduler( + ShardId shardId, + IndexSettings indexSettings, + MergedSegmentTransferTracker mergedSegmentTransferTracker + ) { this.config = indexSettings.getMergeSchedulerConfig(); this.shardId = shardId; this.indexSettings = indexSettings; this.logger = Loggers.getLogger(getClass(), shardId); + this.mergedSegmentTransferTracker = mergedSegmentTransferTracker; refreshConfig(); } @@ -211,7 +218,8 @@ MergeStats stats() { currentMergesSizeInBytes.count(), totalMergeStoppedTime.count(), totalMergeThrottledTime.count(), - config.isAutoThrottle() ? getIORateLimitMBPerSec() : Double.POSITIVE_INFINITY + config.isAutoThrottle() ? getIORateLimitMBPerSec() : Double.POSITIVE_INFINITY, + mergedSegmentTransferTracker.stats() ); return mergeStats; } diff --git a/server/src/main/java/org/opensearch/index/merge/MergeStats.java b/server/src/main/java/org/opensearch/index/merge/MergeStats.java index 7ecaed60735b4..7a6e0e940639f 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergeStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergeStats.java @@ -70,7 +70,11 @@ public class MergeStats implements Writeable, ToXContentFragment { private long unreferencedFileCleanUpsPerformed; - public MergeStats() {} + private final MergedSegmentWarmerStats warmerStats; + + public MergeStats() { + this.warmerStats = new MergedSegmentWarmerStats(); + } public MergeStats(StreamInput in) throws IOException { total = in.readVLong(); @@ -87,6 +91,11 @@ public MergeStats(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_11_0)) { unreferencedFileCleanUpsPerformed = in.readOptionalVLong(); } + if (in.getVersion().onOrAfter(Version.V_3_1_0)) { + this.warmerStats = new MergedSegmentWarmerStats(in); + } else { + this.warmerStats = null; + } } public void add( @@ -99,7 +108,8 @@ public void add( long currentSizeInBytes, long stoppedTimeMillis, long throttledTimeMillis, - double mbPerSecAutoThrottle + double mbPerSecAutoThrottle, + MergedSegmentWarmerStats mergedSegmentWarmerStats ) { this.total += totalMerges; this.totalTimeInMillis += totalMergeTime; @@ -116,6 +126,14 @@ public void add( } else { this.totalBytesPerSecAutoThrottle += bytesPerSecAutoThrottle; } + this.add(mergedSegmentWarmerStats); + } + + public void add(MergedSegmentWarmerStats warmerStats) { + if (this.getWarmerStats() == null) { + return; + } + this.getWarmerStats().add(warmerStats); } public void add(MergeStats mergeStats) { @@ -127,6 +145,9 @@ public void add(MergeStats mergeStats) { this.currentSizeInBytes += mergeStats.currentSizeInBytes; addTotals(mergeStats); + if (this.getWarmerStats() != null) { + this.getWarmerStats().add(mergeStats.getWarmerStats(), false); + } } public void addTotals(MergeStats mergeStats) { @@ -145,6 +166,9 @@ public void addTotals(MergeStats mergeStats) { } else { this.totalBytesPerSecAutoThrottle += mergeStats.totalBytesPerSecAutoThrottle; } + if (this.getWarmerStats() != null) { + this.getWarmerStats().addTotals(mergeStats.getWarmerStats()); + } } public void addUnreferencedFileCleanUpStats(long unreferencedFileCleanUpsPerformed) { @@ -239,6 +263,10 @@ public ByteSizeValue getCurrentSize() { return new ByteSizeValue(currentSizeInBytes); } + public MergedSegmentWarmerStats getWarmerStats() { + return warmerStats; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.MERGES); @@ -256,6 +284,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.field(Fields.TOTAL_THROTTLE_BYTES_PER_SEC_IN_BYTES, totalBytesPerSecAutoThrottle); builder.field(Fields.UNREFERENCED_FILE_CLEANUPS_PERFORMED, unreferencedFileCleanUpsPerformed); + getWarmerStats().toXContent(builder, params); builder.endObject(); return builder; } @@ -302,5 +331,8 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_11_0)) { out.writeOptionalVLong(unreferencedFileCleanUpsPerformed); } + if (out.getVersion().onOrAfter(Version.V_3_1_0)) { + getWarmerStats().writeTo(out); + } } } diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentReplicationTracker.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java similarity index 94% rename from server/src/main/java/org/opensearch/index/merge/MergedSegmentReplicationTracker.java rename to server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java index 2c33ed932a4c7..434864cc3e844 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentReplicationTracker.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java @@ -21,7 +21,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class MergedSegmentReplicationTracker extends AbstractIndexShardComponent { +public class MergedSegmentTransferTracker extends AbstractIndexShardComponent { private final CounterMetric totalWarmInvocationsCount = new CounterMetric(); private final CounterMetric totalWarmTimeMillis = new CounterMetric(); @@ -32,7 +32,7 @@ public class MergedSegmentReplicationTracker extends AbstractIndexShardComponent private final CounterMetric totalDownloadTimeMillis = new CounterMetric(); private final CounterMetric ongoingWarms = new CounterMetric(); - public MergedSegmentReplicationTracker(ShardId shardId, IndexSettings indexSettings) { + public MergedSegmentTransferTracker(ShardId shardId, IndexSettings indexSettings) { super(shardId, indexSettings); } diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java index f5c98b82fa843..a90fe8bb611dc 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java @@ -9,6 +9,7 @@ package org.opensearch.index.merge; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; @@ -25,13 +26,29 @@ */ @PublicApi(since = "1.0.0") public class MergedSegmentWarmerStats implements Writeable, ToXContentFragment { + + // [PRIMARY SHARD] Number of times segment MergedSegmentWarmer.warm has been invoked private long totalWarmInvocationsCount; + + // [PRIMARY SHARD] Total time spent warming segments in milliseconds private long totalWarmTimeMillis; + + // [PRIMARY SHARD] Number of times segment warming has failed private long totalWarmFailureCount; - private long totalBytesUploaded; - private long totalBytesDownloaded; + + // [PRIMARY SHARD] Total bytes sent during segment warming + private long totalBytesSent; + + // [REPLICA SHARD] Total bytes received during segment warming + private long totalBytesReceived; + + // [PRIMARY SHARD] Total time spent uploading segments in milliseconds by a primary shard private long totalUploadTimeMillis; + + // [REPLICA SHARD] Total time spent downloading segments in milliseconds private long totalDownloadTimeMillis; + + // [PRIMARY SHARD] Current number of ongoing segment warming operations private long ongoingWarms; public MergedSegmentWarmerStats() {} @@ -40,8 +57,8 @@ public MergedSegmentWarmerStats(StreamInput in) throws IOException { totalWarmInvocationsCount = in.readVLong(); totalWarmTimeMillis = in.readVLong(); totalWarmFailureCount = in.readVLong(); - totalBytesUploaded = in.readVLong(); - totalBytesDownloaded = in.readVLong(); + totalBytesSent = in.readVLong(); + totalBytesReceived = in.readVLong(); totalUploadTimeMillis = in.readVLong(); totalDownloadTimeMillis = in.readVLong(); ongoingWarms = in.readVLong(); @@ -51,8 +68,8 @@ public synchronized void add( long totalWarmInvocationsCount, long totalWarmTimeMillis, long totalWarmFailureCount, - long totalBytesUploaded, - long totalBytesDownloaded, + long totalBytesSent, + long totalBytesReceived, long totalUploadTimeMillis, long totalDownloadTimeMillis, long ongoingWarms @@ -60,20 +77,26 @@ public synchronized void add( this.totalWarmInvocationsCount += totalWarmInvocationsCount; this.totalWarmTimeMillis += totalWarmTimeMillis; this.totalWarmFailureCount += totalWarmFailureCount; - this.totalBytesUploaded += totalBytesUploaded; - this.totalBytesDownloaded += totalBytesDownloaded; + this.totalBytesSent += totalBytesSent; + this.totalBytesReceived += totalBytesReceived; this.totalUploadTimeMillis += totalUploadTimeMillis; this.totalDownloadTimeMillis += totalDownloadTimeMillis; this.ongoingWarms += ongoingWarms; } public void add(MergedSegmentWarmerStats mergedSegmentWarmerStats) { + add(mergedSegmentWarmerStats, true); + } + + public void add(MergedSegmentWarmerStats mergedSegmentWarmerStats, boolean addTotals) { if (mergedSegmentWarmerStats == null) { return; } this.ongoingWarms += mergedSegmentWarmerStats.ongoingWarms; - addTotals(mergedSegmentWarmerStats); + if (addTotals) { + addTotals(mergedSegmentWarmerStats); + } } public synchronized void addTotals(MergedSegmentWarmerStats mergedSegmentWarmerStats) { @@ -83,8 +106,8 @@ public synchronized void addTotals(MergedSegmentWarmerStats mergedSegmentWarmerS this.totalWarmInvocationsCount += mergedSegmentWarmerStats.totalWarmInvocationsCount; this.totalWarmTimeMillis += mergedSegmentWarmerStats.totalWarmTimeMillis; this.totalWarmFailureCount += mergedSegmentWarmerStats.totalWarmFailureCount; - this.totalBytesUploaded += mergedSegmentWarmerStats.totalBytesUploaded; - this.totalBytesDownloaded += mergedSegmentWarmerStats.totalBytesDownloaded; + this.totalBytesSent += mergedSegmentWarmerStats.totalBytesSent; + this.totalBytesReceived += mergedSegmentWarmerStats.totalBytesReceived; this.totalUploadTimeMillis += mergedSegmentWarmerStats.totalUploadTimeMillis; this.totalDownloadTimeMillis += mergedSegmentWarmerStats.totalDownloadTimeMillis; } @@ -93,44 +116,44 @@ public long getTotalWarmInvocationsCount() { return this.totalWarmInvocationsCount; } - public long getTotalWarmTimeMillis() { - return this.totalWarmTimeMillis; + public TimeValue getTotalWarmTime() { + return new TimeValue(totalWarmTimeMillis); } public long getOngoingWarms() { return ongoingWarms; } - public long getTotalBytesDownloaded() { - return totalBytesDownloaded; + public ByteSizeValue getTotalReceivedSize() { + return new ByteSizeValue(totalBytesReceived); } - public long getTotalBytesUploaded() { - return totalBytesUploaded; + public ByteSizeValue getTotalSentSize() { + return new ByteSizeValue(totalBytesSent); } - public long getTotalDownloadTimeMillis() { - return totalDownloadTimeMillis; + public TimeValue getTotalDownloadTime() { + return new TimeValue(totalDownloadTimeMillis); } public long getTotalWarmFailureCount() { return totalWarmFailureCount; } - public long getTotalUploadTimeMillis() { - return totalUploadTimeMillis; + public TimeValue getTotalUploadTime() { + return new TimeValue(totalUploadTimeMillis); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.MERGED_SEGMENT_WARMER); builder.field(Fields.TOTAL_WARM_INVOCATIONS_COUNT, totalWarmInvocationsCount); - builder.field(Fields.TOTAL_WARM_TIME_MILLIS, totalWarmTimeMillis); + builder.humanReadableField(Fields.TOTAL_WARM_TIME_MILLIS, Fields.TOTAL_WARM_TIME, getTotalWarmTime()); builder.field(Fields.TOTAL_WARM_FAILURE_COUNT, totalWarmFailureCount); - builder.field(Fields.TOTAL_BYTES_UPLOADED, new ByteSizeValue(totalBytesUploaded)); - builder.field(Fields.TOTAL_BYTES_DOWNLOADED, new ByteSizeValue(totalBytesDownloaded)); - builder.field(Fields.TOTAL_UPLOAD_TIME_MILLIS, totalUploadTimeMillis); - builder.field(Fields.TOTAL_DOWNLOAD_TIME_MILLIS, totalDownloadTimeMillis); + builder.humanReadableField(Fields.TOTAL_BYTES_SENT, Fields.TOTAL_SENT_SIZE, getTotalSentSize()); + builder.humanReadableField(Fields.TOTAL_BYTES_RECEIVED, Fields.TOTAL_RECEIVED_SIZE, getTotalReceivedSize()); + builder.humanReadableField(Fields.TOTAL_UPLOAD_TIME_MILLIS, Fields.TOTAL_UPLOAD_TIME, totalUploadTimeMillis); + builder.humanReadableField(Fields.TOTAL_DOWNLOAD_TIME_MILLIS, Fields.TOTAL_DOWNLOAD_TIME, totalDownloadTimeMillis); builder.field(Fields.ONGOING_WARMS, ongoingWarms); builder.endObject(); return builder; @@ -146,11 +169,17 @@ static final class Fields { static final String TOTAL_WARM_INVOCATIONS_COUNT = "total_warm_invocations_count"; static final String TOTAL_WARM_TIME_MILLIS = "total_warm_time_millis"; static final String TOTAL_WARM_FAILURE_COUNT = "total_warm_failure_count"; - static final String TOTAL_BYTES_UPLOADED = "total_bytes_uploaded"; - static final String TOTAL_BYTES_DOWNLOADED = "total_bytes_downloaded"; + static final String TOTAL_BYTES_SENT = "total_bytes_sent"; + static final String TOTAL_BYTES_RECEIVED = "total_bytes_received"; static final String TOTAL_UPLOAD_TIME_MILLIS = "total_upload_time_millis"; static final String TOTAL_DOWNLOAD_TIME_MILLIS = "total_download_time_millis"; static final String ONGOING_WARMS = "ongoing_warms"; + + public static final String TOTAL_WARM_TIME = "total_warm_time"; + public static final String TOTAL_UPLOAD_TIME = "total_upload_time"; + public static final String TOTAL_DOWNLOAD_TIME = "total_download_time"; + public static final String TOTAL_SENT_SIZE = "total_sent_size"; + public static final String TOTAL_RECEIVED_SIZE = "total_received_size"; } @Override @@ -158,8 +187,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(totalWarmInvocationsCount); out.writeVLong(totalWarmTimeMillis); out.writeVLong(totalWarmFailureCount); - out.writeVLong(totalBytesUploaded); - out.writeVLong(totalBytesDownloaded); + out.writeVLong(totalBytesSent); + out.writeVLong(totalBytesReceived); out.writeVLong(totalUploadTimeMillis); out.writeVLong(totalDownloadTimeMillis); out.writeVLong(ongoingWarms); diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index ece00b4940063..48705a8b54465 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -157,8 +157,7 @@ import org.opensearch.index.mapper.SourceToParse; import org.opensearch.index.mapper.Uid; import org.opensearch.index.merge.MergeStats; -import org.opensearch.index.merge.MergedSegmentReplicationTracker; -import org.opensearch.index.merge.MergedSegmentWarmerStats; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.remote.RemoteSegmentStats; @@ -391,7 +390,7 @@ Runnable getGlobalCheckpointSyncer() { private final MergedSegmentPublisher mergedSegmentPublisher; private final ReferencedSegmentsPublisher referencedSegmentsPublisher; private final Set pendingMergedSegmentCheckpoints = Sets.newConcurrentHashSet(); - private final MergedSegmentReplicationTracker mergedSegmentReplicationTracker; + private final MergedSegmentTransferTracker mergedSegmentTransferTracker; @InternalApi public IndexShard( @@ -455,6 +454,7 @@ public IndexShard( indexSettings.isAssignedOnRemoteNode(), () -> getRemoteTranslogUploadBufferInterval(remoteStoreSettings::getClusterRemoteTranslogBufferInterval) ); + this.mergedSegmentTransferTracker = new MergedSegmentTransferTracker(shardId(), indexSettings); this.mapperService = mapperService; this.indexCache = indexCache; this.internalIndexingStats = new InternalIndexingStats(threadPool); @@ -552,7 +552,6 @@ public boolean shouldCache(Query query) { this.clusterApplierService = clusterApplierService; this.mergedSegmentPublisher = mergedSegmentPublisher; this.referencedSegmentsPublisher = referencedSegmentsPublisher; - this.mergedSegmentReplicationTracker = new MergedSegmentReplicationTracker(shardId(), indexSettings); synchronized (this.refreshMutex) { if (shardLevelRefreshEnabled) { startRefreshTask(); @@ -1580,10 +1579,6 @@ public MergeStats mergeStats() { return mergeStats; } - public MergedSegmentWarmerStats mergedSegmentWarmerStats() { - return mergedSegmentReplicationTracker.stats(); - } - public SegmentsStats segmentStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments) { SegmentsStats segmentsStats = getEngine().segmentsStats(includeSegmentFileSizes, includeUnloadedSegments); segmentsStats.addBitsetMemoryInBytes(shardBitsetFilterCache.getMemorySizeInBytes()); @@ -2285,8 +2280,8 @@ public void resetToWriteableEngine() throws IOException, InterruptedException, T indexShardOperationPermits.blockOperations(30, TimeUnit.MINUTES, () -> { resetEngineToGlobalCheckpoint(); }); } - public MergedSegmentReplicationTracker mergedSegmentReplicationTracker() { - return mergedSegmentReplicationTracker; + public MergedSegmentTransferTracker mergedSegmentTransferTracker() { + return mergedSegmentTransferTracker; } /** @@ -4334,7 +4329,8 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) thro // timeseries () -> docMapper(), mergedSegmentWarmerFactory.get(this), - clusterApplierService + clusterApplierService, + mergedSegmentTransferTracker ); } diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 69bd88ec38f89..3515020a2527d 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -1414,7 +1414,6 @@ public synchronized void beforeIndexShardClosed(ShardId shardId, @Nullable Index refreshStats.addTotals(indexShard.refreshStats()); flushStats.addTotals(indexShard.flushStats()); recoveryStats.addTotals(indexShard.recoveryStats()); - mergedSegmentWarmerStats.addTotals(indexShard.mergedSegmentWarmerStats()); } } } diff --git a/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java b/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java index 7ac773d99c203..4c28c08d8061b 100644 --- a/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java +++ b/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java @@ -52,7 +52,6 @@ import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; import org.opensearch.index.merge.MergeStats; -import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.search.stats.SearchStats; @@ -281,11 +280,6 @@ public RecoveryStats getRecoveryStats() { return stats.getRecoveryStats(); } - @Nullable - public MergedSegmentWarmerStats getMergedSegmentWarmerStats() { - return stats.getMergedSegmentWarmer(); - } - @Override public void writeTo(StreamOutput out) throws IOException { stats.writeTo(out); diff --git a/server/src/main/java/org/opensearch/indices/replication/AbstractSegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/AbstractSegmentReplicationTarget.java index 5f9cdbb76d2d1..5a21b10d04c06 100644 --- a/server/src/main/java/org/opensearch/indices/replication/AbstractSegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/AbstractSegmentReplicationTarget.java @@ -282,11 +282,6 @@ private boolean validateLocalChecksum(StoreFileMetadata file) { } } - protected void updateMergedSegmentFileRecoveryBytes(String fileName, long bytesRecovered) { - indexShard.mergedSegmentReplicationTracker().addTotalBytesDownloaded(bytesRecovered); - updateFileRecoveryBytes(fileName, bytesRecovered); - } - /** * Updates the state to reflect recovery progress for the given file and * updates the last access time for the target. diff --git a/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java index 7252d198f04d3..c087f420dcfe0 100644 --- a/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java @@ -76,4 +76,9 @@ protected void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse public MergedSegmentReplicationTarget retryCopy() { return new MergedSegmentReplicationTarget(indexShard, checkpoint, source, listener); } + + protected void updateMergedSegmentFileRecoveryBytes(String fileName, long bytesRecovered) { + indexShard.mergedSegmentTransferTracker().addTotalBytesDownloaded(bytesRecovered); + updateFileRecoveryBytes(fileName, bytesRecovered); + } } diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java index ddf7a1f61030c..be0232ad7f0e5 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java @@ -206,8 +206,11 @@ public void handleException(TransportException e) { ); if (waitForCompletion) { try { - latch.await(waitTimeout.seconds(), TimeUnit.SECONDS); + if (latch.await(waitTimeout.seconds(), TimeUnit.SECONDS) == false) { + indexShard.mergedSegmentTransferTracker().incrementTotalWarmFailureCount(); + } } catch (InterruptedException e) { + indexShard.mergedSegmentTransferTracker().incrementTotalWarmFailureCount(); logger.warn( () -> new ParameterizedMessage("Interrupted while waiting for publish checkpoint complete [{}]", checkpoint), e diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java index 8e54efdf1c3d2..9ec577789af65 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java @@ -86,7 +86,7 @@ protected void doReplicaOperation(RemoteStorePublishMergedSegmentRequest shardRe long startTime = System.currentTimeMillis(); replica.getRemoteDirectory().markMergedSegmentsPendingDownload(checkpoint.getLocalToRemoteSegmentFilenameMap()); replicationService.onNewMergedSegmentCheckpoint(checkpoint, replica); - replica.mergedSegmentReplicationTracker().addTotalDownloadTimeMillis(System.currentTimeMillis() - startTime); + replica.mergedSegmentTransferTracker().addTotalDownloadTimeMillis(System.currentTimeMillis() - startTime); } else { logger.warn( () -> new ParameterizedMessage( @@ -116,7 +116,7 @@ public final void publish(IndexShard indexShard, MergedSegmentCheckpoint checkpo long elapsedTimeMillis = endTimeMillis - startTimeMillis; long timeoutMillis = indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout().millis(); long timeLeftMillis = Math.max(0, timeoutMillis - elapsedTimeMillis); - indexShard.mergedSegmentReplicationTracker().addTotalUploadTimeMillis(elapsedTimeMillis); + indexShard.mergedSegmentTransferTracker().addTotalUploadTimeMillis(elapsedTimeMillis); if (timeLeftMillis > 0) { RemoteStoreMergedSegmentCheckpoint remoteStoreMergedSegmentCheckpoint = new RemoteStoreMergedSegmentCheckpoint( @@ -132,6 +132,7 @@ public final void publish(IndexShard indexShard, MergedSegmentCheckpoint checkpo TimeValue.timeValueMillis(timeLeftMillis) ); } else { + indexShard.mergedSegmentTransferTracker().incrementTotalWarmFailureCount(); logger.warn( () -> new ParameterizedMessage( "Unable to confirm upload of merged segment {} to remote store. Timeout of {}ms exceeded. Skipping pre-copy.", @@ -170,7 +171,7 @@ public void beforeUpload(String file) {} @Override public void onSuccess(String file) { localToRemoteStoreFilenames.put(file, indexShard.getRemoteDirectory().getExistingRemoteFilename(file)); - indexShard.mergedSegmentReplicationTracker().addTotalBytesUploaded(checkpoint.getMetadataMap().get(file).length()); + indexShard.mergedSegmentTransferTracker().addTotalBytesUploaded(checkpoint.getMetadataMap().get(file).length()); } @Override diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java index 8fa24854900c5..377f22a645fd2 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java @@ -62,6 +62,7 @@ import org.opensearch.core.action.ActionResponse; import org.opensearch.core.common.Strings; import org.opensearch.index.IndexSettings; +import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.rest.RestRequest; import org.opensearch.rest.RestResponse; import org.opensearch.rest.action.RestResponseListener; @@ -592,52 +593,76 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa table.addCell("pri.merges.total_time", "default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merged_segment_warmer.total_warm_invocations", - "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_warm_invocations", + "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" + ); + table.addCell( + "pri.merges.merged_segment_warmer.total_warm_invocations", + "default:false;text-align:right;desc:total invocations of merged segment warmer" ); - table.addCell("pri.merged_segment_warmer.total_warm_invocations", "default:false;text-align:right;desc:UPDATE"); table.addCell( - "merged_segment_warmer.total_warm_time_millis", - "alias:mswtwtm,mergedSegmentWarmerTotalWarmTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_warm_time", + "alias:mswtwt,mergedSegmentWarmerTotalWarmTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" + ); + table.addCell( + "pri.merges.merged_segment_warmer.total_warm_time", + "default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); - table.addCell("pri.merged_segment_warmer.total_warm_time_millis", "default:false;text-align:right;desc:UPDATE"); table.addCell( - "merged_segment_warmer.ongoing_warms", - "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.ongoing_warms", + "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" + ); + table.addCell( + "pri.merges.merged_segment_warmer.ongoing_warms", + "default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); - table.addCell("pri.merged_segment_warmer.ongoing_warms", "default:false;text-align:right;desc:UPDATE"); table.addCell( - "merged_segment_warmer.total_bytes_downloaded", - "alias:mswtbd,mergedSegmentWarmerTotalBytesDownloaded;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_bytes_received", + "alias:mswtbr,mergedSegmentWarmerTotalBytesReceived;default:false;text-align:right;desc:total bytes received by a replica shard during the warm operation" + ); + table.addCell( + "pri.merges.merged_segment_warmer.total_bytes_received", + "default:false;text-align:right;desc:total bytes received by a replica shard during the warm operation" ); - table.addCell("pri.merged_segment_warmer.total_bytes_downloaded", "default:false;text-align:right;desc:UPDATE"); table.addCell( - "merged_segment_warmer.total_bytes_uploaded", - "alias:mswtbu,mergedSegmentWarmerTotalBytesUploaded;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_bytes_sent", + "alias:mswtbs,mergedSegmentWarmerTotalBytesSent;default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" + ); + table.addCell( + "pri.merges.merged_segment_warmer.total_bytes_sent", + "default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); - table.addCell("pri.merged_segment_warmer.total_bytes_uploaded", "default:false;text-align:right;desc:UPDATE"); table.addCell( - "merged_segment_warmer.total_download_time_millis", - "alias:mswtdtm,mergedSegmentWarmerTotalDownloadTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_download_time", + "alias:mswtdt,mergedSegmentWarmerTotalDownloadTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" + ); + table.addCell( + "pri.merges.merged_segment_warmer.total_download_time", + "default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); - table.addCell("pri.merged_segment_warmer.total_download_time_millis", "default:false;text-align:right;desc:UPDATE"); table.addCell( - "merged_segment_warmer.total_warm_failure_count", - "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_warm_failure_count", + "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" + ); + table.addCell( + "pri.merges.merged_segment_warmer.total_warm_failure_count", + "default:false;text-align:right;desc:total failures in merged segment warmer" ); - table.addCell("pri.merged_segment_warmer.total_warm_failure_count", "default:false;text-align:right;desc:UPDATE"); table.addCell( - "merged_segment_warmer.total_upload_time_millis", - "alias:mswtutm,mergedSegmentWarmerTotalUploadTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_upload_time", + "alias:mswtut,mergedSegmentWarmerTotalUploadTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" + ); + table.addCell( + "pri.merges.merged_segment_warmer.total_upload_time", + "default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); - table.addCell("pri.merged_segment_warmer.total_upload_time_millis", "default:false;text-align:right;desc:UPDATE"); table.addCell("refresh.total", "sibling:pri;alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); table.addCell("pri.refresh.total", "default:false;text-align:right;desc:total refreshes"); @@ -1042,55 +1067,36 @@ protected Table buildTable( table.addCell(totalStats.getMerge() == null ? null : totalStats.getMerge().getTotalTime()); table.addCell(primaryStats.getMerge() == null ? null : primaryStats.getMerge().getTotalTime()); - table.addCell( - totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalWarmInvocationsCount() - ); - table.addCell( - primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalWarmInvocationsCount() - ); + MergedSegmentWarmerStats mergedSegmentWarmerTotalStats = totalStats.getMerge() == null + ? null + : totalStats.getMerge().getWarmerStats(); + MergedSegmentWarmerStats mergedSegmentWarmerPrimaryStats = primaryStats.getMerge() == null + ? null + : primaryStats.getMerge().getWarmerStats(); - table.addCell( - totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalWarmTimeMillis() - ); - table.addCell( - primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalWarmTimeMillis() - ); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalWarmInvocationsCount()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalWarmInvocationsCount()); - table.addCell(totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getOngoingWarms()); - table.addCell(primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getOngoingWarms()); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalWarmTime()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalWarmTime()); - table.addCell( - totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalBytesDownloaded() - ); - table.addCell( - primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalBytesDownloaded() - ); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getOngoingWarms()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getOngoingWarms()); - table.addCell(totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalBytesUploaded()); - table.addCell( - primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalBytesUploaded() - ); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalReceivedSize()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalReceivedSize()); - table.addCell( - totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalDownloadTimeMillis() - ); - table.addCell( - primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalDownloadTimeMillis() - ); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalSentSize()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalSentSize()); - table.addCell( - totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalWarmFailureCount() - ); - table.addCell( - primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalWarmFailureCount() - ); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalDownloadTime()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalDownloadTime()); - table.addCell( - totalStats.getMergedSegmentWarmer() == null ? null : totalStats.getMergedSegmentWarmer().getTotalUploadTimeMillis() - ); - table.addCell( - primaryStats.getMergedSegmentWarmer() == null ? null : primaryStats.getMergedSegmentWarmer().getTotalUploadTimeMillis() - ); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalWarmFailureCount()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalWarmFailureCount()); + + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalUploadTime()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalUploadTime()); table.addCell(totalStats.getRefresh() == null ? null : totalStats.getRefresh().getTotal()); table.addCell(primaryStats.getRefresh() == null ? null : primaryStats.getRefresh().getTotal()); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java index d303956db9e2d..f63183423b9b4 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java @@ -276,36 +276,36 @@ protected Table getTableWithHeader(final RestRequest request) { table.addCell("merges.total_time", "alias:mtt,mergesTotalTime;default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merged_segment_warmer.total_warm_invocations", + "merges.merged_segment_warmer.total_warm_invocations", "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "merged_segment_warmer.total_warm_time_millis", - "alias:mswtwtm,mergedSegmentWarmerTotalWarmTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_warm_time", + "alias:mswtwt,mergedSegmentWarmerTotalWarmTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "merged_segment_warmer.ongoing_warms", - "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.ongoing_warms", + "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( - "merged_segment_warmer.total_bytes_downloaded", - "alias:mswtbd,mergedSegmentWarmerTotalBytesDownloaded;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_bytes_received", + "alias:mswtbr,mergedSegmentWarmerTotalBytesReceived;default:false;text-align:right;desc:total bytes received by a replica shard during the warm operation" ); table.addCell( - "merged_segment_warmer.total_bytes_uploaded", - "alias:mswtbu,mergedSegmentWarmerTotalBytesUploaded;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_bytes_sent", + "alias:mswtbs,mergedSegmentWarmerTotalBytesSent;default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); table.addCell( - "merged_segment_warmer.total_download_time_millis", - "alias:mswtdtm,mergedSegmentWarmerTotalDownloadTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_download_time", + "alias:mswtdt,mergedSegmentWarmerTotalDownloadTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "merged_segment_warmer.total_warm_failure_count", - "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_warm_failure_count", + "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "merged_segment_warmer.total_upload_time_millis", - "alias:mswtutm,mergedSegmentWarmerTotalUploadTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_upload_time", + "alias:mswtut,mergedSegmentWarmerTotalUploadTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); table.addCell("refresh.total", "alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); @@ -582,15 +582,15 @@ Table buildTable( table.addCell(mergeStats == null ? null : mergeStats.getTotalSize()); table.addCell(mergeStats == null ? null : mergeStats.getTotalTime()); - MergedSegmentWarmerStats mergedSegmentWarmerStats = indicesStats == null ? null : indicesStats.getMergedSegmentWarmerStats(); + MergedSegmentWarmerStats mergedSegmentWarmerStats = mergeStats == null ? null : mergeStats.getWarmerStats(); table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmInvocationsCount()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmTimeMillis()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmTime()); table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getOngoingWarms()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalBytesDownloaded()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalBytesUploaded()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalDownloadTimeMillis()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalReceivedSize()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalSentSize()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalDownloadTime()); table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmFailureCount()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalUploadTimeMillis()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalUploadTime()); RefreshStats refreshStats = indicesStats == null ? null : indicesStats.getRefresh(); table.addCell(refreshStats == null ? null : refreshStats.getTotal()); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java index 5649aca4cca25..532f24be4f62b 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java @@ -216,36 +216,36 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa table.addCell("merges.total_time", "alias:mtt,mergesTotalTime;default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merged_segment_warmer.total_warm_invocations", + "merges.merged_segment_warmer.total_warm_invocations", "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "merged_segment_warmer.total_warm_time_millis", - "alias:mswtwtm,mergedSegmentWarmerTotalWarmTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_warm_time", + "alias:mswtwt,mergedSegmentWarmerTotalWarmTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "merged_segment_warmer.ongoing_warms", - "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.ongoing_warms", + "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( - "merged_segment_warmer.total_bytes_downloaded", - "alias:mswtbd,mergedSegmentWarmerTotalBytesDownloaded;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_bytes_received", + "alias:mswtbr,mergedSegmentWarmerTotalBytesReceived;default:false;text-align:right;desc:total bytes received by a replica shard during the warm operation" ); table.addCell( - "merged_segment_warmer.total_bytes_uploaded", - "alias:mswtbu,mergedSegmentWarmerTotalBytesUploaded;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_bytes_sent", + "alias:mswtbs,mergedSegmentWarmerTotalBytesSent;default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); table.addCell( - "merged_segment_warmer.total_download_time_millis", - "alias:mswtdtm,mergedSegmentWarmerTotalDownloadTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_download_time", + "alias:mswtdt,mergedSegmentWarmerTotalDownloadTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "merged_segment_warmer.total_warm_failure_count", - "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_warm_failure_count", + "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "merged_segment_warmer.total_upload_time_millis", - "alias:mswtutm,mergedSegmentWarmerTotalUploadTimeMillis;default:false;text-align:right;desc:UPDATE" + "merges.merged_segment_warmer.total_upload_time", + "alias:mswtut,mergedSegmentWarmerTotalUploadTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); table.addCell("refresh.total", "alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); @@ -485,17 +485,61 @@ Table buildTable( table.addCell(getOrNull(commonStats, CommonStats::getMerge, MergeStats::getTotalTime)); table.addCell( - getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalWarmInvocationsCount) + getOrNull( + commonStats, + (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), + MergedSegmentWarmerStats::getTotalWarmInvocationsCount + ) ); - table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalWarmTimeMillis)); - table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getOngoingWarms)); - table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalBytesDownloaded)); - table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalBytesUploaded)); table.addCell( - getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalDownloadTimeMillis) + getOrNull( + commonStats, + (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), + MergedSegmentWarmerStats::getTotalWarmTime + ) + ); + table.addCell( + getOrNull( + commonStats, + (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), + MergedSegmentWarmerStats::getOngoingWarms + ) + ); + table.addCell( + getOrNull( + commonStats, + (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), + MergedSegmentWarmerStats::getTotalReceivedSize + ) + ); + table.addCell( + getOrNull( + commonStats, + (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), + MergedSegmentWarmerStats::getTotalSentSize + ) + ); + table.addCell( + getOrNull( + commonStats, + (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), + MergedSegmentWarmerStats::getTotalDownloadTime + ) + ); + table.addCell( + getOrNull( + commonStats, + (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), + MergedSegmentWarmerStats::getTotalWarmFailureCount + ) + ); + table.addCell( + getOrNull( + commonStats, + (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), + MergedSegmentWarmerStats::getTotalUploadTime + ) ); - table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalWarmFailureCount)); - table.addCell(getOrNull(commonStats, CommonStats::getMergedSegmentWarmer, MergedSegmentWarmerStats::getTotalUploadTimeMillis)); table.addCell(getOrNull(commonStats, CommonStats::getRefresh, RefreshStats::getTotal)); table.addCell(getOrNull(commonStats, CommonStats::getRefresh, RefreshStats::getTotalTime)); diff --git a/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java b/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java similarity index 66% rename from server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java rename to server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java index 51cb423b03662..ce272d071af03 100644 --- a/server/src/test/java/org/opensearch/index/merge/MergedSegmentReplicationTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java @@ -8,40 +8,41 @@ package org.opensearch.index.merge; +import org.opensearch.common.unit.TimeValue; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; import org.opensearch.test.OpenSearchTestCase; import static org.opensearch.common.settings.Settings.builder; +import static org.opensearch.index.IndexSettingsTests.newIndexMeta; -public class MergedSegmentReplicationTrackerTests extends OpenSearchTestCase { +public class MergedSegmentTransferTrackerTests extends OpenSearchTestCase { - private MergedSegmentReplicationTracker tracker; + private MergedSegmentTransferTracker tracker; @Override public void setUp() throws Exception { super.setUp(); ShardId shardId = new ShardId("test", "uuid", 0); IndexSettings indexSettings = new IndexSettings(newIndexMeta("test", builder().build()), builder().build()); - tracker = new MergedSegmentReplicationTracker(shardId, indexSettings); + tracker = new MergedSegmentTransferTracker(shardId, indexSettings); } public void testInitialStats() { MergedSegmentWarmerStats stats = tracker.stats(); assertEquals(0, stats.getTotalWarmInvocationsCount()); - assertEquals(0, stats.getTotalWarmTimeMillis()); + assertEquals(TimeValue.ZERO, stats.getTotalWarmTime()); assertEquals(0, stats.getTotalWarmFailureCount()); - assertEquals(0, stats.getTotalBytesUploaded()); - assertEquals(0, stats.getTotalBytesDownloaded()); - assertEquals(0, stats.getTotalUploadTimeMillis()); - assertEquals(0, stats.getTotalDownloadTimeMillis()); + assertEquals(0, stats.getTotalSentSize()); + assertEquals(0, stats.getTotalReceivedSize()); + assertEquals(TimeValue.ZERO, stats.getTotalUploadTime()); + assertEquals(TimeValue.ZERO, stats.getTotalDownloadTime()); assertEquals(0, stats.getOngoingWarms()); } public void testIncrementCounters() { tracker.incrementTotalWarmInvocationsCount(); tracker.incrementTotalWarmFailureCount(); - tracker.incrementTotalRejectedWarms(); MergedSegmentWarmerStats stats = tracker.stats(); assertEquals(1, stats.getTotalWarmInvocationsCount()); @@ -65,20 +66,20 @@ public void testAddTimeAndBytes() { tracker.addTotalBytesDownloaded(2048); MergedSegmentWarmerStats stats = tracker.stats(); - assertEquals(100, stats.getTotalWarmTimeMillis()); - assertEquals(200, stats.getTotalUploadTimeMillis()); - assertEquals(300, stats.getTotalDownloadTimeMillis()); - assertEquals(1024, stats.getTotalBytesUploaded()); - assertEquals(2048, stats.getTotalBytesDownloaded()); + assertEquals(new TimeValue(100), stats.getTotalWarmTime()); + assertEquals(new TimeValue(200), stats.getTotalUploadTime()); + assertEquals(new TimeValue(300), stats.getTotalDownloadTime()); + assertEquals(1024, stats.getTotalSentSize()); + assertEquals(2048, stats.getTotalReceivedSize()); } public void testAccumulativeStats() { tracker.addTotalWarmTimeMillis(100); tracker.addTotalWarmTimeMillis(50); - assertEquals(150, tracker.stats().getTotalWarmTimeMillis()); + assertEquals(new TimeValue(150), tracker.stats().getTotalWarmTime()); tracker.addTotalBytesUploaded(1000); tracker.addTotalBytesUploaded(500); - assertEquals(1500, tracker.stats().getTotalBytesUploaded()); + assertEquals(1500, tracker.stats().getTotalSentSize()); } } From a8f2d6c56018d4613e2745fa88cafea6e02566e8 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Thu, 4 Sep 2025 18:36:59 +0530 Subject: [PATCH 07/30] Addressing review comments + added ITs Signed-off-by: Aditya Khera --- .../core/action/ActionListener.java | 4 + .../org/opensearch/merge/MergeStatsIT.java | 392 ++++++++++++++++++ .../org/opensearch/index/engine/Engine.java | 2 - .../index/engine/MergedSegmentWarmer.java | 1 + .../opensearch/index/merge/MergeStats.java | 23 +- .../merge/MergedSegmentTransferTracker.java | 9 +- .../index/merge/MergedSegmentWarmerStats.java | 141 +++---- .../opensearch/index/shard/IndexShard.java | 2 +- .../opensearch/indices/IndicesService.java | 2 - .../AbstractPublishCheckpointAction.java | 13 +- .../checkpoint/PublishCheckpointAction.java | 2 +- .../PublishMergedSegmentAction.java | 3 +- .../PublishReferencedSegmentsAction.java | 3 +- ...RemoteStorePublishMergedSegmentAction.java | 11 +- .../rest/action/cat/RestIndicesAction.java | 60 +-- .../rest/action/cat/RestNodesAction.java | 36 +- .../rest/action/cat/RestShardsAction.java | 36 +- .../engine/EngineConfigFactoryTests.java | 2 + .../MergedSegmentTransferTrackerTests.java | 45 +- 19 files changed, 591 insertions(+), 196 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java diff --git a/libs/core/src/main/java/org/opensearch/core/action/ActionListener.java b/libs/core/src/main/java/org/opensearch/core/action/ActionListener.java index 4fd55898a2cb5..b396472423954 100644 --- a/libs/core/src/main/java/org/opensearch/core/action/ActionListener.java +++ b/libs/core/src/main/java/org/opensearch/core/action/ActionListener.java @@ -358,4 +358,8 @@ static void completeWith(ActionListener listener, CheckedSu throw ex; } } + + static ActionListener noop() { + return ActionListener.wrap(response -> {}, exception -> {}); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java new file mode 100644 index 0000000000000..e22539a3c3261 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java @@ -0,0 +1,392 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.merge; + +import org.opensearch.action.admin.cluster.node.stats.NodeStats; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.action.admin.indices.forcemerge.ForceMergeRequest; +import org.opensearch.action.admin.indices.stats.CommonStats; +import org.opensearch.action.admin.indices.stats.CommonStatsFlags; +import org.opensearch.action.admin.indices.stats.IndexStats; +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.action.admin.indices.stats.ShardStats; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.merge.MergedSegmentWarmerStats; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; + +/* + * Integration tests asserting on MergeStats for remote store enabled domains. + */ +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class MergeStatsIT extends RemoteStoreBaseIntegTestCase { + + private static final String INDEX_NAME = "test-idx"; + + @Override + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(ShardsLimitAllocationDecider.INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.getKey(), 5) + .build(); + } + + @Override + protected Settings featureFlagSettings() { + Settings.Builder featureSettings = Settings.builder(); + featureSettings.put(FeatureFlags.MERGED_SEGMENT_WARMER_EXPERIMENTAL_FLAG, true); + return featureSettings.build(); + } + + public void setup() { + internalCluster().startNodes(2); + } + + public void testNodesStats() throws ExecutionException, InterruptedException { + setup(); + String[] indices = setupIndices(3); + + ClusterState state = getClusterState(); + List nodes = state.nodes().getNodes().values().stream().map(DiscoveryNode::getName).toList(); + + // ensure merge is executed + for (String index : indices) { + client().admin().indices().forceMerge(new ForceMergeRequest(index).maxNumSegments(2)); + } + final NodesStatsRequest nodesStatsRequest = new NodesStatsRequest("data:true"); + nodesStatsRequest.indices(CommonStatsFlags.ALL); + for (String node : nodes) { + NodesStatsResponse response = client(node).admin().cluster().nodesStats(nodesStatsRequest).get(); + + // Shard stats + List allNodesStats = response.getNodes(); + assertEquals(2, allNodesStats.size()); + + for (NodeStats nodeStats : allNodesStats) { + assertNotNull(nodeStats.getIndices()); + MergeStats mergeStats = nodeStats.getIndices().getMerge(); + assertNotNull(mergeStats); + assertMergeStats(mergeStats, StatsScope.AGGREGATED); + MergedSegmentWarmerStats mergedSegmentWarmerStats = mergeStats.getWarmerStats(); + assertNotNull(mergedSegmentWarmerStats); + assertMergedSegmentWarmerStats(mergedSegmentWarmerStats, StatsScope.AGGREGATED); + } + } + } + + public void testShardStats() throws ExecutionException, InterruptedException { + setup(); + + String[] indices = setupIndices(2); + + ClusterState state = getClusterState(); + List nodes = state.nodes().getNodes().values().stream().map(DiscoveryNode::getName).toList(); + + // ensure merge is executed + for (String index : indices) { + client().admin().indices().forceMerge(new ForceMergeRequest(index).maxNumSegments(2)); + } + for (String node : nodes) { + IndicesStatsResponse response = client(node).admin().indices().stats(new IndicesStatsRequest()).get(); + + // Shard stats + ShardStats[] allShardStats = response.getShards(); + assertEquals(4, allShardStats.length); + + for (ShardStats shardStats : allShardStats) { + StatsScope type = shardStats.getShardRouting().primary() ? StatsScope.PRIMARY_SHARD : StatsScope.REPLICA_SHARD; + CommonStats commonStats = shardStats.getStats(); + assertNotNull(commonStats); + MergeStats mergeStats = commonStats.getMerge(); + assertNotNull(mergeStats); + assertMergeStats(mergeStats, type); + MergedSegmentWarmerStats mergedSegmentWarmerStats = mergeStats.getWarmerStats(); + assertNotNull(mergedSegmentWarmerStats); + assertMergedSegmentWarmerStats(mergedSegmentWarmerStats, type); + } + } + } + + public void testIndicesStats() throws ExecutionException, InterruptedException { + setup(); + String[] indices = setupIndices(3); + + ClusterState state = getClusterState(); + List nodes = state.nodes().getNodes().values().stream().map(DiscoveryNode::getName).toList(); + + // ensure merge is executed + for (String index : indices) { + client().admin().indices().forceMerge(new ForceMergeRequest(index).maxNumSegments(2)); + } + + for (String node : nodes) { + IndicesStatsResponse response = client(node).admin().indices().stats(new IndicesStatsRequest()).get(); + + // Shard stats + Map allIndicesStats = response.getIndices(); + assertEquals(3, allIndicesStats.size()); + for (String index : indices) { + IndexStats indexStats = allIndicesStats.get(index); + CommonStats totalStats = indexStats.getTotal(); + CommonStats priStats = indexStats.getPrimaries(); + assertNotNull(totalStats); + assertNotNull(priStats); + + MergeStats totalMergeStats = totalStats.getMerge(); + assertNotNull(totalMergeStats); + MergeStats priMergeStats = priStats.getMerge(); + assertNotNull(priMergeStats); + + assertMergeStats(priMergeStats, StatsScope.PRIMARY_SHARD); + assertMergeStats(totalMergeStats, StatsScope.AGGREGATED); + + MergedSegmentWarmerStats totalMergedSegmentWarmerStats = totalMergeStats.getWarmerStats(); + MergedSegmentWarmerStats priMergedSegmentWarmerStats = priMergeStats.getWarmerStats(); + + assertNotNull(totalMergedSegmentWarmerStats); + assertNotNull(priMergedSegmentWarmerStats); + + assertMergedSegmentWarmerStats(priMergedSegmentWarmerStats, StatsScope.PRIMARY_SHARD); + assertMergedSegmentWarmerStats(totalMergedSegmentWarmerStats, StatsScope.AGGREGATED); + } + } + } + + private void assertMergeStats(MergeStats stats, StatsScope type) { + if (type == StatsScope.PRIMARY_SHARD) { + assertTrue("Current merges should be >= 0", stats.getCurrent() >= 0); + assertTrue("Current merge docs should be >= 0", stats.getCurrentNumDocs() >= 0); + assertTrue("Current merge size should be >= 0", stats.getCurrentSizeInBytes() >= 0); + assertTrue("Total merges should be >= 1", stats.getTotal() >= 1); + assertTrue("Total merge time should be >= 1ms", stats.getTotalTimeInMillis() >= 1); + assertTrue("Total merge time should be >= 1ms", stats.getTotalTime().getMillis() >= 1); + assertTrue("Total merged docs should be >= 1", stats.getTotalNumDocs() >= 1); + assertTrue("Total merged size should be >= 1 byte", stats.getTotalSizeInBytes() >= 1); + assertTrue("Total merged size should be >= 1 byte", stats.getTotalSize().getBytes() >= 1); + assertTrue("Total stopped time should be >= 0", stats.getTotalStoppedTimeInMillis() >= 0); + assertTrue("Total stopped time should be >= 0", stats.getTotalStoppedTime().getMillis() >= 0); + assertTrue("Total throttled time should be >= 0", stats.getTotalThrottledTime().getMillis() >= 0); + assertTrue("Total throttled time should be >= 0", stats.getTotalThrottledTimeInMillis() >= 0); + } else if (type == StatsScope.REPLICA_SHARD) { + assertEquals("Replica shard current merges should be 0", 0, stats.getCurrent()); + assertEquals("Replica shard current merge docs should be 0", 0, stats.getCurrentNumDocs()); + assertEquals("Replica shard current merge size should be 0", 0, stats.getCurrentSizeInBytes()); + assertEquals("Replica shard total merges should be 0", 0, stats.getTotal()); + assertEquals("Replica shard total merge time should be 0", 0, stats.getTotalTimeInMillis()); + assertEquals("Replica shard total merge time should be 0", 0, stats.getTotalTime().getMillis()); + assertEquals("Replica shard total merged docs should be 0", 0, stats.getTotalNumDocs()); + assertEquals("Replica shard total merged size should be 0", 0, stats.getTotalSizeInBytes()); + assertEquals("Replica shard total merged size should be 0", 0, stats.getTotalSize().getBytes()); + assertEquals("Replica shard total stopped time should be 0", 0, stats.getTotalStoppedTimeInMillis()); + assertEquals("Replica shard total stopped time should be 0", 0, stats.getTotalStoppedTime().getMillis()); + assertEquals("Replica shard total throttled time should be 0", 0, stats.getTotalThrottledTime().getMillis()); + assertEquals("Replica shard total throttled time should be 0", 0, stats.getTotalThrottledTimeInMillis()); + } else if (type == StatsScope.AGGREGATED) { + // the node might have both primaries and replicas, only primaries, or only replicas + boolean primaryShardStatsResult = false; + boolean replicaShardStatsResult = false; + + try { + assertMergeStats(stats, StatsScope.PRIMARY_SHARD); + primaryShardStatsResult = true; + } catch (AssertionError ignored) {} + + try { + assertMergeStats(stats, StatsScope.REPLICA_SHARD); + replicaShardStatsResult = true; + } catch (AssertionError ignored) {} + + assertTrue( + "Stats should match either primary or replica shard patterns or both.", + primaryShardStatsResult || replicaShardStatsResult + ); + } + } + + private void assertMergedSegmentWarmerStats(MergedSegmentWarmerStats stats, StatsScope type) { + if (type == StatsScope.PRIMARY_SHARD) { + assertTrue("Primary shard warm invocations should be >= 1", stats.getTotalInvocationsCount() >= 1); + assertTrue("Primary shard warm time should be >= 1ms", stats.getTotalTime().getMillis() >= 1); + assertTrue("Primary shard warm failures should be >= 0", stats.getTotalFailureCount() >= 0); + assertTrue("Primary shard sent size should be >= 0", stats.getTotalSentSize().getBytes() >= 0); + assertEquals("Primary shard received size should be 0", 0, stats.getTotalReceivedSize().getBytes()); + assertTrue("Primary shard send time should be >= 0", stats.getTotalSendTime().millis() >= 0); + assertEquals("Primary shard receive time should be 0", 0, stats.getTotalReceiveTime().millis()); + assertTrue("Primary shard ongoing warms should be >= 0", stats.getOngoingCount() >= 0); + } else if (type == StatsScope.REPLICA_SHARD) { + assertEquals("Replica shard warm invocations should be 0", 0, stats.getTotalInvocationsCount()); + assertEquals("Replica shard warm time should be 0", 0, stats.getTotalTime().getMillis()); + assertEquals("Replica shard warm failures should be 0", 0, stats.getTotalFailureCount()); + assertEquals("Replica shard sent size should be 0", 0, stats.getTotalSentSize().getBytes()); + assertTrue("Replica shard received size should be >= 1", stats.getTotalReceivedSize().getBytes() >= 1); + assertEquals("Replica shard send time should be 0", 0, stats.getTotalSendTime().millis()); + assertTrue("Replica shard receive time should be >= 1ms", stats.getTotalReceiveTime().millis() >= 1); + assertEquals("Replica shard ongoing warms should be 0", 0, stats.getOngoingCount()); + } else if (type == StatsScope.AGGREGATED) { + // the node might have both primaries and replicas, only primaries, or only replicas + + // would evaluate to true if the node only contains primary shards + boolean primaryShardStatsResult = false; + + // would evaluate to true if the node only contains replica shards + boolean replicaShardStatsResult = false; + + // would evaluate to true if the node contains a mix of primary and replica shards + boolean primaryAndReplicaShardsResult = stats.getOngoingCount() >= 0 + && stats.getTotalTime().getMillis() >= 1 + && stats.getTotalSendTime().getMillis() >= 1 + && stats.getTotalReceiveTime().getMillis() >= 1 + && stats.getTotalInvocationsCount() >= 1 + && stats.getTotalReceivedSize().getBytes() >= 1 + && stats.getTotalSentSize().getBytes() >= 1 + && stats.getTotalFailureCount() >= 0; + + if (primaryAndReplicaShardsResult = true) { + return; + } + + try { + assertMergedSegmentWarmerStats(stats, StatsScope.PRIMARY_SHARD); + primaryShardStatsResult = true; + } catch (AssertionError ignored) {} + + try { + assertMergedSegmentWarmerStats(stats, StatsScope.REPLICA_SHARD); + replicaShardStatsResult = true; // would be true if the node only contains replica shards + } catch (AssertionError ignored) {} + + assertTrue( + "Stats should match either primary or replica shard or patterns both.", + primaryShardStatsResult || replicaShardStatsResult + ); + } + } + + public void testReadWrite() throws IOException { + MergedSegmentWarmerStats mergedSegmentWarmerStats = new MergedSegmentWarmerStats(); + mergedSegmentWarmerStats.add( + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100) + ); + MergeStats mergeStats1 = new MergeStats(); + mergeStats1.add( + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomLongBetween(1, 100), + randomDoubleBetween(1, 100, true), + mergedSegmentWarmerStats + ); + + BytesStreamOutput outputStream = new BytesStreamOutput(); + mergeStats1.writeTo(outputStream); + + BytesReference bytes = outputStream.bytes(); + StreamInput inputStream = bytes.streamInput(); + + MergeStats mergeStats2 = new MergeStats(inputStream); + + assertEquals(mergeStats1.getTotalNumDocs(), mergeStats2.getTotalNumDocs()); + assertEquals(mergeStats1.getTotalSizeInBytes(), mergeStats2.getTotalSizeInBytes()); + assertEquals(mergeStats1.getTotal(), mergeStats2.getTotal()); + assertEquals(mergeStats1.getTotalTimeInMillis(), mergeStats2.getTotalTimeInMillis()); + assertEquals(mergeStats1.getTotalTime(), mergeStats2.getTotalTime()); + assertEquals(mergeStats1.getCurrent(), mergeStats2.getCurrent()); + assertEquals(mergeStats1.getCurrentSize(), mergeStats2.getCurrentSize()); + assertEquals(mergeStats1.getCurrentNumDocs(), mergeStats2.getCurrentNumDocs()); + assertEquals(mergeStats1.getCurrentSizeInBytes(), mergeStats2.getCurrentSizeInBytes()); + assertEquals(mergeStats1.getTotalStoppedTimeInMillis(), mergeStats2.getTotalStoppedTimeInMillis()); + assertEquals(mergeStats1.getTotalStoppedTime(), mergeStats2.getTotalStoppedTime()); + assertEquals(mergeStats1.getTotalThrottledTimeInMillis(), mergeStats2.getTotalThrottledTimeInMillis()); + assertEquals(mergeStats1.getTotalThrottledTime(), mergeStats2.getTotalThrottledTime()); + assertEquals(mergeStats1.getWarmerStats().getTotalFailureCount(), mergeStats2.getWarmerStats().getTotalFailureCount()); + assertEquals(mergeStats1.getWarmerStats().getTotalInvocationsCount(), mergeStats2.getWarmerStats().getTotalInvocationsCount()); + assertEquals(mergeStats1.getWarmerStats().getTotalTime().getMillis(), mergeStats2.getWarmerStats().getTotalTime().getMillis()); + assertEquals( + mergeStats1.getWarmerStats().getTotalSentSize().getBytes(), + mergeStats2.getWarmerStats().getTotalSentSize().getBytes() + ); + assertEquals( + mergeStats1.getWarmerStats().getTotalReceivedSize().getBytes(), + mergeStats2.getWarmerStats().getTotalReceivedSize().getBytes() + ); + assertEquals( + mergeStats1.getWarmerStats().getTotalSendTime().getMillis(), + mergeStats2.getWarmerStats().getTotalSendTime().getMillis() + ); + assertEquals( + mergeStats1.getWarmerStats().getTotalReceiveTime().getMillis(), + mergeStats2.getWarmerStats().getTotalReceiveTime().getMillis() + ); + assertEquals(mergeStats1.getWarmerStats().getOngoingCount(), mergeStats2.getWarmerStats().getOngoingCount()); + + } + + private void indexDocs(String... indexNames) { + for (String indexName : indexNames) { + for (int i = 0; i < randomIntBetween(25, 30); i++) { + if (randomBoolean()) { + flush(indexName); + } else { + refresh(indexName); + } + int numberOfOperations = randomIntBetween(25, 30); + indexBulk(indexName, numberOfOperations); + } + } + } + + private String[] setupIndices(int count) { + if (count <= 0) { + return new String[0]; + } + String[] indices = new String[count]; + for (int i = 0; i < count; i++) { + indices[i] = INDEX_NAME + i; + } + createIndex(indices); + ensureGreen(indices); + for (String index : indices) { + indexDocs(index); + } + return indices; + } + + private enum StatsScope { + PRIMARY_SHARD, + REPLICA_SHARD, + AGGREGATED + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index f70ec75485393..8020d4469a274 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -214,8 +214,6 @@ public GatedCloseable getSegmentInfosSnapshot() { } public MergeStats getMergeStats() { - logger.info(getClass().getSimpleName() + " | getMergeStats called"); - return new MergeStats(); } diff --git a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java index a859b40716d12..a5fbeae83ce57 100644 --- a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java +++ b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java @@ -57,6 +57,7 @@ public void warm(LeafReader leafReader) throws IOException { mergedSegmentTransferTracker.incrementOngoingWarms(); // IndexWriter.IndexReaderWarmer#warm is called by IndexWriter#mergeMiddle. The type of leafReader should be SegmentReader. assert leafReader instanceof SegmentReader; + assert indexShard.indexSettings().isSegRepLocalEnabled() || indexShard.indexSettings().isRemoteStoreEnabled(); long startTime = System.currentTimeMillis(); long elapsedTime = 0; try { diff --git a/server/src/main/java/org/opensearch/index/merge/MergeStats.java b/server/src/main/java/org/opensearch/index/merge/MergeStats.java index 7a6e0e940639f..eed4dc0ef44dd 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergeStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergeStats.java @@ -91,10 +91,10 @@ public MergeStats(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_11_0)) { unreferencedFileCleanUpsPerformed = in.readOptionalVLong(); } - if (in.getVersion().onOrAfter(Version.V_3_1_0)) { + if (in.getVersion().onOrAfter(Version.V_3_2_0)) { this.warmerStats = new MergedSegmentWarmerStats(in); } else { - this.warmerStats = null; + this.warmerStats = new MergedSegmentWarmerStats(); } } @@ -130,10 +130,11 @@ public void add( } public void add(MergedSegmentWarmerStats warmerStats) { - if (this.getWarmerStats() == null) { + if (this.warmerStats == null) { return; } - this.getWarmerStats().add(warmerStats); + this.warmerStats.add(warmerStats); + this.warmerStats.addTotals(warmerStats); } public void add(MergeStats mergeStats) { @@ -144,10 +145,8 @@ public void add(MergeStats mergeStats) { this.currentNumDocs += mergeStats.currentNumDocs; this.currentSizeInBytes += mergeStats.currentSizeInBytes; + this.warmerStats.add(mergeStats.warmerStats); addTotals(mergeStats); - if (this.getWarmerStats() != null) { - this.getWarmerStats().add(mergeStats.getWarmerStats(), false); - } } public void addTotals(MergeStats mergeStats) { @@ -166,9 +165,7 @@ public void addTotals(MergeStats mergeStats) { } else { this.totalBytesPerSecAutoThrottle += mergeStats.totalBytesPerSecAutoThrottle; } - if (this.getWarmerStats() != null) { - this.getWarmerStats().addTotals(mergeStats.getWarmerStats()); - } + this.warmerStats.addTotals(mergeStats.warmerStats); } public void addUnreferencedFileCleanUpStats(long unreferencedFileCleanUpsPerformed) { @@ -284,7 +281,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.field(Fields.TOTAL_THROTTLE_BYTES_PER_SEC_IN_BYTES, totalBytesPerSecAutoThrottle); builder.field(Fields.UNREFERENCED_FILE_CLEANUPS_PERFORMED, unreferencedFileCleanUpsPerformed); - getWarmerStats().toXContent(builder, params); + this.warmerStats.toXContent(builder, params); builder.endObject(); return builder; } @@ -331,8 +328,8 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_11_0)) { out.writeOptionalVLong(unreferencedFileCleanUpsPerformed); } - if (out.getVersion().onOrAfter(Version.V_3_1_0)) { - getWarmerStats().writeTo(out); + if (out.getVersion().onOrAfter(Version.V_3_2_0)) { + this.warmerStats.writeTo(out); } } } diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java index 434864cc3e844..b2751d168fcee 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java @@ -10,9 +10,6 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.metrics.CounterMetric; -import org.opensearch.core.index.shard.ShardId; -import org.opensearch.index.IndexSettings; -import org.opensearch.index.shard.AbstractIndexShardComponent; /** * A component that tracks stats related to merged segment replication operations. @@ -21,7 +18,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class MergedSegmentTransferTracker extends AbstractIndexShardComponent { +public class MergedSegmentTransferTracker { private final CounterMetric totalWarmInvocationsCount = new CounterMetric(); private final CounterMetric totalWarmTimeMillis = new CounterMetric(); @@ -32,10 +29,6 @@ public class MergedSegmentTransferTracker extends AbstractIndexShardComponent { private final CounterMetric totalDownloadTimeMillis = new CounterMetric(); private final CounterMetric ongoingWarms = new CounterMetric(); - public MergedSegmentTransferTracker(ShardId shardId, IndexSettings indexSettings) { - super(shardId, indexSettings); - } - public void incrementTotalWarmInvocationsCount() { totalWarmInvocationsCount.inc(); } diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java index a90fe8bb611dc..10757eb991d7b 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java @@ -20,7 +20,7 @@ import java.io.IOException; /** - * Stores stats about a merge process + * Stores stats about a merged segment warmer process * * @opensearch.api */ @@ -28,13 +28,13 @@ public class MergedSegmentWarmerStats implements Writeable, ToXContentFragment { // [PRIMARY SHARD] Number of times segment MergedSegmentWarmer.warm has been invoked - private long totalWarmInvocationsCount; + private long totalInvocationsCount; // [PRIMARY SHARD] Total time spent warming segments in milliseconds - private long totalWarmTimeMillis; + private long totalTimeMillis; // [PRIMARY SHARD] Number of times segment warming has failed - private long totalWarmFailureCount; + private long totalFailureCount; // [PRIMARY SHARD] Total bytes sent during segment warming private long totalBytesSent; @@ -42,86 +42,75 @@ public class MergedSegmentWarmerStats implements Writeable, ToXContentFragment { // [REPLICA SHARD] Total bytes received during segment warming private long totalBytesReceived; - // [PRIMARY SHARD] Total time spent uploading segments in milliseconds by a primary shard - private long totalUploadTimeMillis; + // [PRIMARY SHARD] Total time spent sending segments in milliseconds by a primary shard + private long totalSendTimeMillis; - // [REPLICA SHARD] Total time spent downloading segments in milliseconds - private long totalDownloadTimeMillis; + // [REPLICA SHARD] Total time spent receiving segments in milliseconds + private long totalReceiveTimeMillis; // [PRIMARY SHARD] Current number of ongoing segment warming operations - private long ongoingWarms; + private long ongoingCount; public MergedSegmentWarmerStats() {} public MergedSegmentWarmerStats(StreamInput in) throws IOException { - totalWarmInvocationsCount = in.readVLong(); - totalWarmTimeMillis = in.readVLong(); - totalWarmFailureCount = in.readVLong(); + totalInvocationsCount = in.readVLong(); + totalTimeMillis = in.readVLong(); + totalFailureCount = in.readVLong(); totalBytesSent = in.readVLong(); totalBytesReceived = in.readVLong(); - totalUploadTimeMillis = in.readVLong(); - totalDownloadTimeMillis = in.readVLong(); - ongoingWarms = in.readVLong(); + totalSendTimeMillis = in.readVLong(); + totalReceiveTimeMillis = in.readVLong(); + ongoingCount = in.readVLong(); } public synchronized void add( - long totalWarmInvocationsCount, - long totalWarmTimeMillis, - long totalWarmFailureCount, + long totalInvocationsCount, + long totalTimeMillis, + long totalFailureCount, long totalBytesSent, long totalBytesReceived, - long totalUploadTimeMillis, - long totalDownloadTimeMillis, - long ongoingWarms + long totalSendTimeMillis, + long totalReceiveTimeMillis, + long ongoingCount ) { - this.totalWarmInvocationsCount += totalWarmInvocationsCount; - this.totalWarmTimeMillis += totalWarmTimeMillis; - this.totalWarmFailureCount += totalWarmFailureCount; + this.totalInvocationsCount += totalInvocationsCount; + this.totalTimeMillis += totalTimeMillis; + this.totalFailureCount += totalFailureCount; this.totalBytesSent += totalBytesSent; this.totalBytesReceived += totalBytesReceived; - this.totalUploadTimeMillis += totalUploadTimeMillis; - this.totalDownloadTimeMillis += totalDownloadTimeMillis; - this.ongoingWarms += ongoingWarms; + this.totalSendTimeMillis += totalSendTimeMillis; + this.totalReceiveTimeMillis += totalReceiveTimeMillis; + this.ongoingCount += ongoingCount; } public void add(MergedSegmentWarmerStats mergedSegmentWarmerStats) { - add(mergedSegmentWarmerStats, true); - } - - public void add(MergedSegmentWarmerStats mergedSegmentWarmerStats, boolean addTotals) { - if (mergedSegmentWarmerStats == null) { - return; - } - this.ongoingWarms += mergedSegmentWarmerStats.ongoingWarms; - - if (addTotals) { - addTotals(mergedSegmentWarmerStats); - } + this.ongoingCount += mergedSegmentWarmerStats.ongoingCount; } public synchronized void addTotals(MergedSegmentWarmerStats mergedSegmentWarmerStats) { if (mergedSegmentWarmerStats == null) { return; } - this.totalWarmInvocationsCount += mergedSegmentWarmerStats.totalWarmInvocationsCount; - this.totalWarmTimeMillis += mergedSegmentWarmerStats.totalWarmTimeMillis; - this.totalWarmFailureCount += mergedSegmentWarmerStats.totalWarmFailureCount; + this.totalInvocationsCount += mergedSegmentWarmerStats.totalInvocationsCount; + this.totalTimeMillis += mergedSegmentWarmerStats.totalTimeMillis; + this.totalFailureCount += mergedSegmentWarmerStats.totalFailureCount; this.totalBytesSent += mergedSegmentWarmerStats.totalBytesSent; this.totalBytesReceived += mergedSegmentWarmerStats.totalBytesReceived; - this.totalUploadTimeMillis += mergedSegmentWarmerStats.totalUploadTimeMillis; - this.totalDownloadTimeMillis += mergedSegmentWarmerStats.totalDownloadTimeMillis; + this.totalSendTimeMillis += mergedSegmentWarmerStats.totalSendTimeMillis; + this.totalReceiveTimeMillis += mergedSegmentWarmerStats.totalReceiveTimeMillis; } - public long getTotalWarmInvocationsCount() { - return this.totalWarmInvocationsCount; + public long getTotalInvocationsCount() { + return this.totalInvocationsCount; } - public TimeValue getTotalWarmTime() { - return new TimeValue(totalWarmTimeMillis); + public TimeValue getTotalTime() { + return new TimeValue(totalTimeMillis); } - public long getOngoingWarms() { - return ongoingWarms; + public long getOngoingCount() { + return ongoingCount; } public ByteSizeValue getTotalReceivedSize() { @@ -132,29 +121,29 @@ public ByteSizeValue getTotalSentSize() { return new ByteSizeValue(totalBytesSent); } - public TimeValue getTotalDownloadTime() { - return new TimeValue(totalDownloadTimeMillis); + public TimeValue getTotalReceiveTime() { + return new TimeValue(totalReceiveTimeMillis); } - public long getTotalWarmFailureCount() { - return totalWarmFailureCount; + public long getTotalFailureCount() { + return totalFailureCount; } - public TimeValue getTotalUploadTime() { - return new TimeValue(totalUploadTimeMillis); + public TimeValue getTotalSendTime() { + return new TimeValue(totalSendTimeMillis); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.MERGED_SEGMENT_WARMER); - builder.field(Fields.TOTAL_WARM_INVOCATIONS_COUNT, totalWarmInvocationsCount); - builder.humanReadableField(Fields.TOTAL_WARM_TIME_MILLIS, Fields.TOTAL_WARM_TIME, getTotalWarmTime()); - builder.field(Fields.TOTAL_WARM_FAILURE_COUNT, totalWarmFailureCount); + builder.field(Fields.WARM_INVOCATIONS_COUNT, totalInvocationsCount); + builder.humanReadableField(Fields.TOTAL_TIME_MILLIS, Fields.TOTAL_TIME, getTotalTime()); + builder.field(Fields.TOTAL_FAILURE_COUNT, totalFailureCount); builder.humanReadableField(Fields.TOTAL_BYTES_SENT, Fields.TOTAL_SENT_SIZE, getTotalSentSize()); builder.humanReadableField(Fields.TOTAL_BYTES_RECEIVED, Fields.TOTAL_RECEIVED_SIZE, getTotalReceivedSize()); - builder.humanReadableField(Fields.TOTAL_UPLOAD_TIME_MILLIS, Fields.TOTAL_UPLOAD_TIME, totalUploadTimeMillis); - builder.humanReadableField(Fields.TOTAL_DOWNLOAD_TIME_MILLIS, Fields.TOTAL_DOWNLOAD_TIME, totalDownloadTimeMillis); - builder.field(Fields.ONGOING_WARMS, ongoingWarms); + builder.humanReadableField(Fields.TOTAL_SEND_TIME_MILLIS, Fields.TOTAL_SEND_TIME, getTotalSendTime()); + builder.humanReadableField(Fields.TOTAL_RECEIVE_TIME_MILLIS, Fields.TOTAL_RECEIVE_TIME, getTotalReceiveTime()); + builder.field(Fields.ONGOING_WARMS, ongoingCount); builder.endObject(); return builder; } @@ -166,31 +155,31 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws */ static final class Fields { static final String MERGED_SEGMENT_WARMER = "merged_segment_warmer"; - static final String TOTAL_WARM_INVOCATIONS_COUNT = "total_warm_invocations_count"; - static final String TOTAL_WARM_TIME_MILLIS = "total_warm_time_millis"; - static final String TOTAL_WARM_FAILURE_COUNT = "total_warm_failure_count"; + static final String WARM_INVOCATIONS_COUNT = "total_invocations_count"; + static final String TOTAL_TIME_MILLIS = "total_time_millis"; + static final String TOTAL_FAILURE_COUNT = "total_failure_count"; static final String TOTAL_BYTES_SENT = "total_bytes_sent"; static final String TOTAL_BYTES_RECEIVED = "total_bytes_received"; - static final String TOTAL_UPLOAD_TIME_MILLIS = "total_upload_time_millis"; - static final String TOTAL_DOWNLOAD_TIME_MILLIS = "total_download_time_millis"; + static final String TOTAL_SEND_TIME_MILLIS = "total_send_time_millis"; + static final String TOTAL_RECEIVE_TIME_MILLIS = "total_receive_time_millis"; static final String ONGOING_WARMS = "ongoing_warms"; - public static final String TOTAL_WARM_TIME = "total_warm_time"; - public static final String TOTAL_UPLOAD_TIME = "total_upload_time"; - public static final String TOTAL_DOWNLOAD_TIME = "total_download_time"; + public static final String TOTAL_TIME = "total_time"; + public static final String TOTAL_SEND_TIME = "total_send_time"; + public static final String TOTAL_RECEIVE_TIME = "total_receive_time"; public static final String TOTAL_SENT_SIZE = "total_sent_size"; public static final String TOTAL_RECEIVED_SIZE = "total_received_size"; } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeVLong(totalWarmInvocationsCount); - out.writeVLong(totalWarmTimeMillis); - out.writeVLong(totalWarmFailureCount); + out.writeVLong(totalInvocationsCount); + out.writeVLong(totalTimeMillis); + out.writeVLong(totalFailureCount); out.writeVLong(totalBytesSent); out.writeVLong(totalBytesReceived); - out.writeVLong(totalUploadTimeMillis); - out.writeVLong(totalDownloadTimeMillis); - out.writeVLong(ongoingWarms); + out.writeVLong(totalSendTimeMillis); + out.writeVLong(totalReceiveTimeMillis); + out.writeVLong(ongoingCount); } } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 48705a8b54465..906cf079179b5 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -454,7 +454,7 @@ public IndexShard( indexSettings.isAssignedOnRemoteNode(), () -> getRemoteTranslogUploadBufferInterval(remoteStoreSettings::getClusterRemoteTranslogBufferInterval) ); - this.mergedSegmentTransferTracker = new MergedSegmentTransferTracker(shardId(), indexSettings); + this.mergedSegmentTransferTracker = new MergedSegmentTransferTracker(); this.mapperService = mapperService; this.indexCache = indexCache; this.internalIndexingStats = new InternalIndexingStats(threadPool); diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 3515020a2527d..667eb9edb65f7 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -126,7 +126,6 @@ import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.merge.MergeStats; -import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.index.query.BaseQueryRewriteContext; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryRewriteContext; @@ -1401,7 +1400,6 @@ static class OldShardsStats implements IndexEventListener { final RefreshStats refreshStats = new RefreshStats(); final FlushStats flushStats = new FlushStats(); final RecoveryStats recoveryStats = new RecoveryStats(); - final MergedSegmentWarmerStats mergedSegmentWarmerStats = new MergedSegmentWarmerStats(); @Override public synchronized void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) { diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java index be0232ad7f0e5..1133e59b87551 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java @@ -44,6 +44,7 @@ import java.util.Objects; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; /** * Abstract base class for publish checkpoint. @@ -112,7 +113,8 @@ final void doPublish( TransportRequest request, String action, boolean waitForCompletion, - TimeValue waitTimeout + TimeValue waitTimeout, + ActionListener listener ) { String primaryAllocationId = indexShard.routingEntry().allocationId().getId(); long primaryTerm = indexShard.getPendingPrimaryTerm(); @@ -207,16 +209,21 @@ public void handleException(TransportException e) { if (waitForCompletion) { try { if (latch.await(waitTimeout.seconds(), TimeUnit.SECONDS) == false) { - indexShard.mergedSegmentTransferTracker().incrementTotalWarmFailureCount(); + listener.onFailure( + new TimeoutException("Timed out waiting for publish checkpoint to complete. Checkpoint: " + checkpoint) + ); } } catch (InterruptedException e) { - indexShard.mergedSegmentTransferTracker().incrementTotalWarmFailureCount(); + listener.onFailure(e); logger.warn( () -> new ParameterizedMessage("Interrupted while waiting for publish checkpoint complete [{}]", checkpoint), e ); } } + listener.onResponse(null); + } catch (Exception e) { + listener.onFailure(e); } } diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java index 722abceb29a30..0aa054600d7c7 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java @@ -98,7 +98,7 @@ public ReplicationMode getReplicationMode(IndexShard indexShard) { * Publish checkpoint request to shard */ final void publish(IndexShard indexShard, ReplicationCheckpoint checkpoint) { - doPublish(indexShard, checkpoint, new PublishCheckpointRequest(checkpoint), TASK_ACTION_NAME, false, null); + doPublish(indexShard, checkpoint, new PublishCheckpointRequest(checkpoint), TASK_ACTION_NAME, false, null, ActionListener.noop()); } @Override diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishMergedSegmentAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishMergedSegmentAction.java index 73eb456fb8915..45d282da78c9b 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishMergedSegmentAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishMergedSegmentAction.java @@ -84,7 +84,8 @@ final public void publish(IndexShard indexShard, MergedSegmentCheckpoint checkpo new PublishMergedSegmentRequest(checkpoint), TASK_ACTION_NAME, true, - indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout() + indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout(), + ActionListener.noop() ); } diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishReferencedSegmentsAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishReferencedSegmentsAction.java index e9b2e6e411e7f..d268791ae9fe6 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishReferencedSegmentsAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishReferencedSegmentsAction.java @@ -79,7 +79,8 @@ final void publish(IndexShard indexShard, ReferencedSegmentsCheckpoint checkpoin new PublishReferencedSegmentsRequest(checkpoint), TASK_ACTION_NAME, false, - indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout() + indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout(), + ActionListener.noop() ); } diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java index 9ec577789af65..1b3fc549b642c 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java @@ -129,7 +129,16 @@ public final void publish(IndexShard indexShard, MergedSegmentCheckpoint checkpo new RemoteStorePublishMergedSegmentRequest(remoteStoreMergedSegmentCheckpoint), "segrep_remote_publish_merged_segment", true, - TimeValue.timeValueMillis(timeLeftMillis) + TimeValue.timeValueMillis(timeLeftMillis), + new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + indexShard.mergedSegmentTransferTracker().incrementTotalWarmFailureCount(); + } + } ); } else { indexShard.mergedSegmentTransferTracker().incrementTotalWarmFailureCount(); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java index 377f22a645fd2..1c094827dca91 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java @@ -593,29 +593,29 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa table.addCell("pri.merges.total_time", "default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merges.merged_segment_warmer.total_warm_invocations", - "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" + "merges.merged_segment_warmer.total_invocations", + "alias:mswti,mergedSegmentWarmerTotalInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "pri.merges.merged_segment_warmer.total_warm_invocations", + "pri.merges.merged_segment_warmer.total_invocations", "default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_warm_time", - "alias:mswtwt,mergedSegmentWarmerTotalWarmTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" + "merges.merged_segment_warmer.total_time", + "alias:mswtt,mergedSegmentWarmerTotalTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "pri.merges.merged_segment_warmer.total_warm_time", + "pri.merges.merged_segment_warmer.total_time", "default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "merges.merged_segment_warmer.ongoing_warms", - "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" + "merges.merged_segment_warmer.ongoing_count", + "alias:mswoc,mergedSegmentWarmerOngoingCount;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( - "pri.merges.merged_segment_warmer.ongoing_warms", + "pri.merges.merged_segment_warmer.ongoing_count", "default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); @@ -638,29 +638,29 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa ); table.addCell( - "merges.merged_segment_warmer.total_download_time", - "alias:mswtdt,mergedSegmentWarmerTotalDownloadTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" + "merges.merged_segment_warmer.total_receive_time", + "alias:mswtrt,mergedSegmentWarmerTotalReceiveTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "pri.merges.merged_segment_warmer.total_download_time", + "pri.merges.merged_segment_warmer.total_receive_time", "default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "merges.merged_segment_warmer.total_warm_failure_count", - "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" + "merges.merged_segment_warmer.total_failure_count", + "alias:mswtfc,mergedSegmentWarmerTotalFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "pri.merges.merged_segment_warmer.total_warm_failure_count", + "pri.merges.merged_segment_warmer.total_failure_count", "default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_upload_time", - "alias:mswtut,mergedSegmentWarmerTotalUploadTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" + "merges.merged_segment_warmer.total_send_time", + "alias:mswtst,mergedSegmentWarmerTotalSendTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); table.addCell( - "pri.merges.merged_segment_warmer.total_upload_time", + "pri.merges.merged_segment_warmer.total_send_time", "default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); @@ -1074,14 +1074,14 @@ protected Table buildTable( ? null : primaryStats.getMerge().getWarmerStats(); - table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalWarmInvocationsCount()); - table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalWarmInvocationsCount()); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalInvocationsCount()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalInvocationsCount()); - table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalWarmTime()); - table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalWarmTime()); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalTime()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalTime()); - table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getOngoingWarms()); - table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getOngoingWarms()); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getOngoingCount()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getOngoingCount()); table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalReceivedSize()); table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalReceivedSize()); @@ -1089,14 +1089,14 @@ protected Table buildTable( table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalSentSize()); table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalSentSize()); - table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalDownloadTime()); - table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalDownloadTime()); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalReceiveTime()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalReceiveTime()); - table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalWarmFailureCount()); - table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalWarmFailureCount()); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalFailureCount()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalFailureCount()); - table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalUploadTime()); - table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalUploadTime()); + table.addCell(mergedSegmentWarmerTotalStats == null ? null : mergedSegmentWarmerTotalStats.getTotalSendTime()); + table.addCell(mergedSegmentWarmerPrimaryStats == null ? null : mergedSegmentWarmerPrimaryStats.getTotalSendTime()); table.addCell(totalStats.getRefresh() == null ? null : totalStats.getRefresh().getTotal()); table.addCell(primaryStats.getRefresh() == null ? null : primaryStats.getRefresh().getTotal()); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java index f63183423b9b4..98f92ced66764 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java @@ -276,16 +276,16 @@ protected Table getTableWithHeader(final RestRequest request) { table.addCell("merges.total_time", "alias:mtt,mergesTotalTime;default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merges.merged_segment_warmer.total_warm_invocations", - "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" + "merges.merged_segment_warmer.total_invocations", + "alias:mswti,mergedSegmentWarmerTotalInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_warm_time", - "alias:mswtwt,mergedSegmentWarmerTotalWarmTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" + "merges.merged_segment_warmer.total_time", + "alias:mswtt,mergedSegmentWarmerTotalTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "merges.merged_segment_warmer.ongoing_warms", - "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" + "merges.merged_segment_warmer.ongoing_count", + "alias:mswoc,mergedSegmentWarmerOngoingCount;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( "merges.merged_segment_warmer.total_bytes_received", @@ -296,16 +296,16 @@ protected Table getTableWithHeader(final RestRequest request) { "alias:mswtbs,mergedSegmentWarmerTotalBytesSent;default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); table.addCell( - "merges.merged_segment_warmer.total_download_time", - "alias:mswtdt,mergedSegmentWarmerTotalDownloadTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" + "merges.merged_segment_warmer.total_receive_time", + "alias:mswtrt,mergedSegmentWarmerTotalReceiveTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "merges.merged_segment_warmer.total_warm_failure_count", - "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" + "merges.merged_segment_warmer.total_failure_count", + "alias:mswtfc,mergedSegmentWarmerTotalFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_upload_time", - "alias:mswtut,mergedSegmentWarmerTotalUploadTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" + "merges.merged_segment_warmer.total_send_time", + "alias:mswtst,mergedSegmentWarmerTotalSendTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); table.addCell("refresh.total", "alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); @@ -583,14 +583,14 @@ Table buildTable( table.addCell(mergeStats == null ? null : mergeStats.getTotalTime()); MergedSegmentWarmerStats mergedSegmentWarmerStats = mergeStats == null ? null : mergeStats.getWarmerStats(); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmInvocationsCount()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmTime()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getOngoingWarms()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalInvocationsCount()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalTime()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getOngoingCount()); table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalReceivedSize()); table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalSentSize()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalDownloadTime()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalWarmFailureCount()); - table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalUploadTime()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalReceiveTime()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalFailureCount()); + table.addCell(mergedSegmentWarmerStats == null ? null : mergedSegmentWarmerStats.getTotalSendTime()); RefreshStats refreshStats = indicesStats == null ? null : indicesStats.getRefresh(); table.addCell(refreshStats == null ? null : refreshStats.getTotal()); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java index 532f24be4f62b..58a10e5e81d02 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java @@ -216,16 +216,16 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa table.addCell("merges.total_time", "alias:mtt,mergesTotalTime;default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merges.merged_segment_warmer.total_warm_invocations", - "alias:mswtwi,mergedSegmentWarmerTotalWarmInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" + "merges.merged_segment_warmer.total_invocations", + "alias:mswti,mergedSegmentWarmerTotalInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_warm_time", - "alias:mswtwt,mergedSegmentWarmerTotalWarmTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" + "merges.merged_segment_warmer.total_time", + "alias:mswtt,mergedSegmentWarmerTotalTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "merges.merged_segment_warmer.ongoing_warms", - "alias:mswow,mergedSegmentWarmerOngoingWarms;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" + "merges.merged_segment_warmer.ongoing_count", + "alias:mswoc,mergedSegmentWarmerOngoingCount;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( "merges.merged_segment_warmer.total_bytes_received", @@ -236,16 +236,16 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa "alias:mswtbs,mergedSegmentWarmerTotalBytesSent;default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); table.addCell( - "merges.merged_segment_warmer.total_download_time", - "alias:mswtdt,mergedSegmentWarmerTotalDownloadTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" + "merges.merged_segment_warmer.total_receive_time", + "alias:mswtrt,mergedSegmentWarmerTotalReceiveTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "merges.merged_segment_warmer.total_warm_failure_count", - "alias:mswtwfc,mergedSegmentWarmerTotalWarmFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" + "merges.merged_segment_warmer.total_failure_count", + "alias:mswtfc,mergedSegmentWarmerTotalFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_upload_time", - "alias:mswtut,mergedSegmentWarmerTotalUploadTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" + "merges.merged_segment_warmer.total_send_time", + "alias:mswtst,mergedSegmentWarmerTotalSendTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); table.addCell("refresh.total", "alias:rto,refreshTotal;default:false;text-align:right;desc:total refreshes"); @@ -488,21 +488,21 @@ Table buildTable( getOrNull( commonStats, (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), - MergedSegmentWarmerStats::getTotalWarmInvocationsCount + MergedSegmentWarmerStats::getTotalInvocationsCount ) ); table.addCell( getOrNull( commonStats, (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), - MergedSegmentWarmerStats::getTotalWarmTime + MergedSegmentWarmerStats::getTotalTime ) ); table.addCell( getOrNull( commonStats, (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), - MergedSegmentWarmerStats::getOngoingWarms + MergedSegmentWarmerStats::getOngoingCount ) ); table.addCell( @@ -523,21 +523,21 @@ Table buildTable( getOrNull( commonStats, (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), - MergedSegmentWarmerStats::getTotalDownloadTime + MergedSegmentWarmerStats::getTotalReceiveTime ) ); table.addCell( getOrNull( commonStats, (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), - MergedSegmentWarmerStats::getTotalWarmFailureCount + MergedSegmentWarmerStats::getTotalFailureCount ) ); table.addCell( getOrNull( commonStats, (c) -> c.getMerge() == null ? null : c.getMerge().getWarmerStats(), - MergedSegmentWarmerStats::getTotalUploadTime + MergedSegmentWarmerStats::getTotalSendTime ) ); diff --git a/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java b/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java index cf184b29a14ff..ee77e6086f80c 100644 --- a/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java @@ -73,6 +73,7 @@ public void testCreateEngineConfigFromFactory() { null, null, null, + null, null ); @@ -156,6 +157,7 @@ public void testCreateCodecServiceFromFactory() { null, null, null, + null, null ); assertNotNull(config.getCodec()); diff --git a/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java index ce272d071af03..722cf43106997 100644 --- a/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java @@ -9,6 +9,7 @@ package org.opensearch.index.merge; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; import org.opensearch.test.OpenSearchTestCase; @@ -25,19 +26,21 @@ public void setUp() throws Exception { super.setUp(); ShardId shardId = new ShardId("test", "uuid", 0); IndexSettings indexSettings = new IndexSettings(newIndexMeta("test", builder().build()), builder().build()); - tracker = new MergedSegmentTransferTracker(shardId, indexSettings); + tracker = new MergedSegmentTransferTracker(); } public void testInitialStats() { MergedSegmentWarmerStats stats = tracker.stats(); - assertEquals(0, stats.getTotalWarmInvocationsCount()); - assertEquals(TimeValue.ZERO, stats.getTotalWarmTime()); - assertEquals(0, stats.getTotalWarmFailureCount()); - assertEquals(0, stats.getTotalSentSize()); - assertEquals(0, stats.getTotalReceivedSize()); - assertEquals(TimeValue.ZERO, stats.getTotalUploadTime()); - assertEquals(TimeValue.ZERO, stats.getTotalDownloadTime()); - assertEquals(0, stats.getOngoingWarms()); + assertEquals(0, stats.getTotalInvocationsCount()); + assertEquals(TimeValue.ZERO, stats.getTotalTime()); + assertEquals(0, stats.getTotalFailureCount()); + assertEquals(ByteSizeValue.class, stats.getTotalSentSize().getClass()); + assertEquals(0, stats.getTotalSentSize().getBytes()); + assertEquals(ByteSizeValue.class, stats.getTotalReceivedSize().getClass()); + assertEquals(0, stats.getTotalReceivedSize().getBytes()); + assertEquals(TimeValue.ZERO, stats.getTotalSendTime()); + assertEquals(TimeValue.ZERO, stats.getTotalReceiveTime()); + assertEquals(0, stats.getOngoingCount()); } public void testIncrementCounters() { @@ -45,17 +48,17 @@ public void testIncrementCounters() { tracker.incrementTotalWarmFailureCount(); MergedSegmentWarmerStats stats = tracker.stats(); - assertEquals(1, stats.getTotalWarmInvocationsCount()); - assertEquals(1, stats.getTotalWarmFailureCount()); + assertEquals(1, stats.getTotalInvocationsCount()); + assertEquals(1, stats.getTotalFailureCount()); } public void testOngoingWarms() { tracker.incrementOngoingWarms(); tracker.incrementOngoingWarms(); - assertEquals(2, tracker.stats().getOngoingWarms()); + assertEquals(2, tracker.stats().getOngoingCount()); tracker.decrementOngoingWarms(); - assertEquals(1, tracker.stats().getOngoingWarms()); + assertEquals(1, tracker.stats().getOngoingCount()); } public void testAddTimeAndBytes() { @@ -66,20 +69,20 @@ public void testAddTimeAndBytes() { tracker.addTotalBytesDownloaded(2048); MergedSegmentWarmerStats stats = tracker.stats(); - assertEquals(new TimeValue(100), stats.getTotalWarmTime()); - assertEquals(new TimeValue(200), stats.getTotalUploadTime()); - assertEquals(new TimeValue(300), stats.getTotalDownloadTime()); - assertEquals(1024, stats.getTotalSentSize()); - assertEquals(2048, stats.getTotalReceivedSize()); + assertEquals(new TimeValue(100), stats.getTotalTime()); + assertEquals(new TimeValue(200), stats.getTotalSendTime()); + assertEquals(new TimeValue(300), stats.getTotalReceiveTime()); + assertEquals(1024, stats.getTotalSentSize().getBytes()); + assertEquals(2048, stats.getTotalReceivedSize().getBytes()); } - public void testAccumulativeStats() { + public void testCumulativeStats() { tracker.addTotalWarmTimeMillis(100); tracker.addTotalWarmTimeMillis(50); - assertEquals(new TimeValue(150), tracker.stats().getTotalWarmTime()); + assertEquals(new TimeValue(150), tracker.stats().getTotalTime()); tracker.addTotalBytesUploaded(1000); tracker.addTotalBytesUploaded(500); - assertEquals(1500, tracker.stats().getTotalSentSize()); + assertEquals(1500, tracker.stats().getTotalSentSize().getBytes()); } } From 6b83220accb0325e9f69c541eb07a85bf2366a25 Mon Sep 17 00:00:00 2001 From: kh3ra Date: Fri, 29 Aug 2025 14:34:44 +0530 Subject: [PATCH 08/30] addressing review comments - tests to follow Signed-off-by: Aditya Khera --- server/src/main/java/org/opensearch/index/engine/Engine.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index 8020d4469a274..f70ec75485393 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -214,6 +214,8 @@ public GatedCloseable getSegmentInfosSnapshot() { } public MergeStats getMergeStats() { + logger.info(getClass().getSimpleName() + " | getMergeStats called"); + return new MergeStats(); } From a2c429a73bc2024fa443b5132de25db724e93eed Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Fri, 5 Sep 2025 14:25:06 +0530 Subject: [PATCH 09/30] Changelog + minor bug fix Signed-off-by: Aditya Khera --- CHANGELOG.md | 1 + .../org/opensearch/index/merge/MergedSegmentWarmerStats.java | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3cf0e78220f4a..1d14573c62236 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Return full error for GRPC error response ([#19568](https://github.com/opensearch-project/OpenSearch/pull/19568)) - Add pluggable gRPC interceptors with explicit ordering([#19005](https://github.com/opensearch-project/OpenSearch/pull/19005)) +- Add metrics for the merged segment warmer feature ([#18929](https://github.com/opensearch-project/OpenSearch/pull/18929)) ### Changed - Faster `terms` query creation for `keyword` field with index and docValues enabled ([#19350](https://github.com/opensearch-project/OpenSearch/pull/19350)) - Refactor to move prepareIndex and prepareDelete methods to Engine class ([#19551](https://github.com/opensearch-project/OpenSearch/pull/19551)) diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java index 10757eb991d7b..c8e1b8f0ac9c5 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java @@ -143,7 +143,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.humanReadableField(Fields.TOTAL_BYTES_RECEIVED, Fields.TOTAL_RECEIVED_SIZE, getTotalReceivedSize()); builder.humanReadableField(Fields.TOTAL_SEND_TIME_MILLIS, Fields.TOTAL_SEND_TIME, getTotalSendTime()); builder.humanReadableField(Fields.TOTAL_RECEIVE_TIME_MILLIS, Fields.TOTAL_RECEIVE_TIME, getTotalReceiveTime()); - builder.field(Fields.ONGOING_WARMS, ongoingCount); + builder.field(Fields.ONGOING_COUNT, ongoingCount); builder.endObject(); return builder; } @@ -162,7 +162,7 @@ static final class Fields { static final String TOTAL_BYTES_RECEIVED = "total_bytes_received"; static final String TOTAL_SEND_TIME_MILLIS = "total_send_time_millis"; static final String TOTAL_RECEIVE_TIME_MILLIS = "total_receive_time_millis"; - static final String ONGOING_WARMS = "ongoing_warms"; + static final String ONGOING_COUNT = "ongoing_count"; public static final String TOTAL_TIME = "total_time"; public static final String TOTAL_SEND_TIME = "total_send_time"; From 39ebcd889cef917bdddcb6066ee5a9d4f1cbc813 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Sun, 7 Sep 2025 17:37:55 +0530 Subject: [PATCH 10/30] Addressing review comments + added UTs Signed-off-by: Aditya Khera --- .../core/action/ActionListener.java | 2 +- .../org/opensearch/merge/MergeStatsIT.java | 100 ++------- .../org/opensearch/index/engine/Engine.java | 2 - .../index/engine/NRTReplicationEngine.java | 7 +- .../opensearch/index/merge/MergeStats.java | 4 +- .../checkpoint/PublishCheckpointAction.java | 2 +- .../PublishMergedSegmentAction.java | 2 +- .../PublishReferencedSegmentsAction.java | 2 +- .../index/merge/MergeStatsTests.java | 189 ++++++++++++++++++ .../merge/MergedSegmentWarmerStatsTests.java | 156 +++++++++++++++ 10 files changed, 373 insertions(+), 93 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java create mode 100644 server/src/test/java/org/opensearch/index/merge/MergedSegmentWarmerStatsTests.java diff --git a/libs/core/src/main/java/org/opensearch/core/action/ActionListener.java b/libs/core/src/main/java/org/opensearch/core/action/ActionListener.java index b396472423954..b93f841e3bda4 100644 --- a/libs/core/src/main/java/org/opensearch/core/action/ActionListener.java +++ b/libs/core/src/main/java/org/opensearch/core/action/ActionListener.java @@ -359,7 +359,7 @@ static void completeWith(ActionListener listener, CheckedSu } } - static ActionListener noop() { + static ActionListener noOp() { return ActionListener.wrap(response -> {}, exception -> {}); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java index e22539a3c3261..a551781788cbe 100644 --- a/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java @@ -18,23 +18,20 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsRequest; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.action.admin.indices.stats.ShardStats; +import org.opensearch.action.search.SearchRequest; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; -import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; -import org.opensearch.core.common.bytes.BytesReference; -import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.index.merge.MergeStats; import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase; -import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; /* * Integration tests asserting on MergeStats for remote store enabled domains. @@ -59,11 +56,11 @@ protected Settings featureFlagSettings() { return featureSettings.build(); } - public void setup() { + private void setup() { internalCluster().startNodes(2); } - public void testNodesStats() throws ExecutionException, InterruptedException { + public void testNodesStats() throws Exception { setup(); String[] indices = setupIndices(3); @@ -95,7 +92,7 @@ public void testNodesStats() throws ExecutionException, InterruptedException { } } - public void testShardStats() throws ExecutionException, InterruptedException { + public void testShardStats() throws Exception { setup(); String[] indices = setupIndices(2); @@ -128,7 +125,7 @@ public void testShardStats() throws ExecutionException, InterruptedException { } } - public void testIndicesStats() throws ExecutionException, InterruptedException { + public void testIndicesStats() throws Exception { setup(); String[] indices = setupIndices(3); @@ -283,77 +280,6 @@ private void assertMergedSegmentWarmerStats(MergedSegmentWarmerStats stats, Stat } } - public void testReadWrite() throws IOException { - MergedSegmentWarmerStats mergedSegmentWarmerStats = new MergedSegmentWarmerStats(); - mergedSegmentWarmerStats.add( - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100) - ); - MergeStats mergeStats1 = new MergeStats(); - mergeStats1.add( - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomLongBetween(1, 100), - randomDoubleBetween(1, 100, true), - mergedSegmentWarmerStats - ); - - BytesStreamOutput outputStream = new BytesStreamOutput(); - mergeStats1.writeTo(outputStream); - - BytesReference bytes = outputStream.bytes(); - StreamInput inputStream = bytes.streamInput(); - - MergeStats mergeStats2 = new MergeStats(inputStream); - - assertEquals(mergeStats1.getTotalNumDocs(), mergeStats2.getTotalNumDocs()); - assertEquals(mergeStats1.getTotalSizeInBytes(), mergeStats2.getTotalSizeInBytes()); - assertEquals(mergeStats1.getTotal(), mergeStats2.getTotal()); - assertEquals(mergeStats1.getTotalTimeInMillis(), mergeStats2.getTotalTimeInMillis()); - assertEquals(mergeStats1.getTotalTime(), mergeStats2.getTotalTime()); - assertEquals(mergeStats1.getCurrent(), mergeStats2.getCurrent()); - assertEquals(mergeStats1.getCurrentSize(), mergeStats2.getCurrentSize()); - assertEquals(mergeStats1.getCurrentNumDocs(), mergeStats2.getCurrentNumDocs()); - assertEquals(mergeStats1.getCurrentSizeInBytes(), mergeStats2.getCurrentSizeInBytes()); - assertEquals(mergeStats1.getTotalStoppedTimeInMillis(), mergeStats2.getTotalStoppedTimeInMillis()); - assertEquals(mergeStats1.getTotalStoppedTime(), mergeStats2.getTotalStoppedTime()); - assertEquals(mergeStats1.getTotalThrottledTimeInMillis(), mergeStats2.getTotalThrottledTimeInMillis()); - assertEquals(mergeStats1.getTotalThrottledTime(), mergeStats2.getTotalThrottledTime()); - assertEquals(mergeStats1.getWarmerStats().getTotalFailureCount(), mergeStats2.getWarmerStats().getTotalFailureCount()); - assertEquals(mergeStats1.getWarmerStats().getTotalInvocationsCount(), mergeStats2.getWarmerStats().getTotalInvocationsCount()); - assertEquals(mergeStats1.getWarmerStats().getTotalTime().getMillis(), mergeStats2.getWarmerStats().getTotalTime().getMillis()); - assertEquals( - mergeStats1.getWarmerStats().getTotalSentSize().getBytes(), - mergeStats2.getWarmerStats().getTotalSentSize().getBytes() - ); - assertEquals( - mergeStats1.getWarmerStats().getTotalReceivedSize().getBytes(), - mergeStats2.getWarmerStats().getTotalReceivedSize().getBytes() - ); - assertEquals( - mergeStats1.getWarmerStats().getTotalSendTime().getMillis(), - mergeStats2.getWarmerStats().getTotalSendTime().getMillis() - ); - assertEquals( - mergeStats1.getWarmerStats().getTotalReceiveTime().getMillis(), - mergeStats2.getWarmerStats().getTotalReceiveTime().getMillis() - ); - assertEquals(mergeStats1.getWarmerStats().getOngoingCount(), mergeStats2.getWarmerStats().getOngoingCount()); - - } - private void indexDocs(String... indexNames) { for (String indexName : indexNames) { for (int i = 0; i < randomIntBetween(25, 30); i++) { @@ -368,7 +294,7 @@ private void indexDocs(String... indexNames) { } } - private String[] setupIndices(int count) { + private String[] setupIndices(int count) throws Exception { if (count <= 0) { return new String[0]; } @@ -381,9 +307,21 @@ private String[] setupIndices(int count) { for (String index : indices) { indexDocs(index); } + waitForDocsOnReplicas(indices); return indices; } + private void waitForDocsOnReplicas(String... indices) throws Exception { + for (String index : indices) { + SearchRequest searchRequest = new SearchRequest(index); + searchRequest.preference("_replica"); + assertBusy(() -> { + long totalDocs = client().search(searchRequest).actionGet().getHits().getTotalHits().value(); + assertTrue("Docs should be searchable on replicas", totalDocs > 0); + }, 10, TimeUnit.SECONDS); + } + } + private enum StatsScope { PRIMARY_SHARD, REPLICA_SHARD, diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index f70ec75485393..8020d4469a274 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -214,8 +214,6 @@ public GatedCloseable getSegmentInfosSnapshot() { } public MergeStats getMergeStats() { - logger.info(getClass().getSimpleName() + " | getMergeStats called"); - return new MergeStats(); } diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index f850778f633a9..64cc076d97af5 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -67,7 +67,6 @@ public class NRTReplicationEngine extends Engine { private final WriteOnlyTranslogManager translogManager; private final Lock flushLock = new ReentrantLock(); protected final ReplicaFileTracker replicaFileTracker; - private final MergeStats mergeStats; private volatile long lastReceivedPrimaryGen = SequenceNumbers.NO_OPS_PERFORMED; @@ -75,7 +74,6 @@ public class NRTReplicationEngine extends Engine { public NRTReplicationEngine(EngineConfig engineConfig) { super(engineConfig); - mergeStats = new MergeStats(); store.incRef(); NRTReplicationReaderManager readerManager = null; WriteOnlyTranslogManager translogManagerRef = null; @@ -504,8 +502,9 @@ public void maybePruneDeletes() {} @Override public MergeStats getMergeStats() { - this.mergeStats.add(engineConfig.getMergedSegmentTransferTracker().stats()); - return this.mergeStats; + MergeStats mergeStats = new MergeStats(); + mergeStats.add(engineConfig.getMergedSegmentTransferTracker().stats()); + return mergeStats; } @Override diff --git a/server/src/main/java/org/opensearch/index/merge/MergeStats.java b/server/src/main/java/org/opensearch/index/merge/MergeStats.java index eed4dc0ef44dd..f767c2fe7d136 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergeStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergeStats.java @@ -91,7 +91,7 @@ public MergeStats(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_11_0)) { unreferencedFileCleanUpsPerformed = in.readOptionalVLong(); } - if (in.getVersion().onOrAfter(Version.V_3_2_0)) { + if (in.getVersion().onOrAfter(Version.CURRENT)) { this.warmerStats = new MergedSegmentWarmerStats(in); } else { this.warmerStats = new MergedSegmentWarmerStats(); @@ -328,7 +328,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_11_0)) { out.writeOptionalVLong(unreferencedFileCleanUpsPerformed); } - if (out.getVersion().onOrAfter(Version.V_3_2_0)) { + if (out.getVersion().onOrAfter(Version.CURRENT)) { this.warmerStats.writeTo(out); } } diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java index 0aa054600d7c7..ba9ebfe4ad4b8 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishCheckpointAction.java @@ -98,7 +98,7 @@ public ReplicationMode getReplicationMode(IndexShard indexShard) { * Publish checkpoint request to shard */ final void publish(IndexShard indexShard, ReplicationCheckpoint checkpoint) { - doPublish(indexShard, checkpoint, new PublishCheckpointRequest(checkpoint), TASK_ACTION_NAME, false, null, ActionListener.noop()); + doPublish(indexShard, checkpoint, new PublishCheckpointRequest(checkpoint), TASK_ACTION_NAME, false, null, ActionListener.noOp()); } @Override diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishMergedSegmentAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishMergedSegmentAction.java index 45d282da78c9b..e397e75cdc75d 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishMergedSegmentAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishMergedSegmentAction.java @@ -85,7 +85,7 @@ final public void publish(IndexShard indexShard, MergedSegmentCheckpoint checkpo TASK_ACTION_NAME, true, indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout(), - ActionListener.noop() + ActionListener.noOp() ); } diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishReferencedSegmentsAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishReferencedSegmentsAction.java index d268791ae9fe6..353bd16635ee6 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishReferencedSegmentsAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/PublishReferencedSegmentsAction.java @@ -80,7 +80,7 @@ final void publish(IndexShard indexShard, ReferencedSegmentsCheckpoint checkpoin TASK_ACTION_NAME, false, indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout(), - ActionListener.noop() + ActionListener.noOp() ); } diff --git a/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java b/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java new file mode 100644 index 0000000000000..b42009dc8dab2 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java @@ -0,0 +1,189 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.merge; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class MergeStatsTests extends OpenSearchTestCase { + + public void testDefaultConstructor() { + MergeStats stats = new MergeStats(); + assertEquals(0, stats.getTotal()); + assertEquals(0, stats.getTotalTimeInMillis()); + assertEquals(0, stats.getTotalNumDocs()); + assertEquals(0, stats.getTotalSizeInBytes()); + assertEquals(0, stats.getCurrent()); + assertEquals(0, stats.getCurrentNumDocs()); + assertEquals(0, stats.getCurrentSizeInBytes()); + assertEquals(0, stats.getTotalStoppedTimeInMillis()); + assertEquals(0, stats.getTotalThrottledTimeInMillis()); + assertEquals(0, stats.getUnreferencedFileCleanUpsPerformed()); + assertNotNull(stats.getWarmerStats()); + } + + public void testAdd() { + MergeStats stats = new MergeStats(); + MergedSegmentWarmerStats warmerStats = new MergedSegmentWarmerStats(); + warmerStats.add(1, 10, 0, 100, 200, 5, 15, 0); + + stats.add(5, 100, 50, 1024, 2, 25, 512, 10, 20, 1.5, warmerStats); + + assertEquals(5, stats.getTotal()); + assertEquals(100, stats.getTotalTimeInMillis()); + assertEquals(50, stats.getTotalNumDocs()); + assertEquals(1024, stats.getTotalSizeInBytes()); + assertEquals(2, stats.getCurrent()); + assertEquals(25, stats.getCurrentNumDocs()); + assertEquals(512, stats.getCurrentSizeInBytes()); + assertEquals(10, stats.getTotalStoppedTimeInMillis()); + assertEquals(20, stats.getTotalThrottledTimeInMillis()); + } + + public void testAddMergeStats() { + MergeStats stats1 = new MergeStats(); + MergeStats stats2 = new MergeStats(); + + MergedSegmentWarmerStats warmerStats = new MergedSegmentWarmerStats(); + warmerStats.add(1, 10, 0, 100, 200, 5, 15, 0); + + stats1.add(5, 100, 50, 1024, 2, 25, 512, 10, 20, 1.5, warmerStats); + stats2.add(3, 50, 30, 512, 1, 15, 256, 5, 10, 1.0, warmerStats); + + stats1.add(stats2); + + assertEquals(8, stats1.getTotal()); + assertEquals(3, stats1.getCurrent()); + assertEquals(40, stats1.getCurrentNumDocs()); + assertEquals(768, stats1.getCurrentSizeInBytes()); + } + + public void testAddTotals() { + MergeStats stats1 = new MergeStats(); + MergeStats stats2 = new MergeStats(); + + MergedSegmentWarmerStats warmerStats = new MergedSegmentWarmerStats(); + warmerStats.add(1, 10, 0, 100, 200, 5, 15, 0); + + stats1.add(5, 100, 50, 1024, 2, 25, 512, 10, 20, 1.5, warmerStats); + stats2.add(3, 50, 30, 512, 1, 15, 256, 5, 10, 1.0, warmerStats); + + stats1.addTotals(stats2); + + assertEquals(8, stats1.getTotal()); + assertEquals(150, stats1.getTotalTimeInMillis()); + assertEquals(80, stats1.getTotalNumDocs()); + assertEquals(1536, stats1.getTotalSizeInBytes()); + assertEquals(15, stats1.getTotalStoppedTimeInMillis()); + assertEquals(30, stats1.getTotalThrottledTimeInMillis()); + } + + public void testAddWithNull() { + MergeStats stats = new MergeStats(); + stats.add((MergeStats) null); + stats.addTotals(null); + + assertEquals(0, stats.getTotal()); + assertEquals(0, stats.getCurrent()); + } + + public void testUnreferencedFileCleanUpStats() { + MergeStats stats = new MergeStats(); + stats.addUnreferencedFileCleanUpStats(5); + assertEquals(5, stats.getUnreferencedFileCleanUpsPerformed()); + + stats.addUnreferencedFileCleanUpStats(3); + assertEquals(8, stats.getUnreferencedFileCleanUpsPerformed()); + } + + public void testGetters() { + MergeStats stats = new MergeStats(); + MergedSegmentWarmerStats warmerStats = new MergedSegmentWarmerStats(); + warmerStats.add(1, 10, 0, 100, 200, 5, 15, 0); + + stats.add(5, 100, 50, 1024, 2, 25, 512, 10, 20, 1.5, warmerStats); + + assertEquals(new TimeValue(100), stats.getTotalTime()); + assertEquals(new TimeValue(10), stats.getTotalStoppedTime()); + assertEquals(new TimeValue(20), stats.getTotalThrottledTime()); + assertEquals(new ByteSizeValue(1024), stats.getTotalSize()); + assertEquals(new ByteSizeValue(512), stats.getCurrentSize()); + assertTrue(stats.getTotalBytesPerSecAutoThrottle() > 0); + } + + public void testAutoThrottleMaxValue() { + MergeStats stats1 = new MergeStats(); + MergeStats stats2 = new MergeStats(); + + MergedSegmentWarmerStats warmerStats = new MergedSegmentWarmerStats(); + + stats1.add(1, 10, 5, 100, 0, 0, 0, 0, 0, Double.MAX_VALUE, warmerStats); + stats2.add(1, 10, 5, 100, 0, 0, 0, 0, 0, 1.0, warmerStats); + + stats1.addTotals(stats2); + assertEquals(Long.MAX_VALUE, stats1.getTotalBytesPerSecAutoThrottle()); + } + + public void testSerialization() throws IOException { + MergeStats original = new MergeStats(); + MergedSegmentWarmerStats warmerStats = new MergedSegmentWarmerStats(); + warmerStats.add(1, 10, 0, 100, 200, 5, 15, 0); + + original.add(5, 100, 50, 1024, 2, 25, 512, 10, 20, 1.5, warmerStats); + original.addUnreferencedFileCleanUpStats(3); + + BytesStreamOutput out = new BytesStreamOutput(); + original.writeTo(out); + + BytesReference bytes = out.bytes(); + StreamInput in = bytes.streamInput(); + MergeStats deserialized = new MergeStats(in); + + assertEquals(original.getTotal(), deserialized.getTotal()); + assertEquals(original.getTotalTimeInMillis(), deserialized.getTotalTimeInMillis()); + assertEquals(original.getTotalNumDocs(), deserialized.getTotalNumDocs()); + assertEquals(original.getTotalSizeInBytes(), deserialized.getTotalSizeInBytes()); + assertEquals(original.getCurrent(), deserialized.getCurrent()); + assertEquals(original.getCurrentNumDocs(), deserialized.getCurrentNumDocs()); + assertEquals(original.getCurrentSizeInBytes(), deserialized.getCurrentSizeInBytes()); + assertEquals(original.getTotalStoppedTimeInMillis(), deserialized.getTotalStoppedTimeInMillis()); + assertEquals(original.getTotalThrottledTimeInMillis(), deserialized.getTotalThrottledTimeInMillis()); + assertEquals(original.getTotalBytesPerSecAutoThrottle(), deserialized.getTotalBytesPerSecAutoThrottle()); + } + + public void testToXContent() throws IOException { + MergeStats stats = new MergeStats(); + MergedSegmentWarmerStats warmerStats = new MergedSegmentWarmerStats(); + warmerStats.add(1, 10, 0, 100, 200, 5, 15, 0); + + stats.add(5, 100, 50, 1024, 2, 25, 512, 10, 20, 1.5, warmerStats); + + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + stats.toXContent(builder, null); + builder.endObject(); + + String json = builder.toString(); + assertTrue(json.contains("merges")); + assertTrue(json.contains("current")); + assertTrue(json.contains("total")); + assertTrue(json.contains("total_time_in_millis")); + assertTrue(json.contains("total_docs")); + assertTrue(json.contains("total_size_in_bytes")); + assertTrue(json.contains("merged_segment_warmer")); + } +} diff --git a/server/src/test/java/org/opensearch/index/merge/MergedSegmentWarmerStatsTests.java b/server/src/test/java/org/opensearch/index/merge/MergedSegmentWarmerStatsTests.java new file mode 100644 index 0000000000000..caedd8b3d24d3 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/merge/MergedSegmentWarmerStatsTests.java @@ -0,0 +1,156 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.merge; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class MergedSegmentWarmerStatsTests extends OpenSearchTestCase { + + public void testDefaultConstructor() { + MergedSegmentWarmerStats stats = new MergedSegmentWarmerStats(); + assertEquals(0, stats.getTotalInvocationsCount()); + assertEquals(0, stats.getTotalTime().getMillis()); + assertEquals(0, stats.getTotalFailureCount()); + assertEquals(0, stats.getTotalSentSize().getBytes()); + assertEquals(0, stats.getTotalReceivedSize().getBytes()); + assertEquals(0, stats.getTotalSendTime().millis()); + assertEquals(0, stats.getTotalReceiveTime().millis()); + assertEquals(0, stats.getOngoingCount()); + } + + public void testAdd() { + MergedSegmentWarmerStats stats = new MergedSegmentWarmerStats(); + stats.add(5, 100, 2, 1024, 2048, 50, 75, 3); + + assertEquals(5, stats.getTotalInvocationsCount()); + assertEquals(100, stats.getTotalTime().getMillis()); + assertEquals(2, stats.getTotalFailureCount()); + assertEquals(1024, stats.getTotalSentSize().getBytes()); + assertEquals(2048, stats.getTotalReceivedSize().getBytes()); + assertEquals(50, stats.getTotalSendTime().millis()); + assertEquals(75, stats.getTotalReceiveTime().millis()); + assertEquals(3, stats.getOngoingCount()); + } + + public void testAddMultiple() { + MergedSegmentWarmerStats stats = new MergedSegmentWarmerStats(); + stats.add(5, 100, 2, 1024, 2048, 50, 75, 3); + stats.add(3, 50, 1, 512, 1024, 25, 30, 1); + + assertEquals(8, stats.getTotalInvocationsCount()); + assertEquals(150, stats.getTotalTime().getMillis()); + assertEquals(3, stats.getTotalFailureCount()); + assertEquals(1536, stats.getTotalSentSize().getBytes()); + assertEquals(3072, stats.getTotalReceivedSize().getBytes()); + assertEquals(75, stats.getTotalSendTime().millis()); + assertEquals(105, stats.getTotalReceiveTime().millis()); + assertEquals(4, stats.getOngoingCount()); + } + + public void testAddStats() { + MergedSegmentWarmerStats stats1 = new MergedSegmentWarmerStats(); + stats1.add(5, 100, 2, 1024, 2048, 50, 75, 3); + + MergedSegmentWarmerStats stats2 = new MergedSegmentWarmerStats(); + stats2.add(3, 50, 1, 512, 1024, 25, 30, 1); + + stats1.add(stats2); + assertEquals(4, stats1.getOngoingCount()); + } + + public void testAddTotals() { + MergedSegmentWarmerStats stats1 = new MergedSegmentWarmerStats(); + stats1.add(5, 100, 2, 1024, 2048, 50, 75, 3); + + MergedSegmentWarmerStats stats2 = new MergedSegmentWarmerStats(); + stats2.add(3, 50, 1, 512, 1024, 25, 30, 1); + + stats1.addTotals(stats2); + assertEquals(8, stats1.getTotalInvocationsCount()); + assertEquals(150, stats1.getTotalTime().getMillis()); + assertEquals(3, stats1.getTotalFailureCount()); + assertEquals(1536, stats1.getTotalSentSize().getBytes()); + assertEquals(3072, stats1.getTotalReceivedSize().getBytes()); + assertEquals(75, stats1.getTotalSendTime().millis()); + assertEquals(105, stats1.getTotalReceiveTime().millis()); + } + + public void testAddTotalsWithNull() { + MergedSegmentWarmerStats stats = new MergedSegmentWarmerStats(); + stats.add(5, 100, 2, 1024, 2048, 50, 75, 3); + + stats.addTotals(null); + assertEquals(5, stats.getTotalInvocationsCount()); + } + + public void testSerialization() throws IOException { + MergedSegmentWarmerStats original = new MergedSegmentWarmerStats(); + original.add(5, 100, 2, 1024, 2048, 50, 75, 3); + + BytesStreamOutput out = new BytesStreamOutput(); + original.writeTo(out); + + BytesReference bytes = out.bytes(); + StreamInput in = bytes.streamInput(); + MergedSegmentWarmerStats deserialized = new MergedSegmentWarmerStats(in); + + assertEquals(original.getTotalInvocationsCount(), deserialized.getTotalInvocationsCount()); + assertEquals(original.getTotalTime().getMillis(), deserialized.getTotalTime().getMillis()); + assertEquals(original.getTotalFailureCount(), deserialized.getTotalFailureCount()); + assertEquals(original.getTotalSentSize().getBytes(), deserialized.getTotalSentSize().getBytes()); + assertEquals(original.getTotalReceivedSize().getBytes(), deserialized.getTotalReceivedSize().getBytes()); + assertEquals(original.getTotalSendTime().millis(), deserialized.getTotalSendTime().millis()); + assertEquals(original.getTotalReceiveTime().millis(), deserialized.getTotalReceiveTime().millis()); + assertEquals(original.getOngoingCount(), deserialized.getOngoingCount()); + } + + public void testToXContent() throws IOException { + MergedSegmentWarmerStats stats = new MergedSegmentWarmerStats(); + stats.add(5, 100, 2, 1024, 2048, 50, 75, 3); + + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + stats.toXContent(builder, null); + builder.endObject(); + + String json = builder.toString(); + assertTrue(json.contains("merged_segment_warmer")); + assertTrue(json.contains("total_invocations_count")); + assertTrue(json.contains("total_time_millis")); + assertTrue(json.contains("total_failure_count")); + assertTrue(json.contains("total_bytes_sent")); + assertTrue(json.contains("total_bytes_received")); + assertTrue(json.contains("total_send_time_millis")); + assertTrue(json.contains("total_receive_time_millis")); + assertTrue(json.contains("ongoing_count")); + } + + public void testGetters() { + MergedSegmentWarmerStats stats = new MergedSegmentWarmerStats(); + stats.add(5, 100, 2, 1024, 2048, 50, 75, 3); + + assertEquals(5, stats.getTotalInvocationsCount()); + assertEquals(new TimeValue(100), stats.getTotalTime()); + assertEquals(2, stats.getTotalFailureCount()); + assertEquals(new ByteSizeValue(1024), stats.getTotalSentSize()); + assertEquals(new ByteSizeValue(2048), stats.getTotalReceivedSize()); + assertEquals(new TimeValue(50), stats.getTotalSendTime()); + assertEquals(new TimeValue(75), stats.getTotalReceiveTime()); + assertEquals(3, stats.getOngoingCount()); + } +} From cc69807cf0b75c4f8f08fb1d5d15aa9b81be8d7b Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Sun, 7 Sep 2025 20:53:31 +0530 Subject: [PATCH 11/30] Fixing tests Signed-off-by: Aditya Khera --- .../test/cat.shards/10_basic.yml | 170 ++++-------------- ...eStorePublishMergedSegmentActionTests.java | 8 + .../action/cat/RestShardsActionTests.java | 8 +- 3 files changed, 51 insertions(+), 135 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index ae94245cb65d3..7412e4a8f645c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -1,13 +1,13 @@ "Help": - skip: - version: " - 3.2.99" - reason: search query failure stats is added in 3.3.0 + version: " - 3.1.99" + reason: star-tree search stats is only added in 3.2.0 features: node_selector - do: cat.shards: help: true node_selector: - version: "3.3.0 - " + version: "3.2.0 - " - match: $body: | @@ -54,6 +54,14 @@ merges.total_docs .+ \n merges.total_size .+ \n merges.total_time .+ \n + merges.merged_segment_warmer.total_invocations .+ \n + merges.merged_segment_warmer.total_time .+ \n + merges.merged_segment_warmer.ongoing_count .+ \n + merges.merged_segment_warmer.total_bytes_received .+ \n + merges.merged_segment_warmer.total_bytes_sent .+ \n + merges.merged_segment_warmer.total_receive_time .+ \n + merges.merged_segment_warmer.total_failure_count .+ \n + merges.merged_segment_warmer.total_send_time .+ \n refresh.total .+ \n refresh.time .+ \n refresh.external_total .+ \n @@ -66,7 +74,6 @@ search.query_current .+ \n search.query_time .+ \n search.query_total .+ \n - search.query_failed .+ \n search.concurrent_query_current .+ \n search.concurrent_query_time .+ \n search.concurrent_query_total .+ \n @@ -74,7 +81,6 @@ search.startree_query_current .+ \n search.startree_query_time .+ \n search.startree_query_total .+ \n - search.startree_query_failed .+ \n search.scroll_current .+ \n search.scroll_time .+ \n search.scroll_total .+ \n @@ -98,16 +104,16 @@ docs.deleted .+ \n $/ --- -"Help from 3.2.0 to 3.2.99": +"Help from 2.14.0 to 3.0.99": - skip: - version: " - 3.1.99, 3.3.0 - " - reason: star-tree search stats is only added in 3.2.0 + version: " - 2.13.99, 3.2.0 - " + reason: search idle reactivate count total is only added in 3.0.0 features: node_selector - do: cat.shards: help: true node_selector: - version: "3.2.0 - 3.2.99" + version: "2.14.0 - 3.1.99" - match: $body: | @@ -170,9 +176,6 @@ search.concurrent_query_time .+ \n search.concurrent_query_total .+ \n search.concurrent_avg_slice_count .+ \n - search.startree_query_current .+ \n - search.startree_query_time .+ \n - search.startree_query_total .+ \n search.scroll_current .+ \n search.scroll_time .+ \n search.scroll_total .+ \n @@ -196,16 +199,16 @@ docs.deleted .+ \n $/ --- -"Help from 2.14.0 to 3.0.99": +"Help from 2.12.0 to 2.13.99": - skip: - version: " - 2.13.99, 3.2.0 - " - reason: search idle reactivate count total is only added in 3.0.0 + version: " - 2.11.99 , 2.14.0 - " + reason: deleted docs and concurrent search are added in 2.12.0 features: node_selector - do: cat.shards: help: true node_selector: - version: "2.14.0 - 3.1.99" + version: "2.12.0 - 2.13.99" - match: $body: | @@ -274,7 +277,6 @@ search.point_in_time_current .+ \n search.point_in_time_time .+ \n search.point_in_time_total .+ \n - search.search_idle_reactivate_count_total .+ \n segments.count .+ \n segments.memory .+ \n segments.index_writer_memory .+ \n @@ -291,16 +293,16 @@ docs.deleted .+ \n $/ --- -"Help from 2.12.0 to 2.13.99": +"Help from 2.4.0 to 2.11.0": - skip: - version: " - 2.11.99 , 2.14.0 - " - reason: deleted docs and concurrent search are added in 2.12.0 + version: " - 2.3.99 , 2.12.0 - " + reason: point in time stats were added in 2.4.0 features: node_selector - do: cat.shards: help: true node_selector: - version: "2.12.0 - 2.13.99" + version: "2.4.0 - 2.11.99" - match: $body: | @@ -359,10 +361,6 @@ search.query_current .+ \n search.query_time .+ \n search.query_total .+ \n - search.concurrent_query_current .+ \n - search.concurrent_query_time .+ \n - search.concurrent_query_total .+ \n - search.concurrent_avg_slice_count .+ \n search.scroll_current .+ \n search.scroll_time .+ \n search.scroll_total .+ \n @@ -382,102 +380,12 @@ warmer.total_time .+ \n path.data .+ \n path.state .+ \n - docs.deleted .+ \n $/ --- -"Help from 2.4.0 to 2.11.0": - - skip: - version: " - 2.3.99 , 2.12.0 - " - reason: point in time stats were added in 2.4.0 - features: node_selector - - do: - cat.shards: - help: true - node_selector: - version: "2.4.0 - 2.11.99" - - - match: - $body: | - /^ index .+ \n - shard .+ \n - prirep .+ \n - state .+ \n - docs .+ \n - store .+ \n - ip .+ \n - id .+ \n - node .+ \n - sync_id .+ \n - unassigned.reason .+ \n - unassigned.at .+ \n - unassigned.for .+ \n - unassigned.details .+ \n - recoverysource.type .+ \n - completion.size .+ \n - fielddata.memory_size .+ \n - fielddata.evictions .+ \n - query_cache.memory_size .+ \n - query_cache.evictions .+ \n - flush.total .+ \n - flush.total_time .+ \n - get.current .+ \n - get.time .+ \n - get.total .+ \n - get.exists_time .+ \n - get.exists_total .+ \n - get.missing_time .+ \n - get.missing_total .+ \n - indexing.delete_current .+ \n - indexing.delete_time .+ \n - indexing.delete_total .+ \n - indexing.index_current .+ \n - indexing.index_time .+ \n - indexing.index_total .+ \n - indexing.index_failed .+ \n - merges.current .+ \n - merges.current_docs .+ \n - merges.current_size .+ \n - merges.total .+ \n - merges.total_docs .+ \n - merges.total_size .+ \n - merges.total_time .+ \n - refresh.total .+ \n - refresh.time .+ \n - refresh.external_total .+ \n - refresh.external_time .+ \n - refresh.listeners .+ \n - search.fetch_current .+ \n - search.fetch_time .+ \n - search.fetch_total .+ \n - search.open_contexts .+ \n - search.query_current .+ \n - search.query_time .+ \n - search.query_total .+ \n - search.scroll_current .+ \n - search.scroll_time .+ \n - search.scroll_total .+ \n - search.point_in_time_current .+ \n - search.point_in_time_time .+ \n - search.point_in_time_total .+ \n - segments.count .+ \n - segments.memory .+ \n - segments.index_writer_memory .+ \n - segments.version_map_memory .+ \n - segments.fixed_bitset_memory .+ \n - seq_no.max .+ \n - seq_no.local_checkpoint .+ \n - seq_no.global_checkpoint .+ \n - warmer.current .+ \n - warmer.total .+ \n - warmer.total_time .+ \n - path.data .+ \n - path.state .+ \n - $/ ---- "Help before - 2.4.0": - skip: version: "2.4.0 - " - reason: point in time stats were added in 2.4.0 + reason: point in time stats were added in 2.4.0 features: node_selector - do: cat.shards: @@ -563,11 +471,11 @@ "Test cat shards output": - do: - cat.shards: {} + cat.shards: { } - match: $body: | - /^$/ + /^$/ - do: indices.create: index: index1 @@ -576,11 +484,11 @@ number_of_shards: "5" number_of_replicas: "1" - do: - cat.shards: {} + cat.shards: { } - match: $body: | - /^(index1 \s+ \d \s+ (p|r) \s+ ((STARTED|INITIALIZING|RELOCATING) \s+ (\d \s+ (\d+|\d+[.]\d+)(kb|b) \s+)? \d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} \s+ .+|UNASSIGNED \s+) \n?){10}$/ + /^(index1 \s+ \d \s+ (p|r) \s+ ((STARTED|INITIALIZING|RELOCATING) \s+ (\d \s+ (\d+|\d+[.]\d+)(kb|b) \s+)? \d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} \s+ .+|UNASSIGNED \s+) \n?){10}$/ - do: indices.create: @@ -591,17 +499,17 @@ number_of_replicas: "0" - do: - cat.shards: {} + cat.shards: { } - match: $body: | - /^(index(1|2) \s+ \d \s+ (p|r) \s+ ((STARTED|INITIALIZING|RELOCATING) \s+ (\d \s+ (\d+|\d+[.]\d+)(kb|b) \s+)? \d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} \s+ .+|UNASSIGNED \s+) \n?){15}$/ + /^(index(1|2) \s+ \d \s+ (p|r) \s+ ((STARTED|INITIALIZING|RELOCATING) \s+ (\d \s+ (\d+|\d+[.]\d+)(kb|b) \s+)? \d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} \s+ .+|UNASSIGNED \s+) \n?){15}$/ - do: cat.shards: index: index2 - match: $body: | - /^(index2 \s+ \d \s+ (p|r) \s+ ((STARTED|INITIALIZING|RELOCATING) \s+ (\d \s+ (\d+|\d+[.]\d+)(kb|b) \s+)? \d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} \s+ .+|UNASSIGNED \s+) \n?){5}$/ + /^(index2 \s+ \d \s+ (p|r) \s+ ((STARTED|INITIALIZING|RELOCATING) \s+ (\d \s+ (\d+|\d+[.]\d+)(kb|b) \s+)? \d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} \s+ .+|UNASSIGNED \s+) \n?){5}$/ --- "Test cat shards using wildcards": @@ -638,7 +546,7 @@ - match: $body: | - /^(foo \n?)$/ + /^(foo \n?)$/ - do: cat.shards: @@ -648,7 +556,7 @@ - match: $body: | - /^(ba(r|z) \n?){2}$/ + /^(ba(r|z) \n?){2}$/ --- "Test cat shards sort": @@ -676,12 +584,12 @@ - do: cat.shards: - h: [index, docs] - s: [docs] + h: [ index, docs ] + s: [ docs ] -# don't use the store here it's cached and might be stale + # don't use the store here it's cached and might be stale - match: $body: | - /^ foo \s+ 0\n - bar \s+ 1\n - $/ + /^ foo \s+ 0\n + bar \s+ 1\n + $/ diff --git a/server/src/test/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentActionTests.java b/server/src/test/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentActionTests.java index 338e1a44e7713..8ab597cde87fb 100644 --- a/server/src/test/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentActionTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentActionTests.java @@ -29,6 +29,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexService; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.RemoteDirectory; import org.opensearch.index.store.RemoteSegmentStoreDirectory; @@ -115,6 +116,7 @@ public void testPublishMergedSegment() { final int id = randomIntBetween(0, 4); final IndexShard indexShard = mock(IndexShard.class); + when(indexShard.mergedSegmentTransferTracker()).thenReturn(new MergedSegmentTransferTracker()); when(indexService.getShard(id)).thenReturn(indexShard); final ShardId shardId = new ShardId(index, id); @@ -177,6 +179,7 @@ public void testPublishMergedSegmentWithNoTimeLeftAfterUpload() { final int id = randomIntBetween(0, 4); final IndexShard indexShard = mock(IndexShard.class); + when(indexShard.mergedSegmentTransferTracker()).thenReturn(new MergedSegmentTransferTracker()); when(indexService.getShard(id)).thenReturn(indexShard); final ShardId shardId = new ShardId(index, id); @@ -238,6 +241,7 @@ public void testPublishMergedSegmentActionOnPrimary() throws InterruptedExceptio final int id = randomIntBetween(0, 4); final IndexShard indexShard = mock(IndexShard.class); + when(indexShard.mergedSegmentTransferTracker()).thenReturn(new MergedSegmentTransferTracker()); when(indexService.getShard(id)).thenReturn(indexShard); final ShardId shardId = new ShardId(index, id); @@ -279,6 +283,7 @@ public void testPublishMergedSegmentActionOnReplica() throws IOException { when(indicesService.indexServiceSafe(index)).thenReturn(indexService); final int id = randomIntBetween(0, 4); final IndexShard indexShard = mock(IndexShard.class); + when(indexShard.mergedSegmentTransferTracker()).thenReturn(new MergedSegmentTransferTracker()); when(indexService.getShard(id)).thenReturn(indexShard); final ShardId shardId = new ShardId(index, id); final RemoteSegmentStoreDirectory remoteDirectory = new RemoteSegmentStoreDirectory( @@ -346,6 +351,7 @@ public void testPublishMergedSegmentActionOnReplicaWithMismatchedShardId() throw when(indicesService.indexServiceSafe(index)).thenReturn(indexService); final int id = randomIntBetween(0, 4); final IndexShard indexShard = mock(IndexShard.class); + when(indexShard.mergedSegmentTransferTracker()).thenReturn(new MergedSegmentTransferTracker()); when(indexService.getShard(id)).thenReturn(indexShard); final ShardId shardId = new ShardId(index, id); final RemoteSegmentStoreDirectory remoteDirectory = new RemoteSegmentStoreDirectory( @@ -425,6 +431,7 @@ public void testPublishMergedSegmentActionOnDocrepReplicaDuringMigration() throw when(indicesService.indexServiceSafe(index)).thenReturn(indexService); final int id = randomIntBetween(0, 4); final IndexShard indexShard = mock(IndexShard.class); + when(indexShard.mergedSegmentTransferTracker()).thenReturn(new MergedSegmentTransferTracker()); when(indexService.getShard(id)).thenReturn(indexShard); final ShardId shardId = new ShardId(index, id); @@ -461,6 +468,7 @@ public void testPublishMergedSegmentActionOnDocrepReplicaDuringMigration() throw public void testGetReplicationModeWithRemoteTranslog() { final RemoteStorePublishMergedSegmentAction action = createAction(); final IndexShard indexShard = mock(IndexShard.class); + when(indexShard.mergedSegmentTransferTracker()).thenReturn(new MergedSegmentTransferTracker()); when(indexShard.indexSettings()).thenReturn(createIndexSettings(true)); assertEquals(ReplicationMode.FULL_REPLICATION, action.getReplicationMode(indexShard)); } diff --git a/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java b/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java index 8bb4660d15155..95a9a943e4dbf 100644 --- a/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java +++ b/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java @@ -158,7 +158,7 @@ private void assertTable(Table table) { assertThat(headers.get(6).value, equalTo("ip")); assertThat(headers.get(7).value, equalTo("id")); assertThat(headers.get(8).value, equalTo("node")); - assertThat(headers.get(84).value, equalTo("docs.deleted")); + assertThat(headers.get(90).value, equalTo("docs.deleted")); final List> rows = table.getRows(); assertThat(rows.size(), equalTo(shardRoutings.size())); @@ -174,9 +174,9 @@ private void assertTable(Table table) { assertThat(row.get(4).value, equalTo(shardStats.getStats().getDocs().getCount())); assertThat(row.get(6).value, equalTo(localNode.getHostAddress())); assertThat(row.get(7).value, equalTo(localNode.getId())); - assertThat(row.get(82).value, equalTo(shardStats.getDataPath())); - assertThat(row.get(83).value, equalTo(shardStats.getStatePath())); - assertThat(row.get(84).value, equalTo(shardStats.getStats().getDocs().getDeleted())); + assertThat(row.get(88).value, equalTo(shardStats.getDataPath())); + assertThat(row.get(89).value, equalTo(shardStats.getStatePath())); + assertThat(row.get(90).value, equalTo(shardStats.getStats().getDocs().getDeleted())); } } } From 4091c983deb47a240c461013cb03b77d425c04b3 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Sun, 7 Sep 2025 21:02:56 +0530 Subject: [PATCH 12/30] Restored breaking publicAPIs Signed-off-by: Aditya Khera --- .../opensearch/index/merge/MergeStats.java | 24 +++++++++++++++++++ .../index/merge/MergedSegmentWarmerStats.java | 4 ++-- .../index/merge/MergeStatsTests.java | 15 ++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/merge/MergeStats.java b/server/src/main/java/org/opensearch/index/merge/MergeStats.java index f767c2fe7d136..302a8f31039c5 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergeStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergeStats.java @@ -98,6 +98,30 @@ public MergeStats(StreamInput in) throws IOException { } } + public void add( + long totalMerges, + long totalMergeTime, + long totalNumDocs, + long totalSizeInBytes, + long currentMerges, + long currentNumDocs, + long currentSizeInBytes, + long stoppedTimeMillis, + long throttledTimeMillis, + double mbPerSecAutoThrottle) { + add(totalMerges, + totalMergeTime, + totalNumDocs, + totalSizeInBytes, + currentMerges, + currentNumDocs, + currentSizeInBytes, + stoppedTimeMillis, + throttledTimeMillis, + mbPerSecAutoThrottle, + new MergedSegmentWarmerStats()); + } + public void add( long totalMerges, long totalMergeTime, diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java index c8e1b8f0ac9c5..cf63b479076da 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java @@ -8,7 +8,7 @@ package org.opensearch.index.merge; -import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -24,7 +24,7 @@ * * @opensearch.api */ -@PublicApi(since = "1.0.0") +@ExperimentalApi public class MergedSegmentWarmerStats implements Writeable, ToXContentFragment { // [PRIMARY SHARD] Number of times segment MergedSegmentWarmer.warm has been invoked diff --git a/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java b/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java index b42009dc8dab2..9288ad2760e24 100644 --- a/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java +++ b/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java @@ -54,6 +54,21 @@ public void testAdd() { assertEquals(20, stats.getTotalThrottledTimeInMillis()); } + public void testAddWithoutMergedSegmentWarmer() { + MergeStats stats = new MergeStats(); + stats.add(5, 100, 50, 1024, 2, 25, 512, 10, 20, 1.5); + + assertEquals(5, stats.getTotal()); + assertEquals(100, stats.getTotalTimeInMillis()); + assertEquals(50, stats.getTotalNumDocs()); + assertEquals(1024, stats.getTotalSizeInBytes()); + assertEquals(2, stats.getCurrent()); + assertEquals(25, stats.getCurrentNumDocs()); + assertEquals(512, stats.getCurrentSizeInBytes()); + assertEquals(10, stats.getTotalStoppedTimeInMillis()); + assertEquals(20, stats.getTotalThrottledTimeInMillis()); + } + public void testAddMergeStats() { MergeStats stats1 = new MergeStats(); MergeStats stats2 = new MergeStats(); From 2bafefdc0e80d1d7b2899dc8681e9e4bce004ed2 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Sun, 7 Sep 2025 21:23:57 +0530 Subject: [PATCH 13/30] spotlessApply Signed-off-by: Aditya Khera --- .../main/java/org/opensearch/index/merge/MergeStats.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/merge/MergeStats.java b/server/src/main/java/org/opensearch/index/merge/MergeStats.java index 302a8f31039c5..89ebc12f7c044 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergeStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergeStats.java @@ -108,8 +108,10 @@ public void add( long currentSizeInBytes, long stoppedTimeMillis, long throttledTimeMillis, - double mbPerSecAutoThrottle) { - add(totalMerges, + double mbPerSecAutoThrottle + ) { + add( + totalMerges, totalMergeTime, totalNumDocs, totalSizeInBytes, @@ -119,7 +121,8 @@ public void add( stoppedTimeMillis, throttledTimeMillis, mbPerSecAutoThrottle, - new MergedSegmentWarmerStats()); + new MergedSegmentWarmerStats() + ); } public void add( From e9cd37f4e8cece33939fad88ba25b6736a56db51 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Mon, 8 Sep 2025 11:08:09 +0530 Subject: [PATCH 14/30] Empty commit Signed-off-by: Aditya Khera From d2f2dc2cd03fd5fdf93a9a4a8994a5730c92eee5 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Mon, 8 Sep 2025 15:14:45 +0530 Subject: [PATCH 15/30] Empty commit to trigger build Signed-off-by: Aditya Khera From d37c9d54eb3a88fefd8aa4432b541bd5deb6709b Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Mon, 8 Sep 2025 16:32:56 +0530 Subject: [PATCH 16/30] Test changes Signed-off-by: Aditya Khera --- .../rest-api-spec/test/cat.shards/10_basic.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index 7412e4a8f645c..b8869ffaacf33 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -1,7 +1,7 @@ "Help": - skip: version: " - 3.1.99" - reason: star-tree search stats is only added in 3.2.0 + reason: star-tree search stats is only added in 3.2.0 features: node_selector - do: cat.shards: @@ -202,7 +202,7 @@ "Help from 2.12.0 to 2.13.99": - skip: version: " - 2.11.99 , 2.14.0 - " - reason: deleted docs and concurrent search are added in 2.12.0 + reason: deleted docs and concurrent search are added in 2.12.0 features: node_selector - do: cat.shards: @@ -296,7 +296,7 @@ "Help from 2.4.0 to 2.11.0": - skip: version: " - 2.3.99 , 2.12.0 - " - reason: point in time stats were added in 2.4.0 + reason: point in time stats were added in 2.4.0 features: node_selector - do: cat.shards: @@ -385,7 +385,7 @@ "Help before - 2.4.0": - skip: version: "2.4.0 - " - reason: point in time stats were added in 2.4.0 + reason: point in time stats were added in 2.4.0 features: node_selector - do: cat.shards: @@ -471,7 +471,7 @@ "Test cat shards output": - do: - cat.shards: { } + cat.shards: {} - match: $body: | @@ -484,7 +484,7 @@ number_of_shards: "5" number_of_replicas: "1" - do: - cat.shards: { } + cat.shards: {} - match: $body: | @@ -499,7 +499,7 @@ number_of_replicas: "0" - do: - cat.shards: { } + cat.shards: {} - match: $body: | /^(index(1|2) \s+ \d \s+ (p|r) \s+ ((STARTED|INITIALIZING|RELOCATING) \s+ (\d \s+ (\d+|\d+[.]\d+)(kb|b) \s+)? \d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} \s+ .+|UNASSIGNED \s+) \n?){15}$/ @@ -584,8 +584,8 @@ - do: cat.shards: - h: [ index, docs ] - s: [ docs ] + h: [index, docs] + s: [docs] # don't use the store here it's cached and might be stale - match: From 9e5924b1e3090447b4f9a503dc48821d07ccc173 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Mon, 8 Sep 2025 11:08:09 +0530 Subject: [PATCH 17/30] Empty commit Signed-off-by: Aditya Khera From 3db69366e48e7c15f26dfd238dda38410baa2169 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Mon, 8 Sep 2025 15:14:45 +0530 Subject: [PATCH 18/30] Empty commit to trigger build Signed-off-by: Aditya Khera From 055fd6b897f4703ebbd6f7b0b9b1d821f291670f Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Tue, 9 Sep 2025 18:35:21 +0530 Subject: [PATCH 19/30] Fixing tests Signed-off-by: Aditya Khera --- CHANGELOG.md | 1 + .../test/cat.shards/10_basic.yml | 104 +++++++++++++++++- .../opensearch/index/merge/MergeStats.java | 4 +- 3 files changed, 104 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d14573c62236..a57338a3581a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add pluggable gRPC interceptors with explicit ordering([#19005](https://github.com/opensearch-project/OpenSearch/pull/19005)) - Add metrics for the merged segment warmer feature ([#18929](https://github.com/opensearch-project/OpenSearch/pull/18929)) + ### Changed - Faster `terms` query creation for `keyword` field with index and docValues enabled ([#19350](https://github.com/opensearch-project/OpenSearch/pull/19350)) - Refactor to move prepareIndex and prepareDelete methods to Engine class ([#19551](https://github.com/opensearch-project/OpenSearch/pull/19551)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index b8869ffaacf33..80d847c411914 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -1,13 +1,13 @@ "Help": - skip: - version: " - 3.1.99" - reason: star-tree search stats is only added in 3.2.0 + version: " - 3.2.99" + reason: merged segment warmer stats were added in 3.3.0 features: node_selector - do: cat.shards: help: true node_selector: - version: "3.2.0 - " + version: "3.3.0 - " - match: $body: | @@ -104,6 +104,104 @@ docs.deleted .+ \n $/ --- +"Help from 3.2.0 to 3.2.99": + - skip: + version: " - 3.1.99, 3.3.0-" + reason: star-tree search stats is only added in 3.2.0 + features: node_selector + - do: + cat.shards: + help: true + node_selector: + version: "3.2.0 - 3.2.99" + + - match: + $body: | + /^ index .+ \n + shard .+ \n + prirep .+ \n + state .+ \n + docs .+ \n + store .+ \n + ip .+ \n + id .+ \n + node .+ \n + sync_id .+ \n + unassigned.reason .+ \n + unassigned.at .+ \n + unassigned.for .+ \n + unassigned.details .+ \n + recoverysource.type .+ \n + completion.size .+ \n + fielddata.memory_size .+ \n + fielddata.evictions .+ \n + query_cache.memory_size .+ \n + query_cache.evictions .+ \n + flush.total .+ \n + flush.total_time .+ \n + get.current .+ \n + get.time .+ \n + get.total .+ \n + get.exists_time .+ \n + get.exists_total .+ \n + get.missing_time .+ \n + get.missing_total .+ \n + indexing.delete_current .+ \n + indexing.delete_time .+ \n + indexing.delete_total .+ \n + indexing.index_current .+ \n + indexing.index_time .+ \n + indexing.index_total .+ \n + indexing.index_failed .+ \n + merges.current .+ \n + merges.current_docs .+ \n + merges.current_size .+ \n + merges.total .+ \n + merges.total_docs .+ \n + merges.total_size .+ \n + merges.total_time .+ \n + refresh.total .+ \n + refresh.time .+ \n + refresh.external_total .+ \n + refresh.external_time .+ \n + refresh.listeners .+ \n + search.fetch_current .+ \n + search.fetch_time .+ \n + search.fetch_total .+ \n + search.open_contexts .+ \n + search.query_current .+ \n + search.query_time .+ \n + search.query_total .+ \n + search.concurrent_query_current .+ \n + search.concurrent_query_time .+ \n + search.concurrent_query_total .+ \n + search.concurrent_avg_slice_count .+ \n + search.startree_query_current .+ \n + search.startree_query_time .+ \n + search.startree_query_total .+ \n + search.scroll_current .+ \n + search.scroll_time .+ \n + search.scroll_total .+ \n + search.point_in_time_current .+ \n + search.point_in_time_time .+ \n + search.point_in_time_total .+ \n + search.search_idle_reactivate_count_total .+ \n + segments.count .+ \n + segments.memory .+ \n + segments.index_writer_memory .+ \n + segments.version_map_memory .+ \n + segments.fixed_bitset_memory .+ \n + seq_no.max .+ \n + seq_no.local_checkpoint .+ \n + seq_no.global_checkpoint .+ \n + warmer.current .+ \n + warmer.total .+ \n + warmer.total_time .+ \n + path.data .+ \n + path.state .+ \n + docs.deleted .+ \n + $/ +--- "Help from 2.14.0 to 3.0.99": - skip: version: " - 2.13.99, 3.2.0 - " diff --git a/server/src/main/java/org/opensearch/index/merge/MergeStats.java b/server/src/main/java/org/opensearch/index/merge/MergeStats.java index 89ebc12f7c044..690260fd0cef9 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergeStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergeStats.java @@ -91,7 +91,7 @@ public MergeStats(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_11_0)) { unreferencedFileCleanUpsPerformed = in.readOptionalVLong(); } - if (in.getVersion().onOrAfter(Version.CURRENT)) { + if (in.getVersion().onOrAfter(Version.V_3_3_0)) { this.warmerStats = new MergedSegmentWarmerStats(in); } else { this.warmerStats = new MergedSegmentWarmerStats(); @@ -355,7 +355,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_11_0)) { out.writeOptionalVLong(unreferencedFileCleanUpsPerformed); } - if (out.getVersion().onOrAfter(Version.CURRENT)) { + if (out.getVersion().onOrAfter(Version.V_3_3_0)) { this.warmerStats.writeTo(out); } } From a11a1273e183f16e53608da3b75a2b719b00cf14 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Wed, 10 Sep 2025 11:50:13 +0530 Subject: [PATCH 20/30] fixes to merged segment warmer + tests Signed-off-by: Aditya Khera --- .../index/engine/MergedSegmentWarmer.java | 4 ++++ .../RemoteStoreReplicationSourceTests.java | 21 ++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java index a5fbeae83ce57..b2d77f4b1ce3a 100644 --- a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java +++ b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java @@ -53,6 +53,10 @@ public MergedSegmentWarmer( @Override public void warm(LeafReader leafReader) throws IOException { + if (shouldWarm() == false) { + return; + } + mergedSegmentTransferTracker.incrementTotalWarmInvocationsCount(); mergedSegmentTransferTracker.incrementOngoingWarms(); // IndexWriter.IndexReaderWarmer#warm is called by IndexWriter#mergeMiddle. The type of leafReader should be SegmentReader. diff --git a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java index ca91bbef52296..b3c6d42325f81 100644 --- a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java @@ -229,7 +229,7 @@ public void testGetMergedSegmentFiles() throws IOException, ExecutionException, GetSegmentFilesResponse response = res.get(); assertEquals(response.files.size(), filesToFetch.size()); assertTrue(response.files.containsAll(filesToFetch)); - closeShards(replicaShard); + closeShardWithRetry(replicaShard); } public void testGetMergedSegmentFilesDownloadTimeout() throws IOException, ExecutionException, InterruptedException { @@ -293,7 +293,7 @@ public void onFailure(Exception e) { observedException.getMessage() != null && observedException.getMessage().equals("Timed out waiting for merged segments download from remote store") ); - closeShards(replicaShard); + closeShardWithRetry(replicaShard); } public void testGetMergedSegmentFilesFailure() throws IOException, ExecutionException, InterruptedException { @@ -337,7 +337,8 @@ public void onFailure(Exception e) { mergedSegmentCheckpoint, filesToFetch, replicaShard, - (fileName, bytesRecovered) -> {}, + (fileName, bytesRecovered) -> { + }, listener ); latch.await(); @@ -356,4 +357,18 @@ private void buildIndexShardBehavior(IndexShard mockShard, IndexShard indexShard FilterDirectory remoteStoreFilterDirectory = new TestFilterDirectory(new TestFilterDirectory(remoteSegmentStoreDirectory)); when(remoteStore.directory()).thenReturn(remoteStoreFilterDirectory); } + + private void closeShardWithRetry(IndexShard shard) { + try { + assertBusy(() -> { + try { + closeShards(shard); + } catch (RuntimeException e) { + throw new AssertionError("Failed to close shard", e); + } + }); + } catch (Exception e) { + logger.warn("Unable to close shard " + shard.shardId() + ". Exception: " + e); + } + } } From 28a61cc991abb1cd9f023df973d6e5ecddcc2fc8 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Wed, 10 Sep 2025 11:59:57 +0530 Subject: [PATCH 21/30] spotlessApply Signed-off-by: Aditya Khera --- .../indices/replication/RemoteStoreReplicationSourceTests.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java index b3c6d42325f81..bf6d2d9e65882 100644 --- a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java @@ -337,8 +337,7 @@ public void onFailure(Exception e) { mergedSegmentCheckpoint, filesToFetch, replicaShard, - (fileName, bytesRecovered) -> { - }, + (fileName, bytesRecovered) -> {}, listener ); latch.await(); From c13f652bb8e5c86159eb650f1dcb7873381cae96 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Wed, 10 Sep 2025 15:01:55 +0530 Subject: [PATCH 22/30] Fixing ITs after rebase Signed-off-by: Aditya Khera --- .../org/opensearch/merge/MergeStatsIT.java | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java index a551781788cbe..8a70947f7640d 100644 --- a/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java @@ -26,9 +26,11 @@ import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.merge.MergeStats; import org.opensearch.index.merge.MergedSegmentWarmerStats; +import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -41,6 +43,14 @@ public class MergeStatsIT extends RemoteStoreBaseIntegTestCase { private static final String INDEX_NAME = "test-idx"; + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(RecoverySettings.INDICES_MERGED_SEGMENT_REPLICATION_WARMER_ENABLED_SETTING.getKey(), true) + .build(); + } + @Override public Settings indexSettings() { return Settings.builder() @@ -203,19 +213,23 @@ private void assertMergeStats(MergeStats stats, StatsScope type) { // the node might have both primaries and replicas, only primaries, or only replicas boolean primaryShardStatsResult = false; boolean replicaShardStatsResult = false; - + List errors = new ArrayList<>(); try { assertMergeStats(stats, StatsScope.PRIMARY_SHARD); primaryShardStatsResult = true; - } catch (AssertionError ignored) {} + } catch (AssertionError error) { + errors.add(error); + } try { assertMergeStats(stats, StatsScope.REPLICA_SHARD); replicaShardStatsResult = true; - } catch (AssertionError ignored) {} + } catch (AssertionError error) { + errors.add(error); + } assertTrue( - "Stats should match either primary or replica shard patterns or both.", + "Stats should match either primary or replica shard patterns or both. Errors: " + errors, primaryShardStatsResult || replicaShardStatsResult ); } @@ -263,18 +277,24 @@ private void assertMergedSegmentWarmerStats(MergedSegmentWarmerStats stats, Stat return; } + List errors = new ArrayList<>(); + try { assertMergedSegmentWarmerStats(stats, StatsScope.PRIMARY_SHARD); - primaryShardStatsResult = true; - } catch (AssertionError ignored) {} + primaryShardStatsResult = true; // would be true if the node contains only primary shard + } catch (AssertionError error) { + errors.add(error); + } try { assertMergedSegmentWarmerStats(stats, StatsScope.REPLICA_SHARD); replicaShardStatsResult = true; // would be true if the node only contains replica shards - } catch (AssertionError ignored) {} + } catch (AssertionError error) { + errors.add(error); + } assertTrue( - "Stats should match either primary or replica shard or patterns both.", + "Stats should match either primary or replica shard or patterns both. Errors: " + errors, primaryShardStatsResult || replicaShardStatsResult ); } From 4d8d2b72887dac0792e0be1da81900aa7ecfc7a8 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Wed, 10 Sep 2025 18:05:48 +0530 Subject: [PATCH 23/30] Empty commit Signed-off-by: Aditya Khera From bf689236e157becf1d4d75e6298765e5f210d94d Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Tue, 14 Oct 2025 11:44:46 +0530 Subject: [PATCH 24/30] Addressing review comments Signed-off-by: Aditya Khera --- .../test/cat.shards/10_basic.yml | 14 +- .../org/opensearch/merge/MergeStatsIT.java | 147 +++++++++--------- .../index/engine/MergedSegmentWarmer.java | 3 +- .../opensearch/index/merge/MergeStats.java | 4 +- .../merge/MergedSegmentTransferTracker.java | 32 ++-- .../index/merge/MergedSegmentWarmerStats.java | 4 +- .../MergedSegmentReplicationTarget.java | 2 +- .../AbstractPublishCheckpointAction.java | 9 +- ...RemoteStorePublishMergedSegmentAction.java | 6 +- .../rest/action/cat/RestIndicesAction.java | 32 ++-- .../rest/action/cat/RestNodesAction.java | 16 +- .../rest/action/cat/RestShardsAction.java | 16 +- .../index/merge/MergeStatsTests.java | 44 +++++- .../MergedSegmentTransferTrackerTests.java | 19 +-- 14 files changed, 190 insertions(+), 158 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index 80d847c411914..68ecc8f2e21ae 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -1,7 +1,7 @@ "Help": - skip: version: " - 3.2.99" - reason: merged segment warmer stats were added in 3.3.0 + reason: search query failure stats is added in 3.3.0 features: node_selector - do: cat.shards: @@ -54,14 +54,6 @@ merges.total_docs .+ \n merges.total_size .+ \n merges.total_time .+ \n - merges.merged_segment_warmer.total_invocations .+ \n - merges.merged_segment_warmer.total_time .+ \n - merges.merged_segment_warmer.ongoing_count .+ \n - merges.merged_segment_warmer.total_bytes_received .+ \n - merges.merged_segment_warmer.total_bytes_sent .+ \n - merges.merged_segment_warmer.total_receive_time .+ \n - merges.merged_segment_warmer.total_failure_count .+ \n - merges.merged_segment_warmer.total_send_time .+ \n refresh.total .+ \n refresh.time .+ \n refresh.external_total .+ \n @@ -74,6 +66,7 @@ search.query_current .+ \n search.query_time .+ \n search.query_total .+ \n + search.query_failed .+ \n search.concurrent_query_current .+ \n search.concurrent_query_time .+ \n search.concurrent_query_total .+ \n @@ -81,6 +74,7 @@ search.startree_query_current .+ \n search.startree_query_time .+ \n search.startree_query_total .+ \n + search.startree_query_failed .+ \n search.scroll_current .+ \n search.scroll_time .+ \n search.scroll_total .+ \n @@ -106,7 +100,7 @@ --- "Help from 3.2.0 to 3.2.99": - skip: - version: " - 3.1.99, 3.3.0-" + version: " - 3.1.99, 3.3.0 - " reason: star-tree search stats is only added in 3.2.0 features: node_selector - do: diff --git a/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java index 8a70947f7640d..5346b1c6fc1b8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/merge/MergeStatsIT.java @@ -20,17 +20,20 @@ import org.opensearch.action.admin.indices.stats.ShardStats; import org.opensearch.action.search.SearchRequest; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.index.merge.MergeStats; import org.opensearch.index.merge.MergedSegmentWarmerStats; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase; -import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -55,7 +58,9 @@ protected Settings nodeSettings(int nodeOrdinal) { public Settings indexSettings() { return Settings.builder() .put(super.indexSettings()) - .put(ShardsLimitAllocationDecider.INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.getKey(), 5) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2) + .put(ShardsLimitAllocationDecider.INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.getKey(), 1) + .put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), 2) .build(); } @@ -72,7 +77,7 @@ private void setup() { public void testNodesStats() throws Exception { setup(); - String[] indices = setupIndices(3); + String[] indices = setupIndices(1); ClusterState state = getClusterState(); List nodes = state.nodes().getNodes().values().stream().map(DiscoveryNode::getName).toList(); @@ -89,7 +94,6 @@ public void testNodesStats() throws Exception { // Shard stats List allNodesStats = response.getNodes(); assertEquals(2, allNodesStats.size()); - for (NodeStats nodeStats : allNodesStats) { assertNotNull(nodeStats.getIndices()); MergeStats mergeStats = nodeStats.getIndices().getMerge(); @@ -99,13 +103,24 @@ public void testNodesStats() throws Exception { assertNotNull(mergedSegmentWarmerStats); assertMergedSegmentWarmerStats(mergedSegmentWarmerStats, StatsScope.AGGREGATED); } + + assertEquals( + "Expected sent size by node 2 to be equal to recieved size by node 1.", + allNodesStats.get(0).getIndices().getMerge().getWarmerStats().getTotalReceivedSize(), + allNodesStats.get(1).getIndices().getMerge().getWarmerStats().getTotalSentSize() + ); + assertEquals( + "Expected sent size by node 1 to be equal to recieved size by node 2.", + allNodesStats.get(0).getIndices().getMerge().getWarmerStats().getTotalSentSize(), + allNodesStats.get(1).getIndices().getMerge().getWarmerStats().getTotalReceivedSize() + ); } } public void testShardStats() throws Exception { setup(); - String[] indices = setupIndices(2); + String[] indices = setupIndices(1); ClusterState state = getClusterState(); List nodes = state.nodes().getNodes().values().stream().map(DiscoveryNode::getName).toList(); @@ -114,6 +129,8 @@ public void testShardStats() throws Exception { for (String index : indices) { client().admin().indices().forceMerge(new ForceMergeRequest(index).maxNumSegments(2)); } + Map> shardsSentAndReceivedSize = new HashMap<>(); + for (String node : nodes) { IndicesStatsResponse response = client(node).admin().indices().stats(new IndicesStatsRequest()).get(); @@ -131,13 +148,34 @@ public void testShardStats() throws Exception { MergedSegmentWarmerStats mergedSegmentWarmerStats = mergeStats.getWarmerStats(); assertNotNull(mergedSegmentWarmerStats); assertMergedSegmentWarmerStats(mergedSegmentWarmerStats, type); + + String primaryOrReplica = type.equals(StatsScope.PRIMARY_SHARD) ? "[P]" : "[R]"; + shardsSentAndReceivedSize.put(shardStats.getShardRouting().shardId() + primaryOrReplica, new HashMap<>() { + { + put("RECEIVED", mergedSegmentWarmerStats.getTotalReceivedSize()); + put("SENT", mergedSegmentWarmerStats.getTotalSentSize()); + } + }); } } + + for (int shard = 0; shard <= 1; shard++) { + assertEquals( + "Expected sent size by primary shard to be equal to recieved size by replica shard.", + shardsSentAndReceivedSize.get("[" + indices[0] + "][" + shard + "][R]").get("RECEIVED"), + shardsSentAndReceivedSize.get("[" + indices[0] + "][" + shard + "][P]").get("SENT") + ); + assertEquals( + "Expected sent size by replica shard to be equal to recieved size by primary shard.", + shardsSentAndReceivedSize.get("[" + indices[0] + "][" + shard + "][R]").get("SENT"), + shardsSentAndReceivedSize.get("[" + indices[0] + "][" + shard + "][P]").get("RECEIVED") + ); + } } public void testIndicesStats() throws Exception { setup(); - String[] indices = setupIndices(3); + String[] indices = setupIndices(1); ClusterState state = getClusterState(); List nodes = state.nodes().getNodes().values().stream().map(DiscoveryNode::getName).toList(); @@ -152,7 +190,7 @@ public void testIndicesStats() throws Exception { // Shard stats Map allIndicesStats = response.getIndices(); - assertEquals(3, allIndicesStats.size()); + assertEquals(1, allIndicesStats.size()); for (String index : indices) { IndexStats indexStats = allIndicesStats.get(index); CommonStats totalStats = indexStats.getTotal(); @@ -181,7 +219,7 @@ public void testIndicesStats() throws Exception { } private void assertMergeStats(MergeStats stats, StatsScope type) { - if (type == StatsScope.PRIMARY_SHARD) { + if (Arrays.asList(StatsScope.PRIMARY_SHARD, StatsScope.AGGREGATED).contains(type)) { assertTrue("Current merges should be >= 0", stats.getCurrent() >= 0); assertTrue("Current merge docs should be >= 0", stats.getCurrentNumDocs() >= 0); assertTrue("Current merge size should be >= 0", stats.getCurrentSizeInBytes() >= 0); @@ -209,29 +247,6 @@ private void assertMergeStats(MergeStats stats, StatsScope type) { assertEquals("Replica shard total stopped time should be 0", 0, stats.getTotalStoppedTime().getMillis()); assertEquals("Replica shard total throttled time should be 0", 0, stats.getTotalThrottledTime().getMillis()); assertEquals("Replica shard total throttled time should be 0", 0, stats.getTotalThrottledTimeInMillis()); - } else if (type == StatsScope.AGGREGATED) { - // the node might have both primaries and replicas, only primaries, or only replicas - boolean primaryShardStatsResult = false; - boolean replicaShardStatsResult = false; - List errors = new ArrayList<>(); - try { - assertMergeStats(stats, StatsScope.PRIMARY_SHARD); - primaryShardStatsResult = true; - } catch (AssertionError error) { - errors.add(error); - } - - try { - assertMergeStats(stats, StatsScope.REPLICA_SHARD); - replicaShardStatsResult = true; - } catch (AssertionError error) { - errors.add(error); - } - - assertTrue( - "Stats should match either primary or replica shard patterns or both. Errors: " + errors, - primaryShardStatsResult || replicaShardStatsResult - ); } } @@ -239,7 +254,7 @@ private void assertMergedSegmentWarmerStats(MergedSegmentWarmerStats stats, Stat if (type == StatsScope.PRIMARY_SHARD) { assertTrue("Primary shard warm invocations should be >= 1", stats.getTotalInvocationsCount() >= 1); assertTrue("Primary shard warm time should be >= 1ms", stats.getTotalTime().getMillis() >= 1); - assertTrue("Primary shard warm failures should be >= 0", stats.getTotalFailureCount() >= 0); + assertEquals("Primary shard warm failures should be == 0", 0, stats.getTotalFailureCount()); assertTrue("Primary shard sent size should be >= 0", stats.getTotalSentSize().getBytes() >= 0); assertEquals("Primary shard received size should be 0", 0, stats.getTotalReceivedSize().getBytes()); assertTrue("Primary shard send time should be >= 0", stats.getTotalSendTime().millis() >= 0); @@ -255,47 +270,35 @@ private void assertMergedSegmentWarmerStats(MergedSegmentWarmerStats stats, Stat assertTrue("Replica shard receive time should be >= 1ms", stats.getTotalReceiveTime().millis() >= 1); assertEquals("Replica shard ongoing warms should be 0", 0, stats.getOngoingCount()); } else if (type == StatsScope.AGGREGATED) { - // the node might have both primaries and replicas, only primaries, or only replicas - - // would evaluate to true if the node only contains primary shards - boolean primaryShardStatsResult = false; - - // would evaluate to true if the node only contains replica shards - boolean replicaShardStatsResult = false; - - // would evaluate to true if the node contains a mix of primary and replica shards - boolean primaryAndReplicaShardsResult = stats.getOngoingCount() >= 0 - && stats.getTotalTime().getMillis() >= 1 - && stats.getTotalSendTime().getMillis() >= 1 - && stats.getTotalReceiveTime().getMillis() >= 1 - && stats.getTotalInvocationsCount() >= 1 - && stats.getTotalReceivedSize().getBytes() >= 1 - && stats.getTotalSentSize().getBytes() >= 1 - && stats.getTotalFailureCount() >= 0; - - if (primaryAndReplicaShardsResult = true) { - return; - } - - List errors = new ArrayList<>(); - - try { - assertMergedSegmentWarmerStats(stats, StatsScope.PRIMARY_SHARD); - primaryShardStatsResult = true; // would be true if the node contains only primary shard - } catch (AssertionError error) { - errors.add(error); - } - - try { - assertMergedSegmentWarmerStats(stats, StatsScope.REPLICA_SHARD); - replicaShardStatsResult = true; // would be true if the node only contains replica shards - } catch (AssertionError error) { - errors.add(error); - } - + assertTrue("Expected warmerStats.getOngoingCount >= 0, found " + stats.getOngoingCount(), stats.getOngoingCount() >= 0); + assertTrue( + "Expected warmerStats.getTotalTime >= 1, found " + stats.getTotalTime().millis(), + stats.getTotalTime().getMillis() >= 1 + ); assertTrue( - "Stats should match either primary or replica shard or patterns both. Errors: " + errors, - primaryShardStatsResult || replicaShardStatsResult + "Expected warmerStats.getTotalSendTime >= 1, found " + stats.getTotalSendTime().getMillis(), + stats.getTotalSendTime().getMillis() >= 1 + ); + assertTrue( + "Expected warmerStats.getTotalReceiveTime >= 1, found " + stats.getTotalReceiveTime().getMillis(), + stats.getTotalReceiveTime().getMillis() >= 1 + ); + assertTrue( + "Expected warmerStats.getTotalInvocationsCount >= 1, found " + stats.getTotalInvocationsCount(), + stats.getTotalInvocationsCount() >= 1 + ); + assertTrue( + "Expected warmerStats.getTotalReceivedSize >= 1, found " + stats.getTotalReceivedSize().getBytes(), + stats.getTotalReceivedSize().getBytes() >= 1 + ); + assertTrue( + "Expected warmerStats.getTotalSentSize >= 1, found " + stats.getTotalSentSize().getBytes(), + stats.getTotalSentSize().getBytes() >= 1 + ); + assertEquals( + "Expected warmerStats.getTotalFailureCount == 0, found " + stats.getTotalFailureCount(), + 0, + stats.getTotalFailureCount() ); } } diff --git a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java index b2d77f4b1ce3a..e1246582b02f7 100644 --- a/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java +++ b/server/src/main/java/org/opensearch/index/engine/MergedSegmentWarmer.java @@ -82,7 +82,8 @@ public void warm(LeafReader leafReader) throws IOException { finalElapsedTime ); }); - } catch (IOException e) { + } catch (Throwable t) { + logger.warn(() -> new ParameterizedMessage("Failed to warm segment. Continuing. {}", leafReader), t); mergedSegmentTransferTracker.incrementTotalWarmFailureCount(); } finally { mergedSegmentTransferTracker.addTotalWarmTimeMillis(elapsedTime); diff --git a/server/src/main/java/org/opensearch/index/merge/MergeStats.java b/server/src/main/java/org/opensearch/index/merge/MergeStats.java index 690260fd0cef9..89ebc12f7c044 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergeStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergeStats.java @@ -91,7 +91,7 @@ public MergeStats(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_11_0)) { unreferencedFileCleanUpsPerformed = in.readOptionalVLong(); } - if (in.getVersion().onOrAfter(Version.V_3_3_0)) { + if (in.getVersion().onOrAfter(Version.CURRENT)) { this.warmerStats = new MergedSegmentWarmerStats(in); } else { this.warmerStats = new MergedSegmentWarmerStats(); @@ -355,7 +355,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_11_0)) { out.writeOptionalVLong(unreferencedFileCleanUpsPerformed); } - if (out.getVersion().onOrAfter(Version.V_3_3_0)) { + if (out.getVersion().onOrAfter(Version.CURRENT)) { this.warmerStats.writeTo(out); } } diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java index b2751d168fcee..8ed92dc036adc 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentTransferTracker.java @@ -23,10 +23,10 @@ public class MergedSegmentTransferTracker { private final CounterMetric totalWarmInvocationsCount = new CounterMetric(); private final CounterMetric totalWarmTimeMillis = new CounterMetric(); private final CounterMetric totalWarmFailureCount = new CounterMetric(); - private final CounterMetric totalBytesUploaded = new CounterMetric(); - private final CounterMetric totalBytesDownloaded = new CounterMetric(); - private final CounterMetric totalUploadTimeMillis = new CounterMetric(); - private final CounterMetric totalDownloadTimeMillis = new CounterMetric(); + private final CounterMetric totalBytesSent = new CounterMetric(); + private final CounterMetric totalBytesReceived = new CounterMetric(); + private final CounterMetric totalSendTimeMillis = new CounterMetric(); + private final CounterMetric totalReceiveTimeMillis = new CounterMetric(); private final CounterMetric ongoingWarms = new CounterMetric(); public void incrementTotalWarmInvocationsCount() { @@ -49,20 +49,20 @@ public void addTotalWarmTimeMillis(long time) { totalWarmTimeMillis.inc(time); } - public void addTotalUploadTimeMillis(long time) { - totalUploadTimeMillis.inc(time); + public void addTotalSendTimeMillis(long time) { + totalSendTimeMillis.inc(time); } - public void addTotalDownloadTimeMillis(long time) { - totalDownloadTimeMillis.inc(time); + public void addTotalReceiveTimeMillis(long time) { + totalReceiveTimeMillis.inc(time); } - public void addTotalBytesUploaded(long bytes) { - totalBytesUploaded.inc(bytes); + public void addTotalBytesSent(long bytes) { + totalBytesSent.inc(bytes); } - public void addTotalBytesDownloaded(long bytes) { - totalBytesDownloaded.inc(bytes); + public void addTotalBytesReceived(long bytes) { + totalBytesReceived.inc(bytes); } public MergedSegmentWarmerStats stats() { @@ -71,10 +71,10 @@ public MergedSegmentWarmerStats stats() { totalWarmInvocationsCount.count(), totalWarmTimeMillis.count(), totalWarmFailureCount.count(), - totalBytesUploaded.count(), - totalBytesDownloaded.count(), - totalUploadTimeMillis.count(), - totalDownloadTimeMillis.count(), + totalBytesSent.count(), + totalBytesReceived.count(), + totalSendTimeMillis.count(), + totalReceiveTimeMillis.count(), ongoingWarms.count() ); return stats; diff --git a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java index cf63b479076da..cf407fe5572ef 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergedSegmentWarmerStats.java @@ -135,7 +135,7 @@ public TimeValue getTotalSendTime() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(Fields.MERGED_SEGMENT_WARMER); + builder.startObject(Fields.WARMER); builder.field(Fields.WARM_INVOCATIONS_COUNT, totalInvocationsCount); builder.humanReadableField(Fields.TOTAL_TIME_MILLIS, Fields.TOTAL_TIME, getTotalTime()); builder.field(Fields.TOTAL_FAILURE_COUNT, totalFailureCount); @@ -154,7 +154,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws * @opensearch.internal */ static final class Fields { - static final String MERGED_SEGMENT_WARMER = "merged_segment_warmer"; + static final String WARMER = "warmer"; static final String WARM_INVOCATIONS_COUNT = "total_invocations_count"; static final String TOTAL_TIME_MILLIS = "total_time_millis"; static final String TOTAL_FAILURE_COUNT = "total_failure_count"; diff --git a/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java index c087f420dcfe0..a4353828c89cf 100644 --- a/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/MergedSegmentReplicationTarget.java @@ -78,7 +78,7 @@ public MergedSegmentReplicationTarget retryCopy() { } protected void updateMergedSegmentFileRecoveryBytes(String fileName, long bytesRecovered) { - indexShard.mergedSegmentTransferTracker().addTotalBytesDownloaded(bytesRecovered); + indexShard.mergedSegmentTransferTracker().addTotalBytesReceived(bytesRecovered); updateFileRecoveryBytes(fileName, bytesRecovered); } } diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java index 1133e59b87551..42d29d370929a 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/AbstractPublishCheckpointAction.java @@ -116,6 +116,7 @@ final void doPublish( TimeValue waitTimeout, ActionListener listener ) { + ActionListener notifyOnceListener = ActionListener.notifyOnce(listener); String primaryAllocationId = indexShard.routingEntry().allocationId().getId(); long primaryTerm = indexShard.getPendingPrimaryTerm(); final ThreadContext threadContext = threadPool.getThreadContext(); @@ -209,21 +210,21 @@ public void handleException(TransportException e) { if (waitForCompletion) { try { if (latch.await(waitTimeout.seconds(), TimeUnit.SECONDS) == false) { - listener.onFailure( + notifyOnceListener.onFailure( new TimeoutException("Timed out waiting for publish checkpoint to complete. Checkpoint: " + checkpoint) ); } } catch (InterruptedException e) { - listener.onFailure(e); + notifyOnceListener.onFailure(e); logger.warn( () -> new ParameterizedMessage("Interrupted while waiting for publish checkpoint complete [{}]", checkpoint), e ); } } - listener.onResponse(null); + notifyOnceListener.onResponse(null); } catch (Exception e) { - listener.onFailure(e); + notifyOnceListener.onFailure(e); } } diff --git a/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java b/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java index 1b3fc549b642c..23a4ca0c9ece1 100644 --- a/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java +++ b/server/src/main/java/org/opensearch/indices/replication/checkpoint/RemoteStorePublishMergedSegmentAction.java @@ -86,7 +86,7 @@ protected void doReplicaOperation(RemoteStorePublishMergedSegmentRequest shardRe long startTime = System.currentTimeMillis(); replica.getRemoteDirectory().markMergedSegmentsPendingDownload(checkpoint.getLocalToRemoteSegmentFilenameMap()); replicationService.onNewMergedSegmentCheckpoint(checkpoint, replica); - replica.mergedSegmentTransferTracker().addTotalDownloadTimeMillis(System.currentTimeMillis() - startTime); + replica.mergedSegmentTransferTracker().addTotalReceiveTimeMillis(System.currentTimeMillis() - startTime); } else { logger.warn( () -> new ParameterizedMessage( @@ -116,7 +116,7 @@ public final void publish(IndexShard indexShard, MergedSegmentCheckpoint checkpo long elapsedTimeMillis = endTimeMillis - startTimeMillis; long timeoutMillis = indexShard.getRecoverySettings().getMergedSegmentReplicationTimeout().millis(); long timeLeftMillis = Math.max(0, timeoutMillis - elapsedTimeMillis); - indexShard.mergedSegmentTransferTracker().addTotalUploadTimeMillis(elapsedTimeMillis); + indexShard.mergedSegmentTransferTracker().addTotalSendTimeMillis(elapsedTimeMillis); if (timeLeftMillis > 0) { RemoteStoreMergedSegmentCheckpoint remoteStoreMergedSegmentCheckpoint = new RemoteStoreMergedSegmentCheckpoint( @@ -180,7 +180,7 @@ public void beforeUpload(String file) {} @Override public void onSuccess(String file) { localToRemoteStoreFilenames.put(file, indexShard.getRemoteDirectory().getExistingRemoteFilename(file)); - indexShard.mergedSegmentTransferTracker().addTotalBytesUploaded(checkpoint.getMetadataMap().get(file).length()); + indexShard.mergedSegmentTransferTracker().addTotalBytesSent(checkpoint.getMetadataMap().get(file).length()); } @Override diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java index 1c094827dca91..215ab36a79cdc 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestIndicesAction.java @@ -593,74 +593,74 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa table.addCell("pri.merges.total_time", "default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merges.merged_segment_warmer.total_invocations", + "merges.warmer.total_invocations", "alias:mswti,mergedSegmentWarmerTotalInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "pri.merges.merged_segment_warmer.total_invocations", + "pri.merges.warmer.total_invocations", "default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_time", + "merges.warmer.total_time", "alias:mswtt,mergedSegmentWarmerTotalTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "pri.merges.merged_segment_warmer.total_time", + "pri.merges.warmer.total_time", "default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "merges.merged_segment_warmer.ongoing_count", + "merges.warmer.ongoing_count", "alias:mswoc,mergedSegmentWarmerOngoingCount;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( - "pri.merges.merged_segment_warmer.ongoing_count", + "pri.merges.warmer.ongoing_count", "default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( - "merges.merged_segment_warmer.total_bytes_received", + "merges.warmer.total_bytes_received", "alias:mswtbr,mergedSegmentWarmerTotalBytesReceived;default:false;text-align:right;desc:total bytes received by a replica shard during the warm operation" ); table.addCell( - "pri.merges.merged_segment_warmer.total_bytes_received", + "pri.merges.warmer.total_bytes_received", "default:false;text-align:right;desc:total bytes received by a replica shard during the warm operation" ); table.addCell( - "merges.merged_segment_warmer.total_bytes_sent", + "merges.warmer.total_bytes_sent", "alias:mswtbs,mergedSegmentWarmerTotalBytesSent;default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); table.addCell( - "pri.merges.merged_segment_warmer.total_bytes_sent", + "pri.merges.warmer.total_bytes_sent", "default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); table.addCell( - "merges.merged_segment_warmer.total_receive_time", + "merges.warmer.total_receive_time", "alias:mswtrt,mergedSegmentWarmerTotalReceiveTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "pri.merges.merged_segment_warmer.total_receive_time", + "pri.merges.warmer.total_receive_time", "default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "merges.merged_segment_warmer.total_failure_count", + "merges.warmer.total_failure_count", "alias:mswtfc,mergedSegmentWarmerTotalFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "pri.merges.merged_segment_warmer.total_failure_count", + "pri.merges.warmer.total_failure_count", "default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_send_time", + "merges.warmer.total_send_time", "alias:mswtst,mergedSegmentWarmerTotalSendTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); table.addCell( - "pri.merges.merged_segment_warmer.total_send_time", + "pri.merges.warmer.total_send_time", "default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java index 98f92ced66764..a3fbff262957f 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java @@ -276,35 +276,35 @@ protected Table getTableWithHeader(final RestRequest request) { table.addCell("merges.total_time", "alias:mtt,mergesTotalTime;default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merges.merged_segment_warmer.total_invocations", + "merges.warmer.total_invocations", "alias:mswti,mergedSegmentWarmerTotalInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_time", + "merges.warmer.total_time", "alias:mswtt,mergedSegmentWarmerTotalTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "merges.merged_segment_warmer.ongoing_count", + "merges.warmer.ongoing_count", "alias:mswoc,mergedSegmentWarmerOngoingCount;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( - "merges.merged_segment_warmer.total_bytes_received", + "merges.warmer.total_bytes_received", "alias:mswtbr,mergedSegmentWarmerTotalBytesReceived;default:false;text-align:right;desc:total bytes received by a replica shard during the warm operation" ); table.addCell( - "merges.merged_segment_warmer.total_bytes_sent", + "merges.warmer.total_bytes_sent", "alias:mswtbs,mergedSegmentWarmerTotalBytesSent;default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); table.addCell( - "merges.merged_segment_warmer.total_receive_time", + "merges.warmer.total_receive_time", "alias:mswtrt,mergedSegmentWarmerTotalReceiveTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "merges.merged_segment_warmer.total_failure_count", + "merges.warmer.total_failure_count", "alias:mswtfc,mergedSegmentWarmerTotalFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_send_time", + "merges.warmer.total_send_time", "alias:mswtst,mergedSegmentWarmerTotalSendTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java index 58a10e5e81d02..7a58c51d10cc9 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java @@ -216,35 +216,35 @@ protected Table getTableWithHeader(final RestRequest request, final PageToken pa table.addCell("merges.total_time", "alias:mtt,mergesTotalTime;default:false;text-align:right;desc:time spent in merges"); table.addCell( - "merges.merged_segment_warmer.total_invocations", + "merges.warmer.total_invocations", "alias:mswti,mergedSegmentWarmerTotalInvocations;default:false;text-align:right;desc:total invocations of merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_time", + "merges.warmer.total_time", "alias:mswtt,mergedSegmentWarmerTotalTime;default:false;text-align:right;desc:total wallclock time spent in the warming operation" ); table.addCell( - "merges.merged_segment_warmer.ongoing_count", + "merges.warmer.ongoing_count", "alias:mswoc,mergedSegmentWarmerOngoingCount;default:false;text-align:right;desc:point-in-time metric for number of in-progress warm operations" ); table.addCell( - "merges.merged_segment_warmer.total_bytes_received", + "merges.warmer.total_bytes_received", "alias:mswtbr,mergedSegmentWarmerTotalBytesReceived;default:false;text-align:right;desc:total bytes received by a replica shard during the warm operation" ); table.addCell( - "merges.merged_segment_warmer.total_bytes_sent", + "merges.warmer.total_bytes_sent", "alias:mswtbs,mergedSegmentWarmerTotalBytesSent;default:false;text-align:right;desc:total bytes sent by a primary shard during the warm operation" ); table.addCell( - "merges.merged_segment_warmer.total_receive_time", + "merges.warmer.total_receive_time", "alias:mswtrt,mergedSegmentWarmerTotalReceiveTime;default:false;text-align:right;desc:total wallclock time spent receiving merged segments by a replica shard" ); table.addCell( - "merges.merged_segment_warmer.total_failure_count", + "merges.warmer.total_failure_count", "alias:mswtfc,mergedSegmentWarmerTotalFailureCount;default:false;text-align:right;desc:total failures in merged segment warmer" ); table.addCell( - "merges.merged_segment_warmer.total_send_time", + "merges.warmer.total_send_time", "alias:mswtst,mergedSegmentWarmerTotalSendTime;default:false;text-align:right;desc:total wallclock time spent sending merged segments by a primary shard" ); diff --git a/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java b/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java index 9288ad2760e24..aca196267724d 100644 --- a/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java +++ b/server/src/test/java/org/opensearch/index/merge/MergeStatsTests.java @@ -52,6 +52,15 @@ public void testAdd() { assertEquals(512, stats.getCurrentSizeInBytes()); assertEquals(10, stats.getTotalStoppedTimeInMillis()); assertEquals(20, stats.getTotalThrottledTimeInMillis()); + + assertEquals(1, stats.getWarmerStats().getTotalInvocationsCount()); + assertEquals(10, stats.getWarmerStats().getTotalTime().getMillis()); + assertEquals(0, stats.getWarmerStats().getTotalFailureCount()); + assertEquals(new ByteSizeValue(100), stats.getWarmerStats().getTotalSentSize()); + assertEquals(new ByteSizeValue(200), stats.getWarmerStats().getTotalReceivedSize()); + assertEquals(0, stats.getWarmerStats().getOngoingCount()); + assertEquals(5, stats.getWarmerStats().getTotalSendTime().getMillis()); + assertEquals(15, stats.getWarmerStats().getTotalReceiveTime().getMillis()); } public void testAddWithoutMergedSegmentWarmer() { @@ -67,6 +76,16 @@ public void testAddWithoutMergedSegmentWarmer() { assertEquals(512, stats.getCurrentSizeInBytes()); assertEquals(10, stats.getTotalStoppedTimeInMillis()); assertEquals(20, stats.getTotalThrottledTimeInMillis()); + + assertNotNull(stats.getWarmerStats()); + assertEquals(0, stats.getWarmerStats().getTotalInvocationsCount()); + assertEquals(0, stats.getWarmerStats().getTotalTime().getMillis()); + assertEquals(0, stats.getWarmerStats().getTotalFailureCount()); + assertEquals(new ByteSizeValue(0), stats.getWarmerStats().getTotalSentSize()); + assertEquals(new ByteSizeValue(0), stats.getWarmerStats().getTotalReceivedSize()); + assertEquals(0, stats.getWarmerStats().getOngoingCount()); + assertEquals(0, stats.getWarmerStats().getTotalSendTime().getMillis()); + assertEquals(0, stats.getWarmerStats().getTotalReceiveTime().getMillis()); } public void testAddMergeStats() { @@ -85,6 +104,15 @@ public void testAddMergeStats() { assertEquals(3, stats1.getCurrent()); assertEquals(40, stats1.getCurrentNumDocs()); assertEquals(768, stats1.getCurrentSizeInBytes()); + + assertEquals(2, stats1.getWarmerStats().getTotalInvocationsCount()); + assertEquals(20, stats1.getWarmerStats().getTotalTime().getMillis()); + assertEquals(0, stats1.getWarmerStats().getTotalFailureCount()); + assertEquals(new ByteSizeValue(200), stats1.getWarmerStats().getTotalSentSize()); + assertEquals(new ByteSizeValue(400), stats1.getWarmerStats().getTotalReceivedSize()); + assertEquals(0, stats1.getWarmerStats().getOngoingCount()); + assertEquals(10, stats1.getWarmerStats().getTotalSendTime().getMillis()); + assertEquals(30, stats1.getWarmerStats().getTotalReceiveTime().getMillis()); } public void testAddTotals() { @@ -92,7 +120,7 @@ public void testAddTotals() { MergeStats stats2 = new MergeStats(); MergedSegmentWarmerStats warmerStats = new MergedSegmentWarmerStats(); - warmerStats.add(1, 10, 0, 100, 200, 5, 15, 0); + warmerStats.add(1, 10, 0, 100, 200, 5, 15, 7); stats1.add(5, 100, 50, 1024, 2, 25, 512, 10, 20, 1.5, warmerStats); stats2.add(3, 50, 30, 512, 1, 15, 256, 5, 10, 1.0, warmerStats); @@ -100,11 +128,23 @@ public void testAddTotals() { stats1.addTotals(stats2); assertEquals(8, stats1.getTotal()); + assertEquals(2, stats1.getCurrent()); // not expected to get added with addTotals + assertEquals(25, stats1.getCurrentNumDocs()); // not expected to get added with addTotals + assertEquals(512, stats1.getCurrentSizeInBytes()); // not expected to get added with addTotals assertEquals(150, stats1.getTotalTimeInMillis()); assertEquals(80, stats1.getTotalNumDocs()); assertEquals(1536, stats1.getTotalSizeInBytes()); assertEquals(15, stats1.getTotalStoppedTimeInMillis()); assertEquals(30, stats1.getTotalThrottledTimeInMillis()); + + assertEquals(2, stats1.getWarmerStats().getTotalInvocationsCount()); + assertEquals(20, stats1.getWarmerStats().getTotalTime().getMillis()); + assertEquals(0, stats1.getWarmerStats().getTotalFailureCount()); + assertEquals(new ByteSizeValue(200), stats1.getWarmerStats().getTotalSentSize()); + assertEquals(new ByteSizeValue(400), stats1.getWarmerStats().getTotalReceivedSize()); + assertEquals(7, stats1.getWarmerStats().getOngoingCount()); // not expected to get added with addTotals + assertEquals(10, stats1.getWarmerStats().getTotalSendTime().getMillis()); + assertEquals(30, stats1.getWarmerStats().getTotalReceiveTime().getMillis()); } public void testAddWithNull() { @@ -199,6 +239,6 @@ public void testToXContent() throws IOException { assertTrue(json.contains("total_time_in_millis")); assertTrue(json.contains("total_docs")); assertTrue(json.contains("total_size_in_bytes")); - assertTrue(json.contains("merged_segment_warmer")); + assertTrue(json.contains("warmer")); } } diff --git a/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java index 722cf43106997..26e895c9b05a5 100644 --- a/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/merge/MergedSegmentTransferTrackerTests.java @@ -10,13 +10,8 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.unit.ByteSizeValue; -import org.opensearch.core.index.shard.ShardId; -import org.opensearch.index.IndexSettings; import org.opensearch.test.OpenSearchTestCase; -import static org.opensearch.common.settings.Settings.builder; -import static org.opensearch.index.IndexSettingsTests.newIndexMeta; - public class MergedSegmentTransferTrackerTests extends OpenSearchTestCase { private MergedSegmentTransferTracker tracker; @@ -24,8 +19,6 @@ public class MergedSegmentTransferTrackerTests extends OpenSearchTestCase { @Override public void setUp() throws Exception { super.setUp(); - ShardId shardId = new ShardId("test", "uuid", 0); - IndexSettings indexSettings = new IndexSettings(newIndexMeta("test", builder().build()), builder().build()); tracker = new MergedSegmentTransferTracker(); } @@ -63,10 +56,10 @@ public void testOngoingWarms() { public void testAddTimeAndBytes() { tracker.addTotalWarmTimeMillis(100); - tracker.addTotalUploadTimeMillis(200); - tracker.addTotalDownloadTimeMillis(300); - tracker.addTotalBytesUploaded(1024); - tracker.addTotalBytesDownloaded(2048); + tracker.addTotalSendTimeMillis(200); + tracker.addTotalReceiveTimeMillis(300); + tracker.addTotalBytesSent(1024); + tracker.addTotalBytesReceived(2048); MergedSegmentWarmerStats stats = tracker.stats(); assertEquals(new TimeValue(100), stats.getTotalTime()); @@ -81,8 +74,8 @@ public void testCumulativeStats() { tracker.addTotalWarmTimeMillis(50); assertEquals(new TimeValue(150), tracker.stats().getTotalTime()); - tracker.addTotalBytesUploaded(1000); - tracker.addTotalBytesUploaded(500); + tracker.addTotalBytesSent(1000); + tracker.addTotalBytesSent(500); assertEquals(1500, tracker.stats().getTotalSentSize().getBytes()); } } From a142adb1807b108f69b0630191bca8934bf67509 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Tue, 14 Oct 2025 12:45:02 +0530 Subject: [PATCH 25/30] rebased with main Signed-off-by: Aditya Khera --- .../test/cat.shards/10_basic.yml | 110 +++++++++++++++++- .../opensearch/index/merge/MergeStats.java | 4 +- .../action/cat/RestShardsActionTests.java | 8 +- 3 files changed, 115 insertions(+), 7 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index 68ecc8f2e21ae..8a313fbc8d83a 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -1,3 +1,111 @@ +"Help": + - skip: + version: " - 3.3.99" + reason: search query failure stats is added in 3.3.0 + features: node_selector + - do: + cat.shards: + help: true + node_selector: + version: "3.4.0 - " + + - match: + $body: | + /^ index .+ \n + shard .+ \n + prirep .+ \n + state .+ \n + docs .+ \n + store .+ \n + ip .+ \n + id .+ \n + node .+ \n + sync_id .+ \n + unassigned.reason .+ \n + unassigned.at .+ \n + unassigned.for .+ \n + unassigned.details .+ \n + recoverysource.type .+ \n + completion.size .+ \n + fielddata.memory_size .+ \n + fielddata.evictions .+ \n + query_cache.memory_size .+ \n + query_cache.evictions .+ \n + flush.total .+ \n + flush.total_time .+ \n + get.current .+ \n + get.time .+ \n + get.total .+ \n + get.exists_time .+ \n + get.exists_total .+ \n + get.missing_time .+ \n + get.missing_total .+ \n + indexing.delete_current .+ \n + indexing.delete_time .+ \n + indexing.delete_total .+ \n + indexing.index_current .+ \n + indexing.index_time .+ \n + indexing.index_total .+ \n + indexing.index_failed .+ \n + merges.current .+ \n + merges.current_docs .+ \n + merges.current_size .+ \n + merges.total .+ \n + merges.total_docs .+ \n + merges.total_size .+ \n + merges.total_time .+ \n + merges.warmer.total_invocations .+ \n + merges.warmer.total_time .+ \n + merges.warmer.ongoing_count .+ \n + merges.warmer.total_bytes_received .+ \n + merges.warmer.total_bytes_sent .+ \n + merges.warmer.total_receive_time .+ \n + merges.warmer.total_failure_count .+ \n + merges.warmer.total_send_time .+ \n + refresh.total .+ \n + refresh.time .+ \n + refresh.external_total .+ \n + refresh.external_time .+ \n + refresh.listeners .+ \n + search.fetch_current .+ \n + search.fetch_time .+ \n + search.fetch_total .+ \n + search.open_contexts .+ \n + search.query_current .+ \n + search.query_time .+ \n + search.query_total .+ \n + search.query_failed .+ \n + search.concurrent_query_current .+ \n + search.concurrent_query_time .+ \n + search.concurrent_query_total .+ \n + search.concurrent_avg_slice_count .+ \n + search.startree_query_current .+ \n + search.startree_query_time .+ \n + search.startree_query_total .+ \n + search.startree_query_failed .+ \n + search.scroll_current .+ \n + search.scroll_time .+ \n + search.scroll_total .+ \n + search.point_in_time_current .+ \n + search.point_in_time_time .+ \n + search.point_in_time_total .+ \n + search.search_idle_reactivate_count_total .+ \n + segments.count .+ \n + segments.memory .+ \n + segments.index_writer_memory .+ \n + segments.version_map_memory .+ \n + segments.fixed_bitset_memory .+ \n + seq_no.max .+ \n + seq_no.local_checkpoint .+ \n + seq_no.global_checkpoint .+ \n + warmer.current .+ \n + warmer.total .+ \n + warmer.total_time .+ \n + path.data .+ \n + path.state .+ \n + docs.deleted .+ \n + $/ +--- "Help": - skip: version: " - 3.2.99" @@ -7,7 +115,7 @@ cat.shards: help: true node_selector: - version: "3.3.0 - " + version: "3.3.0 - 3.3.99" - match: $body: | diff --git a/server/src/main/java/org/opensearch/index/merge/MergeStats.java b/server/src/main/java/org/opensearch/index/merge/MergeStats.java index 89ebc12f7c044..a96e82a5ba6ea 100644 --- a/server/src/main/java/org/opensearch/index/merge/MergeStats.java +++ b/server/src/main/java/org/opensearch/index/merge/MergeStats.java @@ -91,7 +91,7 @@ public MergeStats(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_11_0)) { unreferencedFileCleanUpsPerformed = in.readOptionalVLong(); } - if (in.getVersion().onOrAfter(Version.CURRENT)) { + if (in.getVersion().onOrAfter(Version.V_3_4_0)) { this.warmerStats = new MergedSegmentWarmerStats(in); } else { this.warmerStats = new MergedSegmentWarmerStats(); @@ -355,7 +355,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_11_0)) { out.writeOptionalVLong(unreferencedFileCleanUpsPerformed); } - if (out.getVersion().onOrAfter(Version.CURRENT)) { + if (out.getVersion().onOrAfter(Version.V_3_4_0)) { this.warmerStats.writeTo(out); } } diff --git a/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java b/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java index 95a9a943e4dbf..708e0c7d3da77 100644 --- a/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java +++ b/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java @@ -158,7 +158,7 @@ private void assertTable(Table table) { assertThat(headers.get(6).value, equalTo("ip")); assertThat(headers.get(7).value, equalTo("id")); assertThat(headers.get(8).value, equalTo("node")); - assertThat(headers.get(90).value, equalTo("docs.deleted")); + assertThat(headers.get(92).value, equalTo("docs.deleted")); final List> rows = table.getRows(); assertThat(rows.size(), equalTo(shardRoutings.size())); @@ -174,9 +174,9 @@ private void assertTable(Table table) { assertThat(row.get(4).value, equalTo(shardStats.getStats().getDocs().getCount())); assertThat(row.get(6).value, equalTo(localNode.getHostAddress())); assertThat(row.get(7).value, equalTo(localNode.getId())); - assertThat(row.get(88).value, equalTo(shardStats.getDataPath())); - assertThat(row.get(89).value, equalTo(shardStats.getStatePath())); - assertThat(row.get(90).value, equalTo(shardStats.getStats().getDocs().getDeleted())); + assertThat(row.get(90).value, equalTo(shardStats.getDataPath())); + assertThat(row.get(91).value, equalTo(shardStats.getStatePath())); + assertThat(row.get(92).value, equalTo(shardStats.getStats().getDocs().getDeleted())); } } } From a68d57b9ba9753d2890cc91cc77f2c3c5e9df002 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Tue, 14 Oct 2025 13:59:11 +0530 Subject: [PATCH 26/30] Fixing tests Signed-off-by: Aditya Khera --- .../index/engine/MergeRateLimitingTests.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/engine/MergeRateLimitingTests.java b/server/src/test/java/org/opensearch/index/engine/MergeRateLimitingTests.java index 752c21c5e108c..bab9607b21f39 100644 --- a/server/src/test/java/org/opensearch/index/engine/MergeRateLimitingTests.java +++ b/server/src/test/java/org/opensearch/index/engine/MergeRateLimitingTests.java @@ -19,6 +19,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; +import org.opensearch.index.merge.MergedSegmentTransferTracker; import org.opensearch.test.OpenSearchTestCase; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; @@ -77,7 +78,7 @@ public void testSettingPrecedence() { IndexSettings indexSettings = new IndexSettings(newIndexMeta("test_index", indexBuilder.build()), nodeSettings); ShardId shardId = new ShardId("test_index", "test_uuid", 0); - OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings); + OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); // Should use cluster-level setting assertThat(scheduler.getForceMergeMBPerSec(), equalTo(75.0)); @@ -85,7 +86,7 @@ public void testSettingPrecedence() { // Test with both index and cluster-level settings - index should take precedence indexBuilder.put(MAX_FORCE_MERGE_MB_PER_SEC_SETTING.getKey(), "25.0"); indexSettings = new IndexSettings(newIndexMeta("test_index", indexBuilder.build()), nodeSettings); - scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings); + scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); // Should use index-level setting assertThat(scheduler.getForceMergeMBPerSec(), equalTo(25.0)); @@ -104,7 +105,7 @@ public void testDisabledRateLimiting() { IndexSettings indexSettings = new IndexSettings(newIndexMeta("test_index", builder.build()), Settings.EMPTY); ShardId shardId = new ShardId("test_index", "test_uuid", 0); - OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings); + OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); // Should have no rate limiting assertThat(scheduler.getForceMergeMBPerSec(), equalTo(Double.POSITIVE_INFINITY)); @@ -130,7 +131,7 @@ public void testDynamicRateLimitUpdates() throws Exception { IndexSettings indexSettings = new IndexSettings(newIndexMeta("test_index", builder.build()), Settings.EMPTY); ShardId shardId = new ShardId("test_index", "test_uuid", 0); - OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings); + OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); assertThat(scheduler.getForceMergeMBPerSec(), equalTo(10.0)); // Update to a different rate limit @@ -190,7 +191,7 @@ public void testFallbackToClusterSettingWhenIndexSettingRemoved() throws Excepti IndexSettings indexSettings = new IndexSettings(newIndexMeta("test_index", builder.build()), nodeSettings); ShardId shardId = new ShardId("test_index", "test_uuid", 0); - OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings); + OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); // Should initially use index-level setting assertThat(scheduler.getForceMergeMBPerSec(), equalTo(25.0)); From d6be469fd7aef61c4bd124e4ad87fe7ef18ec404 Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Tue, 14 Oct 2025 14:17:38 +0530 Subject: [PATCH 27/30] spotlessApply Signed-off-by: Aditya Khera --- .../index/engine/MergeRateLimitingTests.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/engine/MergeRateLimitingTests.java b/server/src/test/java/org/opensearch/index/engine/MergeRateLimitingTests.java index bab9607b21f39..bc535475e5ecd 100644 --- a/server/src/test/java/org/opensearch/index/engine/MergeRateLimitingTests.java +++ b/server/src/test/java/org/opensearch/index/engine/MergeRateLimitingTests.java @@ -78,7 +78,11 @@ public void testSettingPrecedence() { IndexSettings indexSettings = new IndexSettings(newIndexMeta("test_index", indexBuilder.build()), nodeSettings); ShardId shardId = new ShardId("test_index", "test_uuid", 0); - OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); + OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler( + shardId, + indexSettings, + new MergedSegmentTransferTracker() + ); // Should use cluster-level setting assertThat(scheduler.getForceMergeMBPerSec(), equalTo(75.0)); @@ -105,7 +109,11 @@ public void testDisabledRateLimiting() { IndexSettings indexSettings = new IndexSettings(newIndexMeta("test_index", builder.build()), Settings.EMPTY); ShardId shardId = new ShardId("test_index", "test_uuid", 0); - OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); + OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler( + shardId, + indexSettings, + new MergedSegmentTransferTracker() + ); // Should have no rate limiting assertThat(scheduler.getForceMergeMBPerSec(), equalTo(Double.POSITIVE_INFINITY)); @@ -131,7 +139,11 @@ public void testDynamicRateLimitUpdates() throws Exception { IndexSettings indexSettings = new IndexSettings(newIndexMeta("test_index", builder.build()), Settings.EMPTY); ShardId shardId = new ShardId("test_index", "test_uuid", 0); - OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); + OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler( + shardId, + indexSettings, + new MergedSegmentTransferTracker() + ); assertThat(scheduler.getForceMergeMBPerSec(), equalTo(10.0)); // Update to a different rate limit @@ -191,7 +203,11 @@ public void testFallbackToClusterSettingWhenIndexSettingRemoved() throws Excepti IndexSettings indexSettings = new IndexSettings(newIndexMeta("test_index", builder.build()), nodeSettings); ShardId shardId = new ShardId("test_index", "test_uuid", 0); - OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler(shardId, indexSettings, new MergedSegmentTransferTracker()); + OpenSearchConcurrentMergeScheduler scheduler = new OpenSearchConcurrentMergeScheduler( + shardId, + indexSettings, + new MergedSegmentTransferTracker() + ); // Should initially use index-level setting assertThat(scheduler.getForceMergeMBPerSec(), equalTo(25.0)); From a6c9a97177b877d6e3a4fe751d238d25bfccb0fc Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Tue, 14 Oct 2025 15:55:25 +0530 Subject: [PATCH 28/30] YAML test fix Signed-off-by: Aditya Khera --- .../main/resources/rest-api-spec/test/cat.shards/10_basic.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index 8a313fbc8d83a..7baa55ec462ed 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -1,7 +1,7 @@ "Help": - skip: version: " - 3.3.99" - reason: search query failure stats is added in 3.3.0 + reason: merges.warmer stats added in 3.4.0 features: node_selector - do: cat.shards: From ba2cec59e99220249fc7933ffacf37f05f1ffffc Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Tue, 14 Oct 2025 16:59:04 +0530 Subject: [PATCH 29/30] yaml test fix Signed-off-by: Aditya Khera --- .../main/resources/rest-api-spec/test/cat.shards/10_basic.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index 7baa55ec462ed..ec7a679fc2807 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -106,7 +106,7 @@ docs.deleted .+ \n $/ --- -"Help": +"Help from 3.3.0 to 3.3.99": - skip: version: " - 3.2.99" reason: search query failure stats is added in 3.3.0 From 1739f475eec7080b06ba7721167cc6d68aa836cc Mon Sep 17 00:00:00 2001 From: Aditya Khera Date: Tue, 14 Oct 2025 17:57:11 +0530 Subject: [PATCH 30/30] test fixes Signed-off-by: Aditya Khera --- .../main/resources/rest-api-spec/test/cat.shards/10_basic.yml | 2 +- .../opensearch/index/merge/MergedSegmentWarmerStatsTests.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index ec7a679fc2807..b69516fd5abc6 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -108,7 +108,7 @@ --- "Help from 3.3.0 to 3.3.99": - skip: - version: " - 3.2.99" + version: " - 3.2.99, 3.4.0 - " reason: search query failure stats is added in 3.3.0 features: node_selector - do: diff --git a/server/src/test/java/org/opensearch/index/merge/MergedSegmentWarmerStatsTests.java b/server/src/test/java/org/opensearch/index/merge/MergedSegmentWarmerStatsTests.java index caedd8b3d24d3..199b7b6ff7cb1 100644 --- a/server/src/test/java/org/opensearch/index/merge/MergedSegmentWarmerStatsTests.java +++ b/server/src/test/java/org/opensearch/index/merge/MergedSegmentWarmerStatsTests.java @@ -129,7 +129,7 @@ public void testToXContent() throws IOException { builder.endObject(); String json = builder.toString(); - assertTrue(json.contains("merged_segment_warmer")); + assertTrue(json.contains("warmer")); assertTrue(json.contains("total_invocations_count")); assertTrue(json.contains("total_time_millis")); assertTrue(json.contains("total_failure_count"));