diff --git a/CHANGELOG.md b/CHANGELOG.md index 648cd3d0963e6..b2d596ec3bcb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Create generic DocRequest to better categorize ActionRequests ([#18269](https://github.com/opensearch-project/OpenSearch/pull/18269))) - Change implementation for `percentiles` aggregation for latency improvement [#18124](https://github.com/opensearch-project/OpenSearch/pull/18124) +- Add segments codec check in UpgradeRequest to select segments for upgrade ([#17491](https://github.com/opensearch-project/OpenSearch/pull/17491)) + ### Dependencies - Update Apache Lucene from 10.1.0 to 10.2.1 ([#17961](https://github.com/opensearch-project/OpenSearch/pull/17961)) - Bump `com.google.code.gson:gson` from 2.12.1 to 2.13.1 ([#17923](https://github.com/opensearch-project/OpenSearch/pull/17923), [#18266](https://github.com/opensearch-project/OpenSearch/pull/18266)) diff --git a/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90StoredFieldsFormatComparator.java b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90StoredFieldsFormatComparator.java new file mode 100644 index 0000000000000..b4f63a9e9c489 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90StoredFieldsFormatComparator.java @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.codecs.lucene90; + +/** + * This class is a custom comparator for Lucene90StoredFieldsFormat + */ +public final class Lucene90StoredFieldsFormatComparator { + private Lucene90StoredFieldsFormatComparator() {} + + public static boolean equal(Lucene90StoredFieldsFormat one, Lucene90StoredFieldsFormat two) { + return one.mode == two.mode; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index 62bfd27516964..456c3f66f8514 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -2372,7 +2372,7 @@ private IndexWriterConfig getIndexWriterConfig() { } iwc.setCheckPendingFlushUpdate(config().getIndexSettings().isCheckPendingFlushEnabled()); - iwc.setMergePolicy(new OpenSearchMergePolicy(mergePolicy)); + iwc.setMergePolicy(new OpenSearchMergePolicy(mergePolicy, engineConfig.getCodec())); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); iwc.setCodec(engineConfig.getCodec()); diff --git a/server/src/main/java/org/opensearch/index/shard/OpenSearchMergePolicy.java b/server/src/main/java/org/opensearch/index/shard/OpenSearchMergePolicy.java index 258a283c63476..b44a338557383 100644 --- a/server/src/main/java/org/opensearch/index/shard/OpenSearchMergePolicy.java +++ b/server/src/main/java/org/opensearch/index/shard/OpenSearchMergePolicy.java @@ -34,6 +34,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat; +import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormatComparator; import org.apache.lucene.index.FilterMergePolicy; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MergePolicy; @@ -71,9 +74,12 @@ public final class OpenSearchMergePolicy extends FilterMergePolicy { private static final int MAX_CONCURRENT_UPGRADE_MERGES = 5; + private final Codec actualCodec; + /** @param delegate the merge policy to wrap */ - public OpenSearchMergePolicy(MergePolicy delegate) { + public OpenSearchMergePolicy(MergePolicy delegate, Codec codec) { super(delegate); + this.actualCodec = codec; } /** return the wrapped merge policy */ @@ -81,22 +87,43 @@ public MergePolicy getDelegate() { return in; } - private boolean shouldUpgrade(SegmentCommitInfo info) { + boolean shouldUpgrade(SegmentCommitInfo info) { org.apache.lucene.util.Version old = info.info.getVersion(); org.apache.lucene.util.Version cur = Version.CURRENT.luceneVersion; // Something seriously wrong if this trips: - assert old.major <= cur.major; + assert cur.major >= old.major; if (cur.major > old.major) { // Always upgrade segment if Lucene's major version is too old return true; } - if (upgradeOnlyAncientSegments == false && cur.minor > old.minor) { - // If it's only a minor version difference, and we are not upgrading only ancient segments, - // also upgrade: + + if (upgradeOnlyAncientSegments) { + // Skip other checks, because we already check major versions and we are upgrading only ancient segments + return false; + } + + if (cur.minor > old.minor) { + return true; + } + + if (!info.info.getCodec().getName().equals(actualCodec.getName())) { + // If difference in Codec we should upgrade segment return true; } + + if (info.info.getAttributes() != null) { + if (info.info.getCodec().storedFieldsFormat() instanceof Lucene90StoredFieldsFormat + && actualCodec.storedFieldsFormat() instanceof Lucene90StoredFieldsFormat) { + var current = (Lucene90StoredFieldsFormat) info.info.getCodec().storedFieldsFormat(); + var target = (Lucene90StoredFieldsFormat) actualCodec.storedFieldsFormat(); + if (!Lucene90StoredFieldsFormatComparator.equal(current, target)) { + return true; + } + } + } + // Version matches, or segment is not ancient and we are only upgrading ancient segments: return false; } @@ -115,8 +142,7 @@ public MergeSpecification findForcedMerges( if (shouldUpgrade(info)) { - // TODO: Use IndexUpgradeMergePolicy instead. We should be comparing codecs, - // for now we just assume every minor upgrade has a new format. + // TODO: Use IndexUpgradeMergePolicy instead logger.debug("Adding segment {} to be upgraded", info.info.name); spec.add(new OneMerge(Collections.singletonList(info))); } diff --git a/server/src/test/java/org/opensearch/index/shard/OpenSearchMergePolicyTests.java b/server/src/test/java/org/opensearch/index/shard/OpenSearchMergePolicyTests.java new file mode 100644 index 0000000000000..d47d9345b3662 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/shard/OpenSearchMergePolicyTests.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.shard; + +import org.apache.lucene.backward_codecs.lucene100.Lucene100Codec; +import org.apache.lucene.backward_codecs.lucene912.Lucene912Codec; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat; +import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormatComparator; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Version; +import org.opensearch.index.codec.composite.composite101.Composite101Codec; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Map; + +import org.mockito.Mockito; + +import static org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat.Mode.BEST_COMPRESSION; +import static org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat.Mode.BEST_SPEED; + +/** + * Very simple checks for {@link OpenSearchMergePolicy} + */ +public class OpenSearchMergePolicyTests extends OpenSearchTestCase { + public void testShouldUpgrade() { + var mockMergePolicy = Mockito.mock(MergePolicy.class); + var openSearchMergePolicy = new OpenSearchMergePolicy(mockMergePolicy, new Lucene101Codec()); + assertFalse(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LATEST, new Lucene101Codec()))); + assertTrue(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LUCENE_10_2_0, new Lucene100Codec()))); + assertTrue( + openSearchMergePolicy.shouldUpgrade( + createSegmentCommitInfo(Version.LATEST, new Lucene101Codec(Lucene101Codec.Mode.BEST_COMPRESSION)) + ) + ); + assertTrue(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LUCENE_9_12_2, new Lucene912Codec()))); + assertTrue(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LATEST, new Composite101Codec()))); + + openSearchMergePolicy.setUpgradeInProgress(true, true); + assertTrue(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LUCENE_9_12_2, new Lucene912Codec()))); + assertFalse(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LUCENE_10_0_0, new Lucene100Codec()))); + } + + /** + * Very simple equal checks for {@link Lucene90StoredFieldsFormatComparator} + */ + public void testLucene90StoredFieldsFormatComparatorEqual() { + assertTrue(Lucene90StoredFieldsFormatComparator.equal(new Lucene90StoredFieldsFormat(), new Lucene90StoredFieldsFormat())); + assertTrue( + Lucene90StoredFieldsFormatComparator.equal(new Lucene90StoredFieldsFormat(), new Lucene90StoredFieldsFormat(BEST_SPEED)) + ); + assertTrue( + Lucene90StoredFieldsFormatComparator.equal( + new Lucene90StoredFieldsFormat(BEST_SPEED), + new Lucene90StoredFieldsFormat(BEST_SPEED) + ) + ); + assertTrue( + Lucene90StoredFieldsFormatComparator.equal( + new Lucene90StoredFieldsFormat(BEST_COMPRESSION), + new Lucene90StoredFieldsFormat(BEST_COMPRESSION) + ) + ); + assertFalse( + Lucene90StoredFieldsFormatComparator.equal(new Lucene90StoredFieldsFormat(), new Lucene90StoredFieldsFormat(BEST_COMPRESSION)) + ); + assertFalse( + Lucene90StoredFieldsFormatComparator.equal( + new Lucene90StoredFieldsFormat(BEST_SPEED), + new Lucene90StoredFieldsFormat(BEST_COMPRESSION) + ) + ); + assertFalse( + Lucene90StoredFieldsFormatComparator.equal( + new Lucene90StoredFieldsFormat(BEST_COMPRESSION), + new Lucene90StoredFieldsFormat(BEST_SPEED) + ) + ); + } + + private SegmentCommitInfo createSegmentCommitInfo(Version version, Codec codec) { + var id = new byte[16]; + var mockDir = Mockito.mock(Directory.class); + var segmentInfo = new SegmentInfo( + mockDir, + version, + version, + "", + Integer.MAX_VALUE, + false, + false, + codec, + Map.of(), + id, + Map.of(), + null + ); + return new SegmentCommitInfo(segmentInfo, 0, 0, 0, 0, 0, id); + } +}