Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Create generic DocRequest to better categorize ActionRequests ([#18269](https://github.com/opensearch-project/OpenSearch/pull/18269)))
- Change implementation for `percentiles` aggregation for latency improvement [#18124](https://github.com/opensearch-project/OpenSearch/pull/18124)

- Add segments codec check in UpgradeRequest to select segments for upgrade ([#17491](https://github.com/opensearch-project/OpenSearch/pull/17491))

### Dependencies
- Update Apache Lucene from 10.1.0 to 10.2.1 ([#17961](https://github.com/opensearch-project/OpenSearch/pull/17961))
- Bump `com.google.code.gson:gson` from 2.12.1 to 2.13.1 ([#17923](https://github.com/opensearch-project/OpenSearch/pull/17923), [#18266](https://github.com/opensearch-project/OpenSearch/pull/18266))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.codecs.lucene90;

/**
* This class is a custom comparator for Lucene90StoredFieldsFormat
*/
public final class Lucene90StoredFieldsFormatComparator {
private Lucene90StoredFieldsFormatComparator() {}

public static boolean equal(Lucene90StoredFieldsFormat one, Lucene90StoredFieldsFormat two) {
return one.mode == two.mode;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2372,7 +2372,7 @@ private IndexWriterConfig getIndexWriterConfig() {
}

iwc.setCheckPendingFlushUpdate(config().getIndexSettings().isCheckPendingFlushEnabled());
iwc.setMergePolicy(new OpenSearchMergePolicy(mergePolicy));
iwc.setMergePolicy(new OpenSearchMergePolicy(mergePolicy, engineConfig.getCodec()));
iwc.setSimilarity(engineConfig.getSimilarity());
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormatComparator;
import org.apache.lucene.index.FilterMergePolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergePolicy;
Expand Down Expand Up @@ -71,32 +74,56 @@ public final class OpenSearchMergePolicy extends FilterMergePolicy {

private static final int MAX_CONCURRENT_UPGRADE_MERGES = 5;

private final Codec actualCodec;

/** @param delegate the merge policy to wrap */
public OpenSearchMergePolicy(MergePolicy delegate) {
public OpenSearchMergePolicy(MergePolicy delegate, Codec codec) {
super(delegate);
this.actualCodec = codec;
}

/** return the wrapped merge policy */
public MergePolicy getDelegate() {
return in;
}

private boolean shouldUpgrade(SegmentCommitInfo info) {
boolean shouldUpgrade(SegmentCommitInfo info) {
org.apache.lucene.util.Version old = info.info.getVersion();
org.apache.lucene.util.Version cur = Version.CURRENT.luceneVersion;

// Something seriously wrong if this trips:
assert old.major <= cur.major;
assert cur.major >= old.major;

if (cur.major > old.major) {
// Always upgrade segment if Lucene's major version is too old
return true;
}
if (upgradeOnlyAncientSegments == false && cur.minor > old.minor) {
// If it's only a minor version difference, and we are not upgrading only ancient segments,
// also upgrade:

if (upgradeOnlyAncientSegments) {
// Skip other checks, because we already check major versions and we are upgrading only ancient segments
return false;
}

if (cur.minor > old.minor) {
return true;
}

if (!info.info.getCodec().getName().equals(actualCodec.getName())) {
// If difference in Codec we should upgrade segment
return true;
}

if (info.info.getAttributes() != null) {
if (info.info.getCodec().storedFieldsFormat() instanceof Lucene90StoredFieldsFormat
&& actualCodec.storedFieldsFormat() instanceof Lucene90StoredFieldsFormat) {
var current = (Lucene90StoredFieldsFormat) info.info.getCodec().storedFieldsFormat();
var target = (Lucene90StoredFieldsFormat) actualCodec.storedFieldsFormat();
if (!Lucene90StoredFieldsFormatComparator.equal(current, target)) {
return true;
}
}
}

// Version matches, or segment is not ancient and we are only upgrading ancient segments:
return false;
}
Expand All @@ -115,8 +142,7 @@ public MergeSpecification findForcedMerges(

if (shouldUpgrade(info)) {

// TODO: Use IndexUpgradeMergePolicy instead. We should be comparing codecs,
// for now we just assume every minor upgrade has a new format.
// TODO: Use IndexUpgradeMergePolicy instead
logger.debug("Adding segment {} to be upgraded", info.info.name);
spec.add(new OneMerge(Collections.singletonList(info)));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.shard;

import org.apache.lucene.backward_codecs.lucene100.Lucene100Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormatComparator;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import org.opensearch.index.codec.composite.composite101.Composite101Codec;
import org.opensearch.test.OpenSearchTestCase;

import java.util.Map;

import org.mockito.Mockito;

import static org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat.Mode.BEST_COMPRESSION;
import static org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat.Mode.BEST_SPEED;

/**
* Very simple checks for {@link OpenSearchMergePolicy}
*/
public class OpenSearchMergePolicyTests extends OpenSearchTestCase {
public void testShouldUpgrade() {
var mockMergePolicy = Mockito.mock(MergePolicy.class);
var openSearchMergePolicy = new OpenSearchMergePolicy(mockMergePolicy, new Lucene101Codec());
assertFalse(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LATEST, new Lucene101Codec())));
assertTrue(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LUCENE_10_2_0, new Lucene100Codec())));
assertTrue(
openSearchMergePolicy.shouldUpgrade(
createSegmentCommitInfo(Version.LATEST, new Lucene101Codec(Lucene101Codec.Mode.BEST_COMPRESSION))
)
);
assertTrue(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LUCENE_9_12_2, new Lucene912Codec())));
assertTrue(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LATEST, new Composite101Codec())));

openSearchMergePolicy.setUpgradeInProgress(true, true);
assertTrue(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LUCENE_9_12_2, new Lucene912Codec())));
assertFalse(openSearchMergePolicy.shouldUpgrade(createSegmentCommitInfo(Version.LUCENE_10_0_0, new Lucene100Codec())));
}

/**
* Very simple equal checks for {@link Lucene90StoredFieldsFormatComparator}
*/
public void testLucene90StoredFieldsFormatComparatorEqual() {
assertTrue(Lucene90StoredFieldsFormatComparator.equal(new Lucene90StoredFieldsFormat(), new Lucene90StoredFieldsFormat()));
assertTrue(
Lucene90StoredFieldsFormatComparator.equal(new Lucene90StoredFieldsFormat(), new Lucene90StoredFieldsFormat(BEST_SPEED))
);
assertTrue(
Lucene90StoredFieldsFormatComparator.equal(
new Lucene90StoredFieldsFormat(BEST_SPEED),
new Lucene90StoredFieldsFormat(BEST_SPEED)
)
);
assertTrue(
Lucene90StoredFieldsFormatComparator.equal(
new Lucene90StoredFieldsFormat(BEST_COMPRESSION),
new Lucene90StoredFieldsFormat(BEST_COMPRESSION)
)
);
assertFalse(
Lucene90StoredFieldsFormatComparator.equal(new Lucene90StoredFieldsFormat(), new Lucene90StoredFieldsFormat(BEST_COMPRESSION))
);
assertFalse(
Lucene90StoredFieldsFormatComparator.equal(
new Lucene90StoredFieldsFormat(BEST_SPEED),
new Lucene90StoredFieldsFormat(BEST_COMPRESSION)
)
);
assertFalse(
Lucene90StoredFieldsFormatComparator.equal(
new Lucene90StoredFieldsFormat(BEST_COMPRESSION),
new Lucene90StoredFieldsFormat(BEST_SPEED)
)
);
}

private SegmentCommitInfo createSegmentCommitInfo(Version version, Codec codec) {
var id = new byte[16];
var mockDir = Mockito.mock(Directory.class);
var segmentInfo = new SegmentInfo(
mockDir,
version,
version,
"",
Integer.MAX_VALUE,
false,
false,
codec,
Map.of(),
id,
Map.of(),
null
);
return new SegmentCommitInfo(segmentInfo, 0, 0, 0, 0, 0, id);
}
}
Loading