diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4c855a7cb465..f77b5b41e341 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -76,7 +76,8 @@ New Features Improvements --------------------- -* LUCENE-10078: Merge-on-refresh is now enabled by default. (Adrien Grand) +* LUCENE-10078: Merge on full flush is now enabled by default with a timeout of + 500ms. (Adrien Grand) * LUCENE-10585: Facet module code cleanup (copy/paste scrubbing, simplification and some very minor optimization tweaks). (Greg Miller) diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java index 8a5e3901c405..6eeb61cde736 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java @@ -124,4 +124,9 @@ public int numDeletesToMerge( public MergePolicy unwrap() { return in; } + + @Override + protected long maxFullFlushMergeSize() { + return in.maxFullFlushMergeSize(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java index 82085478d41e..a1f7ea760154 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -460,7 +460,7 @@ public IndexWriterConfig setCommitOnClose(boolean commitOnClose) { *
Note: Which segments would get merged depends on the implementation of {@link * MergePolicy#findFullFlushMerges(MergeTrigger, SegmentInfos, MergePolicy.MergeContext)} * - *
Note: Set to 0 to disable merge-on-refresh. + *
Note: Set to 0 to disable merging on full flush. */ public IndexWriterConfig setMaxFullFlushMergeWaitMillis(long maxFullFlushMergeWaitMillis) { this.maxFullFlushMergeWaitMillis = maxFullFlushMergeWaitMillis; diff --git a/lucene/core/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java index fc62c4b0ecde..25f5d20a059c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java @@ -97,10 +97,10 @@ public double getMaxMergeMBForForcedMerge() { /** * Sets the minimum size for the lowest level segments. Any segments below this size are - * considered to be on the same level (even if they vary drastically in size) and will be merged - * whenever there are mergeFactor of them. This effectively truncates the "long tail" of small - * segments that would otherwise be created into a single level. If you set this too large, it - * could greatly increase the merging cost during indexing (if you flush many small segments). + * candidates for full-flush merges and merged more aggressively. This effectively reduces chances + * to get a "long tail" of small segments that would otherwise be created into a single level. If + * you set this too large, it could greatly increase the merging cost during indexing (if you + * flush many small segments). */ public void setMinMergeMB(double mb) { minMergeSize = (long) (mb * 1024 * 1024); diff --git a/lucene/core/src/java/org/apache/lucene/index/LogDocMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/LogDocMergePolicy.java index 6836dffcf9dc..a742ed28e12b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LogDocMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/LogDocMergePolicy.java @@ -44,10 +44,10 @@ protected long size(SegmentCommitInfo info, MergeContext mergeContext) throws IO /** * Sets the minimum size for the lowest level segments. Any segments below this size are - * considered to be on the same level (even if they vary drastically in size) and will be merged - * whenever there are mergeFactor of them. This effectively truncates the "long tail" of small - * segments that would otherwise be created into a single level. If you set this too large, it - * could greatly increase the merging cost during indexing (if you flush many small segments). + * candidates for full-flush merges and merged more aggressively. This effectively reduces chances + * to get a "long tail" of small segments that would otherwise be created into a single level. If + * you set this too large, it could greatly increase the merging cost during indexing (if you + * flush many small segments). */ public void setMinMergeDocs(int minMergeDocs) { minMergeSize = minMergeDocs; diff --git a/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java index 4284a4682329..c38adc2ea439 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java @@ -68,8 +68,8 @@ public abstract class LogMergePolicy extends MergePolicy { protected int mergeFactor = DEFAULT_MERGE_FACTOR; /** - * Any segments whose size is smaller than this value will be rounded up to this value. This - * ensures that tiny segments are aggressively merged. + * Any segments whose size is smaller than this value will be candidates for full-flush merges and + * merged more aggressively. */ protected long minMergeSize; @@ -184,6 +184,11 @@ protected boolean isMerged( && (numToMerge != 1 || !segmentIsOriginal || isMerged(infos, mergeInfo, mergeContext)); } + @Override + protected long maxFullFlushMergeSize() { + return minMergeSize; + } + /** * Returns the merges necessary to merge the index, taking the max merge size or max merge docs * into consideration. This method attempts to respect the {@code maxNumSegments} parameter, diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index 518bce666cac..b84958ab6092 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -697,7 +697,7 @@ protected long size(SegmentCommitInfo info, MergeContext mergeContext) throws IO * implementation of {@link #findFullFlushMerges}. */ protected long maxFullFlushMergeSize() { - return Long.MAX_VALUE; + return 0L; } /** Asserts that the delCount for this SegmentCommitInfo is valid */ diff --git a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java index 4d91ce62c138..de394365dbe3 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java @@ -201,6 +201,11 @@ public double getFloorSegmentMB() { return floorSegmentBytes / (1024 * 1024.); } + @Override + protected long maxFullFlushMergeSize() { + return floorSegmentBytes; + } + /** * When forceMergeDeletes is called, we only merge away a segment if its delete percentage is over * this threshold. Default is 10%. diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java index 462343f6bcf7..c71bf35098f0 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java @@ -815,7 +815,8 @@ public void testNPEAfterInvalidReindex2() throws Exception { /** test reopening backwards from a non-NRT reader (with document deletes) */ public void testNRTMdeletes() throws Exception { Directory dir = newDirectory(); - IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriterConfig iwc = + new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE); SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); iwc.setIndexDeletionPolicy(snapshotter); @@ -865,7 +866,8 @@ public void testNRTMdeletes() throws Exception { /** test reopening backwards from an NRT reader (with document deletes) */ public void testNRTMdeletes2() throws Exception { Directory dir = newDirectory(); - IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())) + .setMergePolicy(NoMergePolicy.INSTANCE); SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); iwc.setIndexDeletionPolicy(snapshotter); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestLogMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestLogMergePolicy.java index 2b472d25f047..ce09c1575035 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestLogMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestLogMergePolicy.java @@ -57,10 +57,7 @@ public void testFullFlushMerges() throws IOException { LogMergePolicy mp = mergePolicy(); - for (int i = 0; i < 2 * mp.getMergeFactor() + 3; ++i) { - if (i < mp.getMergeFactor()) { - assertNull(mp.findFullFlushMerges(MergeTrigger.FULL_FLUSH, segmentInfos, mergeContext)); - } + for (int i = 0; i < mp.getMergeFactor(); ++i) { segmentInfos.add( makeSegmentCommitInfo( "_" + segNameGenerator.getAndIncrement(), @@ -76,11 +73,6 @@ public void testFullFlushMerges() throws IOException { segmentInfos = applyMerge(segmentInfos, merge, "_" + segNameGenerator.getAndIncrement(), stats); } - assertEquals(5, segmentInfos.size()); - assertEquals(mp.getMergeFactor(), segmentInfos.info(0).info.maxDoc()); - assertEquals(mp.getMergeFactor(), segmentInfos.info(1).info.maxDoc()); - assertEquals(1, segmentInfos.info(2).info.maxDoc()); - assertEquals(1, segmentInfos.info(3).info.maxDoc()); - assertEquals(1, segmentInfos.info(4).info.maxDoc()); + assertEquals(1, segmentInfos.size()); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java index 06cdcf893ac8..0523d9db21bb 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java @@ -922,13 +922,13 @@ public void testSimulateUpdates() throws IOException { public void testFullFlushMerges() throws IOException { AtomicLong segNameGenerator = new AtomicLong(); + IOStats stats = new IOStats(); MergeContext mergeContext = new MockMergeContext(SegmentCommitInfo::getDelCount); SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major); - TieredMergePolicy mp = mergePolicy(); + TieredMergePolicy mp = new TieredMergePolicy(); - for (int i = 0; i < mp.getSegmentsPerTier(); ++i) { - assertNull(mp.findFullFlushMerges(MergeTrigger.FULL_FLUSH, segmentInfos, mergeContext)); + for (int i = 0; i < 11; ++i) { segmentInfos.add( makeSegmentCommitInfo( "_" + segNameGenerator.getAndIncrement(), @@ -937,6 +937,13 @@ public void testFullFlushMerges() throws IOException { Double.MIN_VALUE, IndexWriter.SOURCE_FLUSH)); } - assertNotNull(mp.findFullFlushMerges(MergeTrigger.FULL_FLUSH, segmentInfos, mergeContext)); + MergeSpecification spec = + mp.findFullFlushMerges(MergeTrigger.FULL_FLUSH, segmentInfos, mergeContext); + assertNotNull(spec); + for (OneMerge merge : spec.merges) { + segmentInfos = + applyMerge(segmentInfos, merge, "_" + segNameGenerator.getAndIncrement(), stats); + } + assertEquals(2, segmentInfos.size()); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java b/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java index 8abbfca8d218..e6dd9f047747 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java @@ -105,7 +105,10 @@ public void run() { // LUCENE-7570 public void testDeadlockStalledMerges() throws Exception { Directory dir = newDirectory(); - IndexWriterConfig iwc = new IndexWriterConfig(); + IndexWriterConfig iwc = + new IndexWriterConfig() + // nocommit: deadlock if you don't disable merge-on-full-flush + .setMaxFullFlushMergeWaitMillis(0); // so we merge every 2 segments: LogMergePolicy mp = new LogDocMergePolicy();