Skip to content

Commit

Permalink
Allow LogMergePolicy to merge more than mergeFactor segments toge…
Browse files Browse the repository at this point in the history
…ther when the merge is below the min merge size.

This is essentially porting apache#266 to `LogMergePolicy`. By allowing more than
`mergeFactor` segments to be merged together for small merges, the merge policy
gets a lower write amplification and indexes have fewer small segments.
  • Loading branch information
jpountz committed Jan 23, 2025
1 parent 8487718 commit 1684e06
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
22 changes: 22 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,28 @@ public MergeSpecification findMerges(
mergeDocs += segmentDocs;
}

if (end - start >= mergeFactor && mergeSize < minMergeSize && anyMerging == false) {
// If the merge has mergeFactor segments but is still smaller than the min merged segment
// size, keep packing candidate segments.
while (end < 1 + upto) {
final SegmentInfoAndLevel segLevel = levels.get(end);
final SegmentCommitInfo info = segLevel.info;
if (mergingSegments.contains(info)) {
anyMerging = true;
break;
}
long segmentSize = size(info, mergeContext);
long segmentDocs = sizeDocs(info, mergeContext);
if (mergeSize + segmentSize > minMergeSize || mergeDocs + segmentDocs > maxMergeDocs) {
break;
}

mergeSize += segmentSize;
mergeDocs += segmentDocs;
end++;
}
}

if (anyMerging || end - start <= 1) {
// skip: there is an ongoing merge at the current level or the computed merge has a single
// segment and this merge policy doesn't do singleton merges
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,13 @@ protected void assertSegmentInfos(MergePolicy policy, SegmentInfos infos) throws
@Override
protected void assertMerge(MergePolicy policy, MergeSpecification merge) throws IOException {
LogMergePolicy lmp = (LogMergePolicy) policy;
MergeContext mockMergeContext = new MockMergeContext(SegmentCommitInfo::getDelCount);
for (OneMerge oneMerge : merge.merges) {
assertTrue(oneMerge.segments.size() <= lmp.getMergeFactor());
long mergeSize = 0;
for (SegmentCommitInfo info : oneMerge.segments) {
mergeSize += lmp.size(info, mockMergeContext);
}
assertTrue(mergeSize < lmp.minMergeSize || oneMerge.segments.size() <= lmp.getMergeFactor());
}
}

Expand Down

0 comments on commit 1684e06

Please sign in to comment.