-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use LogByteSizeMergePolicy instead of TieredMergePolicy for time-based data. #92684
Changes from 4 commits
59275bb
81c1bca
a657bda
447782b
2e2db78
b986142
6689711
ae7b9ef
501943f
1757ddb
3951b8a
a112f4b
6bbcaf3
28afd5c
3ca06ab
2139f08
7cd99d6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
pr: 92684 | ||
summary: Use `LogByteSizeMergePolicy` instead of `TieredMergePolicy` for time-based | ||
data | ||
area: Engine | ||
type: enhancement | ||
issues: [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
package org.elasticsearch.index; | ||
|
||
import org.apache.logging.log4j.Logger; | ||
import org.apache.lucene.index.LogByteSizeMergePolicy; | ||
import org.apache.lucene.index.MergePolicy; | ||
import org.apache.lucene.index.NoMergePolicy; | ||
import org.apache.lucene.index.TieredMergePolicy; | ||
|
@@ -101,9 +102,11 @@ | |
*/ | ||
|
||
public final class MergePolicyConfig { | ||
private final TieredMergePolicy mergePolicy = new TieredMergePolicy(); | ||
private final TieredMergePolicy tieredMergePolicy = new TieredMergePolicy(); | ||
private final LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); | ||
private final Logger logger; | ||
private final boolean mergesEnabled; | ||
private volatile Type mergePolicyType; | ||
|
||
public static final double DEFAULT_EXPUNGE_DELETES_ALLOWED = 10d; | ||
public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = new ByteSizeValue(2, ByteSizeUnit.MB); | ||
|
@@ -120,6 +123,45 @@ public final class MergePolicyConfig { | |
Property.IndexScope | ||
); | ||
|
||
public enum Type { | ||
UNSET { | ||
@Override | ||
MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { | ||
if (isTimeSeriesIndex) { | ||
// TieredMergePolicy is better than LogByteSizeMergePolicy at computing cheaper merges, but it does so by allowing | ||
// itself to merge non-adjacent segments. An important property we get when only merging adjacent segments and data gets | ||
// indexed in order is that segments have non-overlapping time ranges. This means that a range query on the time field | ||
// will only partially match 2 segments at most, and other segments will either fully match or not match at all. | ||
return config.logByteSizeMergePolicy; | ||
} else { | ||
return config.tieredMergePolicy; | ||
} | ||
} | ||
}, | ||
TIERED { | ||
@Override | ||
MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { | ||
return config.tieredMergePolicy; | ||
} | ||
}, | ||
LOG_BYTE_SIZE { | ||
@Override | ||
MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { | ||
return config.logByteSizeMergePolicy; | ||
} | ||
}; | ||
|
||
abstract MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeries); | ||
} | ||
|
||
public static final Setting<Type> INDEX_MERGE_POLICY_TYPE_SETTING = Setting.enumSetting( | ||
Type.class, | ||
"index.merge.policy.type", | ||
Type.UNSET, | ||
Property.Dynamic, | ||
Property.IndexScope | ||
); | ||
|
||
public static final Setting<Double> INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED_SETTING = Setting.doubleSetting( | ||
"index.merge.policy.expunge_deletes_allowed", | ||
DEFAULT_EXPUNGE_DELETES_ALLOWED, | ||
|
@@ -174,6 +216,7 @@ public final class MergePolicyConfig { | |
|
||
MergePolicyConfig(Logger logger, IndexSettings indexSettings) { | ||
this.logger = logger; | ||
Type mergePolicyType = indexSettings.getValue(INDEX_MERGE_POLICY_TYPE_SETTING); | ||
double forceMergeDeletesPctAllowed = indexSettings.getValue(INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED_SETTING); // percentage | ||
ByteSizeValue floorSegment = indexSettings.getValue(INDEX_MERGE_POLICY_FLOOR_SEGMENT_SETTING); | ||
int maxMergeAtOnce = indexSettings.getValue(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_SETTING); | ||
|
@@ -190,15 +233,16 @@ public final class MergePolicyConfig { | |
); | ||
} | ||
maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier); | ||
indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING).configure(mergePolicy); | ||
mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed); | ||
mergePolicy.setFloorSegmentMB(floorSegment.getMbFrac()); | ||
mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce); | ||
mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); | ||
mergePolicy.setSegmentsPerTier(segmentsPerTier); | ||
mergePolicy.setDeletesPctAllowed(deletesPctAllowed); | ||
setMergePolicyType(mergePolicyType); | ||
setCompoundFormatThreshold(indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING)); | ||
setExpungeDeletesAllowed(forceMergeDeletesPctAllowed); | ||
setFloorSegmentSetting(floorSegment); | ||
setMaxMergesAtOnce(maxMergeAtOnce); | ||
setMaxMergedSegment(maxMergedSegment); | ||
setSegmentsPerTier(segmentsPerTier); | ||
setDeletesPctAllowed(deletesPctAllowed); | ||
logger.trace( | ||
"using [tiered] merge mergePolicy with expunge_deletes_allowed[{}], floor_segment[{}]," | ||
"using merge policy with expunge_deletes_allowed[{}], floor_segment[{}]," | ||
+ " max_merge_at_once[{}], max_merged_segment[{}], segments_per_tier[{}]," | ||
+ " deletes_pct_allowed[{}]", | ||
forceMergeDeletesPctAllowed, | ||
|
@@ -210,32 +254,48 @@ public final class MergePolicyConfig { | |
); | ||
} | ||
|
||
void setSegmentsPerTier(Double segmentsPerTier) { | ||
mergePolicy.setSegmentsPerTier(segmentsPerTier); | ||
void setMergePolicyType(Type type) { | ||
this.mergePolicyType = type; | ||
} | ||
|
||
void setSegmentsPerTier(double segmentsPerTier) { | ||
tieredMergePolicy.setSegmentsPerTier(segmentsPerTier); | ||
logByteSizeMergePolicy.setMergeFactor((int) segmentsPerTier); | ||
} | ||
|
||
void setMaxMergedSegment(ByteSizeValue maxMergedSegment) { | ||
mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); | ||
tieredMergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); | ||
// Note: max merge MB has different semantics on LogByteSizeMergePolicy: it's the maximum size for a segment to be considered for a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this opens us up for degenerate cases in both ends, either now stopping at 2.5GB or going up to 25GB. I imagine this being provoked by different input styles (either document size, frequency etc), i.e., some data streams work well (hitting around 5GB) whereas others may suffer from worse search or indexing (due to the larger merges) performance? I wonder if we could adapt the log merge policy to be closer to the tiered merge policy here. I.e., if it can merge together 2 adjacent segments but not 3, then merge the 2? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your question, it made me check the actual |
||
// merge, ie. max input segment size, while for TieredMergePolicy, it's the max output segment size. Also LogByteSizeMergePolicy | ||
// doesn't try to pack as many segments together as necessary to get as close as possible to the max merged segment size. To | ||
// account for that, we divide the max segment size by 2, and in practice, the maximum segment size in an index will be somewhere in | ||
// [maxMergedSegment / 2, maxMergedSegment * 5] (assuming a merge factor of 10). | ||
logByteSizeMergePolicy.setMaxMergeMB(maxMergedSegment.getMbFrac() / 2); | ||
} | ||
|
||
void setMaxMergesAtOnce(Integer maxMergeAtOnce) { | ||
mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce); | ||
void setMaxMergesAtOnce(int maxMergeAtOnce) { | ||
tieredMergePolicy.setMaxMergeAtOnce(maxMergeAtOnce); | ||
// LogByteSizeMergePolicy ignores this parameter, it always merges "segments per tier" segments at once. | ||
} | ||
|
||
void setFloorSegmentSetting(ByteSizeValue floorSegementSetting) { | ||
mergePolicy.setFloorSegmentMB(floorSegementSetting.getMbFrac()); | ||
tieredMergePolicy.setFloorSegmentMB(floorSegementSetting.getMbFrac()); | ||
logByteSizeMergePolicy.setMinMergeMB(floorSegementSetting.getMbFrac()); | ||
} | ||
|
||
void setExpungeDeletesAllowed(Double value) { | ||
mergePolicy.setForceMergeDeletesPctAllowed(value); | ||
tieredMergePolicy.setForceMergeDeletesPctAllowed(value); | ||
// LogByteSizeMergePolicy doesn't have a similar configuration option | ||
} | ||
|
||
void setCompoundFormatThreshold(CompoundFileThreshold compoundFileThreshold) { | ||
compoundFileThreshold.configure(mergePolicy); | ||
compoundFileThreshold.configure(tieredMergePolicy); | ||
compoundFileThreshold.configure(logByteSizeMergePolicy); | ||
} | ||
|
||
void setDeletesPctAllowed(Double deletesPctAllowed) { | ||
mergePolicy.setDeletesPctAllowed(deletesPctAllowed); | ||
tieredMergePolicy.setDeletesPctAllowed(deletesPctAllowed); | ||
// LogByteSizeMergePolicy doesn't have a similar configuration option | ||
} | ||
|
||
private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerTier) { | ||
|
@@ -258,8 +318,11 @@ private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerT | |
} | ||
|
||
@SuppressForbidden(reason = "we always use an appropriate merge scheduler alongside this policy so NoMergePolic#INSTANCE is ok") | ||
MergePolicy getMergePolicy() { | ||
return mergesEnabled ? mergePolicy : NoMergePolicy.INSTANCE; | ||
MergePolicy getMergePolicy(boolean isTimeSeriesIndex) { | ||
if (mergesEnabled == false) { | ||
return NoMergePolicy.INSTANCE; | ||
} | ||
return mergePolicyType.getMergePolicy(this, isTimeSeriesIndex); | ||
} | ||
|
||
private static CompoundFileThreshold parseCompoundFormat(String noCFSRatio) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is formulated as if it in practice could be (substantially) more expensive to use the log merge policy, perhaps we can elaborate here on why this is unlikely to be the case for data streams?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed, I pushed an update that makes it sound like a better trade-off.