From 59275bb42d969208cd1d01c7eb813980be56bc78 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 4 Jan 2023 17:20:25 +0100 Subject: [PATCH 01/12] Use LogByteSizeMergePolicy instead of TieredMergePolicy for time-based data. `TieredMergePolicy` is better than `LogByteSizeMergePolicy` at computing cheaper merges, but it does so by allowing itself to merge non-adjacent segments. An important property we get when only merging adjacent segments and data gets indexed in time order is that segments have non-overlapping time ranges. This means that a range query on the time field will only partially match 2 segments at most, and other segments will either fully match or not match at all. This also means that for segments that partially match a range query, the matching docs will likely be clustered together in a sequential range of doc IDs, ie. there will be good data locality. --- .../elasticsearch/index/IndexSettings.java | 4 +- .../index/MergePolicyConfig.java | 65 ++++-- .../elasticsearch/index/shard/IndexShard.java | 2 +- .../index/MergePolicyConfigTests.java | 195 ++++++++++++------ 4 files changed, 180 insertions(+), 86 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 1149a6400808b..245e26c9d38b2 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -1210,8 +1210,8 @@ public long getGcDeletesInMillis() { /** * Returns the merge policy that should be used for this index. */ - public MergePolicy getMergePolicy() { - return mergePolicyConfig.getMergePolicy(); + public MergePolicy getMergePolicy(boolean isTimeseriesIndex) { + return mergePolicyConfig.getMergePolicy(isTimeseriesIndex); } public T getValue(Setting setting) { diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 4aebbfab5883f..85ce55f520580 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -9,6 +9,7 @@ package org.elasticsearch.index; import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.TieredMergePolicy; @@ -101,7 +102,8 @@ */ public final class MergePolicyConfig { - private final TieredMergePolicy mergePolicy = new TieredMergePolicy(); + private final TieredMergePolicy tieredMergePolicy = new TieredMergePolicy(); + private final LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); private final Logger logger; private final boolean mergesEnabled; @@ -190,15 +192,15 @@ public final class MergePolicyConfig { ); } maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier); - indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING).configure(mergePolicy); - mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed); - mergePolicy.setFloorSegmentMB(floorSegment.getMbFrac()); - mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce); - mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); - mergePolicy.setSegmentsPerTier(segmentsPerTier); - mergePolicy.setDeletesPctAllowed(deletesPctAllowed); + setCompoundFormatThreshold(indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING)); + setExpungeDeletesAllowed(forceMergeDeletesPctAllowed); + setFloorSegmentSetting(floorSegment); + setMaxMergesAtOnce(maxMergeAtOnce); + setMaxMergedSegment(maxMergedSegment); + setSegmentsPerTier(segmentsPerTier); + setDeletesPctAllowed(deletesPctAllowed); logger.trace( - "using [tiered] merge mergePolicy with expunge_deletes_allowed[{}], floor_segment[{}]," + "using merge policy with expunge_deletes_allowed[{}], floor_segment[{}]," + " max_merge_at_once[{}], max_merged_segment[{}], segments_per_tier[{}]," + " deletes_pct_allowed[{}]", forceMergeDeletesPctAllowed, @@ -210,32 +212,44 @@ public final class MergePolicyConfig { ); } - void setSegmentsPerTier(Double segmentsPerTier) { - mergePolicy.setSegmentsPerTier(segmentsPerTier); + void setSegmentsPerTier(double segmentsPerTier) { + tieredMergePolicy.setSegmentsPerTier(segmentsPerTier); + logByteSizeMergePolicy.setMergeFactor((int) segmentsPerTier); } void setMaxMergedSegment(ByteSizeValue maxMergedSegment) { - mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); + tieredMergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); + // Note: max merge MB has different semantics on LogByteSizeMergePolicy: it's the maximum size for a segment to be considered for a + // merge, ie. max input segment size, while for TieredMergePolicy, it's the max output segment size. Also LogByteSizeMergePolicy + // doesn't try to pack as many segments together as necessary to get as close as possible to the max merged segment size. To + // account for that, we divide the max segment size by 2, and in practice, the maximum segment size in an index will be somewhere in + // [maxMergedSegment / 2, maxMergedSegment * 5] (assuming a merge factor of 10). + logByteSizeMergePolicy.setMaxMergeMB(maxMergedSegment.getMbFrac() / 2); } - void setMaxMergesAtOnce(Integer maxMergeAtOnce) { - mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce); + void setMaxMergesAtOnce(int maxMergeAtOnce) { + tieredMergePolicy.setMaxMergeAtOnce(maxMergeAtOnce); + // LogByteSizeMergePolicy ignores this parameter, it always merges "segments per tier" segments at once. } void setFloorSegmentSetting(ByteSizeValue floorSegementSetting) { - mergePolicy.setFloorSegmentMB(floorSegementSetting.getMbFrac()); + tieredMergePolicy.setFloorSegmentMB(floorSegementSetting.getMbFrac()); + logByteSizeMergePolicy.setMinMergeMB(floorSegementSetting.getMbFrac()); } void setExpungeDeletesAllowed(Double value) { - mergePolicy.setForceMergeDeletesPctAllowed(value); + tieredMergePolicy.setForceMergeDeletesPctAllowed(value); + // LogByteSizeMergePolicy doesn't have a similar configuration option } void setCompoundFormatThreshold(CompoundFileThreshold compoundFileThreshold) { - compoundFileThreshold.configure(mergePolicy); + compoundFileThreshold.configure(tieredMergePolicy); + compoundFileThreshold.configure(logByteSizeMergePolicy); } void setDeletesPctAllowed(Double deletesPctAllowed) { - mergePolicy.setDeletesPctAllowed(deletesPctAllowed); + tieredMergePolicy.setDeletesPctAllowed(deletesPctAllowed); + // LogByteSizeMergePolicy doesn't have a similar configuration option } private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerTier) { @@ -258,8 +272,19 @@ private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerT } @SuppressForbidden(reason = "we always use an appropriate merge scheduler alongside this policy so NoMergePolic#INSTANCE is ok") - MergePolicy getMergePolicy() { - return mergesEnabled ? mergePolicy : NoMergePolicy.INSTANCE; + MergePolicy getMergePolicy(boolean isTimeSeriesIndex) { + if (mergesEnabled == false) { + return NoMergePolicy.INSTANCE; + } + if (isTimeSeriesIndex) { + // TieredMergePolicy is better than LogByteSizeMergePolicy at computing cheaper merges, but it does so by allowing itself to + // merge non-adjacent segments. An important property we get when only merging adjacent segments and data gets indexed in order + // is that segments have non-overlapping time ranges. This means that a range query on the time field will only partially match + // 2 segments at most, and other segments will either fully match or not match at all. + return logByteSizeMergePolicy; + } else { + return tieredMergePolicy; + } } private static CompoundFileThreshold parseCompoundFormat(String noCFSRatio) { diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 90c7aa17f1568..e34cee929272e 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -3255,7 +3255,7 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { indexSettings, warmer, store, - indexSettings.getMergePolicy(), + indexSettings.getMergePolicy(isTimeseriesIndex), buildIndexAnalyzer(mapperService), similarityService.similarity(mapperService == null ? null : mapperService::fieldType), codecService, diff --git a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java index 318f9cdcb0e2b..0c4a12fc58887 100644 --- a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java +++ b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java @@ -12,6 +12,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.SegmentCommitInfo; @@ -52,7 +53,7 @@ public void testCompoundFileSettings() throws IOException { } private void assertCompoundThreshold(Settings settings, double noCFSRatio, ByteSizeValue maxCFSSize) { - MergePolicy mp = new MergePolicyConfig(logger, indexSettings(settings)).getMergePolicy(); + MergePolicy mp = new MergePolicyConfig(logger, indexSettings(settings)).getMergePolicy(randomBoolean()); assertThat(mp.getNoCFSRatio(), equalTo(noCFSRatio)); assertThat(mp.getMaxCFSSegmentSizeMB(), equalTo(maxCFSSize.getMbFrac())); } @@ -66,37 +67,52 @@ public void testNoMerges() { logger, indexSettings(Settings.builder().put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).build()) ); - assertTrue(mp.getMergePolicy() instanceof NoMergePolicy); + assertTrue(mp.getMergePolicy(randomBoolean()) instanceof NoMergePolicy); } public void testUpdateSettings() throws IOException { IndexSettings indexSettings = indexSettings(Settings.EMPTY); - assertThat(indexSettings.getMergePolicy().getNoCFSRatio(), equalTo(1.0)); - assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(1024d)); + assertThat(indexSettings.getMergePolicy(randomBoolean()).getNoCFSRatio(), equalTo(1.0)); + assertThat(indexSettings.getMergePolicy(randomBoolean()).getMaxCFSSegmentSizeMB(), equalTo(1024d)); indexSettings = indexSettings(build(0.9)); - assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(0.9)); - assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac())); + assertThat((indexSettings.getMergePolicy(randomBoolean())).getNoCFSRatio(), equalTo(0.9)); + assertThat( + indexSettings.getMergePolicy(randomBoolean()).getMaxCFSSegmentSizeMB(), + equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()) + ); indexSettings.updateIndexMetadata(newIndexMeta("index", build(0.1))); - assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(0.1)); - assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac())); + assertThat((indexSettings.getMergePolicy(randomBoolean())).getNoCFSRatio(), equalTo(0.1)); + assertThat( + indexSettings.getMergePolicy(randomBoolean()).getMaxCFSSegmentSizeMB(), + equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()) + ); indexSettings.updateIndexMetadata(newIndexMeta("index", build(0.0))); - assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(0.0)); - assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac())); + assertThat((indexSettings.getMergePolicy(randomBoolean())).getNoCFSRatio(), equalTo(0.0)); + assertThat( + indexSettings.getMergePolicy(randomBoolean()).getMaxCFSSegmentSizeMB(), + equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()) + ); indexSettings.updateIndexMetadata(newIndexMeta("index", build("true"))); - assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(1.0)); - assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac())); + assertThat((indexSettings.getMergePolicy(randomBoolean())).getNoCFSRatio(), equalTo(1.0)); + assertThat( + indexSettings.getMergePolicy(randomBoolean()).getMaxCFSSegmentSizeMB(), + equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()) + ); indexSettings.updateIndexMetadata(newIndexMeta("index", build("false"))); - assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(0.0)); - assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac())); + assertThat((indexSettings.getMergePolicy(randomBoolean())).getNoCFSRatio(), equalTo(0.0)); + assertThat( + indexSettings.getMergePolicy(randomBoolean()).getMaxCFSSegmentSizeMB(), + equalTo(ByteSizeValue.ofBytes(Long.MAX_VALUE).getMbFrac()) + ); indexSettings.updateIndexMetadata(newIndexMeta("index", build("100mb"))); - assertThat((indexSettings.getMergePolicy()).getNoCFSRatio(), equalTo(1.0)); - assertThat(indexSettings.getMergePolicy().getMaxCFSSegmentSizeMB(), equalTo(100d)); + assertThat((indexSettings.getMergePolicy(randomBoolean())).getNoCFSRatio(), equalTo(1.0)); + assertThat(indexSettings.getMergePolicy(randomBoolean()).getMaxCFSSegmentSizeMB(), equalTo(100d)); } public void testTieredMergePolicySettingsUpdate() throws IOException { IndexSettings indexSettings = indexSettings(Settings.EMPTY); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getForceMergeDeletesPctAllowed(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getForceMergeDeletesPctAllowed(), MergePolicyConfig.DEFAULT_EXPUNGE_DELETES_ALLOWED, 0.0d ); @@ -113,13 +129,18 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ) ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getForceMergeDeletesPctAllowed(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getForceMergeDeletesPctAllowed(), MergePolicyConfig.DEFAULT_EXPUNGE_DELETES_ALLOWED + 1.0d, 0.0d ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getFloorSegmentMB(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getFloorSegmentMB(), + MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMbFrac(), + 0 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMinMergeMB(), MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMbFrac(), 0 ); @@ -135,12 +156,20 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ) ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getFloorSegmentMB(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getFloorSegmentMB(), + new ByteSizeValue(MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMb() + 1, ByteSizeUnit.MB).getMbFrac(), + 0.001 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMinMergeMB(), new ByteSizeValue(MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMb() + 1, ByteSizeUnit.MB).getMbFrac(), 0.001 ); - assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnce(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE); + assertEquals( + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getMaxMergeAtOnce(), + MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE + ); indexSettings.updateIndexMetadata( newIndexMeta( "index", @@ -153,12 +182,17 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ) ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnce(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getMaxMergeAtOnce(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE - 1 ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergedSegmentMB(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getMaxMergedSegmentMB(), + MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac(), + 0.0001 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac(), 0.0001 ); @@ -174,13 +208,23 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ) ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergedSegmentMB(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getMaxMergedSegmentMB(), + ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), + 0.0001 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), 0.0001 ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getSegmentsPerTier(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getSegmentsPerTier(), + MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, + 0 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, 0 ); @@ -196,13 +240,18 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ) ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getSegmentsPerTier(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getSegmentsPerTier(), + MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER + 1, + 0 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER + 1, 0 ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getDeletesPctAllowed(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getDeletesPctAllowed(), MergePolicyConfig.DEFAULT_DELETES_PCT_ALLOWED, 0 ); @@ -212,7 +261,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING.getKey(), 22).build() ) ); - assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getDeletesPctAllowed(), 22, 0); + assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy(false)).getDeletesPctAllowed(), 22, 0); IllegalArgumentException exc = expectThrows( IllegalArgumentException.class, @@ -227,28 +276,46 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { assertThat(cause.getMessage(), containsString("must be <= 50.0")); indexSettings.updateIndexMetadata(newIndexMeta("index", Settings.EMPTY)); // see if defaults are restored assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getForceMergeDeletesPctAllowed(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getForceMergeDeletesPctAllowed(), MergePolicyConfig.DEFAULT_EXPUNGE_DELETES_ALLOWED, 0.0d ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getFloorSegmentMB(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getFloorSegmentMB(), + new ByteSizeValue(MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMb(), ByteSizeUnit.MB).getMbFrac(), + 0.00 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMinMergeMB(), new ByteSizeValue(MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMb(), ByteSizeUnit.MB).getMbFrac(), 0.00 ); - assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnce(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergedSegmentMB(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getMaxMergeAtOnce(), + MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE + ); + assertEquals( + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getMaxMergedSegmentMB(), + ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), + 0.0001 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), 0.0001 ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getSegmentsPerTier(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getSegmentsPerTier(), + MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, + 0 + ); + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, 0 ); assertEquals( - ((TieredMergePolicy) indexSettings.getMergePolicy()).getDeletesPctAllowed(), + ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getDeletesPctAllowed(), MergePolicyConfig.DEFAULT_DELETES_PCT_ALLOWED, 0 ); @@ -275,37 +342,39 @@ private Settings build(ByteSizeValue value) { } public void testCompoundFileConfiguredByByteSize() throws IOException { - try (Directory dir = newDirectory()) { - // index.compound_format: 1gb, the merge will use a compound file - MergePolicy mp = new MergePolicyConfig(logger, indexSettings(Settings.EMPTY)).getMergePolicy(); - try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setMergePolicy(mp))) { - w.addDocument(new Document()); - w.flush(); - w.addDocument(new Document()); - w.forceMerge(1); + for (boolean isTimeSeriesIndex : new boolean[] { false, true }) { + try (Directory dir = newDirectory()) { + // index.compound_format: 1gb, the merge will use a compound file + MergePolicy mp = new MergePolicyConfig(logger, indexSettings(Settings.EMPTY)).getMergePolicy(isTimeSeriesIndex); + try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setMergePolicy(mp))) { + w.addDocument(new Document()); + w.flush(); + w.addDocument(new Document()); + w.forceMerge(1); + } + try (DirectoryReader reader = DirectoryReader.open(dir)) { + LeafReader leaf = getOnlyLeafReader(reader); + SegmentCommitInfo sci = ((SegmentReader) leaf).getSegmentInfo(); + assertEquals(IndexWriter.SOURCE_MERGE, sci.info.getDiagnostics().get(IndexWriter.SOURCE)); + assertTrue(sci.info.getUseCompoundFile()); + } } - try (DirectoryReader reader = DirectoryReader.open(dir)) { - LeafReader leaf = getOnlyLeafReader(reader); - SegmentCommitInfo sci = ((SegmentReader) leaf).getSegmentInfo(); - assertEquals(IndexWriter.SOURCE_MERGE, sci.info.getDiagnostics().get(IndexWriter.SOURCE)); - assertTrue(sci.info.getUseCompoundFile()); - } - } - // index.compound_format: 1b, the merge will not use a compound file - try (Directory dir = newDirectory()) { - MergePolicy mp = new MergePolicyConfig(logger, indexSettings(build("1b"))).getMergePolicy(); - try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setMergePolicy(mp))) { - w.addDocument(new Document()); - w.flush(); - w.addDocument(new Document()); - w.forceMerge(1); - } - try (DirectoryReader reader = DirectoryReader.open(dir)) { - LeafReader leaf = getOnlyLeafReader(reader); - SegmentCommitInfo sci = ((SegmentReader) leaf).getSegmentInfo(); - assertEquals(IndexWriter.SOURCE_MERGE, sci.info.getDiagnostics().get(IndexWriter.SOURCE)); - assertFalse(sci.info.getUseCompoundFile()); + // index.compound_format: 1b, the merge will not use a compound file + try (Directory dir = newDirectory()) { + MergePolicy mp = new MergePolicyConfig(logger, indexSettings(build("1b"))).getMergePolicy(isTimeSeriesIndex); + try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setMergePolicy(mp))) { + w.addDocument(new Document()); + w.flush(); + w.addDocument(new Document()); + w.forceMerge(1); + } + try (DirectoryReader reader = DirectoryReader.open(dir)) { + LeafReader leaf = getOnlyLeafReader(reader); + SegmentCommitInfo sci = ((SegmentReader) leaf).getSegmentInfo(); + assertEquals(IndexWriter.SOURCE_MERGE, sci.info.getDiagnostics().get(IndexWriter.SOURCE)); + assertFalse(sci.info.getUseCompoundFile()); + } } } } From 81c1bca97b05f48cf10e00bf23e3d78ff8776850 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 4 Jan 2023 18:05:36 +0100 Subject: [PATCH 02/12] Update docs/changelog/92684.yaml --- docs/changelog/92684.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/92684.yaml diff --git a/docs/changelog/92684.yaml b/docs/changelog/92684.yaml new file mode 100644 index 0000000000000..18896ce2a9c27 --- /dev/null +++ b/docs/changelog/92684.yaml @@ -0,0 +1,6 @@ +pr: 92684 +summary: Use `LogByteSizeMergePolicy` instead of `TieredMergePolicy` for time-based + data +area: Engine +type: enhancement +issues: [] From a657bda6f8cd87127a5ea6d472782972d944e990 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 4 Jan 2023 18:42:55 +0100 Subject: [PATCH 03/12] Add escape hatch. --- .../common/settings/IndexScopedSettings.java | 1 + .../elasticsearch/index/IndexSettings.java | 1 + .../index/MergePolicyConfig.java | 56 ++++++++++++++++--- .../index/MergePolicyConfigTests.java | 18 +++++- 4 files changed, 64 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 0743569359142..fe3be63d1a9d1 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -89,6 +89,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexingSlowLog.INDEX_INDEXING_SLOWLOG_REFORMAT_SETTING, IndexingSlowLog.INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG_SETTING, MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, + MergePolicyConfig.INDEX_MERGE_POLICY_TYPE_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_FLOOR_SEGMENT_SETTING, diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 245e26c9d38b2..3394219a26e23 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -778,6 +778,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setCompoundFormatThreshold ); + scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_MERGE_POLICY_TYPE_SETTING, mergePolicyConfig::setMergePolicyType); scopedSettings.addSettingsUpdateConsumer( MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING, mergePolicyConfig::setDeletesPctAllowed diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 85ce55f520580..42512a22c1d98 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -106,6 +106,7 @@ public final class MergePolicyConfig { private final LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); private final Logger logger; private final boolean mergesEnabled; + private volatile Type mergePolicyType; public static final double DEFAULT_EXPUNGE_DELETES_ALLOWED = 10d; public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = new ByteSizeValue(2, ByteSizeUnit.MB); @@ -122,6 +123,45 @@ public final class MergePolicyConfig { Property.IndexScope ); + public static enum Type { + UNSET { + @Override + MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { + if (isTimeSeriesIndex) { + // TieredMergePolicy is better than LogByteSizeMergePolicy at computing cheaper merges, but it does so by allowing + // itself to merge non-adjacent segments. An important property we get when only merging adjacent segments and data gets + // indexed in order is that segments have non-overlapping time ranges. This means that a range query on the time field + // will only partially match 2 segments at most, and other segments will either fully match or not match at all. + return config.logByteSizeMergePolicy; + } else { + return config.tieredMergePolicy; + } + } + }, + TIERED { + @Override + MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { + return config.tieredMergePolicy; + } + }, + LOG_BYTE_SIZE { + @Override + MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { + return config.logByteSizeMergePolicy; + } + }; + + abstract MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeries); + } + + public static final Setting INDEX_MERGE_POLICY_TYPE_SETTING = Setting.enumSetting( + Type.class, + "index.merge.policy.type", + Type.UNSET, + Property.Dynamic, + Property.IndexScope + ); + public static final Setting INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED_SETTING = Setting.doubleSetting( "index.merge.policy.expunge_deletes_allowed", DEFAULT_EXPUNGE_DELETES_ALLOWED, @@ -176,6 +216,7 @@ public final class MergePolicyConfig { MergePolicyConfig(Logger logger, IndexSettings indexSettings) { this.logger = logger; + Type mergePolicyType = indexSettings.getValue(INDEX_MERGE_POLICY_TYPE_SETTING); double forceMergeDeletesPctAllowed = indexSettings.getValue(INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED_SETTING); // percentage ByteSizeValue floorSegment = indexSettings.getValue(INDEX_MERGE_POLICY_FLOOR_SEGMENT_SETTING); int maxMergeAtOnce = indexSettings.getValue(INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_SETTING); @@ -192,6 +233,7 @@ public final class MergePolicyConfig { ); } maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier); + setMergePolicyType(mergePolicyType); setCompoundFormatThreshold(indexSettings.getValue(INDEX_COMPOUND_FORMAT_SETTING)); setExpungeDeletesAllowed(forceMergeDeletesPctAllowed); setFloorSegmentSetting(floorSegment); @@ -212,6 +254,10 @@ public final class MergePolicyConfig { ); } + void setMergePolicyType(Type type) { + this.mergePolicyType = type; + } + void setSegmentsPerTier(double segmentsPerTier) { tieredMergePolicy.setSegmentsPerTier(segmentsPerTier); logByteSizeMergePolicy.setMergeFactor((int) segmentsPerTier); @@ -276,15 +322,7 @@ MergePolicy getMergePolicy(boolean isTimeSeriesIndex) { if (mergesEnabled == false) { return NoMergePolicy.INSTANCE; } - if (isTimeSeriesIndex) { - // TieredMergePolicy is better than LogByteSizeMergePolicy at computing cheaper merges, but it does so by allowing itself to - // merge non-adjacent segments. An important property we get when only merging adjacent segments and data gets indexed in order - // is that segments have non-overlapping time ranges. This means that a range query on the time field will only partially match - // 2 segments at most, and other segments will either fully match or not match at all. - return logByteSizeMergePolicy; - } else { - return tieredMergePolicy; - } + return mergePolicyType.getMergePolicy(this, isTimeSeriesIndex); } private static CompoundFileThreshold parseCompoundFormat(String noCFSRatio) { diff --git a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java index 0c4a12fc58887..96bd77fce1361 100644 --- a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java +++ b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESTestCase; +import org.hamcrest.Matchers; import java.io.IOException; @@ -107,6 +108,17 @@ public void testUpdateSettings() throws IOException { indexSettings.updateIndexMetadata(newIndexMeta("index", build("100mb"))); assertThat((indexSettings.getMergePolicy(randomBoolean())).getNoCFSRatio(), equalTo(1.0)); assertThat(indexSettings.getMergePolicy(randomBoolean()).getMaxCFSSegmentSizeMB(), equalTo(100d)); + indexSettings.updateIndexMetadata( + newIndexMeta("index", Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_TYPE_SETTING.getKey(), "tiered").build()) + ); + assertThat(indexSettings.getMergePolicy(randomBoolean()), Matchers.instanceOf(TieredMergePolicy.class)); + indexSettings.updateIndexMetadata( + newIndexMeta( + "index", + Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_TYPE_SETTING.getKey(), "log_byte_size").build() + ) + ); + assertThat(indexSettings.getMergePolicy(randomBoolean()), Matchers.instanceOf(LogByteSizeMergePolicy.class)); } public void testTieredMergePolicySettingsUpdate() throws IOException { @@ -193,7 +205,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac(), + MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac() / 2, 0.0001 ); indexSettings.updateIndexMetadata( @@ -214,7 +226,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), + ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac() / 2, 0.0001 ); @@ -301,7 +313,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), + ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac() / 2, 0.0001 ); assertEquals( From 447782bd5e3f4438deed47c3de4dc7c5575d4d2e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 4 Jan 2023 19:04:14 +0100 Subject: [PATCH 04/12] checkstyle --- .../main/java/org/elasticsearch/index/MergePolicyConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 42512a22c1d98..0a2a0e45d96ce 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -123,7 +123,7 @@ public final class MergePolicyConfig { Property.IndexScope ); - public static enum Type { + public enum Type { UNSET { @Override MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { From 2e2db7841cde9feaa17f150851af076116b666aa Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 5 Jan 2023 14:03:10 +0100 Subject: [PATCH 05/12] feedback --- .../elasticsearch/index/IndexSettings.java | 4 +- .../index/MergePolicyConfig.java | 39 +++++++++++-------- .../elasticsearch/index/shard/IndexShard.java | 6 +-- 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 3394219a26e23..0a70a666c586e 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -1211,8 +1211,8 @@ public long getGcDeletesInMillis() { /** * Returns the merge policy that should be used for this index. */ - public MergePolicy getMergePolicy(boolean isTimeseriesIndex) { - return mergePolicyConfig.getMergePolicy(isTimeseriesIndex); + public MergePolicy getMergePolicy(boolean isTimeBasedIndex) { + return mergePolicyConfig.getMergePolicy(isTimeBasedIndex); } public T getValue(Setting setting) { diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 0a2a0e45d96ce..d87cc65533e5c 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -126,12 +126,24 @@ public final class MergePolicyConfig { public enum Type { UNSET { @Override - MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { - if (isTimeSeriesIndex) { - // TieredMergePolicy is better than LogByteSizeMergePolicy at computing cheaper merges, but it does so by allowing - // itself to merge non-adjacent segments. An important property we get when only merging adjacent segments and data gets - // indexed in order is that segments have non-overlapping time ranges. This means that a range query on the time field - // will only partially match 2 segments at most, and other segments will either fully match or not match at all. + MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { + if (isTimeBasedIndex) { + // With time-based data, it's important that the merge policy only merges adjacent segments, so that segments end up + // with non-overlapping time ranges if data gets indexed in order. This makes queries more efficient, as range filters + // on the timestamp are more likely to either fully match a segment or not match it at all, which Lucene handles more + // efficiently than a partially matching segment. This also plays nicely with the fact that recent data is more heavily + // queried than older data, so some segments are more likely to not get touched at all by queries if they don't + // intersect with the query's range. + + // The downside of only doing adjacent merges is that it may result in slightly less efficient merging if there is a lot + // of variance in the size of flushes. Allowing merges of non-adjacent segments also makes it possible to reclaim + // deletes a bit more efficiently by merging together segments that have the most deletes, even though they might not be + // adjacent. But overall, the benefits of only doing adjacent merging exceed the downsides for time-based data. + + // LogByteSizeMergePolicy is similar to TieredMergePolicy, as it also tries to organize segments into tiers of + // exponential sizes. The main difference is that it never merges non-adjacent segments, which is an interesting + // property for time-based data as described above. + return config.logByteSizeMergePolicy; } else { return config.tieredMergePolicy; @@ -140,13 +152,13 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) }, TIERED { @Override - MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { + MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { return config.tieredMergePolicy; } }, LOG_BYTE_SIZE { @Override - MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeSeriesIndex) { + MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { return config.logByteSizeMergePolicy; } }; @@ -265,12 +277,7 @@ void setSegmentsPerTier(double segmentsPerTier) { void setMaxMergedSegment(ByteSizeValue maxMergedSegment) { tieredMergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); - // Note: max merge MB has different semantics on LogByteSizeMergePolicy: it's the maximum size for a segment to be considered for a - // merge, ie. max input segment size, while for TieredMergePolicy, it's the max output segment size. Also LogByteSizeMergePolicy - // doesn't try to pack as many segments together as necessary to get as close as possible to the max merged segment size. To - // account for that, we divide the max segment size by 2, and in practice, the maximum segment size in an index will be somewhere in - // [maxMergedSegment / 2, maxMergedSegment * 5] (assuming a merge factor of 10). - logByteSizeMergePolicy.setMaxMergeMB(maxMergedSegment.getMbFrac() / 2); + logByteSizeMergePolicy.setMaxMergeMB(maxMergedSegment.getMbFrac()); } void setMaxMergesAtOnce(int maxMergeAtOnce) { @@ -318,11 +325,11 @@ private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerT } @SuppressForbidden(reason = "we always use an appropriate merge scheduler alongside this policy so NoMergePolic#INSTANCE is ok") - MergePolicy getMergePolicy(boolean isTimeSeriesIndex) { + MergePolicy getMergePolicy(boolean isTimeBasedIndex) { if (mergesEnabled == false) { return NoMergePolicy.INSTANCE; } - return mergePolicyType.getMergePolicy(this, isTimeSeriesIndex); + return mergePolicyType.getMergePolicy(this, isTimeBasedIndex); } private static CompoundFileThreshold parseCompoundFormat(String noCFSRatio) { diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index e34cee929272e..7e71923e5ce2c 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -3248,14 +3248,14 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { this.warmer.warm(reader); } }; - final boolean isTimeseriesIndex = mapperService == null ? false : mapperService.mappingLookup().hasTimestampField(); + final boolean isTimeBasedIndex = mapperService == null ? false : mapperService.mappingLookup().hasTimestampField(); return new EngineConfig( shardId, threadPool, indexSettings, warmer, store, - indexSettings.getMergePolicy(isTimeseriesIndex), + indexSettings.getMergePolicy(isTimeBasedIndex), buildIndexAnalyzer(mapperService), similarityService.similarity(mapperService == null ? null : mapperService::fieldType), codecService, @@ -3272,7 +3272,7 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { replicationTracker::getRetentionLeases, this::getOperationPrimaryTerm, snapshotCommitSupplier, - isTimeseriesIndex ? TIMESERIES_LEAF_READERS_SORTER : null, + isTimeBasedIndex ? TIMESERIES_LEAF_READERS_SORTER : null, relativeTimeInNanosSupplier, indexCommitListener, recoveryState != null && recoveryState.getPrimary() From b986142d13e00456a1685d8eeb6d252b35b21d80 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 5 Jan 2023 14:28:23 +0100 Subject: [PATCH 06/12] fix tests --- .../org/elasticsearch/index/MergePolicyConfigTests.java | 6 +++--- .../xpack/ccr/action/TransportResumeFollowAction.java | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java index 96bd77fce1361..5dc84aff05203 100644 --- a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java +++ b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java @@ -205,7 +205,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac() / 2, + MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac(), 0.0001 ); indexSettings.updateIndexMetadata( @@ -226,7 +226,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac() / 2, + ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), 0.0001 ); @@ -313,7 +313,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac() / 2, + ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), 0.0001 ); assertEquals( diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java index 0486381f52d72..c59473c670a1b 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java @@ -498,6 +498,7 @@ static String[] extractLeaderShardHistoryUUIDs(Map ccrIndexMetad IndexingSlowLog.INDEX_INDEXING_SLOWLOG_REFORMAT_SETTING, IndexingSlowLog.INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG_SETTING, MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, + MergePolicyConfig.INDEX_MERGE_POLICY_TYPE_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_SEGMENTS_PER_TIER_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING, From ae7b9ef755cddbd1fa30007d6a3d9fad3f0d9e0c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 13 Jan 2023 10:26:42 +0100 Subject: [PATCH 07/12] Adjust semantics of segments per tier. --- .../main/java/org/elasticsearch/index/MergePolicyConfig.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index d87cc65533e5c..9673d8bd9ac09 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -272,7 +272,9 @@ void setMergePolicyType(Type type) { void setSegmentsPerTier(double segmentsPerTier) { tieredMergePolicy.setSegmentsPerTier(segmentsPerTier); - logByteSizeMergePolicy.setMergeFactor((int) segmentsPerTier); + // LogByteSizeMergePolicy merges segments as soon as it finds `mergeFactor` segments on the same tier. So if you want to allow + // `segmentsPerTier` segments on each tier, you need a merge factor equal to `1 + segmentsPerTier`. + logByteSizeMergePolicy.setMergeFactor(1 + (int) segmentsPerTier); } void setMaxMergedSegment(ByteSizeValue maxMergedSegment) { From 501943f08125df3b7aee2d5de4ac43216e52ce45 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 13 Jan 2023 13:22:51 +0100 Subject: [PATCH 08/12] Fix tests. --- .../org/elasticsearch/index/MergePolicyConfigTests.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java index 5dc84aff05203..d8cc9039569bd 100644 --- a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java +++ b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java @@ -236,7 +236,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { 0 ); assertEquals( - ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor() - 1, MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, 0 ); @@ -257,7 +257,7 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { 0 ); assertEquals( - ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor() - 1, MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER + 1, 0 ); @@ -322,8 +322,8 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { 0 ); assertEquals( - ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), - MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor() - 1, + MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER , 0 ); assertEquals( From 1757ddbd9883b510473836201d05b8a76b2a9318 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 23 Jan 2023 14:35:28 +0100 Subject: [PATCH 09/12] Separate configuration for merge_factor. --- .../common/settings/IndexScopedSettings.java | 1 + .../elasticsearch/index/IndexSettings.java | 4 +++ .../index/MergePolicyConfig.java | 28 ++++++++++++++++--- .../index/MergePolicyConfigTests.java | 28 +++++++++++++------ 4 files changed, 48 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index fe3be63d1a9d1..5d160c8ad34c1 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -97,6 +97,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_EXPLICIT_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_SEGMENTS_PER_TIER_SETTING, + MergePolicyConfig.INDEX_MERGE_POLICY_MERGE_FACTOR_SETTING, IndexSortConfig.INDEX_SORT_FIELD_SETTING, IndexSortConfig.INDEX_SORT_ORDER_SETTING, IndexSortConfig.INDEX_SORT_MISSING_SETTING, diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 0a70a666c586e..34b54bc55b5cd 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -804,6 +804,10 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti MergePolicyConfig.INDEX_MERGE_POLICY_SEGMENTS_PER_TIER_SETTING, mergePolicyConfig::setSegmentsPerTier ); + scopedSettings.addSettingsUpdateConsumer( + MergePolicyConfig.INDEX_MERGE_POLICY_MERGE_FACTOR_SETTING, + mergePolicyConfig::setMergeFactor + ); scopedSettings.addSettingsUpdateConsumer( MergeSchedulerConfig.MAX_THREAD_COUNT_SETTING, diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 9673d8bd9ac09..36464069c4aba 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -113,6 +113,14 @@ public final class MergePolicyConfig { public static final int DEFAULT_MAX_MERGE_AT_ONCE = 10; public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = new ByteSizeValue(5, ByteSizeUnit.GB); public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d; + /** + * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 20. This default value differs from the Lucene default of 10 in + * order to account for the fact that Elasticsearch uses {@link LogByteSizeMergePolicy} for time-based data, where it usually makes + * sense to merge data less aggressively, and because {@link LogByteSizeMergePolicy} merges segments more aggressively than + * {@link TieredMergePolicy} for the same number of segments per tier / merge factor because {@link TieredMergePolicy} makes decisions + * at the whole index level, while {@link LogByteSizeMergePolicy} makes decisions on a per-tier basis. + */ + public static final int DEFAULT_MERGE_FACTOR = 20; public static final double DEFAULT_DELETES_PCT_ALLOWED = 33.0d; private static final String INDEX_COMPOUND_FORMAT_SETTING_KEY = "index.compound_format"; public static final Setting INDEX_COMPOUND_FORMAT_SETTING = new Setting<>( @@ -215,6 +223,13 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { Property.Dynamic, Property.IndexScope ); + public static final Setting INDEX_MERGE_POLICY_MERGE_FACTOR_SETTING = Setting.intSetting( + "index.merge.policy.merge_factor", + 20, + 2, + Property.Dynamic, + Property.IndexScope + ); public static final Setting INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING = Setting.doubleSetting( "index.merge.policy.deletes_pct_allowed", DEFAULT_DELETES_PCT_ALLOWED, @@ -236,6 +251,7 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { // won't they end up with many segments? ByteSizeValue maxMergedSegment = indexSettings.getValue(INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT_SETTING); double segmentsPerTier = indexSettings.getValue(INDEX_MERGE_POLICY_SEGMENTS_PER_TIER_SETTING); + int mergeFactor = indexSettings.getValue(INDEX_MERGE_POLICY_MERGE_FACTOR_SETTING); double deletesPctAllowed = indexSettings.getValue(INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING); this.mergesEnabled = indexSettings.getSettings().getAsBoolean(INDEX_MERGE_ENABLED, true); if (mergesEnabled == false) { @@ -252,6 +268,7 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { setMaxMergesAtOnce(maxMergeAtOnce); setMaxMergedSegment(maxMergedSegment); setSegmentsPerTier(segmentsPerTier); + setMergeFactor(mergeFactor); setDeletesPctAllowed(deletesPctAllowed); logger.trace( "using merge policy with expunge_deletes_allowed[{}], floor_segment[{}]," @@ -272,9 +289,12 @@ void setMergePolicyType(Type type) { void setSegmentsPerTier(double segmentsPerTier) { tieredMergePolicy.setSegmentsPerTier(segmentsPerTier); - // LogByteSizeMergePolicy merges segments as soon as it finds `mergeFactor` segments on the same tier. So if you want to allow - // `segmentsPerTier` segments on each tier, you need a merge factor equal to `1 + segmentsPerTier`. - logByteSizeMergePolicy.setMergeFactor(1 + (int) segmentsPerTier); + // LogByteSizeMergePolicy ignores this parameter, it always tries to have between 1 and merge_factor - 1 segments per tier. + } + + void setMergeFactor(int mergeFactor) { + // TieredMergePolicy ignores this setting, it configures a number of segments per tier instead, which has different semantics. + logByteSizeMergePolicy.setMergeFactor(mergeFactor); } void setMaxMergedSegment(ByteSizeValue maxMergedSegment) { @@ -284,7 +304,7 @@ void setMaxMergedSegment(ByteSizeValue maxMergedSegment) { void setMaxMergesAtOnce(int maxMergeAtOnce) { tieredMergePolicy.setMaxMergeAtOnce(maxMergeAtOnce); - // LogByteSizeMergePolicy ignores this parameter, it always merges "segments per tier" segments at once. + // LogByteSizeMergePolicy ignores this parameter, it always merges merge_factor segments at once. } void setFloorSegmentSetting(ByteSizeValue floorSegementSetting) { diff --git a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java index d8cc9039569bd..fefab0abec0a9 100644 --- a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java +++ b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java @@ -235,11 +235,6 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, 0 ); - assertEquals( - ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor() - 1, - MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, - 0 - ); indexSettings.updateIndexMetadata( newIndexMeta( "index", @@ -256,9 +251,24 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER + 1, 0 ); + assertEquals( - ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor() - 1, - MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER + 1, + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), + MergePolicyConfig.DEFAULT_MERGE_FACTOR, + 0 + ); + indexSettings.updateIndexMetadata( + newIndexMeta( + "index", + Settings.builder() + .put(MergePolicyConfig.INDEX_MERGE_POLICY_MERGE_FACTOR_SETTING.getKey(), MergePolicyConfig.DEFAULT_MERGE_FACTOR + 1) + .build() + ) + ); + + assertEquals( + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), + MergePolicyConfig.DEFAULT_MERGE_FACTOR + 1, 0 ); @@ -322,8 +332,8 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { 0 ); assertEquals( - ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor() - 1, - MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER , + ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMergeFactor(), + MergePolicyConfig.DEFAULT_MERGE_FACTOR, 0 ); assertEquals( From a112f4bbf051e228093959b203b1bc957470610c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 24 Jan 2023 08:57:29 +0100 Subject: [PATCH 10/12] Decrease merge factor to 16. --- .../java/org/elasticsearch/index/MergePolicyConfig.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 36464069c4aba..181b9c7bc5404 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -114,13 +114,13 @@ public final class MergePolicyConfig { public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = new ByteSizeValue(5, ByteSizeUnit.GB); public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d; /** - * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 20. This default value differs from the Lucene default of 10 in + * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 16. This default value differs from the Lucene default of 10 in * order to account for the fact that Elasticsearch uses {@link LogByteSizeMergePolicy} for time-based data, where it usually makes * sense to merge data less aggressively, and because {@link LogByteSizeMergePolicy} merges segments more aggressively than * {@link TieredMergePolicy} for the same number of segments per tier / merge factor because {@link TieredMergePolicy} makes decisions * at the whole index level, while {@link LogByteSizeMergePolicy} makes decisions on a per-tier basis. */ - public static final int DEFAULT_MERGE_FACTOR = 20; + public static final int DEFAULT_MERGE_FACTOR = 16; public static final double DEFAULT_DELETES_PCT_ALLOWED = 33.0d; private static final String INDEX_COMPOUND_FORMAT_SETTING_KEY = "index.compound_format"; public static final Setting INDEX_COMPOUND_FORMAT_SETTING = new Setting<>( @@ -225,7 +225,7 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { ); public static final Setting INDEX_MERGE_POLICY_MERGE_FACTOR_SETTING = Setting.intSetting( "index.merge.policy.merge_factor", - 20, + DEFAULT_MERGE_FACTOR, 2, Property.Dynamic, Property.IndexScope From 28afd5c68c1fad8455cc8abf0834d65e0f2c7b89 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 24 Jan 2023 10:13:19 +0100 Subject: [PATCH 11/12] Fix CCR tests. --- .../xpack/ccr/action/TransportResumeFollowAction.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java index c59473c670a1b..bda6f29cd3ec9 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java @@ -501,6 +501,7 @@ static String[] extractLeaderShardHistoryUUIDs(Map ccrIndexMetad MergePolicyConfig.INDEX_MERGE_POLICY_TYPE_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_SEGMENTS_PER_TIER_SETTING, + MergePolicyConfig.INDEX_MERGE_POLICY_MERGE_FACTOR_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED_SETTING, MergePolicyConfig.INDEX_MERGE_POLICY_FLOOR_SEGMENT_SETTING, From 7cd99d604fda016fcc17e120d1694648fcc1e8f1 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 2 Feb 2023 11:17:18 +0100 Subject: [PATCH 12/12] Remove the max merged segment size on time-based data. --- .../index/MergePolicyConfig.java | 19 ++++++++++++++++--- .../index/MergePolicyConfigTests.java | 15 ++++++--------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java index 99defa9207cde..4a8d5ed905cff 100644 --- a/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java +++ b/server/src/main/java/org/elasticsearch/index/MergePolicyConfig.java @@ -112,6 +112,12 @@ public final class MergePolicyConfig { public static final ByteSizeValue DEFAULT_FLOOR_SEGMENT = new ByteSizeValue(2, ByteSizeUnit.MB); public static final int DEFAULT_MAX_MERGE_AT_ONCE = 10; public static final ByteSizeValue DEFAULT_MAX_MERGED_SEGMENT = new ByteSizeValue(5, ByteSizeUnit.GB); + /** + * Time-based data generally gets rolled over, so there is not much value in enforcing a maximum segment size, which has the side effect + * of merging fewer segments together than the merge factor, which in-turn increases write amplification. So we set an arbitrarily high + * roof that serves as a protection that we expect to never hit. + */ + public static final ByteSizeValue DEFAULT_MAX_TIME_BASED_MERGED_SEGMENT = new ByteSizeValue(100, ByteSizeUnit.GB); public static final double DEFAULT_SEGMENTS_PER_TIER = 10.0d; /** * A default value for {@link LogByteSizeMergePolicy}'s merge factor: 16. This default value differs from the Lucene default of 10 in @@ -212,7 +218,8 @@ MergePolicy getMergePolicy(MergePolicyConfig config, boolean isTimeBasedIndex) { ); public static final Setting INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT_SETTING = Setting.byteSizeSetting( "index.merge.policy.max_merged_segment", - DEFAULT_MAX_MERGED_SEGMENT, + // We're not using DEFAULT_MAX_MERGED_SEGMENT here as we want different defaults for time-based data vs. non-time based + new ByteSizeValue(0, ByteSizeUnit.BYTES), Property.Dynamic, Property.IndexScope ); @@ -298,8 +305,14 @@ void setMergeFactor(int mergeFactor) { } void setMaxMergedSegment(ByteSizeValue maxMergedSegment) { - tieredMergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); - logByteSizeMergePolicy.setMaxMergeMB(maxMergedSegment.getMbFrac()); + // We use 0 as a placeholder for "unset". + if (maxMergedSegment.getBytes() == 0) { + tieredMergePolicy.setMaxMergedSegmentMB(DEFAULT_MAX_MERGED_SEGMENT.getMbFrac()); + logByteSizeMergePolicy.setMaxMergeMB(DEFAULT_MAX_TIME_BASED_MERGED_SEGMENT.getMbFrac()); + } else { + tieredMergePolicy.setMaxMergedSegmentMB(maxMergedSegment.getMbFrac()); + logByteSizeMergePolicy.setMaxMergeMB(maxMergedSegment.getMbFrac()); + } } void setMaxMergesAtOnce(int maxMergeAtOnce) { diff --git a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java index fefab0abec0a9..8619454bc111e 100644 --- a/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java +++ b/server/src/test/java/org/elasticsearch/index/MergePolicyConfigTests.java @@ -205,28 +205,25 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac(), + MergePolicyConfig.DEFAULT_MAX_TIME_BASED_MERGED_SEGMENT.getMbFrac(), 0.0001 ); indexSettings.updateIndexMetadata( newIndexMeta( "index", Settings.builder() - .put( - MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT_SETTING.getKey(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1) - ) + .put(MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT_SETTING.getKey(), ByteSizeValue.ofGb(8)) .build() ) ); assertEquals( ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getMaxMergedSegmentMB(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), + ByteSizeValue.ofGb(8).getMbFrac(), 0.0001 ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), + ByteSizeValue.ofGb(8).getMbFrac(), 0.0001 ); @@ -318,12 +315,12 @@ public void testTieredMergePolicySettingsUpdate() throws IOException { ); assertEquals( ((TieredMergePolicy) indexSettings.getMergePolicy(false)).getMaxMergedSegmentMB(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), + MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac(), 0.0001 ); assertEquals( ((LogByteSizeMergePolicy) indexSettings.getMergePolicy(true)).getMaxMergeMB(), - ByteSizeValue.ofBytes(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), + MergePolicyConfig.DEFAULT_MAX_TIME_BASED_MERGED_SEGMENT.getMbFrac(), 0.0001 ); assertEquals(