diff --git a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 5bef7bee4f10b..f707ecc1fe65c 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -27,6 +27,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.document.LatLonDocValuesField; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FilterLeafReader; @@ -96,6 +97,8 @@ public class Lucene { assert annotation == null : "DocValuesFormat " + LATEST_DOC_VALUES_FORMAT + " is deprecated" ; } + public static final String SOFT_DELETE_FIELD = "__soft_delete"; + public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer()); public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer()); @@ -829,4 +832,11 @@ public int length() { } }; } + + /** + * Returns a numeric docvalues which can be used to soft-delete documents. + */ + public static NumericDocValuesField newSoftDeleteField() { + return new NumericDocValuesField(SOFT_DELETE_FIELD, 1); + } } diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index c916f33febb22..0e7f73aacfa16 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -130,6 +130,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.MAX_REGEX_LENGTH_SETTING, ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING, IndexSettings.INDEX_GC_DELETES_SETTING, + IndexSettings.INDEX_SOFT_DELETES_SETTING, IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING, UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING, diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index e3c82b8abd39f..68b97fa220d5a 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -238,6 +238,12 @@ public final class IndexSettings { public static final Setting INDEX_GC_DELETES_SETTING = Setting.timeSetting("index.gc_deletes", DEFAULT_GC_DELETES, new TimeValue(-1, TimeUnit.MILLISECONDS), Property.Dynamic, Property.IndexScope); + + /** + * Specifies if the index should use soft-delete instead of hard-delete for update/delete operations. + */ + public static final Setting INDEX_SOFT_DELETES_SETTING = Setting.boolSetting("index.soft_deletes", true, Property.IndexScope); + /** * The maximum number of refresh listeners allows on this shard. */ @@ -282,6 +288,7 @@ public final class IndexSettings { private final IndexSortConfig indexSortConfig; private final IndexScopedSettings scopedSettings; private long gcDeletesInMillis = DEFAULT_GC_DELETES.millis(); + private final boolean softDeleteEnabled; private volatile boolean warmerEnabled; private volatile int maxResultWindow; private volatile int maxInnerResultWindow; @@ -393,6 +400,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti generationThresholdSize = scopedSettings.get(INDEX_TRANSLOG_GENERATION_THRESHOLD_SIZE_SETTING); mergeSchedulerConfig = new MergeSchedulerConfig(this); gcDeletesInMillis = scopedSettings.get(INDEX_GC_DELETES_SETTING).getMillis(); + softDeleteEnabled = version.onOrAfter(Version.V_7_0_0_alpha1) && scopedSettings.get(INDEX_SOFT_DELETES_SETTING); warmerEnabled = scopedSettings.get(INDEX_WARMER_ENABLED_SETTING); maxResultWindow = scopedSettings.get(MAX_RESULT_WINDOW_SETTING); maxInnerResultWindow = scopedSettings.get(MAX_INNER_RESULT_WINDOW_SETTING); @@ -839,4 +847,11 @@ public boolean isExplicitRefresh() { * Returns the time that an index shard becomes search idle unless it's accessed in between */ public TimeValue getSearchIdleAfter() { return searchIdleAfter; } + + /** + * Returns true if soft-delete is enabled. + */ + public boolean isSoftDeleteEnabled() { + return softDeleteEnabled; + } } diff --git a/server/src/main/java/org/elasticsearch/index/engine/CommitStats.java b/server/src/main/java/org/elasticsearch/index/engine/CommitStats.java index 21025046b8c57..8fbbe3a9deaa9 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/CommitStats.java +++ b/server/src/main/java/org/elasticsearch/index/engine/CommitStats.java @@ -39,13 +39,13 @@ public final class CommitStats implements Streamable, ToXContentFragment { private String id; // lucene commit id in base 64; private int numDocs; - public CommitStats(SegmentInfos segmentInfos) { + public CommitStats(SegmentInfos segmentInfos, int numDocs) { // clone the map to protect against concurrent changes userData = MapBuilder.newMapBuilder().putAll(segmentInfos.getUserData()).immutableMap(); // lucene calls the current generation, last generation. generation = segmentInfos.getLastGeneration(); id = Base64.getEncoder().encodeToString(segmentInfos.getId()); - numDocs = Lucene.getNumDocs(segmentInfos); + this.numDocs = numDocs; } private CommitStats() { diff --git a/server/src/main/java/org/elasticsearch/index/engine/Engine.java b/server/src/main/java/org/elasticsearch/index/engine/Engine.java index b6c71e06c93df..f1427ad41fa91 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -536,7 +536,9 @@ protected final void ensureOpen() { /** get commits stats for the last commit */ public CommitStats commitStats() { - return new CommitStats(getLastCommittedSegmentInfos()); + try (Engine.Searcher searcher = acquireSearcher("commit_stats", Engine.SearcherScope.INTERNAL)) { + return new CommitStats(getLastCommittedSegmentInfos(), searcher.reader().numDocs()); + } } /** diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index f70c87c1024c4..e6f68e02e2153 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -21,6 +21,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexReader; @@ -67,7 +68,6 @@ import org.elasticsearch.index.seqno.LocalCheckpointTracker; import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.shard.ElasticsearchMergePolicy; -import org.elasticsearch.index.shard.IndexingStats; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.translog.Translog; import org.elasticsearch.index.translog.TranslogConfig; @@ -138,6 +138,8 @@ public class InternalEngine extends Engine { private final CounterMetric numDocDeletes = new CounterMetric(); private final CounterMetric numDocAppends = new CounterMetric(); private final CounterMetric numDocUpdates = new CounterMetric(); + private final NumericDocValuesField softDeleteField = Lucene.newSoftDeleteField(); + private final boolean softDeleteEnabled; /** * How many bytes we are currently moving to disk, via either IndexWriter.flush or refresh. IndexingMemoryController polls this @@ -161,6 +163,7 @@ public InternalEngine(EngineConfig engineConfig) { if (engineConfig.isAutoGeneratedIDsOptimizationEnabled() == false) { maxUnsafeAutoIdTimestamp.set(Long.MAX_VALUE); } + this.softDeleteEnabled = engineConfig.getIndexSettings().isSoftDeleteEnabled(); final TranslogDeletionPolicy translogDeletionPolicy = new TranslogDeletionPolicy( engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(), engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis() @@ -768,6 +771,7 @@ public IndexResult index(Index index) throws IOException { } else if (plan.indexIntoLucene) { indexResult = indexIntoLucene(index, plan); } else { + // TODO: We need to index stale documents to have a full history in Lucene. indexResult = new IndexResult( plan.versionForIndexing, plan.seqNoForIndexing, plan.currentNotFoundOrDeleted); } @@ -1066,10 +1070,18 @@ private boolean assertDocDoesNotExist(final Index index, final boolean allowDele } private void updateDocs(final Term uid, final List docs, final IndexWriter indexWriter) throws IOException { - if (docs.size() > 1) { - indexWriter.updateDocuments(uid, docs); + if (softDeleteEnabled) { + if (docs.size() > 1) { + indexWriter.softUpdateDocuments(uid, docs, softDeleteField); + } else { + indexWriter.softUpdateDocument(uid, docs.get(0), softDeleteField); + } } else { - indexWriter.updateDocument(uid, docs.get(0)); + if (docs.size() > 1) { + indexWriter.updateDocuments(uid, docs); + } else { + indexWriter.updateDocument(uid, docs.get(0)); + } } numDocUpdates.inc(docs.size()); } @@ -1927,11 +1939,14 @@ private IndexWriterConfig getIndexWriterConfig() { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); - MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges - mergePolicy = new ElasticsearchMergePolicy(mergePolicy); - iwc.setMergePolicy(mergePolicy); + MergePolicy mergePolicy = config().getMergePolicy(); + if (softDeleteEnabled) { + // TODO: soft-delete retention policy + iwc.setSoftDeletesField(Lucene.SOFT_DELETE_FIELD); + } + iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy)); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); iwc.setCodec(engineConfig.getCodec()); diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java index 83fded4a1f18b..78b2119d79a36 100644 --- a/server/src/main/java/org/elasticsearch/index/store/Store.java +++ b/server/src/main/java/org/elasticsearch/index/store/Store.java @@ -1007,7 +1007,6 @@ public RecoveryDiff recoveryDiff(MetadataSnapshot recoveryTargetSnapshot) { } final String segmentId = IndexFileNames.parseSegmentName(meta.name()); final String extension = IndexFileNames.getExtension(meta.name()); - assert FIELD_INFOS_FILE_EXTENSION.equals(extension) == false || IndexFileNames.stripExtension(IndexFileNames.stripSegmentName(meta.name())).isEmpty() : "FieldInfos are generational but updateable DV are not supported in elasticsearch"; if (IndexFileNames.SEGMENTS.equals(segmentId) || DEL_FILE_EXTENSION.equals(extension) || LIV_FILE_EXTENSION.equals(extension)) { // only treat del files as per-commit files fnm files are generational but only for upgradable DV perCommitStoreFiles.add(meta); diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index 3a101e16a0689..5630b28c1ca15 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -119,6 +119,7 @@ import org.elasticsearch.index.translog.Translog; import org.elasticsearch.index.translog.TranslogConfig; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.test.IndexSettingsModule; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; @@ -2711,6 +2712,12 @@ private void maybeThrowFailure() throws IOException { } } + @Override + public long softUpdateDocument(Term term, Iterable doc, Field... softDeletes) throws IOException { + maybeThrowFailure(); + return super.softUpdateDocument(term, doc, softDeletes); + } + @Override public long deleteDocuments(Term... terms) throws IOException { maybeThrowFailure();