From aa030ff5548f16a284dcd3818a567aee467f4190 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sat, 5 May 2018 18:49:46 -0400 Subject: [PATCH 1/5] Make _id and _version terms optional in a segment Previously only index and delete operations are indexed into Lucene, therefore every segment should have both _id and _version terms as these operations contains both terms. However, this is no longer guaranteed after noop is also indexed into Lucene. A segment which contains only no-ops does not have either _id or _version. This change makes _id and _version terms optional in PerThreadIDVersionAndSeqNoLookup. Relates #30226 --- .../uid/PerThreadIDVersionAndSeqNoLookup.java | 20 +++++---- .../index/engine/InternalEngineTests.java | 45 +++++++++++++++++++ 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java index 38fcdfe5f1b62..aa5efc8f9bc09 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java @@ -66,15 +66,19 @@ final class PerThreadIDVersionAndSeqNoLookup { */ PerThreadIDVersionAndSeqNoLookup(LeafReader reader, String uidField) throws IOException { this.uidField = uidField; - Terms terms = reader.terms(uidField); + final Terms terms = reader.terms(uidField); if (terms == null) { - throw new IllegalArgumentException("reader misses the [" + uidField + "] field"); - } - termsEnum = terms.iterator(); - if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) { - throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field"); + // Uid is not found if a segment contains only NoOps; in this case, version should not exist either. + if (reader.getNumericDocValues(VersionFieldMapper.NAME) != null) { + throw new IllegalArgumentException("Reader misses [" + uidField + "] but has [" + VersionFieldMapper.NAME + "]"); + } + termsEnum = null; + } else { + if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) { + throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field"); + } + termsEnum = terms.iterator(); } - Object readerKey = null; assert (readerKey = reader.getCoreCacheHelper().getKey()) != null; this.readerKey = readerKey; @@ -111,7 +115,7 @@ public DocIdAndVersion lookupVersion(BytesRef id, LeafReaderContext context) * {@link DocIdSetIterator#NO_MORE_DOCS} is returned if not found * */ private int getDocID(BytesRef id, Bits liveDocs) throws IOException { - if (termsEnum.seekExact(id)) { + if (termsEnum != null && termsEnum.seekExact(id)) { int docID = DocIdSetIterator.NO_MORE_DOCS; // there may be more than one matching docID, in the case of nested docs, so we want the last one: docsEnum = termsEnum.postings(docsEnum, 0); diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index 350f952a1a59f..b762208dd3526 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -3781,6 +3781,51 @@ protected long doGenerateSeqNoForOperation(Operation operation) { } } + public void testSegmentContainsOnlyNoOps() throws Exception { + Engine.NoOpResult noOpResult = engine.noOp(new Engine.NoOp(1, primaryTerm.get(), REPLICA, randomNonNegativeLong(), "test")); + assertThat(noOpResult.getFailure(), nullValue()); + engine.refresh("test"); + Engine.DeleteResult deleteResult = engine.delete(replicaDeleteForDoc("id", 1, 2, randomNonNegativeLong())); + assertThat(deleteResult.getFailure(), nullValue()); + engine.refresh("test"); + } + + public void testRandomOperations() throws Exception { + int numOps = between(10, 100); + for (int i = 0; i < numOps; i++) { + String id = Integer.toString(randomIntBetween(1, 10)); + ParsedDocument doc = createParsedDoc(id, null); + Engine.Operation.TYPE type = randomFrom(Engine.Operation.TYPE.values()); + switch (type) { + case INDEX: + Engine.IndexResult index = engine.index(replicaIndexForDoc(doc, between(1, 100), i, randomBoolean())); + assertThat(index.getFailure(), nullValue()); + break; + case DELETE: + Engine.DeleteResult delete = engine.delete(replicaDeleteForDoc(doc.id(), between(1, 100), i, randomNonNegativeLong())); + assertThat(delete.getFailure(), nullValue()); + break; + case NO_OP: + Engine.NoOpResult noOp = engine.noOp(new Engine.NoOp(i, primaryTerm.get(), REPLICA, randomNonNegativeLong(), "")); + assertThat(noOp.getFailure(), nullValue()); + break; + default: + throw new IllegalStateException("Invalid op [" + type + "]"); + } + if (randomBoolean()) { + engine.refresh("test"); + } + if (randomBoolean()) { + engine.flush(); + } + if (randomBoolean()) { + engine.forceMerge(randomBoolean(), between(1, 10), randomBoolean(), false, false); + } + } + List operations = readAllOperationsInLucene(engine, createMapperService("test")); + assertThat(operations, hasSize(numOps)); + } + public void testMinGenerationForSeqNo() throws IOException, BrokenBarrierException, InterruptedException { engine.close(); final int numberOfTriplets = randomIntBetween(1, 32); From a08e791181b1f8a2b8419d47b43dff29cd854c91 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sun, 6 May 2018 15:17:06 -0400 Subject: [PATCH 2/5] strictly check in the lookup --- .../lucene/uid/PerThreadIDVersionAndSeqNoLookup.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java index aa5efc8f9bc09..efb58ea16b540 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndSeqNo; import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion; import org.elasticsearch.index.mapper.SeqNoFieldMapper; @@ -68,9 +69,13 @@ final class PerThreadIDVersionAndSeqNoLookup { this.uidField = uidField; final Terms terms = reader.terms(uidField); if (terms == null) { - // Uid is not found if a segment contains only NoOps; in this case, version should not exist either. - if (reader.getNumericDocValues(VersionFieldMapper.NAME) != null) { - throw new IllegalArgumentException("Reader misses [" + uidField + "] but has [" + VersionFieldMapper.NAME + "]"); + // If a segment contains only no-ops, it won't have _uid and _version but has both _soft_deletes and _tombstone fields. + final NumericDocValues versionDV = reader.getNumericDocValues(VersionFieldMapper.NAME); + final NumericDocValues softDeletesDV = reader.getNumericDocValues(Lucene.SOFT_DELETE_FIELD); + final NumericDocValues tombstoneDV = reader.getNumericDocValues(SeqNoFieldMapper.TOMBSTONE_NAME); + if (versionDV != null || softDeletesDV == null || tombstoneDV == null) { + throw new IllegalArgumentException("reader does not have _uid terms but not a no-op segment; " + + "_version [" + versionDV + "], _soft_deletes [" + softDeletesDV + "], _tombstone [" + tombstoneDV + "]"); } termsEnum = null; } else { @@ -115,6 +120,7 @@ public DocIdAndVersion lookupVersion(BytesRef id, LeafReaderContext context) * {@link DocIdSetIterator#NO_MORE_DOCS} is returned if not found * */ private int getDocID(BytesRef id, Bits liveDocs) throws IOException { + // termsEnum can possibly be null here if this leaf contains only no-ops. if (termsEnum != null && termsEnum.seekExact(id)) { int docID = DocIdSetIterator.NO_MORE_DOCS; // there may be more than one matching docID, in the case of nested docs, so we want the last one: From bc0acafc51c726ec92878aea06a2e48e8247eacd Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sun, 6 May 2018 15:24:14 -0400 Subject: [PATCH 3/5] add comment for tests --- .../index/engine/InternalEngineTests.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index b762208dd3526..b623b61f19692 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -3781,8 +3781,12 @@ protected long doGenerateSeqNoForOperation(Operation operation) { } } + /** + * Verifies that a segment containing only no-ops can be used to look up _version and _seqno. + */ public void testSegmentContainsOnlyNoOps() throws Exception { - Engine.NoOpResult noOpResult = engine.noOp(new Engine.NoOp(1, primaryTerm.get(), REPLICA, randomNonNegativeLong(), "test")); + Engine.NoOpResult noOpResult = engine.noOp(new Engine.NoOp(1, primaryTerm.get(), + randomFrom(Engine.Operation.Origin.values()), randomNonNegativeLong(), "test")); assertThat(noOpResult.getFailure(), nullValue()); engine.refresh("test"); Engine.DeleteResult deleteResult = engine.delete(replicaDeleteForDoc("id", 1, 2, randomNonNegativeLong())); @@ -3790,6 +3794,11 @@ public void testSegmentContainsOnlyNoOps() throws Exception { engine.refresh("test"); } + /** + * A simple test to check that random combination of operations can coexist in segments and be lookup. + * This is needed as some fields in Lucene may not exist if a segment misses operation types and this code is to check for that. + * For example, a segment containing only no-ops does not have neither _uid or _version. + */ public void testRandomOperations() throws Exception { int numOps = between(10, 100); for (int i = 0; i < numOps; i++) { @@ -3806,7 +3815,8 @@ public void testRandomOperations() throws Exception { assertThat(delete.getFailure(), nullValue()); break; case NO_OP: - Engine.NoOpResult noOp = engine.noOp(new Engine.NoOp(i, primaryTerm.get(), REPLICA, randomNonNegativeLong(), "")); + Engine.NoOpResult noOp = engine.noOp(new Engine.NoOp(i, primaryTerm.get(), + randomFrom(Engine.Operation.Origin.values()), randomNonNegativeLong(), "")); assertThat(noOp.getFailure(), nullValue()); break; default: From 2367bc8c8fb2ef090a1a262c8fe0ddd71817f160 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 7 May 2018 17:11:41 -0400 Subject: [PATCH 4/5] Add dummy _version value --- .../uid/PerThreadIDVersionAndSeqNoLookup.java | 13 ++++++------- .../elasticsearch/index/engine/InternalEngine.java | 3 +++ .../elasticsearch/index/mapper/DocumentMapper.java | 4 ++-- .../elasticsearch/index/shard/IndexShardTests.java | 4 ++-- .../elasticsearch/index/engine/EngineTestCase.java | 7 +++++-- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java index efb58ea16b540..28be480ad741c 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java @@ -69,21 +69,20 @@ final class PerThreadIDVersionAndSeqNoLookup { this.uidField = uidField; final Terms terms = reader.terms(uidField); if (terms == null) { - // If a segment contains only no-ops, it won't have _uid and _version but has both _soft_deletes and _tombstone fields. - final NumericDocValues versionDV = reader.getNumericDocValues(VersionFieldMapper.NAME); + // If a segment contains only no-ops, it does not have _uid but has both _soft_deletes and _tombstone fields. final NumericDocValues softDeletesDV = reader.getNumericDocValues(Lucene.SOFT_DELETE_FIELD); final NumericDocValues tombstoneDV = reader.getNumericDocValues(SeqNoFieldMapper.TOMBSTONE_NAME); - if (versionDV != null || softDeletesDV == null || tombstoneDV == null) { + if (softDeletesDV == null || tombstoneDV == null) { throw new IllegalArgumentException("reader does not have _uid terms but not a no-op segment; " + - "_version [" + versionDV + "], _soft_deletes [" + softDeletesDV + "], _tombstone [" + tombstoneDV + "]"); + "_soft_deletes [" + softDeletesDV + "], _tombstone [" + tombstoneDV + "]"); } termsEnum = null; } else { - if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) { - throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field"); - } termsEnum = terms.iterator(); } + if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) { + throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field; _uid terms [" + terms + "]"); + } Object readerKey = null; assert (readerKey = reader.getCoreCacheHelper().getKey()) != null; this.readerKey = readerKey; diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index e1f8eb91f88ef..fc0e23bfdab43 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -1347,6 +1347,9 @@ private NoOpResult innerNoOp(final NoOp noOp) throws IOException { try { final ParsedDocument tombstone = engineConfig.getTombstoneDocSupplier().newNoopTombstoneDoc(); tombstone.updateSeqID(noOp.seqNo(), noOp.primaryTerm()); + // A noop tombstone does not require a _version but it's added to have a fully dense docvalues for the version field. + // 1L is selected to optimize the compression because it might probably be the most common value in version field. + tombstone.version().setLongValue(1L); assert tombstone.docs().size() == 1 : "Tombstone should have a single doc [" + tombstone + "]"; final ParseContext.Document doc = tombstone.docs().get(0); assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index d02ab5f077713..df02041ecc446 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -178,8 +178,8 @@ public DocumentMapper(MapperService mapperService, Mapping mapping) { TypeFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME); this.deleteTombstoneMetadataFieldMappers = Stream.of(mapping.metadataMappers) .filter(field -> deleteTombstoneMetadataFields.contains(field.name())).toArray(MetadataFieldMapper[]::new); - final Collection noopTombstoneMetadataFields = - Arrays.asList(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME); + final Collection noopTombstoneMetadataFields = Arrays.asList( + VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME); this.noopTombstoneMetadataFieldMappers = Stream.of(mapping.metadataMappers) .filter(field -> noopTombstoneMetadataFields.contains(field.name())).toArray(MetadataFieldMapper[]::new); } diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index dcb6569ba0031..7bc4096741073 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -3113,8 +3113,8 @@ public void testSupplyTombstoneDoc() throws Exception { assertThat(noopTombstone.docs(), hasSize(1)); ParseContext.Document noopDoc = noopTombstone.docs().get(0); assertThat(noopDoc.getFields().stream().map(IndexableField::name).collect(Collectors.toList()), - containsInAnyOrder(SeqNoFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, - SeqNoFieldMapper.TOMBSTONE_NAME)); + containsInAnyOrder(VersionFieldMapper.NAME, SeqNoFieldMapper.TOMBSTONE_NAME, + SeqNoFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME)); assertThat(noopDoc.getField(SeqNoFieldMapper.TOMBSTONE_NAME).numericValue().longValue(), equalTo(1L)); closeShards(shard); diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index 22e533d11140d..fcf8854a9bc56 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -315,7 +315,9 @@ public ParsedDocument newNoopTombstoneDoc() { doc.add(seqID.primaryTerm); seqID.tombstoneField.setLongValue(1); doc.add(seqID.tombstoneField); - return new ParsedDocument(null, seqID, null, null, null, Collections.singletonList(doc), null, XContentType.JSON, null); + Field versionField = new NumericDocValuesField(VersionFieldMapper.NAME, 0); + doc.add(versionField); + return new ParsedDocument(versionField, seqID, null, null, null, Collections.singletonList(doc), null, XContentType.JSON, null); } }; } @@ -734,6 +736,7 @@ private static Translog.Operation readOperationInLucene(IndexSearcher searcher, final int segmentDocID = docID - leaves.get(leafIndex).docBase; final long seqNo = readNumericDV(leaves.get(leafIndex), SeqNoFieldMapper.NAME, segmentDocID); final long primaryTerm = readNumericDV(leaves.get(leafIndex), SeqNoFieldMapper.PRIMARY_TERM_NAME, segmentDocID); + final long version = readNumericDV(leaves.get(leafIndex), VersionFieldMapper.NAME, segmentDocID); final FieldsVisitor fields = new FieldsVisitor(true); searcher.doc(docID, fields); fields.postProcess(mapper); @@ -743,11 +746,11 @@ private static Translog.Operation readOperationInLucene(IndexSearcher searcher, op = new Translog.NoOp(seqNo, primaryTerm, ""); assert readNumericDV(leaves.get(leafIndex), Lucene.SOFT_DELETE_FIELD, segmentDocID) == 1 : "Noop operation but soft_deletes field is not set"; + assert version == 1 : "Noop tombstone should have version 1L; actual version [" + version + "]"; } else { final String id = fields.uid().id(); final String type = fields.uid().type(); final Term uid = new Term(IdFieldMapper.NAME, Uid.encodeId(id)); - final long version = readNumericDV(leaves.get(leafIndex), VersionFieldMapper.NAME, segmentDocID); if (isTombstone) { op = new Translog.Delete(type, id, uid, seqNo, primaryTerm, version, VersionType.INTERNAL); assert readNumericDV(leaves.get(leafIndex), Lucene.SOFT_DELETE_FIELD, segmentDocID) == 1 From 95fe0221b0b41debd4da72543550b05f6178ed58 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 7 May 2018 18:20:40 -0400 Subject: [PATCH 5/5] stylecheck --- .../java/org/elasticsearch/index/engine/EngineTestCase.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index fcf8854a9bc56..2ce31ac24e173 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -317,7 +317,8 @@ public ParsedDocument newNoopTombstoneDoc() { doc.add(seqID.tombstoneField); Field versionField = new NumericDocValuesField(VersionFieldMapper.NAME, 0); doc.add(versionField); - return new ParsedDocument(versionField, seqID, null, null, null, Collections.singletonList(doc), null, XContentType.JSON, null); + return new ParsedDocument(versionField, seqID, null, null, null, + Collections.singletonList(doc), null, XContentType.JSON, null); } }; }