From ce3cc24e2bafa75682185e78f07d0f1fcfd26ffb Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 14 Sep 2023 09:14:14 +0200 Subject: [PATCH 01/11] wip --- .../index/codec/PerFieldMapperCodec.java | 3 +- .../codec/tsdb/ES87TSDBDocValuesConsumer.java | 339 ++++++++++- .../codec/tsdb/ES87TSDBDocValuesEncoder.java | 38 ++ .../codec/tsdb/ES87TSDBDocValuesFormat.java | 8 + .../codec/tsdb/ES87TSDBDocValuesProducer.java | 550 +++++++++++++++++- .../tsdb/ES87TSDBDocValuesEncoderTests.java | 55 ++ .../tsdb/ES87TSDBDocValuesFormatTests.java | 242 +------- 7 files changed, 968 insertions(+), 267 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index d30a91d2ae4d0..08c258f768707 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -110,8 +110,7 @@ public DocValuesFormat getDocValuesFormatForField(String field) { boolean useTSDBDocValuesFormat(final String field) { return mapperService.getIndexSettings().isES87TSDBCodecEnabled() && isTimeSeriesModeIndex() - && isNotSpecialField(field) - && (isCounterOrGaugeMetricType(field) || isTimestampField(field)); + && isNotSpecialField(field); } private boolean isTimeSeriesModeIndex() { diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java index 17bdcbbfb0739..21ff613e9c595 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java @@ -16,25 +16,42 @@ import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexOutput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.packed.DirectMonotonicWriter; import org.elasticsearch.core.IOUtils; import java.io.IOException; import java.util.Arrays; +import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; +import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.SORTED_SET; + final class ES87TSDBDocValuesConsumer extends DocValuesConsumer { IndexOutput data, meta; final int maxDoc; + private byte[] termsDictBuffer; ES87TSDBDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { + this.termsDictBuffer = new byte[1 << 14]; boolean success = false; try { final String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); @@ -73,10 +90,10 @@ public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) t public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { return DocValues.singleton(valuesProducer.getNumeric(field)); } - }); + }, false); } - private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException { int numDocsWithValue = 0; long numValues = 0; @@ -130,7 +147,11 @@ private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProduc buffer[bufferSize++] = values.nextValue(); if (bufferSize == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) { indexWriter.add(data.getFilePointer() - valuesDataOffset); - encoder.encode(buffer, data); + if (ords) { + encoder.encodeOrdinals(buffer, data); + } else { + encoder.encode(buffer, data); + } bufferSize = 0; } } @@ -139,7 +160,11 @@ private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProduc indexWriter.add(data.getFilePointer() - valuesDataOffset); // Fill unused slots in the block with zeroes rather than junk Arrays.fill(buffer, bufferSize, ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L); - encoder.encode(buffer, data); + if (ords) { + encoder.encodeOrdinals(buffer, data); + } else { + encoder.encode(buffer, data); + } } final long valuesDataLength = data.getFilePointer() - valuesDataOffset; @@ -163,18 +188,219 @@ public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) th @Override public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { - throw new UnsupportedOperationException("Unsupported sorted doc values for field [" + field.name + "]"); + meta.writeInt(field.number); + meta.writeByte(ES87TSDBDocValuesFormat.SORTED); + doAddSortedField(field, valuesProducer); + } + + private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + writeNumericField( + field, + new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + SortedDocValues sorted = valuesProducer.getSorted(field); + NumericDocValues sortedOrds = + new NumericDocValues() { + @Override + public long longValue() throws IOException { + return sorted.ordValue(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return sorted.advanceExact(target); + } + + @Override + public int docID() { + return sorted.docID(); + } + + @Override + public int nextDoc() throws IOException { + return sorted.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return sorted.advance(target); + } + + @Override + public long cost() { + return sorted.cost(); + } + }; + return DocValues.singleton(sortedOrds); + } + }, true); + addTermsDict(DocValues.singleton(valuesProducer.getSorted(field))); + } + + private void addTermsDict(SortedSetDocValues values) throws IOException { + final long size = values.getValueCount(); + meta.writeVLong(size); + + int blockMask = ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_MASK; + int shift = ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; + + meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT); + ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput(); + ByteBuffersIndexOutput addressOutput = + new ByteBuffersIndexOutput(addressBuffer, "temp", "temp"); + long numBlocks = (size + blockMask) >>> shift; + DirectMonotonicWriter writer = + DirectMonotonicWriter.getInstance( + meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); + + BytesRefBuilder previous = new BytesRefBuilder(); + long ord = 0; + long start = data.getFilePointer(); + int maxLength = 0, maxBlockLength = 0; + TermsEnum iterator = values.termsEnum(); + + LZ4.FastCompressionHashTable ht = new LZ4.FastCompressionHashTable(); + ByteArrayDataOutput bufferedOutput = new ByteArrayDataOutput(termsDictBuffer); + int dictLength = 0; + + for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { + if ((ord & blockMask) == 0) { + if (ord != 0) { + // flush the previous block + final int uncompressedLength = + compressAndGetTermsDictBlockLength(bufferedOutput, dictLength, ht); + maxBlockLength = Math.max(maxBlockLength, uncompressedLength); + bufferedOutput.reset(termsDictBuffer); + } + + writer.add(data.getFilePointer() - start); + // Write the first term both to the index output, and to the buffer where we'll use it as a + // dictionary for compression + data.writeVInt(term.length); + data.writeBytes(term.bytes, term.offset, term.length); + bufferedOutput = maybeGrowBuffer(bufferedOutput, term.length); + bufferedOutput.writeBytes(term.bytes, term.offset, term.length); + dictLength = term.length; + } else { + final int prefixLength = StringHelper.bytesDifference(previous.get(), term); + final int suffixLength = term.length - prefixLength; + assert suffixLength > 0; // terms are unique + // Will write (suffixLength + 1 byte + 2 vint) bytes. Grow the buffer in need. + bufferedOutput = maybeGrowBuffer(bufferedOutput, suffixLength + 11); + bufferedOutput.writeByte( + (byte) (Math.min(prefixLength, 15) | (Math.min(15, suffixLength - 1) << 4))); + if (prefixLength >= 15) { + bufferedOutput.writeVInt(prefixLength - 15); + } + if (suffixLength >= 16) { + bufferedOutput.writeVInt(suffixLength - 16); + } + bufferedOutput.writeBytes(term.bytes, term.offset + prefixLength, suffixLength); + } + maxLength = Math.max(maxLength, term.length); + previous.copyBytes(term); + ++ord; + } + // Compress and write out the last block + if (bufferedOutput.getPosition() > dictLength) { + final int uncompressedLength = + compressAndGetTermsDictBlockLength(bufferedOutput, dictLength, ht); + maxBlockLength = Math.max(maxBlockLength, uncompressedLength); + } + + writer.finish(); + meta.writeInt(maxLength); + // Write one more int for storing max block length. + meta.writeInt(maxBlockLength); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + start = data.getFilePointer(); + addressBuffer.copyTo(data); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + + // Now write the reverse terms index + writeTermsIndex(values); + } + + private int compressAndGetTermsDictBlockLength( + ByteArrayDataOutput bufferedOutput, int dictLength, LZ4.FastCompressionHashTable ht) + throws IOException { + int uncompressedLength = bufferedOutput.getPosition() - dictLength; + data.writeVInt(uncompressedLength); + LZ4.compressWithDictionary(termsDictBuffer, 0, dictLength, uncompressedLength, data, ht); + return uncompressedLength; + } + + private ByteArrayDataOutput maybeGrowBuffer(ByteArrayDataOutput bufferedOutput, int termLength) { + int pos = bufferedOutput.getPosition(), originalLength = termsDictBuffer.length; + if (pos + termLength >= originalLength - 1) { + termsDictBuffer = ArrayUtil.grow(termsDictBuffer, originalLength + termLength); + bufferedOutput = new ByteArrayDataOutput(termsDictBuffer, pos, termsDictBuffer.length - pos); + } + return bufferedOutput; + } + + private void writeTermsIndex(SortedSetDocValues values) throws IOException { + final long size = values.getValueCount(); + meta.writeInt(ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT); + long start = data.getFilePointer(); + + long numBlocks = + 1L + + ((size + ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) + >>> ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT); + ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput(); + DirectMonotonicWriter writer; + try (ByteBuffersIndexOutput addressOutput = + new ByteBuffersIndexOutput(addressBuffer, "temp", "temp")) { + writer = + DirectMonotonicWriter.getInstance( + meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); + TermsEnum iterator = values.termsEnum(); + BytesRefBuilder previous = new BytesRefBuilder(); + long offset = 0; + long ord = 0; + for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { + if ((ord & ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) == 0) { + writer.add(offset); + final int sortKeyLength; + if (ord == 0) { + // no previous term: no bytes to write + sortKeyLength = 0; + } else { + sortKeyLength = StringHelper.sortKeyLength(previous.get(), term); + } + offset += sortKeyLength; + data.writeBytes(term.bytes, term.offset, sortKeyLength); + } else if ((ord & ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) + == ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) { + previous.copyBytes(term); + } + ++ord; + } + writer.add(offset); + writer.finish(); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + start = data.getFilePointer(); + addressBuffer.copyTo(data); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + } } @Override public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeInt(field.number); meta.writeByte(ES87TSDBDocValuesFormat.SORTED_NUMERIC); - writeSortedNumericField(field, valuesProducer); + writeSortedNumericField(field, valuesProducer, false); } - private void writeSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { - long[] stats = writeNumericField(field, valuesProducer); + private void writeSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException { + long[] stats = writeNumericField(field, valuesProducer, ords); int numDocsWithField = Math.toIntExact(stats[0]); long numValues = stats[1]; assert numValues >= numDocsWithField; @@ -203,9 +429,102 @@ private void writeSortedNumericField(FieldInfo field, DocValuesProducer valuesPr } } + private static boolean isSingleValued(SortedSetDocValues values) throws IOException { + if (DocValues.unwrapSingleton(values) != null) { + return true; + } + + assert values.docID() == -1; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + int docValueCount = values.docValueCount(); + assert docValueCount > 0; + if (docValueCount > 1) { + return false; + } + } + return true; + } + @Override - public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { - throw new UnsupportedOperationException("Unsupported sorted set doc values for field [" + field.name + "]"); + public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + meta.writeInt(field.number); + meta.writeByte(SORTED_SET); + + if (isSingleValued(valuesProducer.getSortedSet(field))) { + meta.writeByte((byte) 0); // multiValued (0 = singleValued) + doAddSortedField( + field, + new EmptyDocValuesProducer() { + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + return SortedSetSelector.wrap( + valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN); + } + }); + return; + } + meta.writeByte((byte) 1); // multiValued (1 = multiValued) + + writeSortedNumericField( + field, + new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + SortedSetDocValues values = valuesProducer.getSortedSet(field); + return new SortedNumericDocValues() { + + long[] ords = LongsRef.EMPTY_LONGS; + int i, docValueCount; + + @Override + public long nextValue() throws IOException { + return ords[i++]; + } + + @Override + public int docValueCount() { + return docValueCount; + } + + @Override + public boolean advanceExact(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int docID() { + return values.docID(); + } + + @Override + public int nextDoc() throws IOException { + int doc = values.nextDoc(); + if (doc != NO_MORE_DOCS) { + docValueCount = values.docValueCount(); + ords = ArrayUtil.grow(ords, docValueCount); + for (int j = 0; j < docValueCount; j++) { + ords[j] = values.nextOrd(); + } + i = 0; + } + return doc; + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + return values.cost(); + } + }; + } + }, true); + + addTermsDict(valuesProducer.getSortedSet(field)); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index b4c0f70ad8c99..e90e0e2bfcea8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -178,6 +178,44 @@ void encode(long[] in, DataOutput out) throws IOException { deltaEncode(0, 0, in, out); } + void encodeOrdinals(long[] in, DataOutput out) throws IOException { + assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; + long previous = in[0]; + int repetitions = -1; + for (long l : in) { + assert l >= 0 : "Ordinals are expected to be positive"; + if (previous == l) { + repetitions++; + } else { + out.writeZLong(previous); + if (repetitions > 0) { + out.writeZLong(repetitions * -1); + } + repetitions = 0; + previous = l; + } + } + out.writeZLong(previous); + if (repetitions > 0) { + out.writeZLong(repetitions * -1); + } + } + + void decodeOrdinals(DataInput in, long[] out) throws IOException { + assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length; + for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;) { + long l = in.readZLong(); + if (l >= 0) { + out[i] = l; + i++; + } else { + int repetitions = (int) (-1 * l); + Arrays.fill(out, i, i + repetitions, out[i - 1]); + i += repetitions; + } + } + } + /** Decode longs that have been encoded with {@link #encode}. */ void decode(DataInput in, long[] out) throws IOException { assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length; diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java index d8b2ea8b677b8..c5f597f27eb98 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java @@ -34,6 +34,14 @@ public class ES87TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesF static final byte SORTED_SET = 3; static final byte SORTED_NUMERIC = 4; + static final int TERMS_DICT_BLOCK_LZ4_SHIFT = 6; + static final int TERMS_DICT_BLOCK_LZ4_SIZE = 1 << TERMS_DICT_BLOCK_LZ4_SHIFT; + static final int TERMS_DICT_BLOCK_LZ4_MASK = TERMS_DICT_BLOCK_LZ4_SIZE - 1; + + static final int TERMS_DICT_REVERSE_INDEX_SHIFT = 10; + static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT; + static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1; + public ES87TSDBDocValuesFormat() { super(CODEC_NAME); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index 174c3fbc9f0b9..464ca38fccc85 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -11,21 +11,29 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene90.IndexedDISI; +import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LongValues; +import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.packed.DirectMonotonicReader; import org.elasticsearch.core.IOUtils; @@ -33,8 +41,12 @@ import java.util.HashMap; import java.util.Map; +import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; + public class ES87TSDBDocValuesProducer extends DocValuesProducer { private final Map numerics = new HashMap<>(); + private final Map sorted = new HashMap<>(); + private final Map sortedSets = new HashMap<>(); private final Map sortedNumerics = new HashMap<>(); private final IndexInput data; private final int maxDoc; @@ -101,7 +113,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { NumericEntry entry = numerics.get(field.name); - return getNumeric(entry); + return getNumeric(entry, false); } @Override @@ -111,18 +123,439 @@ public BinaryDocValues getBinary(FieldInfo field) throws IOException { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - throw new UnsupportedOperationException("Unsupported sorted doc values for field [" + field.name + "]"); + SortedEntry entry = sorted.get(field.name); + return getSorted(entry); + } + + private SortedDocValues getSorted(SortedEntry entry) throws IOException { + final NumericDocValues ords = getNumeric(entry.ordsEntry, true); + return new BaseSortedDocValues(entry) { + + @Override + public int ordValue() throws IOException { + return (int) ords.longValue(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return ords.advanceExact(target); + } + + @Override + public int docID() { + return ords.docID(); + } + + @Override + public int nextDoc() throws IOException { + return ords.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return ords.advance(target); + } + + @Override + public long cost() { + return ords.cost(); + } + }; + } + + + private abstract class BaseSortedDocValues extends SortedDocValues { + + final SortedEntry entry; + final TermsEnum termsEnum; + + BaseSortedDocValues(SortedEntry entry) throws IOException { + this.entry = entry; + this.termsEnum = termsEnum(); + } + + @Override + public int getValueCount() { + return Math.toIntExact(entry.termsDictEntry.termsDictSize); + } + + @Override + public BytesRef lookupOrd(int ord) throws IOException { + termsEnum.seekExact(ord); + return termsEnum.term(); + } + + @Override + public int lookupTerm(BytesRef key) throws IOException { + TermsEnum.SeekStatus status = termsEnum.seekCeil(key); + switch (status) { + case FOUND: + return Math.toIntExact(termsEnum.ord()); + case NOT_FOUND: + case END: + default: + return Math.toIntExact(-1L - termsEnum.ord()); + } + } + + @Override + public TermsEnum termsEnum() throws IOException { + return new TermsDict(entry.termsDictEntry, data); + } + } + + private abstract class BaseSortedSetDocValues extends SortedSetDocValues { + + final SortedSetEntry entry; + final IndexInput data; + final TermsEnum termsEnum; + + BaseSortedSetDocValues(SortedSetEntry entry, IndexInput data) throws IOException { + this.entry = entry; + this.data = data; + this.termsEnum = termsEnum(); + } + + @Override + public long getValueCount() { + return entry.termsDictEntry.termsDictSize; + } + + @Override + public BytesRef lookupOrd(long ord) throws IOException { + termsEnum.seekExact(ord); + return termsEnum.term(); + } + + @Override + public long lookupTerm(BytesRef key) throws IOException { + TermsEnum.SeekStatus status = termsEnum.seekCeil(key); + switch (status) { + case FOUND: + return termsEnum.ord(); + case NOT_FOUND: + case END: + default: + return -1L - termsEnum.ord(); + } + } + + @Override + public TermsEnum termsEnum() throws IOException { + return new TermsDict(entry.termsDictEntry, data); + } + } + + private class TermsDict extends BaseTermsEnum { + static final int LZ4_DECOMPRESSOR_PADDING = 7; + + final TermsDictEntry entry; + final LongValues blockAddresses; + final IndexInput bytes; + final long blockMask; + final LongValues indexAddresses; + final IndexInput indexBytes; + final BytesRef term; + long ord = -1; + + BytesRef blockBuffer = null; + ByteArrayDataInput blockInput = null; + long currentCompressedBlockStart = -1; + long currentCompressedBlockEnd = -1; + + TermsDict(TermsDictEntry entry, IndexInput data) throws IOException { + this.entry = entry; + RandomAccessInput addressesSlice = + data.randomAccessSlice(entry.termsAddressesOffset, entry.termsAddressesLength); + blockAddresses = + DirectMonotonicReader.getInstance(entry.termsAddressesMeta, addressesSlice); + bytes = data.slice("terms", entry.termsDataOffset, entry.termsDataLength); + blockMask = (1L << TERMS_DICT_BLOCK_LZ4_SHIFT) - 1; + RandomAccessInput indexAddressesSlice = + data.randomAccessSlice(entry.termsIndexAddressesOffset, entry.termsIndexAddressesLength); + indexAddresses = + DirectMonotonicReader.getInstance( + entry.termsIndexAddressesMeta, indexAddressesSlice); + indexBytes = data.slice("terms-index", entry.termsIndexOffset, entry.termsIndexLength); + term = new BytesRef(entry.maxTermLength); + + // add the max term length for the dictionary + // add 7 padding bytes can help decompression run faster. + int bufferSize = entry.maxBlockLength + entry.maxTermLength + LZ4_DECOMPRESSOR_PADDING; + blockBuffer = new BytesRef(new byte[bufferSize], 0, bufferSize); + } + + @Override + public BytesRef next() throws IOException { + if (++ord >= entry.termsDictSize) { + return null; + } + + if ((ord & blockMask) == 0L) { + decompressBlock(); + } else { + DataInput input = blockInput; + final int token = Byte.toUnsignedInt(input.readByte()); + int prefixLength = token & 0x0F; + int suffixLength = 1 + (token >>> 4); + if (prefixLength == 15) { + prefixLength += input.readVInt(); + } + if (suffixLength == 16) { + suffixLength += input.readVInt(); + } + term.length = prefixLength + suffixLength; + input.readBytes(term.bytes, prefixLength, suffixLength); + } + return term; + } + + @Override + public void seekExact(long ord) throws IOException { + if (ord < 0 || ord >= entry.termsDictSize) { + throw new IndexOutOfBoundsException(); + } + // Signed shift since ord is -1 when the terms enum is not positioned + final long currentBlockIndex = this.ord >> TERMS_DICT_BLOCK_LZ4_SHIFT; + final long blockIndex = ord >> TERMS_DICT_BLOCK_LZ4_SHIFT; + if (ord < this.ord || blockIndex != currentBlockIndex) { + // The looked up ord is before the current ord or belongs to a different block, seek again + final long blockAddress = blockAddresses.get(blockIndex); + bytes.seek(blockAddress); + this.ord = (blockIndex << TERMS_DICT_BLOCK_LZ4_SHIFT) - 1; + } + // Scan to the looked up ord + while (this.ord < ord) { + next(); + } + } + + private BytesRef getTermFromIndex(long index) throws IOException { + assert index >= 0 && index <= (entry.termsDictSize - 1) >>> entry.termsDictIndexShift; + final long start = indexAddresses.get(index); + term.length = (int) (indexAddresses.get(index + 1) - start); + indexBytes.seek(start); + indexBytes.readBytes(term.bytes, 0, term.length); + return term; + } + + private long seekTermsIndex(BytesRef text) throws IOException { + long lo = 0L; + long hi = (entry.termsDictSize - 1) >> entry.termsDictIndexShift; + while (lo <= hi) { + final long mid = (lo + hi) >>> 1; + getTermFromIndex(mid); + final int cmp = term.compareTo(text); + if (cmp <= 0) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + + assert hi < 0 || getTermFromIndex(hi).compareTo(text) <= 0; + assert hi == ((entry.termsDictSize - 1) >> entry.termsDictIndexShift) + || getTermFromIndex(hi + 1).compareTo(text) > 0; + + return hi; + } + + private BytesRef getFirstTermFromBlock(long block) throws IOException { + assert block >= 0 && block <= (entry.termsDictSize - 1) >>> TERMS_DICT_BLOCK_LZ4_SHIFT; + final long blockAddress = blockAddresses.get(block); + bytes.seek(blockAddress); + term.length = bytes.readVInt(); + bytes.readBytes(term.bytes, 0, term.length); + return term; + } + + private long seekBlock(BytesRef text) throws IOException { + long index = seekTermsIndex(text); + if (index == -1L) { + return -1L; + } + + long ordLo = index << entry.termsDictIndexShift; + long ordHi = Math.min(entry.termsDictSize, ordLo + (1L << entry.termsDictIndexShift)) - 1L; + + long blockLo = ordLo >>> TERMS_DICT_BLOCK_LZ4_SHIFT; + long blockHi = ordHi >>> TERMS_DICT_BLOCK_LZ4_SHIFT; + + while (blockLo <= blockHi) { + final long blockMid = (blockLo + blockHi) >>> 1; + getFirstTermFromBlock(blockMid); + final int cmp = term.compareTo(text); + if (cmp <= 0) { + blockLo = blockMid + 1; + } else { + blockHi = blockMid - 1; + } + } + + assert blockHi < 0 || getFirstTermFromBlock(blockHi).compareTo(text) <= 0; + assert blockHi == ((entry.termsDictSize - 1) >>> TERMS_DICT_BLOCK_LZ4_SHIFT) + || getFirstTermFromBlock(blockHi + 1).compareTo(text) > 0; + + return blockHi; + } + + @Override + public SeekStatus seekCeil(BytesRef text) throws IOException { + final long block = seekBlock(text); + if (block == -1) { + // before the first term, or empty terms dict + if (entry.termsDictSize == 0) { + ord = 0; + return SeekStatus.END; + } else { + seekExact(0L); + return SeekStatus.NOT_FOUND; + } + } + final long blockAddress = blockAddresses.get(block); + this.ord = block << TERMS_DICT_BLOCK_LZ4_SHIFT; + bytes.seek(blockAddress); + decompressBlock(); + + while (true) { + int cmp = term.compareTo(text); + if (cmp == 0) { + return SeekStatus.FOUND; + } else if (cmp > 0) { + return SeekStatus.NOT_FOUND; + } + if (next() == null) { + return SeekStatus.END; + } + } + } + + private void decompressBlock() throws IOException { + // The first term is kept uncompressed, so no need to decompress block if only + // look up the first term when doing seek block. + term.length = bytes.readVInt(); + bytes.readBytes(term.bytes, 0, term.length); + long offset = bytes.getFilePointer(); + if (offset < entry.termsDataLength - 1) { + // Avoid decompress again if we are reading a same block. + if (currentCompressedBlockStart != offset) { + blockBuffer.offset = term.length; + blockBuffer.length = bytes.readVInt(); + // Decompress the remaining of current block, using the first term as a dictionary + System.arraycopy(term.bytes, 0, blockBuffer.bytes, 0, blockBuffer.offset); + LZ4.decompress(bytes, blockBuffer.length, blockBuffer.bytes, blockBuffer.offset); + currentCompressedBlockStart = offset; + currentCompressedBlockEnd = bytes.getFilePointer(); + } else { + // Skip decompression but need to re-seek to block end. + bytes.seek(currentCompressedBlockEnd); + } + + // Reset the buffer. + blockInput = + new ByteArrayDataInput(blockBuffer.bytes, blockBuffer.offset, blockBuffer.length); + } + } + + @Override + public BytesRef term() throws IOException { + return term; + } + + @Override + public long ord() throws IOException { + return ord; + } + + @Override + public long totalTermFreq() throws IOException { + return -1L; + } + + @Override + public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public ImpactsEnum impacts(int flags) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int docFreq() throws IOException { + throw new UnsupportedOperationException(); + } } + @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { SortedNumericEntry entry = sortedNumerics.get(field.name); - return getSortedNumeric(entry); + return getSortedNumeric(entry, false); } @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - throw new UnsupportedOperationException("Unsupported sorted set doc values for field [" + field.name + "]"); + SortedSetEntry entry = sortedSets.get(field.name); + if (entry.singleValueEntry != null) { + return DocValues.singleton(getSorted(entry.singleValueEntry)); + } + + SortedNumericEntry ordsEntry = entry.ordsEntry; + final SortedNumericDocValues ords = getSortedNumeric(ordsEntry, true); + return new BaseSortedSetDocValues(entry, data) { + + int i = 0; + int count = 0; + boolean set = false; + + @Override + public long nextOrd() throws IOException { + if (set == false) { + set = true; + i = 0; + count = ords.docValueCount(); + } + if (i++ == count) { + return NO_MORE_ORDS; + } + return ords.nextValue(); + } + + @Override + public int docValueCount() { + return ords.docValueCount(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + set = false; + return ords.advanceExact(target); + } + + @Override + public int docID() { + return ords.docID(); + } + + @Override + public int nextDoc() throws IOException { + set = false; + return ords.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + set = false; + return ords.advance(target); + } + + @Override + public long cost() { + return ords.cost(); + } + }; } @Override @@ -147,9 +580,9 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException { } else if (type == ES87TSDBDocValuesFormat.BINARY) { throw new CorruptIndexException("unsupported type: " + type, meta); } else if (type == ES87TSDBDocValuesFormat.SORTED) { - throw new CorruptIndexException("unsupported type: " + type, meta); + sorted.put(info.name, readSorted(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED_SET) { - throw new CorruptIndexException("unsupported type: " + type, meta); + sortedSets.put(info.name, readSortedSet(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED_NUMERIC) { sortedNumerics.put(info.name, readSortedNumeric(meta)); } else { @@ -202,11 +635,59 @@ private SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry return entry; } + private SortedEntry readSorted(IndexInput meta) throws IOException { + SortedEntry entry = new SortedEntry(); + entry.ordsEntry = new NumericEntry(); + readNumeric(meta, entry.ordsEntry); + entry.termsDictEntry = new TermsDictEntry(); + readTermDict(meta, entry.termsDictEntry); + return entry; + } + + private SortedSetEntry readSortedSet(IndexInput meta) throws IOException { + SortedSetEntry entry = new SortedSetEntry(); + byte multiValued = meta.readByte(); + switch (multiValued) { + case 0: // singlevalued + entry.singleValueEntry = readSorted(meta); + return entry; + case 1: // multivalued + break; + default: + throw new CorruptIndexException("Invalid multiValued flag: " + multiValued, meta); + } + entry.ordsEntry = new SortedNumericEntry(); + readSortedNumeric(meta, entry.ordsEntry); + entry.termsDictEntry = new TermsDictEntry(); + readTermDict(meta, entry.termsDictEntry); + return entry; + } + + private static void readTermDict(IndexInput meta, TermsDictEntry entry) throws IOException { + entry.termsDictSize = meta.readVLong(); + final int blockShift = meta.readInt(); + final long addressesSize = (entry.termsDictSize + (1L << TERMS_DICT_BLOCK_LZ4_SHIFT) - 1) >>> TERMS_DICT_BLOCK_LZ4_SHIFT; + entry.termsAddressesMeta = DirectMonotonicReader.loadMeta(meta, addressesSize, blockShift); + entry.maxTermLength = meta.readInt(); + entry.maxBlockLength = meta.readInt(); + entry.termsDataOffset = meta.readLong(); + entry.termsDataLength = meta.readLong(); + entry.termsAddressesOffset = meta.readLong(); + entry.termsAddressesLength = meta.readLong(); + entry.termsDictIndexShift = meta.readInt(); + final long indexSize = (entry.termsDictSize + (1L << entry.termsDictIndexShift) - 1) >>> entry.termsDictIndexShift; + entry.termsIndexAddressesMeta = DirectMonotonicReader.loadMeta(meta, 1 + indexSize, blockShift); + entry.termsIndexOffset = meta.readLong(); + entry.termsIndexLength = meta.readLong(); + entry.termsIndexAddressesOffset = meta.readLong(); + entry.termsIndexAddressesLength = meta.readLong(); + } + private abstract static class NumericValues { abstract long advance(long index) throws IOException; } - private NumericDocValues getNumeric(NumericEntry entry) throws IOException { + private NumericDocValues getNumeric(NumericEntry entry, boolean ords) throws IOException { if (entry.docsWithFieldOffset == -2) { // empty return DocValues.emptyNumeric(); @@ -269,7 +750,11 @@ public long longValue() throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - decoder.decode(valuesData, currentBlock); + if (ords) { + decoder.decodeOrdinals(valuesData, currentBlock); + } else { + decoder.decode(valuesData, currentBlock); + } } return currentBlock[blockInIndex]; } @@ -325,7 +810,11 @@ public long longValue() throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - decoder.decode(valuesData, currentBlock); + if (ords) { + decoder.decodeOrdinals(valuesData, currentBlock); + } else { + decoder.decode(valuesData, currentBlock); + } } return currentBlock[blockInIndex]; } @@ -333,7 +822,7 @@ public long longValue() throws IOException { } } - private NumericValues getValues(NumericEntry entry) throws IOException { + private NumericValues getValues(NumericEntry entry, final boolean ords) throws IOException { assert entry.numValues > 0; final RandomAccessInput indexSlice = data.randomAccessSlice(entry.indexOffset, entry.indexLength); final DirectMonotonicReader indexReader = DirectMonotonicReader.getInstance(entry.indexMeta, indexSlice); @@ -355,22 +844,26 @@ long advance(long index) throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - decoder.decode(valuesData, currentBlock); + if (ords) { + decoder.decodeOrdinals(valuesData, currentBlock); + } else { + decoder.decode(valuesData, currentBlock); + } } return currentBlock[blockInIndex]; } }; } - private SortedNumericDocValues getSortedNumeric(SortedNumericEntry entry) throws IOException { + private SortedNumericDocValues getSortedNumeric(SortedNumericEntry entry, boolean ords) throws IOException { if (entry.numValues == entry.numDocsWithField) { - return DocValues.singleton(getNumeric(entry)); + return DocValues.singleton(getNumeric(entry, ords)); } final RandomAccessInput addressesInput = data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput); - final NumericValues values = getValues(entry); + final NumericValues values = getValues(entry, ords); if (entry.docsWithFieldOffset == -1) { // dense @@ -514,4 +1007,33 @@ private static class SortedNumericEntry extends NumericEntry { long addressesLength; } + private static class SortedEntry { + NumericEntry ordsEntry; + TermsDictEntry termsDictEntry; + } + + private static class SortedSetEntry { + SortedEntry singleValueEntry; + SortedNumericEntry ordsEntry; + TermsDictEntry termsDictEntry; + } + + private static class TermsDictEntry { + long termsDictSize; + DirectMonotonicReader.Meta termsAddressesMeta; + int maxTermLength; + long termsDataOffset; + long termsDataLength; + long termsAddressesOffset; + long termsAddressesLength; + int termsDictIndexShift; + DirectMonotonicReader.Meta termsIndexAddressesMeta; + long termsIndexOffset; + long termsIndexLength; + long termsIndexAddressesOffset; + long termsIndexAddressesLength; + + int maxBlockLength; + } + } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index eb06a03cab434..e85d986519adf 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -189,4 +189,59 @@ private void doTest(long[] arr, long expectedNumBytes) throws IOException { } } } + + public void testEncodeOrdinalsSmall() throws IOException { + long[] arr = new long[blockSize]; + for (int i = 0; i < 64; ++i) { + arr[i] = 1; + } + for (int i = 64; i < blockSize; ++i) { + arr[i] = 2; + } + final long expectedNumBytes = 2 // 2 zig-zag-encoded ordinals + + 2; // 2 zig-zag-encoded negative longs, indicating repetitions + + doTestOrdinals(arr, expectedNumBytes); + } + + public void testEncodeOrdinalsSmallLarge() throws IOException { + long[] arr = new long[blockSize]; + for (int i = 0; i < 64; ++i) { + arr[i] = Long.MAX_VALUE - 1; + } + for (int i = 64; i < blockSize; ++i) { + arr[i] = Long.MAX_VALUE; + } + final long expectedNumBytes = 10 * 2 // 2 zig-zag-encoded ordinals + + 1 * 2; // 2 zig-zag-encoded negative longs, indicating repetitions + + doTestOrdinals(arr, expectedNumBytes); + } + + public void testEncodeOrdinalsNoRepetitions() throws IOException { + long[] arr = new long[blockSize]; + for (int i = 0; i < blockSize; ++i) { + arr[i] = i; + } + doTestOrdinals(arr, 192); + } + + private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOException { + final long[] expected = arr.clone(); + try (Directory dir = newDirectory()) { + try (IndexOutput out = dir.createOutput("tests.bin", IOContext.DEFAULT)) { + encoder.encodeOrdinals(arr, out); + assertEquals(expectedNumBytes, out.getFilePointer()); + } + try (IndexInput in = dir.openInput("tests.bin", IOContext.DEFAULT)) { + long[] decoded = new long[blockSize]; + for (int i = 0; i < decoded.length; ++i) { + decoded[i] = random().nextLong(); + } + encoder.decodeOrdinals(in, decoded); + assertEquals(in.length(), in.getFilePointer()); + assertArrayEquals(expected, decoded); + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java index 1fe24f2e740ea..60007980b2e03 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java @@ -13,7 +13,6 @@ import org.apache.lucene.tests.util.TestUtil; import java.io.IOException; -import java.util.function.Supplier; public class ES87TSDBDocValuesFormatTests extends BaseDocValuesFormatTestCase { @@ -66,31 +65,6 @@ public void testBytesMergeAwayAllValues() { assumeTrue("doc values format only supports numerics", false); } - @Override - public void testSortedBytes() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedBytesTwoDocuments() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedBytesThreeDocuments() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedBytesTwoDocumentsMerged() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedMergeAwayAllValues() { - assumeTrue("doc values format only supports numerics", false); - } - @Override public void testBytesWithNewline() { assumeTrue("doc values format only supports numerics", false); @@ -101,16 +75,6 @@ public void testMissingSortedBytes() { assumeTrue("doc values format only supports numerics", false); } - @Override - public void testSortedTermsEnum() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testEmptySortedBytes() { - assumeTrue("doc values format only supports numerics", false); - } - @Override public void testEmptyBytes() { assumeTrue("doc values format only supports numerics", false); @@ -136,16 +100,6 @@ public void testCodecUsesOwnSortedBytes() throws IOException { assumeTrue("doc values format only supports numerics", false); } - @Override - public void testDocValuesSimple() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testRandomSortedBytes() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - @Override public void testBinaryFixedLengthVsStoredFields() { assumeTrue("doc values format only supports numerics", false); @@ -171,141 +125,6 @@ public void doTestBinaryVariableLengthVsStoredFields(double density) throws Exce assumeTrue("doc values format only supports numerics", false); } - @Override - protected void doTestSortedVsStoredFields(int numDocs, double density, Supplier bytes) throws Exception { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedFixedLengthVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSparseSortedFixedLengthVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedVariableLengthVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSparseSortedVariableLengthVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - protected void doTestSortedVsStoredFields(int numDocs, double density, int minLength, int maxLength) throws Exception { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetOneValue() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTwoFields() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTwoDocumentsMerged() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTwoValues() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTwoValuesUnordered() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetThreeValuesTwoDocs() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTwoDocumentsLastMissing() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTwoDocumentsFirstMissing() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetMergeAwayAllValues() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetTermsEnum() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetFixedLengthVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetVariableLengthVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetFixedLengthSingleValuedVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetVariableLengthSingleValuedVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetFixedLengthFewUniqueSetsVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetVariableLengthFewUniqueSetsVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetVariableLengthManyValuesPerDocVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetFixedLengthManyValuesPerDocVsStoredFields() { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSparseGCDCompression() { - assumeTrue("doc values format only supports numerics", false); - } - @Override public void testTwoBytesOneMissing() throws IOException { assumeTrue("doc values format only supports numerics", false); @@ -331,61 +150,6 @@ public void testEmptyBinaryValueOnPageSizes() { assumeTrue("doc values format only supports numerics", false); } - @Override - public void testOneSortedNumber() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testOneSortedNumberOneMissing() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testTwoSortedNumber() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testTwoSortedNumberSameValue() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testTwoSortedNumberOneMissing() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedNumberMerge() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedNumberMergeAwayAllValues() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedEnumAdvanceIndependently() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetEnumAdvanceIndependently() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedMergeAwayAllValuesLargeSegment() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - - @Override - public void testSortedSetMergeAwayAllValuesLargeSegment() throws IOException { - assumeTrue("doc values format only supports numerics", false); - } - @Override public void testBinaryMergeAwayAllValuesLargeSegment() throws IOException { assumeTrue("doc values format only supports numerics", false); @@ -396,11 +160,6 @@ public void testRandomAdvanceBinary() throws IOException { assumeTrue("doc values format only supports numerics", false); } - @Override - public void testHighOrdsSortedSetDV() { - assumeTrue("doc values format only supports numerics", false); - } - @Override public void testCheckIntegrityReadsAllBytes() { assumeTrue("doc values format only supports numerics", false); @@ -415,4 +174,5 @@ public void testMergeStability() { public void testRandomExceptions() { assumeTrue("doc values format only supports numerics", false); } + } From 9b666d9dff1641b655a43f00b08493426b7db7a2 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Tue, 19 Sep 2023 13:50:28 +0200 Subject: [PATCH 02/11] wip --- .../tsdb/ES87TSDBDocValuesEncoderTests.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index e85d986519adf..eb2a901c0db0d 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -12,10 +12,12 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; import java.io.IOException; +import java.nio.file.Path; import java.util.Arrays; import java.util.Random; @@ -244,4 +246,21 @@ private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOExceptio } } } + + public static void main(String[] args) throws IOException { + ES87TSDBDocValuesEncoder encoder = new ES87TSDBDocValuesEncoder(); + try ( + Directory dir = new MMapDirectory( + Path.of( + "/Users/felixbarnsteiner/.rally/benchmarks/races/0b842576-8a3a-49c8-8a74-5bfb7f6c8598/rally-node-0/install/elasticsearch-8.11.0-SNAPSHOT/data/indices/QWKdgc2xRm64O8h1ldxMjg/0/index" + ) + ) + ) { + try (IndexInput in = dir.openInput("_2wh_ES87TSDB_0.dvd", IOContext.DEFAULT)) { + long[] decoded = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + encoder.decodeOrdinals(in, decoded); + System.out.println(decoded); + } + } + } } From 88d45834da8fe93adef0988a07846a213beaa392 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 21 Sep 2023 13:41:20 +0200 Subject: [PATCH 03/11] Revert "wip" This reverts commit 9b666d9dff1641b655a43f00b08493426b7db7a2. --- .../tsdb/ES87TSDBDocValuesEncoderTests.java | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index eb2a901c0db0d..e85d986519adf 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -12,12 +12,10 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; import java.io.IOException; -import java.nio.file.Path; import java.util.Arrays; import java.util.Random; @@ -246,21 +244,4 @@ private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOExceptio } } } - - public static void main(String[] args) throws IOException { - ES87TSDBDocValuesEncoder encoder = new ES87TSDBDocValuesEncoder(); - try ( - Directory dir = new MMapDirectory( - Path.of( - "/Users/felixbarnsteiner/.rally/benchmarks/races/0b842576-8a3a-49c8-8a74-5bfb7f6c8598/rally-node-0/install/elasticsearch-8.11.0-SNAPSHOT/data/indices/QWKdgc2xRm64O8h1ldxMjg/0/index" - ) - ) - ) { - try (IndexInput in = dir.openInput("_2wh_ES87TSDB_0.dvd", IOContext.DEFAULT)) { - long[] decoded = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; - encoder.decodeOrdinals(in, decoded); - System.out.println(decoded); - } - } - } } From 95d63206b999af9c804fd72398ac6d4f82726473 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 29 Sep 2023 15:41:48 +0200 Subject: [PATCH 04/11] iter --- .../index/codec/PerFieldMapperCodec.java | 6 +- .../codec/tsdb/ES87TSDBDocValuesConsumer.java | 246 ++++++++---------- .../codec/tsdb/ES87TSDBDocValuesEncoder.java | 64 +++-- .../codec/tsdb/ES87TSDBDocValuesProducer.java | 59 ++--- .../tsdb/ES87TSDBDocValuesEncoderTests.java | 10 +- 5 files changed, 190 insertions(+), 195 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index 08c258f768707..d392073314843 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -108,9 +108,9 @@ public DocValuesFormat getDocValuesFormatForField(String field) { } boolean useTSDBDocValuesFormat(final String field) { - return mapperService.getIndexSettings().isES87TSDBCodecEnabled() - && isTimeSeriesModeIndex() - && isNotSpecialField(field); + return /*mapperService.getIndexSettings().isES87TSDBCodecEnabled() + && isTimeSeriesModeIndex() + &&*/ isNotSpecialField(field); } private boolean isTimeSeriesModeIndex() { diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java index 21ff613e9c595..c0ddcfd25b7e5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java @@ -35,6 +35,7 @@ import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.packed.DirectMonotonicWriter; +import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.core.IOUtils; import java.io.IOException; @@ -90,10 +91,10 @@ public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) t public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { return DocValues.singleton(valuesProducer.getNumeric(field)); } - }, false); + }, -1); } - private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException { + private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProducer, long maxOrd) throws IOException { int numDocsWithValue = 0; long numValues = 0; @@ -141,14 +142,15 @@ private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProduc final ES87TSDBDocValuesEncoder encoder = new ES87TSDBDocValuesEncoder(); values = valuesProducer.getSortedNumeric(field); + final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { final int count = values.docValueCount(); for (int i = 0; i < count; ++i) { buffer[bufferSize++] = values.nextValue(); if (bufferSize == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) { indexWriter.add(data.getFilePointer() - valuesDataOffset); - if (ords) { - encoder.encodeOrdinals(buffer, data); + if (maxOrd >= 0) { + encoder.encodeOrdinals(buffer, data, bitsPerOrd); } else { encoder.encode(buffer, data); } @@ -160,8 +162,8 @@ private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProduc indexWriter.add(data.getFilePointer() - valuesDataOffset); // Fill unused slots in the block with zeroes rather than junk Arrays.fill(buffer, bufferSize, ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L); - if (ords) { - encoder.encodeOrdinals(buffer, data); + if (maxOrd >= 0) { + encoder.encodeOrdinals(buffer, data, bitsPerOrd); } else { encoder.encode(buffer, data); } @@ -193,49 +195,47 @@ public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) th doAddSortedField(field, valuesProducer); } - private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) - throws IOException { - writeNumericField( - field, - new EmptyDocValuesProducer() { - @Override - public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - SortedDocValues sorted = valuesProducer.getSorted(field); - NumericDocValues sortedOrds = - new NumericDocValues() { - @Override - public long longValue() throws IOException { - return sorted.ordValue(); - } + private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + SortedDocValues sorted = valuesProducer.getSorted(field); + int maxOrd = sorted.getValueCount(); + writeNumericField(field, new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + SortedDocValues sorted = valuesProducer.getSorted(field); + NumericDocValues sortedOrds = new NumericDocValues() { + @Override + public long longValue() throws IOException { + return sorted.ordValue(); + } - @Override - public boolean advanceExact(int target) throws IOException { - return sorted.advanceExact(target); - } + @Override + public boolean advanceExact(int target) throws IOException { + return sorted.advanceExact(target); + } - @Override - public int docID() { - return sorted.docID(); - } + @Override + public int docID() { + return sorted.docID(); + } - @Override - public int nextDoc() throws IOException { - return sorted.nextDoc(); - } + @Override + public int nextDoc() throws IOException { + return sorted.nextDoc(); + } - @Override - public int advance(int target) throws IOException { - return sorted.advance(target); - } + @Override + public int advance(int target) throws IOException { + return sorted.advance(target); + } - @Override - public long cost() { - return sorted.cost(); - } - }; - return DocValues.singleton(sortedOrds); - } - }, true); + @Override + public long cost() { + return sorted.cost(); + } + }; + return DocValues.singleton(sortedOrds); + } + }, maxOrd); addTermsDict(DocValues.singleton(valuesProducer.getSorted(field))); } @@ -248,12 +248,9 @@ private void addTermsDict(SortedSetDocValues values) throws IOException { meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT); ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput(); - ByteBuffersIndexOutput addressOutput = - new ByteBuffersIndexOutput(addressBuffer, "temp", "temp"); + ByteBuffersIndexOutput addressOutput = new ByteBuffersIndexOutput(addressBuffer, "temp", "temp"); long numBlocks = (size + blockMask) >>> shift; - DirectMonotonicWriter writer = - DirectMonotonicWriter.getInstance( - meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); + DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); BytesRefBuilder previous = new BytesRefBuilder(); long ord = 0; @@ -269,8 +266,7 @@ private void addTermsDict(SortedSetDocValues values) throws IOException { if ((ord & blockMask) == 0) { if (ord != 0) { // flush the previous block - final int uncompressedLength = - compressAndGetTermsDictBlockLength(bufferedOutput, dictLength, ht); + final int uncompressedLength = compressAndGetTermsDictBlockLength(bufferedOutput, dictLength, ht); maxBlockLength = Math.max(maxBlockLength, uncompressedLength); bufferedOutput.reset(termsDictBuffer); } @@ -289,8 +285,7 @@ private void addTermsDict(SortedSetDocValues values) throws IOException { assert suffixLength > 0; // terms are unique // Will write (suffixLength + 1 byte + 2 vint) bytes. Grow the buffer in need. bufferedOutput = maybeGrowBuffer(bufferedOutput, suffixLength + 11); - bufferedOutput.writeByte( - (byte) (Math.min(prefixLength, 15) | (Math.min(15, suffixLength - 1) << 4))); + bufferedOutput.writeByte((byte) (Math.min(prefixLength, 15) | (Math.min(15, suffixLength - 1) << 4))); if (prefixLength >= 15) { bufferedOutput.writeVInt(prefixLength - 15); } @@ -305,8 +300,7 @@ private void addTermsDict(SortedSetDocValues values) throws IOException { } // Compress and write out the last block if (bufferedOutput.getPosition() > dictLength) { - final int uncompressedLength = - compressAndGetTermsDictBlockLength(bufferedOutput, dictLength, ht); + final int uncompressedLength = compressAndGetTermsDictBlockLength(bufferedOutput, dictLength, ht); maxBlockLength = Math.max(maxBlockLength, uncompressedLength); } @@ -325,8 +319,7 @@ private void addTermsDict(SortedSetDocValues values) throws IOException { writeTermsIndex(values); } - private int compressAndGetTermsDictBlockLength( - ByteArrayDataOutput bufferedOutput, int dictLength, LZ4.FastCompressionHashTable ht) + private int compressAndGetTermsDictBlockLength(ByteArrayDataOutput bufferedOutput, int dictLength, LZ4.FastCompressionHashTable ht) throws IOException { int uncompressedLength = bufferedOutput.getPosition() - dictLength; data.writeVInt(uncompressedLength); @@ -348,17 +341,12 @@ private void writeTermsIndex(SortedSetDocValues values) throws IOException { meta.writeInt(ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT); long start = data.getFilePointer(); - long numBlocks = - 1L - + ((size + ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) - >>> ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT); + long numBlocks = 1L + ((size + ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) + >>> ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT); ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput(); DirectMonotonicWriter writer; - try (ByteBuffersIndexOutput addressOutput = - new ByteBuffersIndexOutput(addressBuffer, "temp", "temp")) { - writer = - DirectMonotonicWriter.getInstance( - meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); + try (ByteBuffersIndexOutput addressOutput = new ByteBuffersIndexOutput(addressBuffer, "temp", "temp")) { + writer = DirectMonotonicWriter.getInstance(meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); TermsEnum iterator = values.termsEnum(); BytesRefBuilder previous = new BytesRefBuilder(); long offset = 0; @@ -375,10 +363,10 @@ private void writeTermsIndex(SortedSetDocValues values) throws IOException { } offset += sortKeyLength; data.writeBytes(term.bytes, term.offset, sortKeyLength); - } else if ((ord & ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) - == ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) { - previous.copyBytes(term); - } + } else if ((ord + & ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) == ES87TSDBDocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) { + previous.copyBytes(term); + } ++ord; } writer.add(offset); @@ -396,11 +384,11 @@ private void writeTermsIndex(SortedSetDocValues values) throws IOException { public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeInt(field.number); meta.writeByte(ES87TSDBDocValuesFormat.SORTED_NUMERIC); - writeSortedNumericField(field, valuesProducer, false); + writeSortedNumericField(field, valuesProducer, -1); } - private void writeSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException { - long[] stats = writeNumericField(field, valuesProducer, ords); + private void writeSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer, long maxOrd) throws IOException { + long[] stats = writeNumericField(field, valuesProducer, maxOrd); int numDocsWithField = Math.toIntExact(stats[0]); long numValues = stats[1]; assert numValues >= numDocsWithField; @@ -446,83 +434,79 @@ private static boolean isSingleValued(SortedSetDocValues values) throws IOExcept } @Override - public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) - throws IOException { + public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeInt(field.number); meta.writeByte(SORTED_SET); if (isSingleValued(valuesProducer.getSortedSet(field))) { meta.writeByte((byte) 0); // multiValued (0 = singleValued) - doAddSortedField( - field, - new EmptyDocValuesProducer() { - @Override - public SortedDocValues getSorted(FieldInfo field) throws IOException { - return SortedSetSelector.wrap( - valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN); - } - }); + doAddSortedField(field, new EmptyDocValuesProducer() { + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN); + } + }); return; } meta.writeByte((byte) 1); // multiValued (1 = multiValued) - writeSortedNumericField( - field, - new EmptyDocValuesProducer() { - @Override - public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - SortedSetDocValues values = valuesProducer.getSortedSet(field); - return new SortedNumericDocValues() { + SortedSetDocValues values = valuesProducer.getSortedSet(field); + long maxOrd = values.getValueCount(); + writeSortedNumericField(field, new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + SortedSetDocValues values = valuesProducer.getSortedSet(field); + return new SortedNumericDocValues() { - long[] ords = LongsRef.EMPTY_LONGS; - int i, docValueCount; + long[] ords = LongsRef.EMPTY_LONGS; + int i, docValueCount; - @Override - public long nextValue() throws IOException { - return ords[i++]; - } + @Override + public long nextValue() throws IOException { + return ords[i++]; + } - @Override - public int docValueCount() { - return docValueCount; - } + @Override + public int docValueCount() { + return docValueCount; + } - @Override - public boolean advanceExact(int target) throws IOException { - throw new UnsupportedOperationException(); - } + @Override + public boolean advanceExact(int target) throws IOException { + throw new UnsupportedOperationException(); + } - @Override - public int docID() { - return values.docID(); - } + @Override + public int docID() { + return values.docID(); + } - @Override - public int nextDoc() throws IOException { - int doc = values.nextDoc(); - if (doc != NO_MORE_DOCS) { - docValueCount = values.docValueCount(); - ords = ArrayUtil.grow(ords, docValueCount); - for (int j = 0; j < docValueCount; j++) { - ords[j] = values.nextOrd(); - } - i = 0; + @Override + public int nextDoc() throws IOException { + int doc = values.nextDoc(); + if (doc != NO_MORE_DOCS) { + docValueCount = values.docValueCount(); + ords = ArrayUtil.grow(ords, docValueCount); + for (int j = 0; j < docValueCount; j++) { + ords[j] = values.nextOrd(); } - return doc; + i = 0; } + return doc; + } - @Override - public int advance(int target) throws IOException { - throw new UnsupportedOperationException(); - } + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } - @Override - public long cost() { - return values.cost(); - } - }; - } - }, true); + @Override + public long cost() { + return values.cost(); + } + }; + } + }, maxOrd); addTermsDict(valuesProducer.getSortedSet(field)); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index e90e0e2bfcea8..d36ee611112c7 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -178,41 +178,49 @@ void encode(long[] in, DataOutput out) throws IOException { deltaEncode(0, 0, in, out); } - void encodeOrdinals(long[] in, DataOutput out) throws IOException { + void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException { assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; - long previous = in[0]; - int repetitions = -1; - for (long l : in) { - assert l >= 0 : "Ordinals are expected to be positive"; - if (previous == l) { - repetitions++; - } else { - out.writeZLong(previous); - if (repetitions > 0) { - out.writeZLong(repetitions * -1); - } - repetitions = 0; - previous = l; + int numRuns = 1; + for (int i = 1; i < in.length; ++i) { + if (in[i - 1] != in[1]) { + numRuns++; } } - out.writeZLong(previous); - if (repetitions > 0) { - out.writeZLong(repetitions * -1); + if (numRuns == 1) { + long value = in[0]; + out.writeVLong(value << 2); + } else if (numRuns == 2) { + out.writeVLong((in[0] << 2) | 0x01); + int firstRunLen = in.length; + for (int i = 1; i < in.length; ++i) { + if (in[i] != in[0]) { + firstRunLen = i; + break; + } + } + out.writeVInt(firstRunLen); + out.writeZLong(in[in.length - 1] - in[0]); + } else { + out.writeVLong(0x02); + forUtil.encode(in, bitsPerOrd, out); } } - void decodeOrdinals(DataInput in, long[] out) throws IOException { + void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException { assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length; - for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;) { - long l = in.readZLong(); - if (l >= 0) { - out[i] = l; - i++; - } else { - int repetitions = (int) (-1 * l); - Arrays.fill(out, i, i + repetitions, out[i - 1]); - i += repetitions; - } + + long header = in.readVLong(); + int token = (int) (header & 0x03L); + if (token == 0) { + Arrays.fill(out, header >>> 2); + } else if (token == 1) { + long v1 = header >>> 2; + int runLen = in.readVInt(); + long v2 = v1 + in.readZLong(); + Arrays.fill(out, 0, runLen, v1); + Arrays.fill(out, runLen, out.length, v2); + } else { + forUtil.decode(bitsPerOrd, in, out); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index 464ca38fccc85..d4ac9b247a655 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -35,6 +35,7 @@ import org.apache.lucene.util.LongValues; import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.packed.DirectMonotonicReader; +import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.core.IOUtils; import java.io.IOException; @@ -113,7 +114,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { NumericEntry entry = numerics.get(field.name); - return getNumeric(entry, false); + return getNumeric(entry, -1); } @Override @@ -128,7 +129,7 @@ public SortedDocValues getSorted(FieldInfo field) throws IOException { } private SortedDocValues getSorted(SortedEntry entry) throws IOException { - final NumericDocValues ords = getNumeric(entry.ordsEntry, true); + final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize); return new BaseSortedDocValues(entry) { @Override @@ -163,7 +164,6 @@ public long cost() { }; } - private abstract class BaseSortedDocValues extends SortedDocValues { final SortedEntry entry; @@ -265,17 +265,15 @@ private class TermsDict extends BaseTermsEnum { TermsDict(TermsDictEntry entry, IndexInput data) throws IOException { this.entry = entry; - RandomAccessInput addressesSlice = - data.randomAccessSlice(entry.termsAddressesOffset, entry.termsAddressesLength); - blockAddresses = - DirectMonotonicReader.getInstance(entry.termsAddressesMeta, addressesSlice); + RandomAccessInput addressesSlice = data.randomAccessSlice(entry.termsAddressesOffset, entry.termsAddressesLength); + blockAddresses = DirectMonotonicReader.getInstance(entry.termsAddressesMeta, addressesSlice); bytes = data.slice("terms", entry.termsDataOffset, entry.termsDataLength); blockMask = (1L << TERMS_DICT_BLOCK_LZ4_SHIFT) - 1; - RandomAccessInput indexAddressesSlice = - data.randomAccessSlice(entry.termsIndexAddressesOffset, entry.termsIndexAddressesLength); - indexAddresses = - DirectMonotonicReader.getInstance( - entry.termsIndexAddressesMeta, indexAddressesSlice); + RandomAccessInput indexAddressesSlice = data.randomAccessSlice( + entry.termsIndexAddressesOffset, + entry.termsIndexAddressesLength + ); + indexAddresses = DirectMonotonicReader.getInstance(entry.termsIndexAddressesMeta, indexAddressesSlice); indexBytes = data.slice("terms-index", entry.termsIndexOffset, entry.termsIndexLength); term = new BytesRef(entry.maxTermLength); @@ -354,8 +352,7 @@ private long seekTermsIndex(BytesRef text) throws IOException { } assert hi < 0 || getTermFromIndex(hi).compareTo(text) <= 0; - assert hi == ((entry.termsDictSize - 1) >> entry.termsDictIndexShift) - || getTermFromIndex(hi + 1).compareTo(text) > 0; + assert hi == ((entry.termsDictSize - 1) >> entry.termsDictIndexShift) || getTermFromIndex(hi + 1).compareTo(text) > 0; return hi; } @@ -394,7 +391,7 @@ private long seekBlock(BytesRef text) throws IOException { assert blockHi < 0 || getFirstTermFromBlock(blockHi).compareTo(text) <= 0; assert blockHi == ((entry.termsDictSize - 1) >>> TERMS_DICT_BLOCK_LZ4_SHIFT) - || getFirstTermFromBlock(blockHi + 1).compareTo(text) > 0; + || getFirstTermFromBlock(blockHi + 1).compareTo(text) > 0; return blockHi; } @@ -452,8 +449,7 @@ private void decompressBlock() throws IOException { } // Reset the buffer. - blockInput = - new ByteArrayDataInput(blockBuffer.bytes, blockBuffer.offset, blockBuffer.length); + blockInput = new ByteArrayDataInput(blockBuffer.bytes, blockBuffer.offset, blockBuffer.length); } } @@ -488,11 +484,10 @@ public int docFreq() throws IOException { } } - @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { SortedNumericEntry entry = sortedNumerics.get(field.name); - return getSortedNumeric(entry, false); + return getSortedNumeric(entry, -1); } @Override @@ -503,7 +498,7 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { } SortedNumericEntry ordsEntry = entry.ordsEntry; - final SortedNumericDocValues ords = getSortedNumeric(ordsEntry, true); + final SortedNumericDocValues ords = getSortedNumeric(ordsEntry, entry.termsDictEntry.termsDictSize); return new BaseSortedSetDocValues(entry, data) { int i = 0; @@ -687,7 +682,7 @@ private abstract static class NumericValues { abstract long advance(long index) throws IOException; } - private NumericDocValues getNumeric(NumericEntry entry, boolean ords) throws IOException { + private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOException { if (entry.docsWithFieldOffset == -2) { // empty return DocValues.emptyNumeric(); @@ -700,6 +695,7 @@ private NumericDocValues getNumeric(NumericEntry entry, boolean ords) throws IOE final DirectMonotonicReader indexReader = DirectMonotonicReader.getInstance(entry.indexMeta, indexSlice); final IndexInput valuesData = data.slice("values", entry.valuesOffset, entry.valuesLength); + final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; if (entry.docsWithFieldOffset == -1) { // dense return new NumericDocValues() { @@ -750,8 +746,8 @@ public long longValue() throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - if (ords) { - decoder.decodeOrdinals(valuesData, currentBlock); + if (maxOrd >= 0) { + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); } else { decoder.decode(valuesData, currentBlock); } @@ -810,8 +806,8 @@ public long longValue() throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - if (ords) { - decoder.decodeOrdinals(valuesData, currentBlock); + if (maxOrd >= 0) { + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); } else { decoder.decode(valuesData, currentBlock); } @@ -822,12 +818,13 @@ public long longValue() throws IOException { } } - private NumericValues getValues(NumericEntry entry, final boolean ords) throws IOException { + private NumericValues getValues(NumericEntry entry, final long maxOrd) throws IOException { assert entry.numValues > 0; final RandomAccessInput indexSlice = data.randomAccessSlice(entry.indexOffset, entry.indexLength); final DirectMonotonicReader indexReader = DirectMonotonicReader.getInstance(entry.indexMeta, indexSlice); final IndexInput valuesData = data.slice("values", entry.valuesOffset, entry.valuesLength); + final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; return new NumericValues() { private final ES87TSDBDocValuesEncoder decoder = new ES87TSDBDocValuesEncoder(); @@ -844,8 +841,8 @@ long advance(long index) throws IOException { valuesData.seek(indexReader.get(blockIndex)); } currentBlockIndex = blockIndex; - if (ords) { - decoder.decodeOrdinals(valuesData, currentBlock); + if (bitsPerOrd >= 0) { + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); } else { decoder.decode(valuesData, currentBlock); } @@ -855,15 +852,15 @@ long advance(long index) throws IOException { }; } - private SortedNumericDocValues getSortedNumeric(SortedNumericEntry entry, boolean ords) throws IOException { + private SortedNumericDocValues getSortedNumeric(SortedNumericEntry entry, long maxOrd) throws IOException { if (entry.numValues == entry.numDocsWithField) { - return DocValues.singleton(getNumeric(entry, ords)); + return DocValues.singleton(getNumeric(entry, maxOrd)); } final RandomAccessInput addressesInput = data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput); - final NumericValues values = getValues(entry, ords); + final NumericValues values = getValues(entry, maxOrd); if (entry.docsWithFieldOffset == -1) { // dense diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index e85d986519adf..518fbda870c73 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -14,6 +14,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.packed.PackedInts; import java.io.IOException; import java.util.Arrays; @@ -227,10 +228,15 @@ public void testEncodeOrdinalsNoRepetitions() throws IOException { } private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOException { + long maxOrd = 0; + for (long ord : arr) { + maxOrd = Math.max(maxOrd, ord + 1); + } + final int bitsPerOrd = PackedInts.bitsRequired(maxOrd - 1); final long[] expected = arr.clone(); try (Directory dir = newDirectory()) { try (IndexOutput out = dir.createOutput("tests.bin", IOContext.DEFAULT)) { - encoder.encodeOrdinals(arr, out); + encoder.encodeOrdinals(arr, out, bitsPerOrd); assertEquals(expectedNumBytes, out.getFilePointer()); } try (IndexInput in = dir.openInput("tests.bin", IOContext.DEFAULT)) { @@ -238,7 +244,7 @@ private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOExceptio for (int i = 0; i < decoded.length; ++i) { decoded[i] = random().nextLong(); } - encoder.decodeOrdinals(in, decoded); + encoder.decodeOrdinals(in, decoded, bitsPerOrd); assertEquals(in.length(), in.getFilePointer()); assertArrayEquals(expected, decoded); } From 84e3785d072abde371eb002bf21ec65787305d62 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 29 Sep 2023 16:15:19 +0200 Subject: [PATCH 05/11] iter --- .../index/codec/PerFieldMapperCodec.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index d392073314843..85806f2bb889a 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -20,7 +20,9 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat; +import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperService; @@ -108,9 +110,22 @@ public DocValuesFormat getDocValuesFormatForField(String field) { } boolean useTSDBDocValuesFormat(final String field) { - return /*mapperService.getIndexSettings().isES87TSDBCodecEnabled() + if (mapperService != null) { + final MappingLookup mappingLookup = mapperService.mappingLookup(); + if (mappingLookup.getMapper(field) instanceof NumberFieldMapper) { + return true; + } + if (mappingLookup.getMapper(field) instanceof DateFieldMapper) { + return true; + } + if (mappingLookup.getMapper(field) instanceof KeywordFieldMapper) { + return true; + } + } + return false; + /*return /.getIndexSettings().isES87TSDBCodecEnabled() && isTimeSeriesModeIndex() - &&*/ isNotSpecialField(field); + && isNotSpecialField(field);*/ } private boolean isTimeSeriesModeIndex() { From ddb0b180ee43fedccc44f20aba7b37610f8500ad Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 4 Nov 2023 10:03:46 +0100 Subject: [PATCH 06/11] Variable-size header --- .../index/codec/PerFieldMapperCodec.java | 24 +----- .../codec/tsdb/ES87TSDBDocValuesEncoder.java | 49 ++++++++++--- .../tsdb/ES87TSDBDocValuesEncoderTests.java | 73 ++++++++++++++----- 3 files changed, 91 insertions(+), 55 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index 9be677a868bb7..9bf6730faef21 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -107,7 +107,7 @@ public DocValuesFormat getDocValuesFormatForField(String field) { } boolean useTSDBDocValuesFormat(final String field) { - if (mapperService != null) { + if (mapperService != null && mapperService.getIndexSettings().isES87TSDBCodecEnabled() && isTimeSeriesModeIndex()) { final MappingLookup mappingLookup = mapperService.mappingLookup(); if (mappingLookup.getMapper(field) instanceof NumberFieldMapper) { return true; @@ -120,32 +120,10 @@ boolean useTSDBDocValuesFormat(final String field) { } } return false; - /*return /.getIndexSettings().isES87TSDBCodecEnabled() - && isTimeSeriesModeIndex() - && isNotSpecialField(field);*/ } private boolean isTimeSeriesModeIndex() { return IndexMode.TIME_SERIES.equals(mapperService.getIndexSettings().getMode()); } - private boolean isCounterOrGaugeMetricType(String field) { - if (mapperService != null) { - final MappingLookup mappingLookup = mapperService.mappingLookup(); - if (mappingLookup.getMapper(field) instanceof NumberFieldMapper) { - final MappedFieldType fieldType = mappingLookup.getFieldType(field); - return TimeSeriesParams.MetricType.COUNTER.equals(fieldType.getMetricType()) - || TimeSeriesParams.MetricType.GAUGE.equals(fieldType.getMetricType()); - } - } - return false; - } - - private static boolean isTimestampField(String field) { - return "@timestamp".equals(field); - } - - private static boolean isNotSpecialField(String field) { - return field.startsWith("_") == false; - } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index a38be38f087ed..f293eb86141b6 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -178,19 +178,40 @@ void encode(long[] in, DataOutput out) throws IOException { deltaEncode(0, 0, in, out); } + /** + * Optimizes for encoding sorted fields where we expect a block to mostly either be the same value + * or to make a transition from one value to a second one. + *

+ * Encodes blocks in the following format: + *

    + *
  • byte 0: 1/2 bits header+6/7 bits data
  • + *
  • byte 1..n: data
  • + *
+ * The header (first 1 or 2 bits) describes how the data is encoded: + *
    + *
  • ?0 block has a single value (vlong), 2nd bit already contains data
  • + *
  • + * 01 block has two runs, data contains value 1 (vlong), run-length (vint) of value 1, + * and delta from first to second value (zlong) + *
  • + *
  • 11 block is bit-packed
  • + *
+ */ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException { assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; int numRuns = 1; for (int i = 1; i < in.length; ++i) { - if (in[i - 1] != in[1]) { + if (in[i - 1] != in[i]) { numRuns++; } } - if (numRuns == 1) { + if (numRuns == 1 && bitsPerOrd < 63) { long value = in[0]; - out.writeVLong(value << 2); - } else if (numRuns == 2) { - out.writeVLong((in[0] << 2) | 0x01); + // set first bit to 0 to indicate the block has a single run + out.writeVLong(value << 1); + } else if (numRuns == 2 && bitsPerOrd < 62) { + // set first two bits to 01 to indicate the block has two runs + out.writeVLong((in[0] << 2) | 0b01); int firstRunLen = in.length; for (int i = 1; i < in.length; ++i) { if (in[i] != in[0]) { @@ -201,7 +222,8 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio out.writeVInt(firstRunLen); out.writeZLong(in[in.length - 1] - in[0]); } else { - out.writeVLong(0x02); + // set first two bits to 11 to indicate the block is bit-packed + out.writeVLong(0b11); forUtil.encode(in, bitsPerOrd, out); } } @@ -209,17 +231,20 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException { assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length; - long header = in.readVLong(); - int token = (int) (header & 0x03L); - if (token == 0) { - Arrays.fill(out, header >>> 2); - } else if (token == 1) { - long v1 = header >>> 2; + long v1 = in.readVLong(); + int header = (int) (v1 & 0b11L); + if (header == 0b00 || header == 0b10) { + // first bit is zero -> single run + Arrays.fill(out, v1 >>> 1); + } else if (header == 0b01) { + // first two bits are 01 -> two runs + v1 = v1 >>> 2; int runLen = in.readVInt(); long v2 = v1 + in.readZLong(); Arrays.fill(out, 0, runLen, v1); Arrays.fill(out, runLen, out.length, v2); } else { + // first two bits are 11 -> bit-packed forUtil.decode(bitsPerOrd, in, out); } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index 518fbda870c73..cd57ad07b51fa 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -191,30 +191,63 @@ private void doTest(long[] arr, long expectedNumBytes) throws IOException { } } - public void testEncodeOrdinalsSmall() throws IOException { + public void testEncodeOrdinalsSingleValueSmall() throws IOException { long[] arr = new long[blockSize]; - for (int i = 0; i < 64; ++i) { - arr[i] = 1; - } - for (int i = 64; i < blockSize; ++i) { - arr[i] = 2; - } - final long expectedNumBytes = 2 // 2 zig-zag-encoded ordinals - + 2; // 2 zig-zag-encoded negative longs, indicating repetitions + Arrays.fill(arr, 63); + final long expectedNumBytes = 1; doTestOrdinals(arr, expectedNumBytes); } - public void testEncodeOrdinalsSmallLarge() throws IOException { + public void testEncodeOrdinalsSingleValueMedium() throws IOException { long[] arr = new long[blockSize]; - for (int i = 0; i < 64; ++i) { - arr[i] = Long.MAX_VALUE - 1; - } - for (int i = 64; i < blockSize; ++i) { - arr[i] = Long.MAX_VALUE; - } - final long expectedNumBytes = 10 * 2 // 2 zig-zag-encoded ordinals - + 1 * 2; // 2 zig-zag-encoded negative longs, indicating repetitions + Arrays.fill(arr, 64); + final long expectedNumBytes = 2; + + doTestOrdinals(arr, expectedNumBytes); + } + + public void testEncodeOrdinalsSingleValueLarge() throws IOException { + long[] arr = new long[blockSize]; + final long expectedNumBytes = 3; + // each byte of a vlong can store 7 bits (first bit is continuation bit) + // first byte can only store 6 bits as the first bit is the header + Arrays.fill(arr, (1 << 6 + 7 + 7) -1); + + doTestOrdinals(arr, expectedNumBytes); + } + + public void testEncodeOrdinalsSingleValueGrande() throws IOException { + long[] arr = new long[blockSize]; + Arrays.fill(arr, Long.MAX_VALUE); + final long expectedNumBytes = 1 + blockSize * Long.BYTES; + + doTestOrdinals(arr, expectedNumBytes); + } + + public void testEncodeOrdinalsTwoValuesSmall() throws IOException { + long[] arr = new long[blockSize]; + Arrays.fill(arr, 63); + arr[0] = 1; + final long expectedNumBytes = 3; + + doTestOrdinals(arr, expectedNumBytes); + } + + public void testEncodeOrdinalsTwoValuesLarge() throws IOException { + long[] arr = new long[blockSize]; + Arrays.fill(arr, Long.MAX_VALUE >> 2); + arr[0] = (Long.MAX_VALUE >> 2) - 1; + final long expectedNumBytes = 11; + + doTestOrdinals(arr, expectedNumBytes); + } + + public void testEncodeOrdinalsTwoValuesGrande() throws IOException { + long[] arr = new long[blockSize]; + Arrays.fill(arr, Long.MAX_VALUE); + arr[0] = Long.MAX_VALUE - 1; + final long expectedNumBytes = 1 + blockSize * Long.BYTES; doTestOrdinals(arr, expectedNumBytes); } @@ -224,13 +257,13 @@ public void testEncodeOrdinalsNoRepetitions() throws IOException { for (int i = 0; i < blockSize; ++i) { arr[i] = i; } - doTestOrdinals(arr, 192); + doTestOrdinals(arr, 113); } private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOException { long maxOrd = 0; for (long ord : arr) { - maxOrd = Math.max(maxOrd, ord + 1); + maxOrd = Math.max(maxOrd, ord); } final int bitsPerOrd = PackedInts.bitsRequired(maxOrd - 1); final long[] expected = arr.clone(); From 863c3a8143a8ee27bde64021160e1b2a4e087113 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 4 Nov 2023 10:34:27 +0100 Subject: [PATCH 07/11] Apply spotless suggestions --- .../java/org/elasticsearch/index/codec/PerFieldMapperCodec.java | 2 -- .../index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index 9bf6730faef21..df5f0e9cb7b95 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -23,12 +23,10 @@ import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.NumberFieldMapper; -import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; /** diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index cd57ad07b51fa..bf0737ebda47b 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -212,7 +212,7 @@ public void testEncodeOrdinalsSingleValueLarge() throws IOException { final long expectedNumBytes = 3; // each byte of a vlong can store 7 bits (first bit is continuation bit) // first byte can only store 6 bits as the first bit is the header - Arrays.fill(arr, (1 << 6 + 7 + 7) -1); + Arrays.fill(arr, (1 << 6 + 7 + 7) - 1); doTestOrdinals(arr, expectedNumBytes); } From 713ec18501bfc15770e13855e4624edc859fa48c Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 6 Dec 2023 11:59:30 +0100 Subject: [PATCH 08/11] Update docs/changelog/99747.yaml --- docs/changelog/99747.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/99747.yaml diff --git a/docs/changelog/99747.yaml b/docs/changelog/99747.yaml new file mode 100644 index 0000000000000..94aefbf25d8e5 --- /dev/null +++ b/docs/changelog/99747.yaml @@ -0,0 +1,5 @@ +pr: 99747 +summary: TSDB dimensions encoding +area: TSDB +type: enhancement +issues: [] From fa1b9e46bf7a10904e84ec38f5723c58aadf1d7b Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 31 Jan 2024 14:08:10 +0100 Subject: [PATCH 09/11] Apply feedback from review --- .../org/elasticsearch/index/codec/PerFieldMapperCodec.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index 0b9aa7ddf1940..0e2b50257ae37 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -28,6 +28,7 @@ import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; /** @@ -121,12 +122,15 @@ boolean useTSDBDocValuesFormat(final String field) { if (mappingLookup.getMapper(field) instanceof KeywordFieldMapper) { return true; } + if (mappingLookup.getMapper(field) instanceof TimeSeriesIdFieldMapper) { + return true; + } } return false; } private boolean isTimeSeriesModeIndex() { - return IndexMode.TIME_SERIES.equals(mapperService.getIndexSettings().getMode()); + return IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode(); } } From af1c42c16378c215b69add7f21f63a6d154352b3 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 6 Feb 2024 10:27:26 +0100 Subject: [PATCH 10/11] Tweak the tsdb codec format to not write ordinals blocks when a field has exactly one value and use ordinal zero as constant. This could be done without changing the core format and without making the format more complex. --- .../codec/tsdb/ES87TSDBDocValuesConsumer.java | 62 ++++++----- .../codec/tsdb/ES87TSDBDocValuesProducer.java | 102 +++++++++++++++++- .../tsdb/ES87TSDBDocValuesFormatTests.java | 93 ++++++++++++++++ 3 files changed, 224 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java index c0ddcfd25b7e5..34d6ffa6535be 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java @@ -127,7 +127,8 @@ private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProduc meta.writeLong(numValues); if (numValues > 0) { - meta.writeInt(ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT); + // Special case for maxOrd of 1, signal -1 that no blocks will be written + meta.writeInt(maxOrd != 1 ? ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1); final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput(); final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance( meta, @@ -136,41 +137,46 @@ private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProduc ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT ); - final long[] buffer = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; - int bufferSize = 0; final long valuesDataOffset = data.getFilePointer(); - final ES87TSDBDocValuesEncoder encoder = new ES87TSDBDocValuesEncoder(); - - values = valuesProducer.getSortedNumeric(field); - final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; - for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - final int count = values.docValueCount(); - for (int i = 0; i < count; ++i) { - buffer[bufferSize++] = values.nextValue(); - if (bufferSize == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) { - indexWriter.add(data.getFilePointer() - valuesDataOffset); - if (maxOrd >= 0) { - encoder.encodeOrdinals(buffer, data, bitsPerOrd); - } else { - encoder.encode(buffer, data); + // Special case for maxOrd of 1, skip writing the blocks + if (maxOrd != 1) { + final long[] buffer = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + int bufferSize = 0; + final ES87TSDBDocValuesEncoder encoder = new ES87TSDBDocValuesEncoder(); + values = valuesProducer.getSortedNumeric(field); + final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + final int count = values.docValueCount(); + for (int i = 0; i < count; ++i) { + buffer[bufferSize++] = values.nextValue(); + if (bufferSize == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) { + indexWriter.add(data.getFilePointer() - valuesDataOffset); + if (maxOrd >= 0) { + encoder.encodeOrdinals(buffer, data, bitsPerOrd); + } else { + encoder.encode(buffer, data); + } + bufferSize = 0; } - bufferSize = 0; } } - } - if (bufferSize > 0) { - indexWriter.add(data.getFilePointer() - valuesDataOffset); - // Fill unused slots in the block with zeroes rather than junk - Arrays.fill(buffer, bufferSize, ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L); - if (maxOrd >= 0) { - encoder.encodeOrdinals(buffer, data, bitsPerOrd); - } else { - encoder.encode(buffer, data); + if (bufferSize > 0) { + indexWriter.add(data.getFilePointer() - valuesDataOffset); + // Fill unused slots in the block with zeroes rather than junk + Arrays.fill(buffer, bufferSize, ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L); + if (maxOrd >= 0) { + encoder.encodeOrdinals(buffer, data, bitsPerOrd); + } else { + encoder.encode(buffer, data); + } } } final long valuesDataLength = data.getFilePointer() - valuesDataOffset; - indexWriter.finish(); + if (maxOrd != 1) { + // Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method. + indexWriter.finish(); + } final long indexDataOffset = data.getFilePointer(); data.copyBytes(indexOut.toDataInput(), indexOut.size()); meta.writeLong(indexDataOffset); diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index c56957b5cb99a..a06227351473a 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -600,11 +600,15 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx entry.numValues = meta.readLong(); if (entry.numValues > 0) { final int indexBlockShift = meta.readInt(); - entry.indexMeta = DirectMonotonicReader.loadMeta( - meta, - 1 + ((entry.numValues - 1) >>> ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), - indexBlockShift - ); + // Special case, -1 means there are no blocks, so no need to load the metadata for it + // -1 is written when there the cardinality of a field is exactly one. + if (indexBlockShift != -1) { + entry.indexMeta = DirectMonotonicReader.loadMeta( + meta, + 1 + ((entry.numValues - 1) >>> ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), + indexBlockShift + ); + } entry.indexOffset = meta.readLong(); entry.indexLength = meta.readLong(); entry.valuesOffset = meta.readLong(); @@ -688,6 +692,94 @@ private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOEx return DocValues.emptyNumeric(); } + if (maxOrd == 1) { + // Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value + if (entry.docsWithFieldOffset == -1) { + // Special case when all docs have a value + return new NumericDocValues() { + + private final int maxDoc = ES87TSDBDocValuesProducer.this.maxDoc; + private int doc = -1; + + @Override + public long longValue() { + // Only one ordinal! + return 0L; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + return doc = target; + } + + @Override + public boolean advanceExact(int target) { + doc = target; + return true; + } + + @Override + public long cost() { + return maxDoc; + } + }; + } else { + final IndexedDISI disi = new IndexedDISI( + data, + entry.docsWithFieldOffset, + entry.docsWithFieldLength, + entry.jumpTableEntryCount, + entry.denseRankPower, + entry.numValues + ); + return new NumericDocValues() { + + @Override + public int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return disi.advanceExact(target); + } + + @Override + public int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public long cost() { + return disi.cost(); + } + + @Override + public long longValue() { + return 0L; + } + }; + } + } + // NOTE: we could make this a bit simpler by reusing #getValues but this // makes things slower. diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java index 60007980b2e03..8c433173e561a 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java @@ -8,14 +8,30 @@ package org.elasticsearch.index.codec.tsdb; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; +import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BytesRef; import java.io.IOException; public class ES87TSDBDocValuesFormatTests extends BaseDocValuesFormatTestCase { + private static final int NUM_DOCS = 10; + private final Codec codec = TestUtil.alwaysDocValuesFormat(new ES87TSDBDocValuesFormat()); @Override @@ -23,6 +39,83 @@ protected Codec getCodec() { return codec; } + public void testSortedDocValuesSingleUniqueValue() throws IOException { + try (Directory directory = newDirectory()) { + Analyzer analyzer = new MockAnalyzer(random()); + IndexWriterConfig conf = newIndexWriterConfig(analyzer); + conf.setMergePolicy(newLogMergePolicy()); + try (RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf)) { + for (int i = 0; i < NUM_DOCS; i++) { + Document doc = new Document(); + doc.add(new SortedDocValuesField("field", newBytesRef("value"))); + doc.add(new SortedDocValuesField("field" + i, newBytesRef("value" + i))); + iwriter.addDocument(doc); + } + iwriter.forceMerge(1); + } + try (IndexReader ireader = maybeWrapWithMergingReader(DirectoryReader.open(directory))) { + assert ireader.leaves().size() == 1; + SortedDocValues field = ireader.leaves().get(0).reader().getSortedDocValues("field"); + for (int i = 0; i < NUM_DOCS; i++) { + assertEquals(i, field.nextDoc()); + assertEquals(0, field.ordValue()); + BytesRef scratch = field.lookupOrd(0); + assertEquals("value", scratch.utf8ToString()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, field.nextDoc()); + for (int i = 0; i < NUM_DOCS; i++) { + SortedDocValues fieldN = ireader.leaves().get(0).reader().getSortedDocValues("field" + i); + assertEquals(i, fieldN.nextDoc()); + assertEquals(0, fieldN.ordValue()); + BytesRef scratch = fieldN.lookupOrd(0); + assertEquals("value" + i, scratch.utf8ToString()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, fieldN.nextDoc()); + } + } + } + } + + public void testSortedSetDocValuesSingleUniqueValue() throws IOException { + try (Directory directory = newDirectory()) { + Analyzer analyzer = new MockAnalyzer(random()); + IndexWriterConfig conf = newIndexWriterConfig(analyzer); + conf.setMergePolicy(newLogMergePolicy()); + try (RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf)) { + for (int i = 0; i < NUM_DOCS; i++) { + Document doc = new Document(); + doc.add(new SortedSetDocValuesField("field", newBytesRef("value"))); + doc.add(new SortedSetDocValuesField("field" + i, newBytesRef("value" + i))); + iwriter.addDocument(doc); + } + iwriter.forceMerge(1); + } + + try (IndexReader ireader = maybeWrapWithMergingReader(DirectoryReader.open(directory))) { + assert ireader.leaves().size() == 1; + var field = ireader.leaves().get(0).reader().getSortedSetDocValues("field"); + for (int i = 0; i < NUM_DOCS; i++) { + assertEquals(i, field.nextDoc()); + assertEquals(1, field.docValueCount()); + assertEquals(0, field.nextOrd()); + BytesRef scratch = field.lookupOrd(0); + assertEquals("value", scratch.utf8ToString()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, field.nextOrd()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, field.nextDoc()); + for (int i = 0; i < NUM_DOCS; i++) { + var fieldN = ireader.leaves().get(0).reader().getSortedSetDocValues("field" + i); + assertEquals(i, fieldN.nextDoc()); + assertEquals(1, fieldN.docValueCount()); + assertEquals(0, fieldN.nextOrd()); + BytesRef scratch = fieldN.lookupOrd(0); + assertEquals("value" + i, scratch.utf8ToString()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, fieldN.nextDoc()); + assertEquals(SortedSetDocValues.NO_MORE_ORDS, fieldN.nextOrd()); + } + } + } + } + // NOTE: here and below we disable tests dealing with non-numeric fields // because ES87TSDBDocValuesFormat only deals with numeric fields. @Override From 2bdc6fc6afae4645668c2ca8336bafaebee23a28 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 6 Feb 2024 10:28:58 +0100 Subject: [PATCH 11/11] rename method --- .../index/codec/tsdb/ES87TSDBDocValuesConsumer.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java index 34d6ffa6535be..15dc386f41284 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java @@ -86,7 +86,7 @@ final class ES87TSDBDocValuesConsumer extends DocValuesConsumer { public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeInt(field.number); meta.writeByte(ES87TSDBDocValuesFormat.NUMERIC); - writeNumericField(field, new EmptyDocValuesProducer() { + writeField(field, new EmptyDocValuesProducer() { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { return DocValues.singleton(valuesProducer.getNumeric(field)); @@ -94,7 +94,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti }, -1); } - private long[] writeNumericField(FieldInfo field, DocValuesProducer valuesProducer, long maxOrd) throws IOException { + private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, long maxOrd) throws IOException { int numDocsWithValue = 0; long numValues = 0; @@ -204,7 +204,7 @@ public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) th private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { SortedDocValues sorted = valuesProducer.getSorted(field); int maxOrd = sorted.getValueCount(); - writeNumericField(field, new EmptyDocValuesProducer() { + writeField(field, new EmptyDocValuesProducer() { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { SortedDocValues sorted = valuesProducer.getSorted(field); @@ -394,7 +394,7 @@ public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProdu } private void writeSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer, long maxOrd) throws IOException { - long[] stats = writeNumericField(field, valuesProducer, maxOrd); + long[] stats = writeField(field, valuesProducer, maxOrd); int numDocsWithField = Math.toIntExact(stats[0]); long numValues = stats[1]; assert numValues >= numDocsWithField;