Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change PerFieldMapperCodec to use tsdb doc values codec for all fields. #105301

Merged
merged 10 commits into from
Feb 12, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,9 @@
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.IpFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;

/**
Expand Down Expand Up @@ -112,25 +106,17 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
}

boolean useTSDBDocValuesFormat(final String field) {
if (mapperService != null && mapperService.getIndexSettings().isES87TSDBCodecEnabled() && isTimeSeriesModeIndex()) {
final MappingLookup mappingLookup = mapperService.mappingLookup();
if (mappingLookup.getMapper(field) instanceof NumberFieldMapper) {
return true;
}
if (mappingLookup.getMapper(field) instanceof DateFieldMapper) {
return true;
}
if (mappingLookup.getMapper(field) instanceof KeywordFieldMapper) {
return true;
}
if (mappingLookup.getMapper(field) instanceof TimeSeriesIdFieldMapper) {
return true;
}
if (mappingLookup.getMapper(field) instanceof IpFieldMapper) {
return true;
}
if (excludeFields(field)) {
return false;
}
return false;

return mapperService != null && isTimeSeriesModeIndex() && mapperService.getIndexSettings().isES87TSDBCodecEnabled();
}

private boolean excludeFields(String fieldName) {
// Avoid using tsdb codec for fields like _seq_no, _primary_term.
// But _tsid should always use the tesbd codec.
return fieldName.startsWith("_") && fieldName.equals("_tsid") == false;
}

private boolean isTimeSeriesModeIndex() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
Expand Down Expand Up @@ -191,7 +192,70 @@ private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, lon

@Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
throw new UnsupportedOperationException("Unsupported binary doc values for field [" + field.name + "]");
meta.writeInt(field.number);
meta.writeByte(ES87TSDBDocValuesFormat.BINARY);

BinaryDocValues values = valuesProducer.getBinary(field);
long start = data.getFilePointer();
meta.writeLong(start); // dataOffset
int numDocsWithField = 0;
int minLength = Integer.MAX_VALUE;
int maxLength = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
numDocsWithField++;
BytesRef v = values.binaryValue();
int length = v.length;
data.writeBytes(v.bytes, v.offset, v.length);
minLength = Math.min(length, minLength);
maxLength = Math.max(length, maxLength);
}
assert numDocsWithField <= maxDoc;
meta.writeLong(data.getFilePointer() - start); // dataLength

if (numDocsWithField == 0) {
meta.writeLong(-2); // docsWithFieldOffset
meta.writeLong(0L); // docsWithFieldLength
meta.writeShort((short) -1); // jumpTableEntryCount
meta.writeByte((byte) -1); // denseRankPower
} else if (numDocsWithField == maxDoc) {
meta.writeLong(-1); // docsWithFieldOffset
meta.writeLong(0L); // docsWithFieldLength
meta.writeShort((short) -1); // jumpTableEntryCount
meta.writeByte((byte) -1); // denseRankPower
} else {
long offset = data.getFilePointer();
meta.writeLong(offset); // docsWithFieldOffset
values = valuesProducer.getBinary(field);
final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
meta.writeShort(jumpTableEntryCount);
meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
}

meta.writeInt(numDocsWithField);
meta.writeInt(minLength);
meta.writeInt(maxLength);
if (maxLength > minLength) {
start = data.getFilePointer();
meta.writeLong(start);
meta.writeVInt(ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT);

final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
meta,
data,
numDocsWithField + 1,
ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
);
long addr = 0;
writer.add(addr);
values = valuesProducer.getBinary(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
addr += values.binaryValue().length;
writer.add(addr);
}
writer.finish();
meta.writeLong(data.getFilePointer() - start);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

public class ES87TSDBDocValuesProducer extends DocValuesProducer {
private final Map<String, NumericEntry> numerics = new HashMap<>();
private final Map<String, BinaryEntry> binaries = new HashMap<>();
private final Map<String, SortedEntry> sorted = new HashMap<>();
private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
private final Map<String, SortedNumericEntry> sortedNumerics = new HashMap<>();
Expand Down Expand Up @@ -119,7 +120,160 @@ public NumericDocValues getNumeric(FieldInfo field) throws IOException {

@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
throw new UnsupportedOperationException("Unsupported binary doc values for field [" + field.name + "]");
BinaryEntry entry = binaries.get(field.name);
if (entry.docsWithFieldOffset == -2) {
return DocValues.emptyBinary();
}

final IndexInput bytesSlice = data.slice("fixed-binary", entry.dataOffset, entry.dataLength);

if (entry.docsWithFieldOffset == -1) {
// dense
if (entry.minLength == entry.maxLength) {
// fixed length
final int length = entry.maxLength;
return new DenseBinaryDocValues(maxDoc) {
final BytesRef bytes = new BytesRef(new byte[length], 0, length);

@Override
public BytesRef binaryValue() throws IOException {
bytesSlice.seek((long) doc * length);
bytesSlice.readBytes(bytes.bytes, 0, length);
return bytes;
}
};
} else {
// variable length
final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
return new DenseBinaryDocValues(maxDoc) {
final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength);

@Override
public BytesRef binaryValue() throws IOException {
long startOffset = addresses.get(doc);
bytes.length = (int) (addresses.get(doc + 1L) - startOffset);
bytesSlice.seek(startOffset);
bytesSlice.readBytes(bytes.bytes, 0, bytes.length);
return bytes;
}
};
}
} else {
// sparse
final IndexedDISI disi = new IndexedDISI(
data,
entry.docsWithFieldOffset,
entry.docsWithFieldLength,
entry.jumpTableEntryCount,
entry.denseRankPower,
entry.numDocsWithField
);
if (entry.minLength == entry.maxLength) {
// fixed length
final int length = entry.maxLength;
return new SparseBinaryDocValues(disi) {
final BytesRef bytes = new BytesRef(new byte[length], 0, length);

@Override
public BytesRef binaryValue() throws IOException {
bytesSlice.seek((long) disi.index() * length);
bytesSlice.readBytes(bytes.bytes, 0, length);
return bytes;
}
};
} else {
// variable length
final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
return new SparseBinaryDocValues(disi) {
final BytesRef bytes = new BytesRef(new byte[entry.maxLength], 0, entry.maxLength);

@Override
public BytesRef binaryValue() throws IOException {
final int index = disi.index();
long startOffset = addresses.get(index);
bytes.length = (int) (addresses.get(index + 1L) - startOffset);
bytesSlice.seek(startOffset);
bytesSlice.readBytes(bytes.bytes, 0, bytes.length);
return bytes;
}
};
}
}
}

private abstract static class DenseBinaryDocValues extends BinaryDocValues {

final int maxDoc;
int doc = -1;

DenseBinaryDocValues(int maxDoc) {
this.maxDoc = maxDoc;
}

@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}

@Override
public int docID() {
return doc;
}

@Override
public long cost() {
return maxDoc;
}

@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return doc = NO_MORE_DOCS;
}
return doc = target;
}

@Override
public boolean advanceExact(int target) throws IOException {
doc = target;
return true;
}
}

private abstract static class SparseBinaryDocValues extends BinaryDocValues {

final IndexedDISI disi;

SparseBinaryDocValues(IndexedDISI disi) {
this.disi = disi;
}

@Override
public int nextDoc() throws IOException {
return disi.nextDoc();
}

@Override
public int docID() {
return disi.docID();
}

@Override
public long cost() {
return disi.cost();
}

@Override
public int advance(int target) throws IOException {
return disi.advance(target);
}

@Override
public boolean advanceExact(int target) throws IOException {
return disi.advanceExact(target);
}
}

@Override
Expand Down Expand Up @@ -573,7 +727,7 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
if (type == ES87TSDBDocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta));
} else if (type == ES87TSDBDocValuesFormat.BINARY) {
throw new CorruptIndexException("unsupported type: " + type, meta);
binaries.put(info.name, readBinary(meta));
} else if (type == ES87TSDBDocValuesFormat.SORTED) {
sorted.put(info.name, readSorted(meta));
} else if (type == ES87TSDBDocValuesFormat.SORTED_SET) {
Expand Down Expand Up @@ -616,6 +770,30 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx
}
}

private BinaryEntry readBinary(IndexInput meta) throws IOException {
final BinaryEntry entry = new BinaryEntry();
entry.dataOffset = meta.readLong();
entry.dataLength = meta.readLong();
entry.docsWithFieldOffset = meta.readLong();
entry.docsWithFieldLength = meta.readLong();
entry.jumpTableEntryCount = meta.readShort();
entry.denseRankPower = meta.readByte();
entry.numDocsWithField = meta.readInt();
entry.minLength = meta.readInt();
entry.maxLength = meta.readInt();
if (entry.minLength < entry.maxLength) {
entry.addressesOffset = meta.readLong();

martijnvg marked this conversation as resolved.
Show resolved Hide resolved
// Old count of uncompressed addresses
long numAddresses = entry.numDocsWithField + 1L;

final int blockShift = meta.readVInt();
entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift);
entry.addressesLength = meta.readLong();
}
return entry;
}

private static SortedNumericEntry readSortedNumeric(IndexInput meta) throws IOException {
SortedNumericEntry entry = new SortedNumericEntry();
readSortedNumeric(meta, entry);
Expand Down Expand Up @@ -1089,6 +1267,21 @@ private static class NumericEntry {
long valuesLength;
}

private static class BinaryEntry {
long dataOffset;
long dataLength;
long docsWithFieldOffset;
long docsWithFieldLength;
short jumpTableEntryCount;
byte denseRankPower;
int numDocsWithField;
int minLength;
int maxLength;
long addressesOffset;
long addressesLength;
DirectMonotonicReader.Meta addressesMeta;
}

private static class SortedNumericEntry extends NumericEntry {
int numDocsWithField;
DirectMonotonicReader.Meta addressesMeta;
Expand Down
Loading