Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TSDB dimensions encoding #99747

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/99747.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 99747
summary: TSDB dimensions encoding
area: TSDB
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesParams;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;

/**
Expand Down Expand Up @@ -110,33 +111,26 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
}

boolean useTSDBDocValuesFormat(final String field) {
return mapperService.getIndexSettings().isES87TSDBCodecEnabled()
&& isTimeSeriesModeIndex()
&& isNotSpecialField(field)
&& (isCounterOrGaugeMetricType(field) || isTimestampField(field));
}

private boolean isTimeSeriesModeIndex() {
return IndexMode.TIME_SERIES.equals(mapperService.getIndexSettings().getMode());
}

private boolean isCounterOrGaugeMetricType(String field) {
if (mapperService != null) {
if (mapperService != null && mapperService.getIndexSettings().isES87TSDBCodecEnabled() && isTimeSeriesModeIndex()) {
final MappingLookup mappingLookup = mapperService.mappingLookup();
if (mappingLookup.getMapper(field) instanceof NumberFieldMapper) {
final MappedFieldType fieldType = mappingLookup.getFieldType(field);
return TimeSeriesParams.MetricType.COUNTER.equals(fieldType.getMetricType())
|| TimeSeriesParams.MetricType.GAUGE.equals(fieldType.getMetricType());
return true;
}
if (mappingLookup.getMapper(field) instanceof DateFieldMapper) {
return true;
}
if (mappingLookup.getMapper(field) instanceof KeywordFieldMapper) {
return true;
felixbarny marked this conversation as resolved.
Show resolved Hide resolved
}
if (mappingLookup.getMapper(field) instanceof TimeSeriesIdFieldMapper) {
return true;
}
}
return false;
}

private static boolean isTimestampField(String field) {
return "@timestamp".equals(field);
private boolean isTimeSeriesModeIndex() {
return IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode();
}

private static boolean isNotSpecialField(String field) {
return field.startsWith("_") == false;
}
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,77 @@ void encode(long[] in, DataOutput out) throws IOException {
deltaEncode(0, 0, in, out);
}

/**
* Optimizes for encoding sorted fields where we expect a block to mostly either be the same value
* or to make a transition from one value to a second one.
* <p>
* Encodes blocks in the following format:
* <ul>
* <li>byte 0: 1/2 bits header+6/7 bits data</li>
* <li>byte 1..n: data</li>
* </ul>
* The header (first 1 or 2 bits) describes how the data is encoded:
* <ul>
* <li>?0 block has a single value (vlong), 2nd bit already contains data</li>
* <li>
* 01 block has two runs, data contains value 1 (vlong), run-length (vint) of value 1,
* and delta from first to second value (zlong)
* </li>
* <li>11 block is bit-packed</li>
* </ul>
*/
void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
int numRuns = 1;
for (int i = 1; i < in.length; ++i) {
if (in[i - 1] != in[i]) {
numRuns++;
}
}
if (numRuns == 1 && bitsPerOrd < 63) {
long value = in[0];
// set first bit to 0 to indicate the block has a single run
out.writeVLong(value << 1);
} else if (numRuns == 2 && bitsPerOrd < 62) {
// set first two bits to 01 to indicate the block has two runs
out.writeVLong((in[0] << 2) | 0b01);
int firstRunLen = in.length;
for (int i = 1; i < in.length; ++i) {
if (in[i] != in[0]) {
firstRunLen = i;
break;
}
}
out.writeVInt(firstRunLen);
out.writeZLong(in[in.length - 1] - in[0]);
} else {
// set first two bits to 11 to indicate the block is bit-packed
out.writeVLong(0b11);
forUtil.encode(in, bitsPerOrd, out);
}
}

void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException {
assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length;

long v1 = in.readVLong();
int header = (int) (v1 & 0b11L);
if (header == 0b00 || header == 0b10) {
// first bit is zero -> single run
Arrays.fill(out, v1 >>> 1);
} else if (header == 0b01) {
// first two bits are 01 -> two runs
v1 = v1 >>> 2;
int runLen = in.readVInt();
long v2 = v1 + in.readZLong();
Arrays.fill(out, 0, runLen, v1);
Arrays.fill(out, runLen, out.length, v2);
} else {
// first two bits are 11 -> bit-packed
forUtil.decode(bitsPerOrd, in, out);
}
}

/** Decode longs that have been encoded with {@link #encode}. */
void decode(DataInput in, long[] out) throws IOException {
assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ public class ES87TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesF
static final byte SORTED_SET = 3;
static final byte SORTED_NUMERIC = 4;

static final int TERMS_DICT_BLOCK_LZ4_SHIFT = 6;
static final int TERMS_DICT_BLOCK_LZ4_SIZE = 1 << TERMS_DICT_BLOCK_LZ4_SHIFT;
static final int TERMS_DICT_BLOCK_LZ4_MASK = TERMS_DICT_BLOCK_LZ4_SIZE - 1;

static final int TERMS_DICT_REVERSE_INDEX_SHIFT = 10;
static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT;
static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1;

public ES87TSDBDocValuesFormat() {
super(CODEC_NAME);
}
Expand Down
Loading