Skip to content

Commit

Permalink
fix: document id generation
Browse files Browse the repository at this point in the history
  • Loading branch information
salvatore-campagna committed Oct 31, 2023
1 parent 8c2b8aa commit acc0d14
Show file tree
Hide file tree
Showing 10 changed files with 32 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ private List<InternalBucket> randomBuckets(boolean keyed, InternalAggregations a
builder.addKeywordDimension(entry.getKey(), (String) entry.getValue());
}
try {
var key = builder.build().toBytesRef();
var key = builder.withoutHash().toBytesRef();
bucketList.add(new InternalBucket(key, docCount, aggregations, keyed));
} catch (IOException e) {
throw new UncheckedIOException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public static void writeTS(RandomIndexWriter iw, long timestamp, Object[] dimens
fields.add(new DoubleDocValuesField(metrics[i].toString(), (double) metrics[i + 1]));
}
}
fields.add(new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, builder.build().toBytesRef()));
fields.add(new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, builder.withoutHash().toBytesRef()));
// TODO: Handle metrics
iw.addDocument(fields);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ generates a consistent id:
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:52:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2001818691, "rx": 802133794}}}}'
- match: {items.0.index._id: cZZNs2vcxybH5Kk3AAABeRnS7fM}
- match: {items.0.index._id: cZZNs-xII2fZweptAAABeRnS7fM}

- do:
bulk:
Expand All @@ -85,7 +85,7 @@ generates a consistent id:
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:52:04.467Z", "metricset": "pod", "k8s": {"pod": {"name": "cat", "uid":"947e4ced-1786-4e53-9e0c-5c447e959507", "ip": "10.10.55.1", "network": {"tx": 2001818691, "rx": 802133794}}}}'
- match: {items.0.index._id: cZZNs2vcxybH5Kk3AAABeRnS7fM}
- match: {items.0.index._id: cZZNs-xII2fZweptAAABeRnS7fM}

- do:
search:
Expand Down Expand Up @@ -170,7 +170,7 @@ index a new document on top of an old one:
network:
tx: 111434595272
rx: 430605511
- match: {_id: cn4exVlxKdyGtXayAAABeRnR_mY}
- match: {_id: cn4exSPK93Q9eJj8AAABeRnR_mY}

- do:
search:
Expand Down Expand Up @@ -215,7 +215,7 @@ index a new document on top of an old one over bulk:
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:51:03.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 111434595272, "rx": 430605511}}}}'
- match: {items.0.index._id: cn4exVlxKdyGtXayAAABeRnR_mY}
- match: {items.0.index._id: cn4exSPK93Q9eJj8AAABeRnR_mY}

- do:
search:
Expand All @@ -239,7 +239,7 @@ create operation on top of old document fails:
reason: id generation changed in 8.2

- do:
catch: "/\\[cn4exVlxKdyGtXayAAABeRnR_mY\\]\\[\\{.+\\}\\@2021-04-28T18:51:03.142Z\\]: version conflict, document already exists \\(current version \\[1\\]\\)/"
catch: "/\\[cn4exSPK93Q9eJj8AAABeRnR_mY\\]\\[\\{.+\\}\\@2021-04-28T18:51:03.142Z\\]: version conflict, document already exists \\(current version \\[1\\]\\)/"
index:
refresh: true
index: test
Expand Down Expand Up @@ -268,7 +268,7 @@ create operation on top of old document fails over bulk:
body:
- '{"create": {}}'
- '{"@timestamp": "2021-04-28T18:51:03.142Z", "metricset": "pod", "k8s": {"pod": {"name": "dog", "uid":"df3145b3-0563-4d3b-a0f7-897eb2876ea9", "ip": "10.10.55.3", "network": {"tx": 111434595272, "rx": 430605511}}}}'
- match: { items.0.create.error.reason: "[cn4exVlxKdyGtXayAAABeRnR_mY][{_tsid=KMQn1H8GA7xNFfZA53p2lAAAAAAAAAAAAAAAAAAAAAA5ihHD--qoyLTiOy0pmP6_RAIE-e0-dKQ}@2021-04-28T18:51:03.142Z]: version conflict, document already exists (current version [1])" }
- match: { items.0.create.error.reason: "[cn4exSPK93Q9eJj8AAABeRnR_mY][{_tsid=KMQn1H8GA7xNFfZA53p2lAAAAAAAAAAAAAAAAAAAAAA5ihHD--qoyLTiOy0pmP6_RAIE-e0-dKQ}@2021-04-28T18:51:03.142Z]: version conflict, document already exists (current version [1])" }

---
ids query:
Expand Down Expand Up @@ -339,7 +339,7 @@ get with routing:
catch: bad_request
get:
index: test
id: cZZNs4NdV58ePSPIAAABeRnSA5M
id: cZZNs-xII2fZweptAAABeRnSA5M
routing: routing

---
Expand Down Expand Up @@ -403,8 +403,8 @@ delete over _bulk:
bulk:
index: test
body:
- '{"delete": {"_id": "cn4exTOUtxytuLkQAAABeRnR_mY"}}'
- '{"delete": {"_id": "cZZNs4NdV58ePSPIAAABeRnSA5M"}}'
- '{"delete": {"_id": "cn4exSPK93Q9eJj8AAABeRnR_mY"}}'
- '{"delete": {"_id": "cZZNs-xII2fZweptAAABeRnSA5M"}}'
- '{"delete": {"_id": "not found ++ not found"}}'
- match: {items.0.delete.result: deleted}
- match: {items.1.delete.result: deleted}
Expand Down Expand Up @@ -454,7 +454,7 @@ routing_path matches deep object:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "dim": {"foo": {"bar": {"baz": {"uid": "uid1"}}}}}'
- match: {items.0.index.result: created}
- match: {items.0.index._id: OcEOGWojqLcS93iiAAABeRnRGTM}
- match: {items.0.index._id: OcEOGeykhIEIYuXSAAABeRnRGTM}

---
routing_path matches object:
Expand Down Expand Up @@ -495,4 +495,4 @@ routing_path matches object:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "dim": {"foo": {"uid": "uid1"}}}'
- match: {items.0.index.result: created}
- match: {items.0.index._id: 8bgiqVXBrZn1EpjcAAABeRnRGTM}
- match: {items.0.index._id: 8bgiqXPBT_mM6CX6AAABeRnRGTM}
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ private static IndexVersion def(int id, Version luceneVersion) {
public static final IndexVersion NEW_SPARSE_VECTOR = def(8_500_001, Version.LUCENE_9_7_0);
public static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT = def(8_500_002, Version.LUCENE_9_7_0);
public static final IndexVersion UPGRADE_LUCENE_9_8 = def(8_500_003, Version.LUCENE_9_8_0);
public static final IndexVersion TIME_SERIES_ID_HASHING = def(8_500_004, Version.LUCENE_9_8_0);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
Expand Down Expand Up @@ -136,11 +138,17 @@ public void postParse(DocumentParserContext context) throws IOException {
assert fieldType().isIndexed() == false;

final TimeSeriesIdBuilder timeSeriesIdBuilder = (TimeSeriesIdBuilder) context.getDocumentFields();
final BytesRef timeSeriesId = timeSeriesIdBuilder.build().toBytesRef();
context.doc().add(new SortedDocValuesField(fieldType().name(), timeSeriesIdBuilder.similarityHash().toBytesRef()));
final BytesRef timeSeriesId = getIndexVersionCreated(context).before(IndexVersions.TIME_SERIES_ID_HASHING)
? timeSeriesIdBuilder.withoutHash().toBytesRef()
: timeSeriesIdBuilder.withHash().toBytesRef();
context.doc().add(new SortedDocValuesField(fieldType().name(), timeSeriesId));
TsidExtractingIdFieldMapper.createField(context, timeSeriesIdBuilder.routingBuilder, timeSeriesId);
}

private IndexVersion getIndexVersionCreated(final DocumentParserContext context) {
return context.indexSettings().getIndexVersionCreated();
}

@Override
protected String contentType() {
return CONTENT_TYPE;
Expand Down Expand Up @@ -210,7 +218,7 @@ public TimeSeriesIdBuilder(@Nullable IndexRouting.ExtractFromSource.Builder rout
this.routingBuilder = routingBuilder;
}

public BytesReference build() throws IOException {
public BytesReference withoutHash() throws IOException {
if (dimensions.isEmpty()) {
throw new IllegalArgumentException("Dimension fields are missing.");
}
Expand All @@ -237,7 +245,7 @@ public BytesReference build() throws IOException {
* The idea is to be able to place 'similar' time series close to each other. Two time series
* are considered 'similar' if they share the same dimensions (names and values).
*/
public BytesReference similarityHash() throws IOException {
public BytesReference withHash() throws IOException {
// NOTE: hash all dimension field names
int numberOfDimensions = Math.min(MAX_DIMENSIONS, dimensions.size());
int tsidHashIndex = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ public BytesRef parseBytesRef(Object value) {
}

try {
return builder.build().toBytesRef();
return builder.withoutHash().toBytesRef();
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ private static void indexDoc(IndexRouting.ExtractFromSource routing, IndexWriter
fields.add(new SortedSetDocValuesField(dimension.field, new BytesRef(dimension.value.toString())));
}
}
BytesRef tsid = builder.build().toBytesRef();
BytesRef tsid = builder.withoutHash().toBytesRef();
fields.add(new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, tsid));
iw.addDocument(fields);
}
Expand All @@ -252,7 +252,7 @@ private static String expectedId(IndexRouting.ExtractFromSource routing, Doc doc
return TsidExtractingIdFieldMapper.createId(
false,
routingBuilder,
timeSeriesIdBuilder.build().toBytesRef(),
timeSeriesIdBuilder.withoutHash().toBytesRef(),
doc.timestamp,
new byte[16]
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ public void testParseTsid() throws IOException {
timeSeriesIdBuilder.addKeywordDimension("string", randomAlphaOfLength(10));
timeSeriesIdBuilder.addLongDimension("long", randomLong());
timeSeriesIdBuilder.addUnsignedLongDimension("ulong", randomLong());
BytesRef tsidBytes = timeSeriesIdBuilder.build().toBytesRef();
BytesRef tsidBytes = timeSeriesIdBuilder.withoutHash().toBytesRef();
Object tsidFormat = DocValueFormat.TIME_SERIES_ID.format(tsidBytes);
BytesRef tsidParse = DocValueFormat.TIME_SERIES_ID.parseBytesRef(tsidFormat);
assertEquals(tsidBytes, tsidParse);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ private List<Document> docs(long startTimestamp, String dim, long... values) thr
private static BytesReference tsid(String dim) throws IOException {
TimeSeriesIdFieldMapper.TimeSeriesIdBuilder idBuilder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null);
idBuilder.addKeywordDimension("dim", dim);
return idBuilder.build();
return idBuilder.withoutHash();
}

private Document doc(long timestamp, BytesReference tsid, long counterValue) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ private void assertGeoLine_TSDB(
ArrayList<Field> fields = new ArrayList<>(
Arrays.asList(
new SortedDocValuesField("group_id", new BytesRef(testData.groups[g])),
new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, builder.build().toBytesRef())
new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, builder.withoutHash().toBytesRef())
)
);
GeoPoint point = points.get(i);
Expand Down

0 comments on commit acc0d14

Please sign in to comment.