diff --git a/docs/changelog/99912.yaml b/docs/changelog/99912.yaml new file mode 100644 index 0000000000000..06f0f9baa9661 --- /dev/null +++ b/docs/changelog/99912.yaml @@ -0,0 +1,6 @@ +pr: 99912 +summary: Represent histogram value count as long +area: Aggregations +type: enhancement +issues: + - 99820 diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index 70164dba236ce..38887cef013b9 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -10,7 +10,7 @@ This data is defined using two paired arrays: * A `values` array of <> numbers, representing the buckets for the histogram. These values must be provided in ascending order. -* A corresponding `counts` array of <> numbers, representing how +* A corresponding `counts` array of <> numbers, representing how many values fall into each bucket. These numbers must be positive or zero. Because the elements in the `values` array correspond to the elements in the @@ -138,5 +138,5 @@ PUT my-index-000001/_doc/2 <1> Values for each bucket. Values in the array are treated as doubles and must be given in increasing order. For <> histograms this value represents the mean value. In case of HDR histograms this represents the value iterated to. -<2> Count for each bucket. Values in the arrays are treated as integers and must be positive or zero. +<2> Count for each bucket. Values in the arrays are treated as long integers and must be positive or zero. Negative values will be rejected. The relation between a bucket and a count is given by the position in the array. diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 30cff2ce427be..b460c7aaf2ebc 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -148,6 +148,7 @@ static TransportVersion def(int id) { public static final TransportVersion COMPACT_FIELD_CAPS_ADDED = def(8_505_00_0); public static final TransportVersion DATA_STREAM_RESPONSE_INDEX_PROPERTIES = def(8_506_00_0); public static final TransportVersion ML_TRAINED_MODEL_CONFIG_PLATFORM_ADDED = def(8_507_00_0); + public static final TransportVersion LONG_COUNT_IN_HISTOGRAM_ADDED = def(8_508_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java index 902246b442e3b..3ee868ed85ee2 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java @@ -32,6 +32,6 @@ public abstract class HistogramValue { * The current count of the histogram * @return the current count of the histogram */ - public abstract int count(); + public abstract long count(); } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java index 2e76da1a519bd..88e039b6013e4 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java @@ -88,7 +88,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { double previousKey = Double.NEGATIVE_INFINITY; while (sketch.next()) { final double value = sketch.value(); - final int count = sketch.count(); + final long count = sketch.count(); double key = Math.floor((value - offset) / interval); assert key >= previousKey; diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/range/HistoBackedRangeAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/range/HistoBackedRangeAggregator.java index b2be65a9e2901..2db972115767e 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/range/HistoBackedRangeAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/range/HistoBackedRangeAggregator.java @@ -134,7 +134,7 @@ public void collect(int doc, long bucket) throws IOException { previousValue = value; // Collecting the bucket automatically increments the count by the docCountProvider, // account for that here - final int count = sketch.count() - docCountProvider.getDocCount(doc); + final long count = sketch.count() - docCountProvider.getDocCount(doc); lo = HistoBackedRangeAggregator.this.collect(sub, doc, value, bucket, lo, count); } } @@ -142,7 +142,7 @@ public void collect(int doc, long bucket) throws IOException { }; } - abstract int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound, int count) + abstract int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound, long count) throws IOException; private static class NoOverlap extends HistoBackedRangeAggregator { @@ -178,7 +178,7 @@ private NoOverlap( } @Override - public int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound, int count) + public int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound, long count) throws IOException { int lo = lowBound, hi = ranges.length - 1; while (lo <= hi) { @@ -240,7 +240,7 @@ private static class Overlap extends HistoBackedRangeAggregator { } @Override - public int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound, int count) + public int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound, long count) throws IOException { int lo = lowBound, hi = ranges.length - 1; // all candidates are between these indexes int mid = (lo + hi) >>> 1; diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 08fad5dd3b83c..a06eb509d2539 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -15,6 +15,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.io.stream.BytesStreamOutput; @@ -295,7 +296,7 @@ public void parse(DocumentParserContext context) throws IOException { return; } ArrayList values = null; - ArrayList counts = null; + ArrayList counts = null; // should be an object ensureExpectedToken(XContentParser.Token.START_OBJECT, token, context.parser()); subParser = new XContentSubParser(context.parser()); @@ -343,7 +344,7 @@ public void parse(DocumentParserContext context) throws IOException { while (token != XContentParser.Token.END_ARRAY) { // should be a number ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser); - counts.add(subParser.intValue()); + counts.add(subParser.longValue()); token = subParser.nextToken(); } } else { @@ -385,7 +386,7 @@ public void parse(DocumentParserContext context) throws IOException { } BytesStreamOutput streamOutput = new BytesStreamOutput(); for (int i = 0; i < values.size(); i++) { - int count = counts.get(i); + long count = counts.get(i); if (count < 0) { throw new DocumentParsingException( subParser.getTokenLocation(), @@ -393,7 +394,11 @@ public void parse(DocumentParserContext context) throws IOException { ); } else if (count > 0) { // we do not add elements with count == 0 - streamOutput.writeVInt(count); + if (streamOutput.getTransportVersion().onOrAfter(TransportVersions.LONG_COUNT_IN_HISTOGRAM_ADDED)) { + streamOutput.writeVLong(count); + } else { + streamOutput.writeVInt(Math.toIntExact(count)); + } streamOutput.writeLong(Double.doubleToRawLongBits(values.get(i))); } } @@ -431,7 +436,7 @@ public void parse(DocumentParserContext context) throws IOException { /** re-usable {@link HistogramValue} implementation */ private static class InternalHistogramValue extends HistogramValue { double value; - int count; + long count; boolean isExhausted; final ByteArrayStreamInput streamInput; @@ -450,7 +455,11 @@ void reset(BytesRef bytesRef) { @Override public boolean next() throws IOException { if (streamInput.available() > 0) { - count = streamInput.readVInt(); + if (streamInput.getTransportVersion().onOrAfter(TransportVersions.LONG_COUNT_IN_HISTOGRAM_ADDED)) { + count = streamInput.readVLong(); + } else { + count = streamInput.readVInt(); + } value = Double.longBitsToDouble(streamInput.readLong()); return true; } @@ -467,7 +476,7 @@ public double value() { } @Override - public int count() { + public long count() { if (isExhausted) { throw new IllegalArgumentException("histogram already exhausted"); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index 9be00cacd0e09..2892ada15fec9 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -281,8 +281,8 @@ public void testCountIsLong() throws Exception { .field("values", new double[] { 2, 2, 3 }) .endObject() ); - Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source)); - assertThat(e.getCause().getMessage(), containsString(" out of range of int")); + ParsedDocument doc = mapper.parse(source); + assertThat(doc.rootDoc().getField("field"), notNullValue()); } public void testValuesNotInOrder() throws Exception { diff --git a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/LabelFieldProducer.java b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/LabelFieldProducer.java index c2b8f909618b7..013ad20ffe04d 100644 --- a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/LabelFieldProducer.java +++ b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/LabelFieldProducer.java @@ -158,7 +158,7 @@ public void write(XContentBuilder builder) throws IOException { if (isEmpty() == false) { final HistogramValue histogramValue = (HistogramValue) label.get(); final List values = new ArrayList<>(); - final List counts = new ArrayList<>(); + final List counts = new ArrayList<>(); while (histogramValue.next()) { values.add(histogramValue.value()); counts.add(histogramValue.count()); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml index 7c1d99458291f..b2f710e5ffed8 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml @@ -251,3 +251,58 @@ histogram with synthetic source and zero counts: latency: values: [0.2, 0.4] counts: [7, 6] + + +--- +histogram with large count values: + - skip: + version: " - 8.10.99" + reason: Support for `long` values was introduced in 8.11.0 + + - do: + indices.create: + index: histo_large_count + body: + mappings: + properties: + latency: + type: histogram + - do: + bulk: + index: histo_large_count + refresh: true + body: + - '{"index": {}}' + - '{"latency": {"values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [0, 1000000000000, 10, 1000, 1000000]}}' + + - do: + search: + index: histo_large_count + body: + size: 0 + aggs: + histo: + histogram: + field: latency + interval: 0.3 + + - length: { aggregations.histo.buckets: 2 } + - match: { aggregations.histo.buckets.0.key: 0.0 } + - match: { aggregations.histo.buckets.0.doc_count: 1000000000000 } + - match: { aggregations.histo.buckets.1.key: 0.3 } + - match: { aggregations.histo.buckets.1.doc_count: 1001010 } + + - do: + search: + index: histo_large_count + body: + size: 0 + aggs: + percent: + percentiles: + field: latency + + - length: { aggregations.percent.values: 7 } + - match: { aggregations.percent.values.1\.0: 0.2 } + - match: { aggregations.percent.values.5\.0: 0.2 } + - match: { aggregations.percent.values.25\.0: 0.2 }