-
Notifications
You must be signed in to change notification settings - Fork 25k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Histogram field type support for ValueCount and Avg aggregations (#55933
) Implements value_count and avg aggregations over Histogram fields as discussed in #53285 - value_count returns the sum of all counts array of the histograms - avg computes a weighted average of the values array of the histogram by multiplying each value with its associated element in the counts array
- Loading branch information
Showing
19 changed files
with
696 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
124 changes: 124 additions & 0 deletions
124
...java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedAvgAggregator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
package org.elasticsearch.xpack.analytics.aggregations.metrics; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.search.ScoreMode; | ||
import org.elasticsearch.common.lease.Releasables; | ||
import org.elasticsearch.common.util.BigArrays; | ||
import org.elasticsearch.common.util.DoubleArray; | ||
import org.elasticsearch.common.util.LongArray; | ||
import org.elasticsearch.index.fielddata.HistogramValue; | ||
import org.elasticsearch.index.fielddata.HistogramValues; | ||
import org.elasticsearch.search.DocValueFormat; | ||
import org.elasticsearch.search.aggregations.Aggregator; | ||
import org.elasticsearch.search.aggregations.InternalAggregation; | ||
import org.elasticsearch.search.aggregations.LeafBucketCollector; | ||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; | ||
import org.elasticsearch.search.aggregations.metrics.CompensatedSum; | ||
import org.elasticsearch.search.aggregations.metrics.InternalAvg; | ||
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator; | ||
import org.elasticsearch.search.internal.SearchContext; | ||
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource; | ||
|
||
import java.io.IOException; | ||
import java.util.Map; | ||
|
||
/** | ||
* Average aggregator operating over histogram datatypes {@link HistogramValuesSource} | ||
* The aggregation computes weighted average by taking counts into consideration for each value | ||
*/ | ||
class HistoBackedAvgAggregator extends NumericMetricsAggregator.SingleValue { | ||
|
||
private final HistogramValuesSource.Histogram valuesSource; | ||
|
||
LongArray counts; | ||
DoubleArray sums; | ||
DoubleArray compensations; | ||
DocValueFormat format; | ||
|
||
HistoBackedAvgAggregator(String name, HistogramValuesSource.Histogram valuesSource, DocValueFormat formatter, SearchContext context, | ||
Aggregator parent, Map<String, Object> metadata) throws IOException { | ||
super(name, context, parent, metadata); | ||
this.valuesSource = valuesSource; | ||
this.format = formatter; | ||
if (valuesSource != null) { | ||
final BigArrays bigArrays = context.bigArrays(); | ||
counts = bigArrays.newLongArray(1, true); | ||
sums = bigArrays.newDoubleArray(1, true); | ||
compensations = bigArrays.newDoubleArray(1, true); | ||
} | ||
} | ||
|
||
@Override | ||
public ScoreMode scoreMode() { | ||
return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES; | ||
} | ||
|
||
@Override | ||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, | ||
final LeafBucketCollector sub) throws IOException { | ||
if (valuesSource == null) { | ||
return LeafBucketCollector.NO_OP_COLLECTOR; | ||
} | ||
final BigArrays bigArrays = context.bigArrays(); | ||
final HistogramValues values = valuesSource.getHistogramValues(ctx); | ||
final CompensatedSum kahanSummation = new CompensatedSum(0, 0); | ||
|
||
return new LeafBucketCollectorBase(sub, values) { | ||
@Override | ||
public void collect(int doc, long bucket) throws IOException { | ||
counts = bigArrays.grow(counts, bucket + 1); | ||
sums = bigArrays.grow(sums, bucket + 1); | ||
compensations = bigArrays.grow(compensations, bucket + 1); | ||
|
||
if (values.advanceExact(doc)) { | ||
final HistogramValue sketch = values.histogram(); | ||
|
||
// Compute the sum of double values with Kahan summation algorithm which is more accurate than naive summation | ||
final double sum = sums.get(bucket); | ||
final double compensation = compensations.get(bucket); | ||
kahanSummation.reset(sum, compensation); | ||
while (sketch.next()) { | ||
double d = sketch.value() * sketch.count(); | ||
kahanSummation.add(d); | ||
counts.increment(bucket, sketch.count()); | ||
} | ||
|
||
sums.set(bucket, kahanSummation.value()); | ||
compensations.set(bucket, kahanSummation.delta()); | ||
} | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public double metric(long owningBucketOrd) { | ||
if (valuesSource == null || owningBucketOrd >= sums.size()) { | ||
return Double.NaN; | ||
} | ||
return sums.get(owningBucketOrd) / counts.get(owningBucketOrd); | ||
} | ||
|
||
@Override | ||
public InternalAggregation buildAggregation(long bucket) { | ||
if (valuesSource == null || bucket >= sums.size()) { | ||
return buildEmptyAggregation(); | ||
} | ||
return new InternalAvg(name, sums.get(bucket), counts.get(bucket), format, metadata()); | ||
} | ||
|
||
@Override | ||
public InternalAggregation buildEmptyAggregation() { | ||
return new InternalAvg(name, 0.0, 0L, format, metadata()); | ||
} | ||
|
||
@Override | ||
public void doClose() { | ||
Releasables.close(counts, sums, compensations); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.