-
Notifications
You must be signed in to change notification settings - Fork 25.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for histogram fields to rate aggregation #63289
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.analytics.rate; | ||
|
||
import java.io.IOException; | ||
import java.util.Map; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.elasticsearch.common.Rounding; | ||
import org.elasticsearch.common.util.BigArrays; | ||
import org.elasticsearch.index.fielddata.HistogramValue; | ||
import org.elasticsearch.index.fielddata.HistogramValues; | ||
import org.elasticsearch.search.aggregations.Aggregator; | ||
import org.elasticsearch.search.aggregations.LeafBucketCollector; | ||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; | ||
import org.elasticsearch.search.aggregations.metrics.CompensatedSum; | ||
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; | ||
import org.elasticsearch.search.internal.SearchContext; | ||
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource; | ||
|
||
public class HistogramRateAggregator extends AbstractRateAggregator { | ||
public HistogramRateAggregator( | ||
String name, | ||
ValuesSourceConfig valuesSourceConfig, | ||
Rounding.DateTimeUnit rateUnit, | ||
SearchContext context, | ||
Aggregator parent, | ||
Map<String, Object> metadata | ||
) throws IOException { | ||
super(name, valuesSourceConfig, rateUnit, context, parent, metadata); | ||
} | ||
|
||
@Override | ||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { | ||
final BigArrays bigArrays = context.bigArrays(); | ||
final CompensatedSum kahanSummation = new CompensatedSum(0, 0); | ||
final HistogramValues values = ((HistogramValuesSource.Histogram) valuesSource).getHistogramValues(ctx); | ||
return new LeafBucketCollectorBase(sub, values) { | ||
@Override | ||
public void collect(int doc, long bucket) throws IOException { | ||
sums = bigArrays.grow(sums, bucket + 1); | ||
compensations = bigArrays.grow(compensations, bucket + 1); | ||
|
||
if (values.advanceExact(doc)) { | ||
final HistogramValue sketch = values.histogram(); | ||
while (sketch.next()) { | ||
double sum = sums.get(bucket); | ||
double compensation = compensations.get(bucket); | ||
kahanSummation.reset(sum, compensation); | ||
kahanSummation.add(sketch.value()); | ||
compensations.set(bucket, kahanSummation.delta()); | ||
sums.set(bucket, kahanSummation.value()); | ||
} | ||
} | ||
} | ||
}; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.analytics.rate; | ||
|
||
import java.io.IOException; | ||
import java.util.Map; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.elasticsearch.common.Rounding; | ||
import org.elasticsearch.common.util.BigArrays; | ||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; | ||
import org.elasticsearch.search.aggregations.Aggregator; | ||
import org.elasticsearch.search.aggregations.LeafBucketCollector; | ||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; | ||
import org.elasticsearch.search.aggregations.metrics.CompensatedSum; | ||
import org.elasticsearch.search.aggregations.support.ValuesSource; | ||
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; | ||
import org.elasticsearch.search.internal.SearchContext; | ||
|
||
public class NumericRateAggregator extends AbstractRateAggregator { | ||
public NumericRateAggregator( | ||
String name, | ||
ValuesSourceConfig valuesSourceConfig, | ||
Rounding.DateTimeUnit rateUnit, | ||
SearchContext context, | ||
Aggregator parent, | ||
Map<String, Object> metadata | ||
) throws IOException { | ||
super(name, valuesSourceConfig, rateUnit, context, parent, metadata); | ||
} | ||
|
||
@Override | ||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { | ||
final BigArrays bigArrays = context.bigArrays(); | ||
final CompensatedSum kahanSummation = new CompensatedSum(0, 0); | ||
final SortedNumericDoubleValues values = ((ValuesSource.Numeric) valuesSource).doubleValues(ctx); | ||
return new LeafBucketCollectorBase(sub, values) { | ||
@Override | ||
public void collect(int doc, long bucket) throws IOException { | ||
sums = bigArrays.grow(sums, bucket + 1); | ||
compensations = bigArrays.grow(compensations, bucket + 1); | ||
|
||
if (values.advanceExact(doc)) { | ||
final int valuesCount = values.docValueCount(); | ||
// Compute the sum of double values with Kahan summation algorithm which is more | ||
// accurate than naive summation. | ||
double sum = sums.get(bucket); | ||
double compensation = compensations.get(bucket); | ||
kahanSummation.reset(sum, compensation); | ||
|
||
for (int i = 0; i < valuesCount; i++) { | ||
double value = values.nextValue(); | ||
kahanSummation.add(value); | ||
} | ||
|
||
compensations.set(bucket, kahanSummation.delta()); | ||
sums.set(bucket, kahanSummation.value()); | ||
} | ||
} | ||
}; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,10 @@ | |
|
||
package org.elasticsearch.xpack.analytics.rate; | ||
|
||
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.util.Map; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.elasticsearch.common.Rounding; | ||
import org.elasticsearch.index.query.QueryShardContext; | ||
|
@@ -19,10 +23,7 @@ | |
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; | ||
import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; | ||
import org.elasticsearch.search.internal.SearchContext; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
import java.util.Map; | ||
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType; | ||
|
||
class RateAggregatorFactory extends ValuesSourceAggregatorFactory { | ||
|
||
|
@@ -44,15 +45,21 @@ class RateAggregatorFactory extends ValuesSourceAggregatorFactory { | |
static void registerAggregators(ValuesSourceRegistry.Builder builder) { | ||
builder.register( | ||
RateAggregationBuilder.REGISTRY_KEY, | ||
List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.BOOLEAN), | ||
RateAggregator::new, | ||
Collections.singletonList(CoreValuesSourceType.NUMERIC), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it a BWC break that we're dropping boolean here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically, yes. But this behavior was never documented, and it doesn't make any sense in this context. I am not even sure what would be an interpretation of this number in case of boolean field. |
||
NumericRateAggregator::new, | ||
true | ||
); | ||
builder.register( | ||
RateAggregationBuilder.REGISTRY_KEY, | ||
Collections.singletonList(AnalyticsValuesSourceType.HISTOGRAM), | ||
HistogramRateAggregator::new, | ||
true | ||
); | ||
} | ||
|
||
@Override | ||
protected Aggregator createUnmapped(SearchContext searchContext, Aggregator parent, Map<String, Object> metadata) throws IOException { | ||
return new RateAggregator(name, config, rateUnit, searchContext, parent, metadata) { | ||
return new AbstractRateAggregator(name, config, rateUnit, searchContext, parent, metadata) { | ||
@Override | ||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) { | ||
return LeafBucketCollector.NO_OP_COLLECTOR; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is fine, but just for the sake of mentioning options, we could push getting the values source into the concrete subclasses and do the cast in the constructor, rather than at access time in the leaf reader.