diff --git a/docs/reference/mapping/fields.asciidoc b/docs/reference/mapping/fields.asciidoc index 75cb29abf89fd..d2625c506f565 100644 --- a/docs/reference/mapping/fields.asciidoc +++ b/docs/reference/mapping/fields.asciidoc @@ -29,6 +29,13 @@ fields can be customized when a mapping is created. The size of the `_source` field in bytes, provided by the {plugins}/mapper-size.html[`mapper-size` plugin]. +[discrete] +=== Doc count metadata field + +<>:: + + A custom field used for storing doc counts when a document represents pre-aggregated data. + [discrete] === Indexing metadata fields @@ -55,6 +62,7 @@ fields can be customized when a mapping is created. Application specific metadata. +include::fields/doc-count-field.asciidoc[] include::fields/field-names-field.asciidoc[] @@ -69,4 +77,3 @@ include::fields/meta-field.asciidoc[] include::fields/routing-field.asciidoc[] include::fields/source-field.asciidoc[] - diff --git a/docs/reference/mapping/fields/doc-count-field.asciidoc b/docs/reference/mapping/fields/doc-count-field.asciidoc new file mode 100644 index 0000000000000..52eb8ea52a71f --- /dev/null +++ b/docs/reference/mapping/fields/doc-count-field.asciidoc @@ -0,0 +1,118 @@ +[[mapping-doc-count-field]] +=== `_doc_count` data type +++++ +_doc_count +++++ + +Bucket aggregations always return a field named `doc_count` showing the number of documents that were aggregated and partitioned +in each bucket. Computation of the value of `doc_count` is very simple. `doc_count` is incremented by 1 for every document collected +in each bucket. + +While this simple approach is effective when computing aggregations over individual documents, it fails to accurately represent +documents that store pre-aggregated data (such as `histogram` or `aggregate_metric_double` fields), because one summary field may +represent multiple documents. + +To allow for correct computation of the number of documents when working with pre-aggregated data, we have introduced a +metadata field type named `_doc_count`. `_doc_count` must always be a positive integer representing the number of documents +aggregated in a single summary field. + +When field `_doc_count` is added to a document, all bucket aggregations will respect its value and increment the bucket `doc_count` +by the value of the field. If a document does not contain any `_doc_count` field, `_doc_count = 1` is implied by default. + +[IMPORTANT] +======== +* A `_doc_count` field can only store a single positive integer per document. Nested arrays are not allowed. +* If a document contains no `_doc_count` fields, aggregators will increment by 1, which is the default behavior. +======== + +[[mapping-doc-count-field-example]] +==== Example + +The following <> API request creates a new index with the following field mappings: + +* `my_histogram`, a `histogram` field used to store percentile data +* `my_text`, a `keyword` field used to store a title for the histogram + +[source,console] +-------------------------------------------------- +PUT my_index +{ + "mappings" : { + "properties" : { + "my_histogram" : { + "type" : "histogram" + }, + "my_text" : { + "type" : "keyword" + } + } + } +} +-------------------------------------------------- + +The following <> API requests store pre-aggregated data for +two histograms: `histogram_1` and `histogram_2`. + +[source,console] +-------------------------------------------------- +PUT my_index/_doc/1 +{ + "my_text" : "histogram_1", + "my_histogram" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], + "counts" : [3, 7, 23, 12, 6] + }, + "_doc_count": 45 <1> +} + +PUT my_index/_doc/2 +{ + "my_text" : "histogram_2", + "my_histogram" : { + "values" : [0.1, 0.25, 0.35, 0.4, 0.45, 0.5], + "counts" : [8, 17, 8, 7, 6, 2] + }, + "_doc_count_": 62 <1> +} +-------------------------------------------------- +<1> Field `_doc_count` must be a positive integer storing the number of documents aggregated to produce each histogram. + +If we run the following <> on `my_index`: + +[source,console] +-------------------------------------------------- +GET /_search +{ + "aggs" : { + "histogram_titles" : { + "terms" : { "field" : "my_text" } + } + } +} +-------------------------------------------------- + +We will get the following response: + +[source,console-result] +-------------------------------------------------- +{ + ... + "aggregations" : { + "histogram_titles" : { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets" : [ + { + "key" : "histogram_2", + "doc_count" : 62 + }, + { + "key" : "histogram_1", + "doc_count" : 45 + } + ] + } + } +} +-------------------------------------------------- +// TESTRESPONSE[skip:test not setup] diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml new file mode 100644 index 0000000000000..8c19a418f0a8f --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_doc_count_field.yml @@ -0,0 +1,150 @@ +setup: + - do: + indices.create: + index: test_1 + body: + settings: + number_of_replicas: 0 + mappings: + properties: + str: + type: keyword + number: + type: integer + + - do: + bulk: + index: test_1 + refresh: true + body: + - '{"index": {}}' + - '{"_doc_count": 10, "str": "abc", "number" : 500, "unmapped": "abc" }' + - '{"index": {}}' + - '{"_doc_count": 5, "str": "xyz", "number" : 100, "unmapped": "xyz" }' + - '{"index": {}}' + - '{"_doc_count": 7, "str": "foo", "number" : 100, "unmapped": "foo" }' + - '{"index": {}}' + - '{"_doc_count": 1, "str": "foo", "number" : 200, "unmapped": "foo" }' + - '{"index": {}}' + - '{"str": "abc", "number" : 500, "unmapped": "abc" }' + +--- +"Test numeric terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + + - do: + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : { "num_terms" : { "terms" : { "field" : "number" } } } } + + - match: { hits.total: 5 } + - length: { aggregations.num_terms.buckets: 3 } + - match: { aggregations.num_terms.buckets.0.key: 100 } + - match: { aggregations.num_terms.buckets.0.doc_count: 12 } + - match: { aggregations.num_terms.buckets.1.key: 500 } + - match: { aggregations.num_terms.buckets.1.doc_count: 11 } + - match: { aggregations.num_terms.buckets.2.key: 200 } + - match: { aggregations.num_terms.buckets.2.doc_count: 1 } + + +--- +"Test keyword terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + - do: + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str" } } } } + + - match: { hits.total: 5 } + - length: { aggregations.str_terms.buckets: 3 } + - match: { aggregations.str_terms.buckets.0.key: "abc" } + - match: { aggregations.str_terms.buckets.0.doc_count: 11 } + - match: { aggregations.str_terms.buckets.1.key: "foo" } + - match: { aggregations.str_terms.buckets.1.doc_count: 8 } + - match: { aggregations.str_terms.buckets.2.key: "xyz" } + - match: { aggregations.str_terms.buckets.2.doc_count: 5 } + +--- + +"Test unmapped string terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"_doc_count": 10, "str": "abc" }' + - '{"index": {}}' + - '{"str": "abc" }' + - do: + search: + index: test_2 + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str.keyword" } } } } + + - match: { hits.total: 2 } + - length: { aggregations.str_terms.buckets: 1 } + - match: { aggregations.str_terms.buckets.0.key: "abc" } + - match: { aggregations.str_terms.buckets.0.doc_count: 11 } + +--- +"Test composite str_terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + - do: + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : + { "composite_agg" : { "composite" : + { + "sources": ["str_terms": { "terms": { "field": "str" } }] + } + } + } + } + + - match: { hits.total: 5 } + - length: { aggregations.composite_agg.buckets: 3 } + - match: { aggregations.composite_agg.buckets.0.key.str_terms: "abc" } + - match: { aggregations.composite_agg.buckets.0.doc_count: 11 } + - match: { aggregations.composite_agg.buckets.1.key.str_terms: "foo" } + - match: { aggregations.composite_agg.buckets.1.doc_count: 8 } + - match: { aggregations.composite_agg.buckets.2.key.str_terms: "xyz" } + - match: { aggregations.composite_agg.buckets.2.doc_count: 5 } + + +--- +"Test composite num_terms agg with doc_count": + - skip: + version: " - 7.99.99" + reason: "Doc count fields are only implemented in 8.0" + - do: + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : + { "composite_agg" : + { "composite" : + { + "sources": ["num_terms" : { "terms" : { "field" : "number" } }] + } + } + } + } + + - match: { hits.total: 5 } + - length: { aggregations.composite_agg.buckets: 3 } + - match: { aggregations.composite_agg.buckets.0.key.num_terms: 100 } + - match: { aggregations.composite_agg.buckets.0.doc_count: 12 } + - match: { aggregations.composite_agg.buckets.1.key.num_terms: 200 } + - match: { aggregations.composite_agg.buckets.1.doc_count: 1 } + - match: { aggregations.composite_agg.buckets.2.key.num_terms: 500 } + - match: { aggregations.composite_agg.buckets.2.doc_count: 11 } + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java new file mode 100644 index 0000000000000..611bab88f66fe --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -0,0 +1,141 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParserUtils; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +/** Mapper for the doc_count field. */ +public class DocCountFieldMapper extends MetadataFieldMapper { + + public static final String NAME = "_doc_count"; + public static final String CONTENT_TYPE = "_doc_count"; + + public static final TypeParser PARSER = new ConfigurableTypeParser( + c -> new DocCountFieldMapper(), + c -> new DocCountFieldMapper.Builder()); + + static class Builder extends MetadataFieldMapper.Builder { + + Builder() { + super(NAME); + } + + @Override + protected List> getParameters() { + return Collections.emptyList(); + } + + @Override + public DocCountFieldMapper build(BuilderContext context) { + return new DocCountFieldMapper(); + } + } + + public static final class DocCountFieldType extends MappedFieldType { + + public static final DocCountFieldType INSTANCE = new DocCountFieldType(); + + private static final Long defaultValue = 1L; + + public DocCountFieldType() { + super(NAME, false, false, true, TextSearchInfo.NONE, Collections.emptyMap()); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public String familyTypeName() { + return NumberFieldMapper.NumberType.INTEGER.typeName(); + } + + @Override + public Query existsQuery(QueryShardContext context) { + return new DocValuesFieldExistsQuery(NAME); + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + throw new QueryShardException(context, "Field [" + name() + "] of type [" + typeName() + "] is not searchable"); + } + + @Override + public ValueFetcher valueFetcher(MapperService mapperService, SearchLookup searchLookup, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + + return new SourceValueFetcher(name(), mapperService, defaultValue) { + @Override + protected Object parseSourceValue(Object value) { + if ("".equals(value)) { + return defaultValue; + } else { + return NumberFieldMapper.NumberType.objectToLong(value, false); + } + } + }; + } + } + + private DocCountFieldMapper() { + super(DocCountFieldType.INSTANCE); + } + + @Override + protected void parseCreateField(ParseContext context) throws IOException { + XContentParser parser = context.parser(); + XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, parser.currentToken(), parser); + + long value = parser.longValue(false); + if (value <= 0) { + throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer."); + } + final Field docCount = new NumericDocValuesField(NAME, value); + context.doc().add(docCount); + } + + @Override + public void preParse(ParseContext context) { } + + @Override + public DocCountFieldType fieldType() { + return (DocCountFieldType) super.fieldType(); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + +} diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index cade11a509b62..2ba32df753ea1 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -32,6 +32,7 @@ import org.elasticsearch.index.mapper.BooleanFieldMapper; import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DocCountFieldMapper; import org.elasticsearch.index.mapper.FieldAliasMapper; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.GeoPointFieldMapper; @@ -152,6 +153,7 @@ private static Map initBuiltInMetadataMa builtInMetadataMappers.put(NestedPathFieldMapper.NAME, NestedPathFieldMapper.PARSER); builtInMetadataMappers.put(VersionFieldMapper.NAME, VersionFieldMapper.PARSER); builtInMetadataMappers.put(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PARSER); + builtInMetadataMappers.put(DocCountFieldMapper.NAME, DocCountFieldMapper.PARSER); //_field_names must be added last so that it has a chance to see all the other mappers builtInMetadataMappers.put(FieldNamesFieldMapper.NAME, FieldNamesFieldMapper.PARSER); return Collections.unmodifiableMap(builtInMetadataMappers); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java index 31284edeaa478..be9c81b5759b9 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java @@ -179,7 +179,7 @@ public Map metadata() { @Override public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { - preGetSubLeafCollectors(); + preGetSubLeafCollectors(ctx); final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx); return getLeafCollector(ctx, sub); } @@ -188,7 +188,7 @@ public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws * Can be overridden by aggregator implementations that like the perform an operation before the leaf collectors * of children aggregators are instantiated for the next segment. */ - protected void preGetSubLeafCollectors() throws IOException { + protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { } /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 596aadd927e83..bb2e956a485b6 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -18,9 +18,10 @@ */ package org.elasticsearch.search.aggregations.bucket; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorBase; @@ -52,7 +53,8 @@ public abstract class BucketsAggregator extends AggregatorBase { private final BigArrays bigArrays; private final IntConsumer multiBucketConsumer; - private IntArray docCounts; + private LongArray docCounts; + protected final DocCountProvider docCountProvider; public BucketsAggregator(String name, AggregatorFactories factories, SearchContext context, Aggregator parent, CardinalityUpperBound bucketCardinality, Map metadata) throws IOException { @@ -63,7 +65,8 @@ public BucketsAggregator(String name, AggregatorFactories factories, SearchConte } else { multiBucketConsumer = (count) -> {}; } - docCounts = bigArrays.newIntArray(1, true); + docCounts = bigArrays.newLongArray(1, true); + docCountProvider = new DocCountProvider(); } /** @@ -92,7 +95,8 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long * Same as {@link #collectBucket(LeafBucketCollector, int, long)}, but doesn't check if the docCounts needs to be re-sized. */ public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException { - if (docCounts.increment(bucketOrd, 1) == 1) { + long docCount = docCountProvider.getDocCount(doc); + if (docCounts.increment(bucketOrd, docCount) == docCount) { // We calculate the final number of buckets only during the reduce phase. But we still need to // trigger bucket consumer from time to time in order to give it a chance to check available memory and break // the execution if we are running out. To achieve that we are passing 0 as a bucket count. @@ -123,11 +127,11 @@ public final void mergeBuckets(long[] mergeMap, long newNumBuckets) { * merge the actual ordinals and doc ID deltas. */ public final void rewriteBuckets(long newNumBuckets, LongUnaryOperator mergeMap){ - try (IntArray oldDocCounts = docCounts) { - docCounts = bigArrays.newIntArray(newNumBuckets, true); + try (LongArray oldDocCounts = docCounts) { + docCounts = bigArrays.newLongArray(newNumBuckets, true); docCounts.fill(0, newNumBuckets, 0); for (long i = 0; i < oldDocCounts.size(); i++) { - int docCount = oldDocCounts.get(i); + long docCount = oldDocCounts.get(i); if(docCount == 0) continue; @@ -140,14 +144,14 @@ public final void rewriteBuckets(long newNumBuckets, LongUnaryOperator mergeMap) } } - public IntArray getDocCounts() { + public LongArray getDocCounts() { return docCounts; } /** * Utility method to increment the doc counts of the given bucket (identified by the bucket ordinal) */ - public final void incrementBucketDocCount(long bucketOrd, int inc) { + public final void incrementBucketDocCount(long bucketOrd, long inc) { docCounts = bigArrays.grow(docCounts, bucketOrd + 1); docCounts.increment(bucketOrd, inc); } @@ -155,7 +159,7 @@ public final void incrementBucketDocCount(long bucketOrd, int inc) { /** * Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal) */ - public final int bucketDocCount(long bucketOrd) { + public final long bucketDocCount(long bucketOrd) { if (bucketOrd >= docCounts.size()) { // This may happen eg. if no document in the highest buckets is accepted by a sub aggregator. // For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value @@ -295,7 +299,7 @@ protected final InternalAggregation[] buildAggregationsForFixedBucketCount(l } @FunctionalInterface protected interface BucketBuilderForFixedCount { - B build(int offsetInOwningOrd, int docCount, InternalAggregations subAggregationResults); + B build(int offsetInOwningOrd, long docCount, InternalAggregations subAggregationResults); } /** @@ -366,7 +370,7 @@ protected final InternalAggregation[] buildAggregationsForVariableBuckets(lo } @FunctionalInterface protected interface BucketBuilderForVariable { - B build(long bucketValue, int docCount, InternalAggregations subAggregationResults); + B build(long bucketValue, long docCount, InternalAggregations subAggregationResults); } @FunctionalInterface protected interface ResultBuilderForVariable { @@ -394,7 +398,7 @@ public BucketComparator bucketComparator(String key, SortOrder order) { return super.bucketComparator(key, order); } if (key == null || "doc_count".equals(key)) { - return (lhs, rhs) -> order.reverseMul() * Integer.compare(bucketDocCount(lhs), bucketDocCount(rhs)); + return (lhs, rhs) -> order.reverseMul() * Long.compare(bucketDocCount(lhs), bucketDocCount(rhs)); } throw new IllegalArgumentException("Ordering on a single-bucket aggregation can only be done on its doc_count. " + "Either drop the key (a la \"" + name() + "\") or change it to \"doc_count\" (a la \"" + name() + @@ -411,4 +415,10 @@ public static boolean descendsFromGlobalAggregator(Aggregator parent) { return false; } + @Override + protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { + super.preGetSubLeafCollectors(ctx); + // Set LeafReaderContext to the doc_count provider + docCountProvider.setLeafReaderContext(ctx); + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java new file mode 100644 index 0000000000000..9cf25e098cb0f --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -0,0 +1,49 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.elasticsearch.index.mapper.DocCountFieldMapper; + +import java.io.IOException; + +/** + * An implementation of a doc_count provider that reads the value + * of the _doc_count field in the document. If a document does not have a + * _doc_count field the implementation will return 1 as the default value. + */ +public class DocCountProvider { + + private NumericDocValues docCountValues; + + public long getDocCount(int doc) throws IOException { + if (docCountValues != null && docCountValues.advanceExact(doc)) { + return docCountValues.longValue(); + } else { + return 1L; + } + } + + public void setLeafReaderContext(LeafReaderContext ctx) throws IOException { + docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME); + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java index afc1bea27c2f3..c12a6f002b418 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java @@ -200,7 +200,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I List buckets = new ArrayList<>(filters.length); for (int i = 0; i < keys.length; i++) { long bucketOrd = bucketOrd(owningBucketOrds[owningBucketOrdIdx], i); - int docCount = bucketDocCount(bucketOrd); + long docCount = bucketDocCount(bucketOrd); // Empty buckets are not returned because this aggregation will commonly be used under a // a date-histogram where we will look for transactions over time and can expect many // empty buckets. @@ -214,7 +214,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I for (int i = 0; i < keys.length; i++) { for (int j = i + 1; j < keys.length; j++) { long bucketOrd = bucketOrd(owningBucketOrds[owningBucketOrdIdx], pos); - int docCount = bucketDocCount(bucketOrd); + long docCount = bucketDocCount(bucketOrd); // Empty buckets are not returned due to potential for very sparse matrices if (docCount > 0) { String intersectKey = keys[i] + separator + keys[j]; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java index 5946507403925..5c21692f3ad5f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -155,7 +155,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I int slot = queue.pop(); CompositeKey key = queue.toCompositeKey(slot); InternalAggregations aggs = subAggsForBuckets[slot]; - int docCount = queue.getDocCount(slot); + long docCount = queue.getDocCount(slot); buckets[queue.size()] = new InternalComposite.InternalBucket(sourceNames, formats, key, reverseMuls, docCount, aggs); } CompositeKey lastBucket = num > 0 ? buckets[num-1].getRawKey() : null; @@ -430,7 +430,8 @@ private LeafBucketCollector getFirstPassCollector(RoaringDocIdSet.Builder builde @Override public void collect(int doc, long bucket) throws IOException { try { - if (queue.addIfCompetitive(indexSortPrefix)) { + long docCount = docCountProvider.getDocCount(doc); + if (queue.addIfCompetitive(indexSortPrefix, docCount)) { if (builder != null && lastDoc != doc) { builder.add(doc); lastDoc = doc; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java index 70887e0724c8a..35caefb495da4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java @@ -25,7 +25,7 @@ import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.LeafBucketCollector; import java.io.IOException; @@ -65,7 +65,7 @@ public int hashCode() { private final Map map; private final SingleDimensionValuesSource[] arrays; - private IntArray docCounts; + private LongArray docCounts; private boolean afterKeyIsSet = false; /** @@ -88,7 +88,7 @@ public int hashCode() { sources[i].setAfter(afterKey.get(i)); } } - this.docCounts = bigArrays.newIntArray(1, false); + this.docCounts = bigArrays.newLongArray(1, false); } @Override @@ -127,19 +127,19 @@ Comparable getUpperValueLeadSource() throws IOException { /** * Returns the document count in slot. */ - int getDocCount(int slot) { + long getDocCount(int slot) { return docCounts.get(slot); } /** * Copies the current value in slot. */ - private void copyCurrent(int slot) { + private void copyCurrent(int slot, long value) { for (int i = 0; i < arrays.length; i++) { arrays[i].copyCurrent(slot); } docCounts = bigArrays.grow(docCounts, slot+1); - docCounts.set(slot, 1); + docCounts.set(slot, value); } /** @@ -248,8 +248,8 @@ LeafBucketCollector getLeafCollector(Comparable forceLeadSourceValue, * Check if the current candidate should be added in the queue. * @return true if the candidate is competitive (added or already in the queue). */ - boolean addIfCompetitive() { - return addIfCompetitive(0); + boolean addIfCompetitive(long inc) { + return addIfCompetitive(0, inc); } @@ -263,12 +263,12 @@ boolean addIfCompetitive() { * * @throws CollectionTerminatedException if the current collection can be terminated early due to index sorting. */ - boolean addIfCompetitive(int indexSortSourcePrefix) { + boolean addIfCompetitive(int indexSortSourcePrefix, long inc) { // checks if the candidate key is competitive Integer topSlot = compareCurrent(); if (topSlot != null) { // this key is already in the top N, skip it - docCounts.increment(topSlot, 1); + docCounts.increment(topSlot, inc); return true; } if (afterKeyIsSet) { @@ -309,7 +309,7 @@ boolean addIfCompetitive(int indexSortSourcePrefix) { newSlot = size(); } // move the candidate key to its new slot - copyCurrent(newSlot); + copyCurrent(newSlot, inc); map.put(new Slot(newSlot), newSlot); add(newSlot); return true; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java index 01e2c0a3ae5b0..7a5cef87b6731 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java @@ -27,6 +27,7 @@ import org.apache.lucene.util.DocIdSetBuilder; import org.elasticsearch.common.Nullable; import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.bucket.DocCountProvider; import java.io.IOException; @@ -54,6 +55,8 @@ protected boolean processBucket(CompositeValuesCollectorQueue queue, LeafReaderC Comparable leadSourceBucket, @Nullable DocIdSetBuilder builder) throws IOException { final int[] topCompositeCollected = new int[1]; final boolean[] hasCollected = new boolean[1]; + final DocCountProvider docCountProvider = new DocCountProvider(); + docCountProvider.setLeafReaderContext(context); final LeafBucketCollector queueCollector = new LeafBucketCollector() { int lastDoc = -1; @@ -66,7 +69,8 @@ protected boolean processBucket(CompositeValuesCollectorQueue queue, LeafReaderC @Override public void collect(int doc, long bucket) throws IOException { hasCollected[0] = true; - if (queue.addIfCompetitive()) { + long docCount = docCountProvider.getDocCount(doc); + if (queue.addIfCompetitive(docCount)) { topCompositeCollected[0]++; if (adder != null && doc != lastDoc) { if (remainingBits == 0) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index f481d30d8b0fd..6e40d7217fbcd 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -107,7 +107,8 @@ public void collect(int parentDoc, long bucket) throws IOException { } @Override - protected void preGetSubLeafCollectors() throws IOException { + protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException { + super.preGetSubLeafCollectors(ctx); processBufferedDocs(); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 4dad0ca1d4af6..f26dccf900315 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -30,7 +30,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; -import org.elasticsearch.common.util.IntArray; import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.LongHash; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -272,7 +271,7 @@ protected void doClose() { static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { private LongUnaryOperator mapping; - private IntArray segmentDocCounts; + private LongArray segmentDocCounts; LowCardinality( String name, @@ -292,7 +291,7 @@ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { super(name, factories, resultStrategy, valuesSource, order, format, bucketCountThresholds, null, context, parent, remapGlobalOrds, collectionMode, showTermDocCountError, CardinalityUpperBound.ONE, metadata); assert factories == null || factories.countAggregators() == 0; - this.segmentDocCounts = context.bigArrays().newIntArray(1, true); + this.segmentDocCounts = context.bigArrays().newLongArray(1, true); } @Override @@ -316,7 +315,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } int ord = singleValues.ordValue(); - segmentDocCounts.increment(ord + 1, 1); + long docCount = docCountProvider.getDocCount(doc); + segmentDocCounts.increment(ord + 1, docCount); } }); } @@ -329,7 +329,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) { - segmentDocCounts.increment(segmentOrd + 1, 1); + long docCount = docCountProvider.getDocCount(doc); + segmentDocCounts.increment(segmentOrd + 1, docCount); } } }); @@ -353,7 +354,7 @@ private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IO for (long i = 1; i < segmentDocCounts.size(); i++) { // We use set(...) here, because we need to reset the slow to 0. // segmentDocCounts get reused over the segments and otherwise counts would be too high. - int inc = segmentDocCounts.set(i, 0); + long inc = segmentDocCounts.set(i, 0); if (inc == 0) { continue; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java new file mode 100644 index 0000000000000..25cf8c064a847 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -0,0 +1,71 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexableField; + +import static org.hamcrest.Matchers.containsString; + +public class DocCountFieldMapperTests extends MapperServiceTestCase { + + private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE; + private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME; + + /** + * Test parsing field mapping and adding simple field + */ + public void testParseValue() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + ParsedDocument doc = mapper.parse(source(b -> + b.field("foo", 500) + .field(CONTENT_TYPE, 100) + )); + + IndexableField field = doc.rootDoc().getField(DOC_COUNT_FIELD); + assertEquals(100L, field.numericValue()); + assertEquals(DocValuesType.NUMERIC, field.fieldType().docValuesType()); + assertEquals(1, doc.rootDoc().getFields(DOC_COUNT_FIELD).length); + } + + public void testInvalidDocument_NegativeDocCount() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, -100)))); + assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer")); + } + + public void testInvalidDocument_ZeroDocCount() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 0)))); + assertThat(e.getCause().getMessage(), containsString("Field [_doc_count] must be a positive integer")); + } + + public void testInvalidDocument_NonNumericDocCount() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, "foo")))); + assertThat(e.getCause().getMessage(), + containsString("Failed to parse object: expecting token of type [VALUE_NUMBER] but found [VALUE_STRING]")); + } + + public void testInvalidDocument_FractionalDocCount() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23)))); + assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Long without data loss")); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java new file mode 100644 index 0000000000000..f8f5d3c2e810c --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java @@ -0,0 +1,55 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.elasticsearch.index.query.QueryShardException; + +import java.io.IOException; +import java.util.Arrays; + +public class DocCountFieldTypeTests extends FieldTypeTestCase { + + public void testTermQuery() { + MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); + QueryShardException e = expectThrows(QueryShardException.class, + () -> ft.termQuery(10L, randomMockShardContext())); + assertEquals("Field [_doc_count] of type [_doc_count] is not searchable", e.getMessage()); + } + + public void testRangeQuery() { + MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> ft.rangeQuery(null, null, randomBoolean(), randomBoolean(), null, null, null, null)); + assertEquals("Field [_doc_count] of type [_doc_count] does not support range queries", e.getMessage()); + } + + public void testExistsQuery() { + MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); + assertTrue(ft.existsQuery(randomMockShardContext()) instanceof DocValuesFieldExistsQuery); + } + + public void testFetchSourceValue() throws IOException { + MappedFieldType fieldType = new DocCountFieldMapper.DocCountFieldType(); + assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, 14)); + assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, "14")); + assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, "")); + assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, null)); + } +} diff --git a/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java b/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java index 6ee6fb0cd3972..7c58d977154af 100644 --- a/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java +++ b/server/src/test/java/org/elasticsearch/indices/IndicesModuleTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.indices; import org.elasticsearch.Version; +import org.elasticsearch.index.mapper.DocCountFieldMapper; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.IgnoredFieldMapper; @@ -76,7 +77,8 @@ public Map getMetadataMappers() { private static final String[] EXPECTED_METADATA_FIELDS = new String[]{ IgnoredFieldMapper.NAME, IdFieldMapper.NAME, RoutingFieldMapper.NAME, IndexFieldMapper.NAME, SourceFieldMapper.NAME, - NestedPathFieldMapper.NAME, VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, FieldNamesFieldMapper.NAME }; + NestedPathFieldMapper.NAME, VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, DocCountFieldMapper.NAME, + FieldNamesFieldMapper.NAME }; public void testBuiltinMappers() { IndicesModule module = new IndicesModule(Collections.emptyList()); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java new file mode 100644 index 0000000000000..f7ba8db8a66f2 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java @@ -0,0 +1,108 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.index.mapper.DocCountFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal; + +import java.io.IOException; +import java.util.List; +import java.util.function.Consumer; + +import static java.util.Collections.singleton; + + +public class DocCountProviderTests extends AggregatorTestCase { + + private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME; + private static final String NUMBER_FIELD = "number"; + + public void testDocsWithDocCount() throws IOException { + testAggregation(new MatchAllDocsQuery(), iw -> { + iw.addDocument(List.of( + new NumericDocValuesField(DOC_COUNT_FIELD, 4), + new SortedNumericDocValuesField(NUMBER_FIELD, 1) + )); + iw.addDocument(List.of( + new NumericDocValuesField(DOC_COUNT_FIELD, 5), + new SortedNumericDocValuesField(NUMBER_FIELD, 7) + )); + iw.addDocument(List.of( + // Intentionally omit doc_count field + new SortedNumericDocValuesField(NUMBER_FIELD, 1) + )); + }, global -> { + assertEquals(10, global.getDocCount()); + }); + } + + public void testDocsWithoutDocCount() throws IOException { + testAggregation(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 1))); + iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 7))); + iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD, 1))); + }, global -> { + assertEquals(3, global.getDocCount()); + }); + } + + public void testQueryFiltering() throws IOException { + testAggregation(IntPoint.newRangeQuery(NUMBER_FIELD, 4, 5), iw -> { + iw.addDocument(List.of( + new NumericDocValuesField(DOC_COUNT_FIELD, 4), + new IntPoint(NUMBER_FIELD, 6) + )); + iw.addDocument(List.of( + new NumericDocValuesField(DOC_COUNT_FIELD, 2), + new IntPoint(NUMBER_FIELD, 5) + )); + iw.addDocument(List.of( + // Intentionally omit doc_count field + new IntPoint(NUMBER_FIELD, 1) + )); + iw.addDocument(List.of( + // Intentionally omit doc_count field + new IntPoint(NUMBER_FIELD, 5) + )); + }, global -> { + assertEquals(3, global.getDocCount()); + }); + } + + private void testAggregation(Query query, + CheckedConsumer indexer, + Consumer verify) throws IOException { + GlobalAggregationBuilder aggregationBuilder = new GlobalAggregationBuilder("_name"); + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD, NumberFieldMapper.NumberType.LONG); + MappedFieldType docCountFieldType = new DocCountFieldMapper.DocCountFieldType(); + testCase(aggregationBuilder, query, indexer, verify, fieldType, docCountFieldType); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java index 8cb35b6b861b4..57f04d5192529 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java @@ -309,7 +309,7 @@ private void testRandomCase(boolean forceMerge, final LeafBucketCollector leafCollector = new LeafBucketCollector() { @Override public void collect(int doc, long bucket) throws IOException { - queue.addIfCompetitive(indexSortSourcePrefix); + queue.addIfCompetitive(indexSortSourcePrefix, 1); } }; final LeafBucketCollector queueCollector = queue.getLeafCollector(leafReaderContext, leafCollector); diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 70a197f7619aa..bd0bafcee4c2f 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -278,6 +278,12 @@ public boolean shouldCache(Query query) { // TODO: now just needed for top_hits, this will need to be revised for other agg unit tests: MapperService mapperService = mapperServiceMock(); + Map fieldNameToType = new HashMap<>(); + fieldNameToType.putAll(Arrays.stream(fieldTypes) + .filter(Objects::nonNull) + .collect(Collectors.toMap(MappedFieldType::name, Function.identity()))); + fieldNameToType.putAll(getFieldAliases(fieldTypes)); + when(mapperService.fieldTypes()).thenReturn(fieldNameToType.values()); when(mapperService.getIndexSettings()).thenReturn(indexSettings); when(mapperService.hasNested()).thenReturn(false); QueryShardContext queryShardContext = @@ -292,14 +298,7 @@ public boolean shouldCache(Query query) { return null; }); - Map fieldNameToType = new HashMap<>(); - fieldNameToType.putAll(Arrays.stream(fieldTypes) - .filter(Objects::nonNull) - .collect(Collectors.toMap(MappedFieldType::name, Function.identity()))); - fieldNameToType.putAll(getFieldAliases(fieldTypes)); - - registerFieldTypes(searchContext, mapperService, - fieldNameToType); + registerFieldTypes(searchContext, mapperService, fieldNameToType); doAnswer(invocation -> { /* Store the release-ables so we can release them at the end of the test case. This is important because aggregations don't * close their sub-aggregations. This is fairly similar to what the production code does. */ diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java index de5afebeec253..e163f71e187df 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/aggregatemetric/AggregateMetricFeatureSetUsage.java @@ -24,7 +24,8 @@ public AggregateMetricFeatureSetUsage(boolean available, boolean enabled) { super(XPackField.AGGREGATE_METRIC, available, enabled); } - @Override public Version getMinimalSupportedVersion() { + @Override + public Version getMinimalSupportedVersion() { return Version.V_8_0_0; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java index 57288547cf119..7ef9e7ddecd98 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java @@ -59,7 +59,7 @@ public class ExtractedFieldsDetector { */ private static final List IGNORE_FIELDS = Arrays.asList("_id", "_field_names", "_index", "_parent", "_routing", "_seq_no", "_source", "_type", "_uid", "_version", "_feature", "_ignored", "_nested_path", DestinationIndex.INCREMENTAL_ID, - "_data_stream_timestamp"); + "_data_stream_timestamp", "_doc_count"); private final DataFrameAnalyticsConfig config; private final int docValueFieldsLimit;