diff --git a/docs/reference/aggregations/metrics/valuecount-aggregation.asciidoc b/docs/reference/aggregations/metrics/valuecount-aggregation.asciidoc index 74cc0ae76447b..36539eef1257c 100644 --- a/docs/reference/aggregations/metrics/valuecount-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/valuecount-aggregation.asciidoc @@ -6,6 +6,9 @@ These values can be extracted either from specific fields in the documents, or b this aggregator will be used in conjunction with other single-value aggregations. For example, when computing the `avg` one might be interested in the number of values the average is computed over. +`value_count` does not de-duplicate values, so even if a field has duplicates (or a script generates multiple +identical values for a single document), each value will be counted individually. + [source,console] -------------------------------------------------- POST /sales/_search?size=0 @@ -77,3 +80,7 @@ POST /sales/_search?size=0 } -------------------------------------------------- // TEST[setup:sales,stored_example_script] + +NOTE:: Because `value_count` is designed to work with any field it internally treats all values as simple bytes. +Due to this implementation, if `_value` script variable is used to fetch a value instead of accessing the field +directly (e.g. a "value script"), the field value will be returned as a string instead of it's native format. diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java index e3060d4c78ef0..559aec4d231dc 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java @@ -20,10 +20,12 @@ package org.elasticsearch.search.aggregations.metrics; import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; @@ -35,6 +37,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.mapper.BooleanFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.GeoPointFieldMapper; @@ -44,14 +47,28 @@ import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.RangeFieldMapper; import org.elasticsearch.index.mapper.RangeType; +import org.elasticsearch.script.MockScriptEngine; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptEngine; +import org.elasticsearch.script.ScriptModule; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.script.ScriptType; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValueType; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.Consumer; +import java.util.function.Function; import static java.util.Collections.singleton; @@ -59,6 +76,41 @@ public class ValueCountAggregatorTests extends AggregatorTestCase { private static final String FIELD_NAME = "field"; + /** Script to return the {@code _value} provided by aggs framework. */ + private static final String VALUE_SCRIPT = "_value"; + private static final String SINGLE_SCRIPT = "single"; + + @Override + protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { + return new ValueCountAggregationBuilder("foo", null).field(fieldName); + } + + @Override + protected List getSupportedValuesSourceTypes() { + return List.of( + CoreValuesSourceType.NUMERIC, + CoreValuesSourceType.BYTES, + CoreValuesSourceType.GEOPOINT, + CoreValuesSourceType.RANGE, + CoreValuesSourceType.HISTOGRAM + ); + } + + @Override + protected ScriptService getMockScriptService() { + Map, Object>> scripts = new HashMap<>(); + + scripts.put(VALUE_SCRIPT, vars -> (Double.valueOf((String) vars.get("_value")) + 1)); + scripts.put(SINGLE_SCRIPT, vars -> 1); + + MockScriptEngine scriptEngine = new MockScriptEngine(MockScriptEngine.NAME, + scripts, + Collections.emptyMap()); + Map engines = Collections.singletonMap(scriptEngine.getType(), scriptEngine); + + return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS); + } + public void testNoDocs() throws IOException { for (ValueType valueType : ValueType.values()) { testCase(new MatchAllDocsQuery(), valueType, iw -> { @@ -185,6 +237,105 @@ public void testRangeFieldValues() throws IOException { }, fieldType); } + public void testValueScriptNumber() throws IOException { + ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null) + .field(FIELD_NAME) + .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, Collections.emptyMap())); + + MappedFieldType fieldType = createMappedFieldType(ValueType.NUMERIC); + fieldType.setName(FIELD_NAME); + fieldType.setHasDocValues(true); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 7))); + iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 8))); + iw.addDocument(singleton(new NumericDocValuesField(FIELD_NAME, 9))); + }, card -> { + assertEquals(3, card.getValue(), 0); + assertTrue(AggregationInspectionHelper.hasValue(card)); + }, fieldType); + } + + public void testSingleScriptNumber() throws IOException { + ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null) + .field(FIELD_NAME); + + MappedFieldType fieldType = createMappedFieldType(ValueType.NUMERIC); + fieldType.setName(FIELD_NAME); + fieldType.setHasDocValues(true); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField(FIELD_NAME, 7)); + doc.add(new SortedNumericDocValuesField(FIELD_NAME, 7)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new SortedNumericDocValuesField(FIELD_NAME, 8)); + doc.add(new SortedNumericDocValuesField(FIELD_NAME, 8)); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new SortedNumericDocValuesField(FIELD_NAME, 1)); + doc.add(new SortedNumericDocValuesField(FIELD_NAME, 1)); + iw.addDocument(doc); + }, card -> { + // note: this is 6, even though the script returns a single value. ValueCount does not de-dedupe + assertEquals(6, card.getValue(), 0); + assertTrue(AggregationInspectionHelper.hasValue(card)); + }, fieldType); + } + + public void testValueScriptString() throws IOException { + ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null) + .field(FIELD_NAME) + .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, Collections.emptyMap())); + + MappedFieldType fieldType = createMappedFieldType(ValueType.STRING); + fieldType.setName(FIELD_NAME); + fieldType.setHasDocValues(true); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("1")))); + iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("2")))); + iw.addDocument(singleton(new SortedDocValuesField(FIELD_NAME, new BytesRef("3")))); + }, card -> { + assertEquals(3, card.getValue(), 0); + assertTrue(AggregationInspectionHelper.hasValue(card)); + }, fieldType); + } + + public void testSingleScriptString() throws IOException { + ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null) + .field(FIELD_NAME); + + MappedFieldType fieldType = createMappedFieldType(ValueType.STRING); + fieldType.setName(FIELD_NAME); + fieldType.setHasDocValues(true); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + Document doc = new Document(); + // Note: unlike numerics, lucene de-dupes strings so we increment here + doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("1"))); + doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("2"))); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("3"))); + doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("4"))); + iw.addDocument(doc); + + doc = new Document(); + doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("5"))); + doc.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("6"))); + iw.addDocument(doc); + }, card -> { + // note: this is 6, even though the script returns a single value. ValueCount does not de-dedupe + assertEquals(6, card.getValue(), 0); + assertTrue(AggregationInspectionHelper.hasValue(card)); + }, fieldType); + } + private void testCase(Query query, ValueType valueType, CheckedConsumer indexer,