diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 6033df8d9fa4e..a553940c85376 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -683,10 +683,11 @@ public final void testSupportedFieldTypes() throws IOException { fail("Aggregator [" + aggregationBuilder.getType() + "] should not support field type [" + fieldType.typeName() + "] but executing against the field did not throw an exception"); } - } catch (Exception e) { + } catch (Exception | AssertionError e) { if (supportedVSTypes.contains(vst) && unsupportedMappedFieldTypes.contains(fieldType.typeName()) == false) { - fail("Aggregator [" + aggregationBuilder.getType() + "] supports field type [" - + fieldType.typeName() + "] but executing against the field threw an exception: [" + e.getMessage() + "]"); + throw new AssertionError("Aggregator [" + aggregationBuilder.getType() + "] supports field type [" + + fieldType.typeName() + "] but executing against the field threw an exception: [" + e.getMessage() + "]", + e); } } } @@ -706,7 +707,7 @@ private void writeTestDoc(MappedFieldType fieldType, String fieldName, RandomInd ValuesSourceType vst = fieldType.getValuesSourceType(); Document doc = new Document(); String json; - + if (vst.equals(CoreValuesSourceType.NUMERIC)) { // TODO note: once VS refactor adds DATE/BOOLEAN, this conditional will go away long v; diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/stringstats/StringStatsAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/stringstats/StringStatsAggregatorTests.java index ff4bae131dd8a..c9790638da078 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/stringstats/StringStatsAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/stringstats/StringStatsAggregatorTests.java @@ -18,24 +18,46 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.IpFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.script.MockScriptEngine; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptEngine; +import org.elasticsearch.script.ScriptModule; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValueType; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; +import org.elasticsearch.search.lookup.LeafDocLookup; import java.io.IOException; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.function.Consumer; +import java.util.function.Function; +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; import static java.util.Collections.singleton; +import static java.util.Collections.singletonList; +import static java.util.Collections.singletonMap; +import static java.util.stream.Collectors.toList; public class StringStatsAggregatorTests extends AggregatorTestCase { + private static final String VALUE_SCRIPT_NAME = "value_script"; + private static final String FIELD_SCRIPT_NAME = "field_script"; + private void testCase(Query query, CheckedConsumer buildIndex, Consumer verify) throws IOException { @@ -119,6 +141,32 @@ public void testUnmappedWithMissingField() throws IOException { }, null); } + public void testMissing() throws IOException { + final TextFieldMapper.TextFieldType fieldType = new TextFieldMapper.TextFieldType(); + fieldType.setName("text"); + fieldType.setFielddata(true); + + final StringStatsAggregationBuilder aggregationBuilder = new StringStatsAggregationBuilder("_name") + .field(fieldType.name()) + .missing("b"); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new TextField(fieldType.name(), "a", Field.Store.NO))); + iw.addDocument(emptySet()); + iw.addDocument(singleton(new TextField(fieldType.name(), "a", Field.Store.NO))); + iw.addDocument(emptySet()); + }, stats -> { + assertEquals(4, stats.getCount()); + assertEquals(1, stats.getMaxLength()); + assertEquals(1, stats.getMinLength()); + assertEquals(1.0, stats.getAvgLength(), 0); + assertEquals(2, stats.getDistribution().size()); + assertEquals(0.5, stats.getDistribution().get("a"), 0); + assertEquals(0.5, stats.getDistribution().get("b"), 0); + assertEquals(1.0, stats.getEntropy(), 0); + }, fieldType); + } + public void testSingleValuedField() throws IOException { testCase(new MatchAllDocsQuery(), iw -> { for(int i=0; i < 10; i++) { @@ -258,4 +306,144 @@ public void testNestedAggregation() throws IOException { directory.close(); } + public void testValueScriptSingleValuedField() throws IOException { + final TextFieldMapper.TextFieldType fieldType = new TextFieldMapper.TextFieldType(); + fieldType.setName("text"); + fieldType.setFielddata(true); + + final StringStatsAggregationBuilder aggregationBuilder = new StringStatsAggregationBuilder("_name") + .field(fieldType.name()) + .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT_NAME, emptyMap())); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new TextField(fieldType.name(), "b", Field.Store.NO))); + iw.addDocument(singleton(new TextField(fieldType.name(), "b", Field.Store.NO))); + }, stats -> { + assertEquals(2, stats.getCount()); + assertEquals(2, stats.getMaxLength()); + assertEquals(2, stats.getMinLength()); + assertEquals(2.0, stats.getAvgLength(), 0); + assertEquals(2, stats.getDistribution().size()); + assertEquals(0.5, stats.getDistribution().get("a"), 0); + assertEquals(0.5, stats.getDistribution().get("b"), 0); + assertEquals(1.0, stats.getEntropy(), 0); + }, fieldType); + } + + public void testValueScriptMultiValuedField() throws IOException { + final TextFieldMapper.TextFieldType fieldType = new TextFieldMapper.TextFieldType(); + fieldType.setName("text"); + fieldType.setFielddata(true); + + final StringStatsAggregationBuilder aggregationBuilder = new StringStatsAggregationBuilder("_name") + .field(fieldType.name()) + .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT_NAME, emptyMap())); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(Set.of( + new TextField(fieldType.name(), "b", Field.Store.NO), + new TextField(fieldType.name(), "c", Field.Store.NO) + )); + iw.addDocument(Set.of( + new TextField(fieldType.name(), "b", Field.Store.NO), + new TextField(fieldType.name(), "c", Field.Store.NO) + )); + }, stats -> { + assertEquals(4, stats.getCount()); + assertEquals(2, stats.getMaxLength()); + assertEquals(2, stats.getMinLength()); + assertEquals(2.0, stats.getAvgLength(), 0); + assertEquals(3, stats.getDistribution().size()); + assertEquals(0.5, stats.getDistribution().get("a"), 0); + assertEquals(0.25, stats.getDistribution().get("b"), 0); + assertEquals(0.25, stats.getDistribution().get("c"), 0); + assertEquals(1.5, stats.getEntropy(), 0); + }, fieldType); + } + + public void testFieldScriptSingleValuedField() throws IOException { + final TextFieldMapper.TextFieldType fieldType = new TextFieldMapper.TextFieldType(); + fieldType.setName("text"); + fieldType.setFielddata(true); + + final StringStatsAggregationBuilder aggregationBuilder = new StringStatsAggregationBuilder("_name") + .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, FIELD_SCRIPT_NAME, singletonMap("field", fieldType.name()))); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new TextField(fieldType.name(), "b", Field.Store.NO))); + iw.addDocument(singleton(new TextField(fieldType.name(), "b", Field.Store.NO))); + }, stats -> { + assertEquals(2, stats.getCount()); + assertEquals(2, stats.getMaxLength()); + assertEquals(2, stats.getMinLength()); + assertEquals(2.0, stats.getAvgLength(), 0); + assertEquals(2, stats.getDistribution().size()); + assertEquals(0.5, stats.getDistribution().get("a"), 0); + assertEquals(0.5, stats.getDistribution().get("b"), 0); + assertEquals(1.0, stats.getEntropy(), 0); + }, fieldType); + } + + public void testFieldScriptMultiValuedField() throws IOException { + final TextFieldMapper.TextFieldType fieldType = new TextFieldMapper.TextFieldType(); + fieldType.setName("text"); + fieldType.setFielddata(true); + + final StringStatsAggregationBuilder aggregationBuilder = new StringStatsAggregationBuilder("_name") + .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, FIELD_SCRIPT_NAME, singletonMap("field", fieldType.name()))); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(Set.of( + new TextField(fieldType.name(), "b", Field.Store.NO), + new TextField(fieldType.name(), "c", Field.Store.NO) + )); + iw.addDocument(Set.of( + new TextField(fieldType.name(), "b", Field.Store.NO), + new TextField(fieldType.name(), "c", Field.Store.NO) + )); + }, stats -> { + assertEquals(4, stats.getCount()); + assertEquals(2, stats.getMaxLength()); + assertEquals(2, stats.getMinLength()); + assertEquals(2.0, stats.getAvgLength(), 0); + assertEquals(3, stats.getDistribution().size()); + assertEquals(0.5, stats.getDistribution().get("a"), 0); + assertEquals(0.25, stats.getDistribution().get("b"), 0); + assertEquals(0.25, stats.getDistribution().get("c"), 0); + assertEquals(1.5, stats.getEntropy(), 0); + }, fieldType); + } + + @Override + protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { + return new StringStatsAggregationBuilder("_name") + .field(fieldName); + } + + @Override + protected List getSupportedValuesSourceTypes() { + return singletonList(CoreValuesSourceType.BYTES); + } + + @Override + protected List unsupportedMappedFieldTypes() { + return singletonList(IpFieldMapper.CONTENT_TYPE); + } + + @Override + protected ScriptService getMockScriptService() { + final Map, Object>> scripts = Map.of( + VALUE_SCRIPT_NAME, vars -> "a" + vars.get("_value"), + FIELD_SCRIPT_NAME, vars -> { + final String fieldName = (String) vars.get("field"); + final LeafDocLookup lookup = (LeafDocLookup) vars.get("doc"); + return lookup.get(fieldName).stream() + .map(value -> "a" + value) + .collect(toList()); + } + ); + final MockScriptEngine engine = new MockScriptEngine(MockScriptEngine.NAME, scripts, emptyMap()); + final Map engines = singletonMap(engine.getType(), engine); + return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS); + } } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/string_stats.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/string_stats.yml new file mode 100644 index 0000000000000..daa34a6d021e6 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/string_stats.yml @@ -0,0 +1,53 @@ +setup: + - do: + indices.create: + index: test + body: + mappings: + properties: + value: + type: keyword + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{"value": "a"}' + - '{"index": {}}' + - '{"value": "b"}' + +--- +"basic": + - do: + search: + size: 0 + body: + aggs: + my_agg: + string_stats: + field: value + - match: { aggregations.my_agg.count: 2 } + - match: { aggregations.my_agg.min_length: 1 } + - match: { aggregations.my_agg.max_length: 1 } + - match: { aggregations.my_agg.avg_length: 1.0 } + - match: { aggregations.my_agg.entropy: 1.0 } + +--- +"show_distribution": + - do: + search: + size: 0 + body: + aggs: + my_agg: + string_stats: + field: value + show_distribution: true + - match: { aggregations.my_agg.count: 2 } + - match: { aggregations.my_agg.min_length: 1 } + - match: { aggregations.my_agg.max_length: 1 } + - match: { aggregations.my_agg.avg_length: 1.0 } + - match: { aggregations.my_agg.entropy: 1.0 } + - match: { aggregations.my_agg.distribution.a: 0.5 } + - match: { aggregations.my_agg.distribution.b: 0.5 }