From 5aeab57f4105d045859f8776a6438e04e91509e7 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 18 Apr 2018 15:28:05 -0400 Subject: [PATCH 1/6] Handle missing and multiple values in script Previously in script for numeric fields, there was no way to check if a document is missing a value. Also certain operations on multiple- values fields were missing. This PR adds the following: - return null for doc['field'] if a document is missing a 'field1': Now we can do this: if (doc['field'] == null) {return -1;} return doc['field'].value; or doc['field']?.value ?: -1 - add the following functions for multiple-valued numeric fields: doc['field'].min returns the minumum amoung values doc['field'].max returns the maximum amoung values doc['field'].sum returns the sum of amoung values doc['field'].avg returns the average of values Closes #29286 --- .../painless-getting-started.asciidoc | 16 +++ .../painless/spi/org.elasticsearch.txt | 8 ++ .../test/painless/20_scriptfield.yml | 2 +- .../70_missing_and_multiple_values.yml | 117 ++++++++++++++++++ .../index/fielddata/ScriptDocValues.java | 101 +++++++++++++-- .../index/mapper/IpFieldMapper.java | 6 +- .../search/lookup/LeafDocLookup.java | 3 +- .../fielddata/ScriptDocValuesLongsTests.java | 33 +++++ .../aggregations/bucket/MinDocCountIT.java | 6 +- .../aggregations/metrics/CardinalityIT.java | 7 +- .../search/functionscore/FunctionScoreIT.java | 2 +- 11 files changed, 277 insertions(+), 24 deletions(-) create mode 100644 modules/lang-painless/src/test/resources/rest-api-spec/test/painless/70_missing_and_multiple_values.yml diff --git a/docs/painless/painless-getting-started.asciidoc b/docs/painless/painless-getting-started.asciidoc index 2cf91666ba48d..73da36e432f61 100644 --- a/docs/painless/painless-getting-started.asciidoc +++ b/docs/painless/painless-getting-started.asciidoc @@ -68,6 +68,7 @@ GET hockey/_search ---------------------------------------------------------------- // CONSOLE + Alternatively, you could do the same thing using a script field instead of a function score: [source,js] @@ -119,6 +120,21 @@ GET hockey/_search ---------------------------------------------------------------- // CONSOLE +[float] +===== Missing and multiple values + +If a document is missing a field `field`, `doc['field']` for this document +will return null. + +There is also a number of operations designed for numeric fields, +if a document has multiple values in such a field: + +- `doc['field'].min` - gets the minimum value among values +- `doc['field'].max` - gets the maximum value among values +- `doc['field'].sum` - gets the sum of all values +- `doc['field'].avg` - gets the average of all values + + [float] ==== Updating Fields with Painless diff --git a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt index 51a1b7cecb3f8..9ae832579f9f4 100644 --- a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt +++ b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt @@ -73,6 +73,10 @@ class org.elasticsearch.index.fielddata.ScriptDocValues$Strings { class org.elasticsearch.index.fielddata.ScriptDocValues$Longs { Long get(int) long getValue() + long getMin() + long getMax() + long getSum() + double getAvg() List getValues() org.joda.time.ReadableDateTime getDate() List getDates() @@ -89,6 +93,10 @@ class org.elasticsearch.index.fielddata.ScriptDocValues$Dates { class org.elasticsearch.index.fielddata.ScriptDocValues$Doubles { Double get(int) double getValue() + double getMin() + double getMax() + double getSum() + double getAvg() List getValues() } diff --git a/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/20_scriptfield.yml b/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/20_scriptfield.yml index e498a1737576e..27f6678e31ed3 100644 --- a/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/20_scriptfield.yml +++ b/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/20_scriptfield.yml @@ -95,7 +95,7 @@ setup: script_fields: bar: script: - source: "(doc['missing'].value?.length() ?: 0) + params.x;" + source: "(doc['missing']?.value?.length() ?: 0) + params.x;" params: x: 5 diff --git a/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/70_missing_and_multiple_values.yml b/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/70_missing_and_multiple_values.yml new file mode 100644 index 0000000000000..7ed21655c8d57 --- /dev/null +++ b/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/70_missing_and_multiple_values.yml @@ -0,0 +1,117 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + mappings: + _doc: + properties: + dval: + type: double + lval: + type: long + + - do: + index: + index: test + type: _doc + id: 1 + body: { "dval": 5.5, "lval": 5 } + + - do: + index: + index: test + type: _doc + id: 2 + body: { "dval": [5.5, 3.5, 4.5] } + + + - do: + index: + index: test + type: _doc + id: 3 + body: { "lval": [5, 3, 4] } + + - do: + indices.refresh: {} + +--- +"check double and long values: missing values and operations on multiple values": + - skip: + version: " - 6.99.99" + reason: Handling missing values and operations on multiple values were added after these versions + + - do: + search: + body: + script_fields: + val_dval: + script: + source: "doc['dval']?.value ?: -1.0" + min_dval: + script: + source: "doc['dval']?.min ?: -1.0" + max_dval: + script: + source: "doc['dval']?.max ?: -1.0" + sum_dval: + script: + source: "doc['dval']?.sum ?: -1.0" + avg_dval: + script: + source: "doc['dval']?.avg ?: -1.0" + val_lval: + script: + source: "doc['lval']?.value ?: -1" + min_lval: + script: + source: "doc['lval']?.min ?: -1" + max_lval: + script: + source: "doc['lval']?.max ?: -1" + sum_lval: + script: + source: "doc['lval']?.sum ?: -1" + avg_lval: + script: + source: "doc['lval']?.avg ?: -1" + + - match: { hits.hits.0.fields.val_dval.0: 5.5} + - match: { hits.hits.0.fields.min_dval.0: 5.5} + - match: { hits.hits.0.fields.max_dval.0: 5.5} + - match: { hits.hits.0.fields.sum_dval.0: 5.5} + - match: { hits.hits.0.fields.avg_dval.0: 5.5} + + - match: { hits.hits.0.fields.val_lval.0: 5} + - match: { hits.hits.0.fields.min_lval.0: 5} + - match: { hits.hits.0.fields.max_lval.0: 5} + - match: { hits.hits.0.fields.sum_lval.0: 5} + - match: { hits.hits.0.fields.avg_lval.0: 5} + + - match: { hits.hits.1.fields.val_dval.0: 3.5} + - match: { hits.hits.1.fields.min_dval.0: 3.5} + - match: { hits.hits.1.fields.max_dval.0: 5.5} + - match: { hits.hits.1.fields.sum_dval.0: 13.5} + - match: { hits.hits.1.fields.avg_dval.0: 4.5} + + - match: { hits.hits.1.fields.val_lval.0: -1} + - match: { hits.hits.1.fields.min_lval.0: -1} + - match: { hits.hits.1.fields.max_lval.0: -1} + - match: { hits.hits.1.fields.sum_lval.0: -1} + - match: { hits.hits.1.fields.avg_lval.0: -1} + + - match: { hits.hits.2.fields.val_dval.0: -1.0} + - match: { hits.hits.2.fields.min_dval.0: -1.0} + - match: { hits.hits.2.fields.max_dval.0: -1.0} + - match: { hits.hits.2.fields.sum_dval.0: -1.0} + - match: { hits.hits.2.fields.avg_dval.0: -1.0} + + - match: { hits.hits.2.fields.val_lval.0: 3} + - match: { hits.hits.2.fields.min_lval.0: 3} + - match: { hits.hits.2.fields.max_lval.0: 5} + - match: { hits.hits.2.fields.sum_lval.0: 12} + - match: { hits.hits.2.fields.avg_lval.0: 4} + diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java index 3d07a0f87aa5e..a2d841d376ecb 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java @@ -58,7 +58,7 @@ public abstract class ScriptDocValues extends AbstractList { /** * Set the current doc ID. */ - public abstract void setNextDocId(int docId) throws IOException; + public abstract boolean setNextDocId(int docId) throws IOException; /** * Return a copy of the list of the values for the current document. @@ -124,9 +124,10 @@ public Longs(SortedNumericDocValues in) { } @Override - public void setNextDocId(int docId) throws IOException { + public boolean setNextDocId(int docId) throws IOException { this.docId = docId; - if (in.advanceExact(docId)) { + boolean docHasValues = in.advanceExact(docId); + if (docHasValues) { resize(in.docValueCount()); for (int i = 0; i < count; i++) { values[i] = in.nextValue(); @@ -137,6 +138,7 @@ public void setNextDocId(int docId) throws IOException { if (dates != null) { dates.setNextDocId(docId); } + return docHasValues; } /** @@ -159,6 +161,37 @@ public long getValue() { return values[0]; } + public long getMin() { + if (count == 0) { + return 0L; + }; + return values[0]; + } + + public long getMax() { + if (count == 0) { + return 0L; + }; + return values[count - 1]; + } + + public long getSum() { + if (count == 0) { + return 0L; + }; + long sum = 0L; + for (int i = 0; i < count; i++) + sum += values[i]; + return sum; + } + + public double getAvg() { + if (count == 0) { + return 0d; + }; + return getSum() * 1.0/count; + } + @Deprecated public ReadableDateTime getDate() throws IOException { deprecated("getDate on numeric fields is deprecated. Use a date field to get dates."); @@ -285,13 +318,15 @@ public int size() { } @Override - public void setNextDocId(int docId) throws IOException { - if (in.advanceExact(docId)) { + public boolean setNextDocId(int docId) throws IOException { + boolean docHasValues = in.advanceExact(docId); + if (docHasValues) { count = in.docValueCount(); } else { count = 0; } refreshArray(); + return docHasValues; } /** @@ -355,8 +390,9 @@ public Doubles(SortedNumericDoubleValues in) { } @Override - public void setNextDocId(int docId) throws IOException { - if (in.advanceExact(docId)) { + public boolean setNextDocId(int docId) throws IOException { + boolean docHasValues = in.advanceExact(docId); + if (docHasValues) { resize(in.docValueCount()); for (int i = 0; i < count; i++) { values[i] = in.nextValue(); @@ -364,6 +400,7 @@ public void setNextDocId(int docId) throws IOException { } else { resize(0); } + return docHasValues; } /** @@ -386,6 +423,38 @@ public double getValue() { return values[0]; } + public double getMin() { + if (count == 0) { + return 0d; + }; + return values[0]; + } + + public double getMax() { + if (count == 0) { + return 0d; + }; + return values[count - 1]; + } + + public double getSum() { + if (count == 0) { + return 0d; + }; + double sum = 0d; + for (int i = 0; i < count; i++) + sum += values[i]; + return sum; + } + + public double getAvg() { + if (count == 0) { + return 0d; + }; + return getSum() / count; + } + + @Override public Double get(int index) { return values[index]; @@ -408,8 +477,9 @@ public GeoPoints(MultiGeoPointValues in) { } @Override - public void setNextDocId(int docId) throws IOException { - if (in.advanceExact(docId)) { + public boolean setNextDocId(int docId) throws IOException { + boolean docHasValues = in.advanceExact(docId); + if (docHasValues) { resize(in.docValueCount()); for (int i = 0; i < count; i++) { GeoPoint point = in.nextValue(); @@ -418,6 +488,7 @@ public void setNextDocId(int docId) throws IOException { } else { resize(0); } + return docHasValues; } /** @@ -528,8 +599,9 @@ public Booleans(SortedNumericDocValues in) { } @Override - public void setNextDocId(int docId) throws IOException { - if (in.advanceExact(docId)) { + public boolean setNextDocId(int docId) throws IOException { + boolean docHasValues = in.advanceExact(docId); + if (docHasValues) { resize(in.docValueCount()); for (int i = 0; i < count; i++) { values[i] = in.nextValue() == 1; @@ -537,6 +609,7 @@ public void setNextDocId(int docId) throws IOException { } else { resize(0); } + return docHasValues; } /** @@ -584,8 +657,9 @@ abstract static class BinaryScriptDocValues extends ScriptDocValues { } @Override - public void setNextDocId(int docId) throws IOException { - if (in.advanceExact(docId)) { + public boolean setNextDocId(int docId) throws IOException { + boolean docHasValues = in.advanceExact(docId); + if (docHasValues) { resize(in.docValueCount()); for (int i = 0; i < count; i++) { // We need to make a copy here, because BytesBinaryDVAtomicFieldData's SortedBinaryDocValues @@ -596,6 +670,7 @@ public void setNextDocId(int docId) throws IOException { } else { resize(0); } + return docHasValues; } /** diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java index fd5dc080011e6..17d1c16b4048b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java @@ -252,14 +252,16 @@ public IpScriptDocValues(SortedSetDocValues in) { } @Override - public void setNextDocId(int docId) throws IOException { + public boolean setNextDocId(int docId) throws IOException { count = 0; - if (in.advanceExact(docId)) { + boolean docHasValues = in.advanceExact(docId); + if (docHasValues) { for (long ord = in.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = in.nextOrd()) { ords = ArrayUtil.grow(ords, count + 1); ords[count++] = ord; } } + return docHasValues; } public String getValue() { diff --git a/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java index 33645340c7802..d1548bd5b5e67 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java @@ -91,7 +91,8 @@ public ScriptDocValues run() { localCacheFieldData.put(fieldName, scriptValues); } try { - scriptValues.setNextDocId(docId); + boolean docHasValues = scriptValues.setNextDocId(docId); + if (!docHasValues) return null; } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } diff --git a/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesLongsTests.java b/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesLongsTests.java index 8b20e9a9f3a71..a5104b46a8bdb 100644 --- a/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesLongsTests.java +++ b/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesLongsTests.java @@ -32,6 +32,7 @@ import java.security.Permissions; import java.security.PrivilegedAction; import java.security.ProtectionDomain; +import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.function.Consumer; @@ -128,6 +129,38 @@ public Void run() { "getDates on numeric fields is deprecated. Use a date field to get dates.")); } + public void testLongsMinMaxSumAvg() throws IOException { + long[][] values = new long[between(3, 10)][]; + long[] mins = new long[values.length]; + long[] maxs = new long[values.length]; + long[] sums = new long[values.length]; + double[] avgs = new double[values.length]; + for (int d = 0; d < values.length; d++) { + values[d] = new long[randomBoolean() ? randomBoolean() ? 0 : 1 : between(2, 100)]; + mins[d] = values[d].length > 0 ? Long.MAX_VALUE : 0L; + maxs[d] = values[d].length > 0 ? Long.MIN_VALUE : 0L; + sums[d] = 0L; + for (int i = 0; i < values[d].length; i++) { + values[d][i] = randomLong(); + mins[d] = mins[d] > values[d][i] ? values[d][i] : mins[d]; + maxs[d] = maxs[d] < values[d][i] ? values[d][i] : maxs[d]; + sums[d] += values[d][i]; + } + avgs[d] = values[d].length > 0 ? sums[d] * 1.0/ values[d].length : 0L; + Arrays.sort(values[d]); + } + Longs longs = wrap(values, deprecationMessage -> {fail("unexpected deprecation: " + deprecationMessage);}); + + for (int round = 0; round < 10; round++) { + int d = between(0, values.length - 1); + longs.setNextDocId(d); + assertEquals(mins[d], longs.getMin()); + assertEquals(maxs[d], longs.getMax()); + assertEquals(sums[d], longs.getSum()); + assertEquals(avgs[d], longs.getAvg(), 0.0); + } + } + private Longs wrap(long[][] values, Consumer deprecationCallback) { return new Longs(new AbstractSortedNumericDocValues() { long[] current; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountIT.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountIT.java index 015664109cdfe..84128d120cdd2 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountIT.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountIT.java @@ -85,19 +85,19 @@ protected Map, Object>> pluginScripts() { scripts.put("doc['d'].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Doubles value = (ScriptDocValues.Doubles) doc.get("d"); - return value.getValues(); + if (value != null) { return value.getValues(); } else return null; }); scripts.put("doc['l'].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Longs value = (ScriptDocValues.Longs) doc.get("l"); - return value.getValues(); + if (value != null) { return value.getValues(); } else return null; }); scripts.put("doc['s'].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Strings value = (ScriptDocValues.Strings) doc.get("s"); - return value.getValues(); + if (value != null) { return value.getValues(); } else return null; }); return scripts; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityIT.java b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityIT.java index b8b33b97e4d00..96ac161c52230 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityIT.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityIT.java @@ -76,7 +76,7 @@ protected Map, Object>> pluginScripts() { scripts.put("doc['str_values'].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Strings strValue = (ScriptDocValues.Strings) doc.get("str_values"); - return strValue.getValues(); + if (strValue != null) { return strValue.getValues(); } else return null; }); scripts.put("doc[' + singleNumericField() + '].value", vars -> { @@ -86,7 +86,8 @@ protected Map, Object>> pluginScripts() { scripts.put("doc[' + multiNumericField(false) + '].values", vars -> { Map doc = (Map) vars.get("doc"); - return ((ScriptDocValues) doc.get(multiNumericField(false))).getValues(); + ScriptDocValues numValue = (ScriptDocValues) doc.get(multiNumericField(false)); + if (numValue != null) { return numValue.getValues(); } else return null; }); return scripts; @@ -129,7 +130,7 @@ public void setupSuiteScopeCluster() throws Exception { .endObject() .endObject().endObject().endObject()).execute().actionGet(); - numDocs = randomIntBetween(2, 100); + numDocs = randomIntBetween(2, 3); precisionThreshold = randomIntBetween(0, 1 << randomInt(20)); IndexRequestBuilder[] builders = new IndexRequestBuilder[(int) numDocs]; for (int i = 0; i < numDocs; ++i) { diff --git a/server/src/test/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java b/server/src/test/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java index 0e92aba2a8552..c476bbb6efc0a 100644 --- a/server/src/test/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java +++ b/server/src/test/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java @@ -83,7 +83,7 @@ protected Map, Object>> pluginScripts() { scripts.put("doc['random_score']", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Doubles randomScore = (ScriptDocValues.Doubles) doc.get("random_score"); - return randomScore.getValue(); + if (randomScore != null) {return randomScore.getValue();} else return 0; }); return scripts; } From 3b42f1f1d9e864ee0e389463042a32ad0ee1c718 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 1 May 2018 06:21:19 -0400 Subject: [PATCH 2/6] Handle missing and multiple values in script Previously in script for numeric fields, there was no way to check if a document is missing a value. Also certain operations on multiple- values fields were missing. This PR adds the following: - add the following functions for multiple-valued numeric fields: doc['field'].min returns the minumum amoung values doc['field'].max returns the maximum amoung values doc['field'].sum returns the sum of amoung values doc['field'].avg returns the average of values - return null for doc['field'] if a document is missing a 'field1': Now we can do this: if (doc['field'] == null) {return -1;} return doc['field'].value; or doc['field']?.value ?: -1 This new behaviour will only work if the following system property is set: `export ES_JAVA_OPTS="-Des.script.null_for_missing_value=true"' Closes #29286 --- docs/CHANGELOG.asciidoc | 1 + .../painless/painless-getting-started.asciidoc | 18 ++++++++++++++---- modules/lang-painless/build.gradle | 1 + .../main/java/org/elasticsearch/node/Node.java | 4 ++++ .../org/elasticsearch/script/ScriptModule.java | 4 ++++ .../search/lookup/LeafDocLookup.java | 3 ++- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 9c6e36a2e1d0d..83a50be4f6f78 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -12,6 +12,7 @@ === Breaking Java Changes === Deprecations +Returning 0 for missing numeric fields in script is deprecated. PR: 29611 === New Features diff --git a/docs/painless/painless-getting-started.asciidoc b/docs/painless/painless-getting-started.asciidoc index 73da36e432f61..ab4ed6a3f8465 100644 --- a/docs/painless/painless-getting-started.asciidoc +++ b/docs/painless/painless-getting-started.asciidoc @@ -121,12 +121,22 @@ GET hockey/_search // CONSOLE [float] -===== Missing and multiple values +===== Missing values -If a document is missing a field `field`, `doc['field']` for this document -will return null. +Currently by default, if a document is missing a numeric field `field`, +`doc['field'].value` returns `0` for this document. This default behaviour +will be changed in the next major version of elasticsearch: +if a document is missing a field `field`, `doc['field']` for this document +will return `null`. You can set a system property +`export ES_JAVA_OPTS="-Des.script.null_for_missing_value=true"' on a node +to make this node's behaviour compatible with the future major version. +Otherwise, every time the node starts a deprecation warning will remind you +about this forthcoming change. -There is also a number of operations designed for numeric fields, + +===== Multiple values + +There is a number of operations designed for numeric fields, if a document has multiple values in such a field: - `doc['field'].min` - gets the minimum value among values diff --git a/modules/lang-painless/build.gradle b/modules/lang-painless/build.gradle index d287d7ee02378..6d759a497f5b2 100644 --- a/modules/lang-painless/build.gradle +++ b/modules/lang-painless/build.gradle @@ -25,6 +25,7 @@ esplugin { } integTestCluster { + systemProperty 'es.script.null_for_missing_value', 'true' module project.project(':modules:mapper-extras') } diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 64d176c01fa99..3dde15b710298 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -716,6 +716,10 @@ public void onTimeout(TimeValue timeout) { writePortsFile("transport", transport.boundAddress()); } + if (!ScriptModule.NULL_FOR_MISSING_VALUE) + logger.warn("Script: returning 0 for missing numeric values is deprecated. " + + "Set system property '-Des.script.null_for_missing_value=true' to make behaviour compatible with future major versions."); + logger.info("started"); pluginsService.filterPlugins(ClusterPlugin.class).forEach(ClusterPlugin::onNodeStarted); diff --git a/server/src/main/java/org/elasticsearch/script/ScriptModule.java b/server/src/main/java/org/elasticsearch/script/ScriptModule.java index 727651be6a565..579d63b38b7fe 100644 --- a/server/src/main/java/org/elasticsearch/script/ScriptModule.java +++ b/server/src/main/java/org/elasticsearch/script/ScriptModule.java @@ -27,6 +27,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import org.elasticsearch.common.Booleans; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.ScriptPlugin; @@ -52,6 +53,9 @@ public class ScriptModule { ).collect(Collectors.toMap(c -> c.name, Function.identity())); } + public static final boolean NULL_FOR_MISSING_VALUE = + Booleans.parseBoolean(System.getProperty("es.script.null_for_missing_value", "false")); + private final ScriptService scriptService; public ScriptModule(Settings settings, List scriptPlugins) { diff --git a/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java index d1548bd5b5e67..2fcc72eb54e32 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java @@ -25,6 +25,7 @@ import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.script.ScriptModule; import java.io.IOException; import java.security.AccessController; @@ -92,7 +93,7 @@ public ScriptDocValues run() { } try { boolean docHasValues = scriptValues.setNextDocId(docId); - if (!docHasValues) return null; + if (!docHasValues && ScriptModule.NULL_FOR_MISSING_VALUE) return null; } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } From 757fe0fe74ebc9582cec38cf1368d85acb0011dc Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 15 May 2018 11:28:08 -0400 Subject: [PATCH 3/6] Handle missing and multiple values in script Previously in script for numeric fields, there was no way to check if a document is missing a value. Also certain operations on multiple- values fields were missing. This PR adds the following: - add the following functions for multiple-valued numeric fields: doc['field'].min returns the minumum amoung values doc['field'].max returns the maximum amoung values doc['field'].sum returns the sum of amoung values doc['field'].avg returns the average of values - return null for doc['field'] if a document is missing a 'field1': Now we can do this: if (doc['field'] == null) {return -1;} return doc['field'].value; or doc['field']?.value ?: -1 This new behaviour will only work if the following system property is set: `export ES_JAVA_OPTS="-Des.script.null_for_missing_value=true"' --- docs/CHANGELOG.asciidoc | 1 - .../painless-getting-started.asciidoc | 26 +++++++----- modules/lang-painless/build.gradle | 10 ++++- .../70_missing_and_multiple_values.yml | 40 +++++++++---------- .../java/org/elasticsearch/node/Node.java | 4 -- .../elasticsearch/script/ScriptModule.java | 8 ++++ .../search/lookup/LeafDocLookup.java | 2 +- .../aggregations/bucket/MinDocCountIT.java | 6 +-- .../aggregations/metrics/CardinalityIT.java | 4 +- .../search/functionscore/FunctionScoreIT.java | 2 +- 10 files changed, 60 insertions(+), 43 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index a0578ca5b05cc..6eb26fde8f9f8 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -85,7 +85,6 @@ Machine Learning:: [float] === Deprecations -Returning 0 for missing numeric fields in script is deprecated. PR: 29611 Monitoring:: * The `xpack.monitoring.collection.interval` setting can no longer be set to `-1` to disable monitoring data collection. Use `xpack.monitoring.collection.enabled` diff --git a/docs/painless/painless-getting-started.asciidoc b/docs/painless/painless-getting-started.asciidoc index ab4ed6a3f8465..2d9c86f53da45 100644 --- a/docs/painless/painless-getting-started.asciidoc +++ b/docs/painless/painless-getting-started.asciidoc @@ -120,19 +120,25 @@ GET hockey/_search ---------------------------------------------------------------- // CONSOLE + [float] ===== Missing values -Currently by default, if a document is missing a numeric field `field`, -`doc['field'].value` returns `0` for this document. This default behaviour -will be changed in the next major version of elasticsearch: -if a document is missing a field `field`, `doc['field']` for this document -will return `null`. You can set a system property -`export ES_JAVA_OPTS="-Des.script.null_for_missing_value=true"' on a node -to make this node's behaviour compatible with the future major version. -Otherwise, every time the node starts a deprecation warning will remind you -about this forthcoming change. - +If you request the value from a field `field` that isn’t in +the document, `doc['field'].value` for this document returns: + +- `0` if a `field` has a numeric datatype (long, double etc.) +- `false` is a `field` has a boolean datatype +- epoch date if a `field` has a date datatype +- `null` if a `field` has a string datatype +- `null` if a `field` has a geo datatype +- `""` if a `field` has a binary datatype + +IMPORTANT: Starting in 7.0, `doc['field']` returns a value of null if +the field is missing in a document. To enable this behavior now, +set a <> +`-Des.script.null_for_missing_value=true` on a node. If you do not enable +this behavior, a deprecation warning is logged on start up. ===== Multiple values diff --git a/modules/lang-painless/build.gradle b/modules/lang-painless/build.gradle index 6d759a497f5b2..a7d927b6215f5 100644 --- a/modules/lang-painless/build.gradle +++ b/modules/lang-painless/build.gradle @@ -17,7 +17,7 @@ * under the License. */ -import org.apache.tools.ant.types.Path +import org.elasticsearch.gradle.test.RestIntegTestTask esplugin { description 'An easy, safe and fast scripting language for Elasticsearch' @@ -25,7 +25,15 @@ esplugin { } integTestCluster { + module project.project(':modules:mapper-extras') +} + +Task additionalClusterTest = tasks.create( + name: "additionalClusterTest", type: RestIntegTestTask) +additionalClusterTestCluster { systemProperty 'es.script.null_for_missing_value', 'true' + distribution = 'integ-test-zip' + module project // add the lang-painless module itself module project.project(':modules:mapper-extras') } diff --git a/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/70_missing_and_multiple_values.yml b/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/70_missing_and_multiple_values.yml index 7ed21655c8d57..ce5e3ada60eaf 100644 --- a/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/70_missing_and_multiple_values.yml +++ b/modules/lang-painless/src/test/resources/rest-api-spec/test/painless/70_missing_and_multiple_values.yml @@ -50,34 +50,34 @@ setup: script_fields: val_dval: script: - source: "doc['dval']?.value ?: -1.0" + source: "doc['dval']?.value ?: 0" min_dval: script: - source: "doc['dval']?.min ?: -1.0" + source: "doc['dval']?.min ?: 0" max_dval: script: - source: "doc['dval']?.max ?: -1.0" + source: "doc['dval']?.max ?: 0" sum_dval: script: - source: "doc['dval']?.sum ?: -1.0" + source: "doc['dval']?.sum ?: 0" avg_dval: script: - source: "doc['dval']?.avg ?: -1.0" + source: "doc['dval']?.avg ?: 0" val_lval: script: - source: "doc['lval']?.value ?: -1" + source: "doc['lval']?.value ?: 0" min_lval: script: - source: "doc['lval']?.min ?: -1" + source: "doc['lval']?.min ?: 0" max_lval: script: - source: "doc['lval']?.max ?: -1" + source: "doc['lval']?.max ?: 0" sum_lval: script: - source: "doc['lval']?.sum ?: -1" + source: "doc['lval']?.sum ?: 0" avg_lval: script: - source: "doc['lval']?.avg ?: -1" + source: "doc['lval']?.avg ?: 0" - match: { hits.hits.0.fields.val_dval.0: 5.5} - match: { hits.hits.0.fields.min_dval.0: 5.5} @@ -97,17 +97,17 @@ setup: - match: { hits.hits.1.fields.sum_dval.0: 13.5} - match: { hits.hits.1.fields.avg_dval.0: 4.5} - - match: { hits.hits.1.fields.val_lval.0: -1} - - match: { hits.hits.1.fields.min_lval.0: -1} - - match: { hits.hits.1.fields.max_lval.0: -1} - - match: { hits.hits.1.fields.sum_lval.0: -1} - - match: { hits.hits.1.fields.avg_lval.0: -1} + - match: { hits.hits.1.fields.val_lval.0: 0} + - match: { hits.hits.1.fields.min_lval.0: 0} + - match: { hits.hits.1.fields.max_lval.0: 0} + - match: { hits.hits.1.fields.sum_lval.0: 0} + - match: { hits.hits.1.fields.avg_lval.0: 0} - - match: { hits.hits.2.fields.val_dval.0: -1.0} - - match: { hits.hits.2.fields.min_dval.0: -1.0} - - match: { hits.hits.2.fields.max_dval.0: -1.0} - - match: { hits.hits.2.fields.sum_dval.0: -1.0} - - match: { hits.hits.2.fields.avg_dval.0: -1.0} + - match: { hits.hits.2.fields.val_dval.0: 0} + - match: { hits.hits.2.fields.min_dval.0: 0} + - match: { hits.hits.2.fields.max_dval.0: 0} + - match: { hits.hits.2.fields.sum_dval.0: 0} + - match: { hits.hits.2.fields.avg_dval.0: 0} - match: { hits.hits.2.fields.val_lval.0: 3} - match: { hits.hits.2.fields.min_lval.0: 3} diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 4c6cf2c1932c1..054b91dc51101 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -699,10 +699,6 @@ public void onTimeout(TimeValue timeout) { writePortsFile("http", http.boundAddress()); } - if (!ScriptModule.NULL_FOR_MISSING_VALUE) - logger.warn("Script: returning 0 for missing numeric values is deprecated. " + - "Set system property '-Des.script.null_for_missing_value=true' to make behaviour compatible with future major versions."); - logger.info("started"); pluginsService.filterPlugins(ClusterPlugin.class).forEach(ClusterPlugin::onNodeStarted); diff --git a/server/src/main/java/org/elasticsearch/script/ScriptModule.java b/server/src/main/java/org/elasticsearch/script/ScriptModule.java index 579d63b38b7fe..e8c4445b82767 100644 --- a/server/src/main/java/org/elasticsearch/script/ScriptModule.java +++ b/server/src/main/java/org/elasticsearch/script/ScriptModule.java @@ -28,6 +28,8 @@ import java.util.stream.Stream; import org.elasticsearch.common.Booleans; +import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.ScriptPlugin; @@ -56,6 +58,8 @@ public class ScriptModule { public static final boolean NULL_FOR_MISSING_VALUE = Booleans.parseBoolean(System.getProperty("es.script.null_for_missing_value", "false")); + private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(ScriptModule.class)); + private final ScriptService scriptService; public ScriptModule(Settings settings, List scriptPlugins) { @@ -79,6 +83,10 @@ public ScriptModule(Settings settings, List scriptPlugins) { } } } + if (NULL_FOR_MISSING_VALUE == false) + DEPRECATION_LOGGER.deprecated("Script: returning 0 for missing numeric values is deprecated. " + + "Set system property '-Des.script.null_for_missing_value=true' to make behaviour compatible with future major versions."); + scriptService = new ScriptService(settings, Collections.unmodifiableMap(engines), Collections.unmodifiableMap(contexts)); } diff --git a/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java index 2fcc72eb54e32..573aec7e7851c 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/LeafDocLookup.java @@ -93,7 +93,7 @@ public ScriptDocValues run() { } try { boolean docHasValues = scriptValues.setNextDocId(docId); - if (!docHasValues && ScriptModule.NULL_FOR_MISSING_VALUE) return null; + if ((docHasValues == false) && ScriptModule.NULL_FOR_MISSING_VALUE) return null; } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountIT.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountIT.java index 84128d120cdd2..da77f5fc9e82e 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountIT.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/MinDocCountIT.java @@ -85,19 +85,19 @@ protected Map, Object>> pluginScripts() { scripts.put("doc['d'].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Doubles value = (ScriptDocValues.Doubles) doc.get("d"); - if (value != null) { return value.getValues(); } else return null; + return value == null ? null : value.getValues(); }); scripts.put("doc['l'].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Longs value = (ScriptDocValues.Longs) doc.get("l"); - if (value != null) { return value.getValues(); } else return null; + return value == null ? null : value.getValues(); }); scripts.put("doc['s'].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Strings value = (ScriptDocValues.Strings) doc.get("s"); - if (value != null) { return value.getValues(); } else return null; + return value == null ? null : value.getValues(); }); return scripts; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityIT.java b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityIT.java index 96ac161c52230..11cca5d57f385 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityIT.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityIT.java @@ -76,7 +76,7 @@ protected Map, Object>> pluginScripts() { scripts.put("doc['str_values'].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Strings strValue = (ScriptDocValues.Strings) doc.get("str_values"); - if (strValue != null) { return strValue.getValues(); } else return null; + return strValue == null ? null : strValue.getValues(); }); scripts.put("doc[' + singleNumericField() + '].value", vars -> { @@ -87,7 +87,7 @@ protected Map, Object>> pluginScripts() { scripts.put("doc[' + multiNumericField(false) + '].values", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues numValue = (ScriptDocValues) doc.get(multiNumericField(false)); - if (numValue != null) { return numValue.getValues(); } else return null; + return numValue == null ? null : numValue.getValues(); }); return scripts; diff --git a/server/src/test/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java b/server/src/test/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java index c476bbb6efc0a..6368933a4289d 100644 --- a/server/src/test/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java +++ b/server/src/test/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java @@ -83,7 +83,7 @@ protected Map, Object>> pluginScripts() { scripts.put("doc['random_score']", vars -> { Map doc = (Map) vars.get("doc"); ScriptDocValues.Doubles randomScore = (ScriptDocValues.Doubles) doc.get("random_score"); - if (randomScore != null) {return randomScore.getValue();} else return 0; + return randomScore == null ? 0 : randomScore.getValue(); }); return scripts; } From 359a000e3d16a281cc2bf1f44f31838ae068fe24 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 16 May 2018 15:23:24 -0400 Subject: [PATCH 4/6] Correct painless-getting-started for multi values --- docs/painless/painless-getting-started.asciidoc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/painless/painless-getting-started.asciidoc b/docs/painless/painless-getting-started.asciidoc index 2d9c86f53da45..1812a129ab243 100644 --- a/docs/painless/painless-getting-started.asciidoc +++ b/docs/painless/painless-getting-started.asciidoc @@ -142,13 +142,12 @@ this behavior, a deprecation warning is logged on start up. ===== Multiple values -There is a number of operations designed for numeric fields, -if a document has multiple values in such a field: +Painless supports the following operations for multi-value numeric fields: -- `doc['field'].min` - gets the minimum value among values -- `doc['field'].max` - gets the maximum value among values -- `doc['field'].sum` - gets the sum of all values -- `doc['field'].avg` - gets the average of all values +- `doc['field'].min` - gets the minimum value +- `doc['field'].max` - gets the maximum value +- `doc['field'].sum` - gets the sum of the values +- `doc['field'].avg` - gets the average of the values [float] From b0c891ad94b716b4de9072dd0a9a0458ae72ed6c Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 17 May 2018 17:02:48 -0400 Subject: [PATCH 5/6] Set system property for test framework --- test/framework/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/test/framework/build.gradle b/test/framework/build.gradle index 193fcb30988c6..a543fd81e7c75 100644 --- a/test/framework/build.gradle +++ b/test/framework/build.gradle @@ -73,4 +73,5 @@ precommit.dependsOn namingConventionsMain test.configure { systemProperty 'tests.gradle_index_compat_versions', bwcVersions.indexCompatible.join(',') systemProperty 'tests.gradle_wire_compat_versions', bwcVersions.wireCompatible.join(',') + systemProperty 'es.script.null_for_missing_value', 'true' } From 3b5f42127351443b2732ea3e1ad06701db0b3797 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 18 May 2018 12:39:21 -0400 Subject: [PATCH 6/6] Set system property for tests and update painless-getting-started --- .../painless-getting-started.asciidoc | 19 ++++++++++--------- modules/parent-join/build.gradle | 3 +++ modules/percolator/build.gradle | 4 ++++ server/build.gradle | 4 ++++ x-pack/plugin/security/build.gradle | 1 + 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/docs/painless/painless-getting-started.asciidoc b/docs/painless/painless-getting-started.asciidoc index 1812a129ab243..47f8bf03c5f0a 100644 --- a/docs/painless/painless-getting-started.asciidoc +++ b/docs/painless/painless-getting-started.asciidoc @@ -140,15 +140,6 @@ set a <> `-Des.script.null_for_missing_value=true` on a node. If you do not enable this behavior, a deprecation warning is logged on start up. -===== Multiple values - -Painless supports the following operations for multi-value numeric fields: - -- `doc['field'].min` - gets the minimum value -- `doc['field'].max` - gets the maximum value -- `doc['field'].sum` - gets the sum of the values -- `doc['field'].avg` - gets the average of the values - [float] ==== Updating Fields with Painless @@ -388,6 +379,16 @@ Note: all of the `_update_by_query` examples above could really do with a as using any other query because script queries aren't able to use the inverted index to limit the documents that they have to check. +[[multi-value-field-operations]] +=== Multi-value field operations + +Painless supports the following operations for multi-value numeric fields: + +- `doc['field'].min` - gets the minimum value +- `doc['field'].max` - gets the maximum value +- `doc['field'].sum` - gets the sum of the values +- `doc['field'].avg` - gets the average of the values + [[modules-scripting-painless-dispatch]] === How painless dispatches functions diff --git a/modules/parent-join/build.gradle b/modules/parent-join/build.gradle index 67bcc9d54e8e7..38563e467914b 100644 --- a/modules/parent-join/build.gradle +++ b/modules/parent-join/build.gradle @@ -22,3 +22,6 @@ esplugin { classname 'org.elasticsearch.join.ParentJoinPlugin' hasClientJar = true } +test.configure { + systemProperty 'es.script.null_for_missing_value', 'true' +} diff --git a/modules/percolator/build.gradle b/modules/percolator/build.gradle index db4a716af6513..7be888d377dd3 100644 --- a/modules/percolator/build.gradle +++ b/modules/percolator/build.gradle @@ -36,3 +36,7 @@ dependencyLicenses { compileJava.options.compilerArgs << "-Xlint:-deprecation,-rawtypes" compileTestJava.options.compilerArgs << "-Xlint:-deprecation,-rawtypes" + +test.configure { + systemProperty 'es.script.null_for_missing_value', 'true' +} diff --git a/server/build.gradle b/server/build.gradle index 7e880e0dae4d2..2c3ae6e6573e3 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -338,3 +338,7 @@ if (isEclipse == false || project.path == ":server-tests") { check.dependsOn integTest integTest.mustRunAfter test } + +test.configure { + systemProperty 'es.script.null_for_missing_value', 'true' +} diff --git a/x-pack/plugin/security/build.gradle b/x-pack/plugin/security/build.gradle index 12533a389b5f1..e4b24343c4a37 100644 --- a/x-pack/plugin/security/build.gradle +++ b/x-pack/plugin/security/build.gradle @@ -225,6 +225,7 @@ test { * other if we allow them to set the number of available processors as it's set-once in Netty. */ systemProperty 'es.set.netty.runtime.available.processors', 'false' + systemProperty 'es.script.null_for_missing_value', 'true' } // xpack modules are installed in real clusters as the meta plugin, so