diff --git a/docs/changelog/117201.yaml b/docs/changelog/117201.yaml new file mode 100644 index 0000000000000..f8a2be35c70a3 --- /dev/null +++ b/docs/changelog/117201.yaml @@ -0,0 +1,6 @@ +pr: 117201 +summary: "Use `field_caps` native nested fields filtering" +area: ES|QL +type: bug +issues: + - 117054 diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/FieldExtractorTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/FieldExtractorTestCase.java index d124fdb5755c3..4a1874cc46612 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/FieldExtractorTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/FieldExtractorTestCase.java @@ -14,6 +14,7 @@ import org.elasticsearch.client.ResponseException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.network.NetworkAddress; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.geo.GeometryTestUtils; import org.elasticsearch.index.mapper.BlockLoader; @@ -27,6 +28,7 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.hamcrest.Matcher; import org.junit.Before; @@ -1106,6 +1108,323 @@ public void testTypeConflictInObject() throws IOException { ); } + /** + * Test for https://github.com/elastic/elasticsearch/issues/117054 fix + */ + public void testOneNestedSubField_AndSameNameSupportedField() throws IOException { + assumeIndexResolverNestedFieldsNameClashFixed(); + ESRestTestCase.createIndex("test", Settings.EMPTY, """ + "properties": { + "Responses": { + "properties": { + "process": { + "type": "nested", + "properties": { + "pid": { + "type": "long" + } + } + } + } + }, + "process": { + "properties": { + "parent": { + "properties": { + "command_line": { + "type": "wildcard", + "fields": { + "text": { + "type": "text" + } + } + } + } + } + } + } + } + """); + + Map result = runEsql("FROM test"); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text")) + ).entry("values", Collections.EMPTY_LIST) + ); + + index("test", """ + {"Responses.process.pid": 123,"process.parent.command_line":"run.bat"}"""); + + result = runEsql("FROM test"); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text")) + ).entry("values", List.of(matchesList().item("run.bat").item("run.bat"))) + ); + + result = runEsql(""" + FROM test | where process.parent.command_line == "run.bat" + """); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text")) + ).entry("values", List.of(matchesList().item("run.bat").item("run.bat"))) + ); + + ResponseException e = expectThrows(ResponseException.class, () -> runEsql("FROM test | SORT Responses.process.pid")); + String err = EntityUtils.toString(e.getResponse().getEntity()); + assertThat(err, containsString("line 1:18: Unknown column [Responses.process.pid]")); + + e = expectThrows(ResponseException.class, () -> runEsql(""" + FROM test + | SORT Responses.process.pid + | WHERE Responses.process IS NULL + """)); + err = EntityUtils.toString(e.getResponse().getEntity()); + assertThat(err, containsString("line 2:8: Unknown column [Responses.process.pid]")); + } + + public void testOneNestedSubField_AndSameNameSupportedField_TwoIndices() throws IOException { + assumeIndexResolverNestedFieldsNameClashFixed(); + ESRestTestCase.createIndex("test1", Settings.EMPTY, """ + "properties": { + "Responses": { + "properties": { + "process": { + "type": "nested", + "properties": { + "pid": { + "type": "long" + } + } + } + } + } + } + """); + ESRestTestCase.createIndex("test2", Settings.EMPTY, """ + "properties": { + "process": { + "properties": { + "parent": { + "properties": { + "command_line": { + "type": "wildcard", + "fields": { + "text": { + "type": "text" + } + } + } + } + } + } + } + } + """); + index("test1", """ + {"Responses.process.pid": 123}"""); + index("test2", """ + {"process.parent.command_line":"run.bat"}"""); + + Map result = runEsql("FROM test* | SORT process.parent.command_line ASC NULLS FIRST"); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text")) + ).entry("values", List.of(matchesList().item(null).item(null), matchesList().item("run.bat").item("run.bat"))) + ); + + result = runEsql(""" + FROM test* | where process.parent.command_line == "run.bat" + """); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of(columnInfo("process.parent.command_line", "keyword"), columnInfo("process.parent.command_line.text", "text")) + ).entry("values", List.of(matchesList().item("run.bat").item("run.bat"))) + ); + + ResponseException e = expectThrows(ResponseException.class, () -> runEsql("FROM test* | SORT Responses.process.pid")); + String err = EntityUtils.toString(e.getResponse().getEntity()); + assertThat(err, containsString("line 1:19: Unknown column [Responses.process.pid]")); + + e = expectThrows(ResponseException.class, () -> runEsql(""" + FROM test* + | SORT Responses.process.pid + | WHERE Responses.process IS NULL + """)); + err = EntityUtils.toString(e.getResponse().getEntity()); + assertThat(err, containsString("line 2:8: Unknown column [Responses.process.pid]")); + } + + public void testOneNestedField_AndSameNameSupportedField_TwoIndices() throws IOException { + assumeIndexResolverNestedFieldsNameClashFixed(); + ESRestTestCase.createIndex("test1", Settings.EMPTY, """ + "properties": { + "Responses": { + "properties": { + "process": { + "type": "nested", + "properties": { + "pid": { + "type": "long" + } + } + } + } + }, + "process": { + "properties": { + "parent": { + "properties": { + "command_line": { + "type": "wildcard", + "fields": { + "text": { + "type": "text" + } + } + } + } + } + } + } + } + """); + ESRestTestCase.createIndex("test2", Settings.EMPTY, """ + "properties": { + "Responses": { + "properties": { + "process": { + "type": "integer", + "fields": { + "pid": { + "type": "long" + } + } + } + } + }, + "process": { + "properties": { + "parent": { + "properties": { + "command_line": { + "type": "wildcard", + "fields": { + "text": { + "type": "text" + } + } + } + } + } + } + } + } + """); + index("test1", """ + {"Responses.process.pid": 111,"process.parent.command_line":"run1.bat"}"""); + index("test2", """ + {"Responses.process": 222,"process.parent.command_line":"run2.bat"}"""); + + Map result = runEsql("FROM test* | SORT process.parent.command_line"); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of( + columnInfo("Responses.process", "integer"), + columnInfo("Responses.process.pid", "long"), + columnInfo("process.parent.command_line", "keyword"), + columnInfo("process.parent.command_line.text", "text") + ) + ) + .entry( + "values", + List.of( + matchesList().item(null).item(null).item("run1.bat").item("run1.bat"), + matchesList().item(222).item(222).item("run2.bat").item("run2.bat") + ) + ) + ); + + result = runEsql(""" + FROM test* | where Responses.process.pid == 111 + """); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of( + columnInfo("Responses.process", "integer"), + columnInfo("Responses.process.pid", "long"), + columnInfo("process.parent.command_line", "keyword"), + columnInfo("process.parent.command_line.text", "text") + ) + ).entry("values", List.of()) + ); + + result = runEsql("FROM test* | SORT process.parent.command_line"); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of( + columnInfo("Responses.process", "integer"), + columnInfo("Responses.process.pid", "long"), + columnInfo("process.parent.command_line", "keyword"), + columnInfo("process.parent.command_line.text", "text") + ) + ) + .entry( + "values", + List.of( + matchesList().item(null).item(null).item("run1.bat").item("run1.bat"), + matchesList().item(222).item(222).item("run2.bat").item("run2.bat") + ) + ) + ); + + result = runEsql(""" + FROM test* + | SORT process.parent.command_line + | WHERE Responses.process IS NULL + """); + assertMap( + result, + matchesMapWithOptionalTook(result.get("took")).entry( + "columns", + List.of( + columnInfo("Responses.process", "integer"), + columnInfo("Responses.process.pid", "long"), + columnInfo("process.parent.command_line", "keyword"), + columnInfo("process.parent.command_line.text", "text") + ) + ).entry("values", List.of(matchesList().item(null).item(null).item("run1.bat").item("run1.bat"))) + ); + } + + private void assumeIndexResolverNestedFieldsNameClashFixed() throws IOException { + // especially for BWC tests but also for regular tests + var capsName = EsqlCapabilities.Cap.FIX_NESTED_FIELDS_NAME_CLASH_IN_INDEXRESOLVER.name().toLowerCase(Locale.ROOT); + boolean requiredClusterCapability = clusterHasCapability("POST", "/_query", List.of(), List.of(capsName)).orElse(false); + assumeTrue( + "This test makes sense for versions that have the fix for https://github.com/elastic/elasticsearch/issues/117054", + requiredClusterCapability + ); + } + private CheckedConsumer empNoInObject(String empNoType) { return index -> { index.startObject("properties"); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index c4ce8fbc8b19c..82942119dead4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -504,7 +504,12 @@ public enum Cap { /** * LOOKUP JOIN */ - JOIN_LOOKUP(Build.current().isSnapshot()); + JOIN_LOOKUP(Build.current().isSnapshot()), + + /** + * Fix for https://github.com/elastic/elasticsearch/issues/117054 + */ + FIX_NESTED_FIELDS_NAME_CLASH_IN_INDEXRESOLVER; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index 0be8cf820d345..f61be4b59830e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -98,9 +98,8 @@ public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResp // TODO flattened is simpler - could we get away with that? String[] names = fieldsCaps.keySet().toArray(new String[0]); Arrays.sort(names); - Set forbiddenFields = new HashSet<>(); Map rootFields = new HashMap<>(); - name: for (String name : names) { + for (String name : names) { Map fields = rootFields; String fullName = name; boolean isAlias = false; @@ -111,9 +110,6 @@ public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResp break; } String parent = name.substring(0, nextDot); - if (forbiddenFields.contains(parent)) { - continue name; - } EsField obj = fields.get(parent); if (obj == null) { obj = new EsField(parent, OBJECT, new HashMap<>(), false, true); @@ -125,16 +121,10 @@ public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResp fields = obj.getProperties(); name = name.substring(nextDot + 1); } - - List caps = fieldsCaps.get(fullName); - if (allNested(caps)) { - forbiddenFields.add(name); - continue; - } // TODO we're careful to make isAlias match IndexResolver - but do we use it? EsField field = firstUnsupportedParent == null - ? createField(fieldCapsResponse, name, fullName, caps, isAlias) + ? createField(fieldCapsResponse, name, fullName, fieldsCaps.get(fullName), isAlias) : new UnsupportedEsField( fullName, firstUnsupportedParent.getOriginalType(), @@ -164,15 +154,6 @@ public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResp return IndexResolution.valid(new EsIndex(indexPattern, rootFields, concreteIndices), concreteIndices.keySet(), unavailableRemotes); } - private boolean allNested(List caps) { - for (IndexFieldCapabilities cap : caps) { - if (false == cap.type().equalsIgnoreCase("nested")) { - return false; - } - } - return true; - } - private static Map> collectFieldCaps(FieldCapabilitiesResponse fieldCapsResponse) { Set seenHashes = new HashSet<>(); Map> fieldsCaps = new HashMap<>(); @@ -278,6 +259,8 @@ private static FieldCapabilitiesRequest createFieldCapsRequest(String index, Set // lenient because we throw our own errors looking at the response e.g. if something was not resolved // also because this way security doesn't throw authorization exceptions but rather honors ignore_unavailable req.indicesOptions(FIELD_CAPS_INDICES_OPTIONS); + // we ignore the nested data type fields starting with https://github.com/elastic/elasticsearch/pull/111495 + req.filters("-nested"); req.setMergeResults(false); return req; }