Skip to content

Commit

Permalink
ESQL: Remove the NESTED DataType (#111495)
Browse files Browse the repository at this point in the history
We don't support nested fields at the moment and when we do I'm not sure
it'll *be* a DataType. Maybe. Probably. But let's deal with that when we
support it.
  • Loading branch information
nik9000 authored Aug 6, 2024
1 parent 51ba611 commit 3d6b7aa
Show file tree
Hide file tree
Showing 12 changed files with 173 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ static TransportVersion def(int id) {
public static final TransportVersion INDEX_REQUEST_UPDATE_BY_DOC_ORIGIN = def(8_714_00_0);
public static final TransportVersion ESQL_ATTRIBUTE_CACHED_SERIALIZATION = def(8_715_00_0);
public static final TransportVersion REGISTER_SLM_STATS = def(8_716_00_0);
public static final TransportVersion ESQL_NESTED_UNSUPPORTED = def(8_717_00_0);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ public enum DataType {
// 8.15.2-SNAPSHOT is 15 bytes, most are shorter, some can be longer
VERSION(builder().esType("version").estimatedSize(15).docValues()),
OBJECT(builder().esType("object").unknownSize()),
NESTED(builder().esType("nested").unknownSize()),
SOURCE(builder().esType(SourceFieldMapper.NAME).unknownSize()),
DATE_PERIOD(builder().typeName("DATE_PERIOD").estimatedSize(3 * Integer.BYTES)),
TIME_DURATION(builder().typeName("TIME_DURATION").estimatedSize(Integer.BYTES + Long.BYTES)),
Expand Down Expand Up @@ -227,6 +226,11 @@ public static Collection<DataType> types() {
return TYPES;
}

/**
* Resolve a type from a name. This name is sometimes user supplied,
* like in the case of {@code ::<typename>} and is sometimes the name
* used over the wire, like in {@link #readFrom(String)}.
*/
public static DataType fromTypeName(String name) {
return NAME_TO_TYPE.get(name.toLowerCase(Locale.ROOT));
}
Expand Down Expand Up @@ -287,7 +291,7 @@ public static boolean isPrimitiveAndSupported(DataType t) {
}

public static boolean isPrimitive(DataType t) {
return t != OBJECT && t != NESTED;
return t != OBJECT;
}

public static boolean isNull(DataType t) {
Expand Down Expand Up @@ -335,7 +339,6 @@ public static boolean areCompatible(DataType left, DataType right) {
*/
public static boolean isRepresentable(DataType t) {
return t != OBJECT
&& t != NESTED
&& t != UNSUPPORTED
&& t != DATE_PERIOD
&& t != TIME_DURATION
Expand Down Expand Up @@ -442,8 +445,20 @@ public void writeTo(StreamOutput out) throws IOException {

public static DataType readFrom(StreamInput in) throws IOException {
// TODO: Use our normal enum serialization pattern
String name = in.readString();
return readFrom(in.readString());
}

/**
* Resolve a {@link DataType} from a name read from a {@link StreamInput}.
* @throws IOException on an unknown dataType
*/
public static DataType readFrom(String name) throws IOException {
if (name.equalsIgnoreCase(DataType.DOC_DATA_TYPE.nameUpper())) {
/*
* DOC is not declared in fromTypeName because fromTypeName is
* exposed to users for things like `::<typename>` and we don't
* want folks to be able to convert to `DOC`.
*/
return DataType.DOC_DATA_TYPE;
}
DataType dataType = DataType.fromTypeName(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/
package org.elasticsearch.xpack.esql.core.type;

import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.NamedWriteable;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
Expand Down Expand Up @@ -54,16 +55,32 @@ public EsField(String name, DataType esDataType, Map<String, EsField> properties

public EsField(StreamInput in) throws IOException {
this.name = in.readString();
this.esDataType = DataType.readFrom(in);
this.esDataType = readDataType(in);
this.properties = in.readImmutableMap(i -> i.readNamedWriteable(EsField.class));
this.aggregatable = in.readBoolean();
this.isAlias = in.readBoolean();
}

private DataType readDataType(StreamInput in) throws IOException {
String name = in.readString();
if (in.getTransportVersion().before(TransportVersions.ESQL_NESTED_UNSUPPORTED) && name.equalsIgnoreCase("NESTED")) {
/*
* The "nested" data type existed in older versions of ESQL but was
* entirely used to filter mappings away. Those versions will still
* sometimes send it inside EsField when hitting `nested` fields in
* indices. But the rest of ESQL will never see that type. Thus, we
* translate it here. We translate to UNSUPPORTED because that seems
* to work. We've already performed any required filtering.
*/
return DataType.UNSUPPORTED;
}
return DataType.readFrom(name);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeString(esDataType.typeName());
esDataType.writeTo(out);
out.writeMap(properties, StreamOutput::writeNamedWriteable);
out.writeBoolean(aggregatable);
out.writeBoolean(isAlias);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import static java.util.Collections.emptyMap;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.esql.core.type.DataType.NESTED;
import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED;
Expand Down Expand Up @@ -79,10 +78,14 @@ private static void walkMapping(String name, Object value, Map<String, EsField>
if (value instanceof Map) {
Map<String, Object> content = (Map<String, Object>) value;

if ("nested".equals(content.get("type"))) {
// Nested fields are entirely removed by IndexResolver so we mimic it.
return;
}
// extract field type
DataType esDataType = getType(content);
final Map<String, EsField> properties;
if (esDataType == OBJECT || esDataType == NESTED) {
if (esDataType == OBJECT) {
properties = fromEs(content);
} else if (content.containsKey("fields")) {
// Check for multifields
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Pa
}
}
};
case DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, SHORT, BYTE, OBJECT, NESTED, FLOAT, HALF_FLOAT, SCALED_FLOAT,
case DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, SHORT, BYTE, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT,
PARTIAL_AGG -> throw new IllegalArgumentException("can't convert values of type [" + columnInfo.type() + "]");
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ private static Object valueAt(DataType dataType, Block block, int offset, BytesR
throw new UncheckedIOException(e);
}
}
case SHORT, BYTE, FLOAT, HALF_FLOAT, SCALED_FLOAT, OBJECT, NESTED, DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE,
NULL, PARTIAL_AGG -> throw EsqlIllegalArgumentException.illegalDataType(dataType);
case SHORT, BYTE, FLOAT, HALF_FLOAT, SCALED_FLOAT, OBJECT, DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, NULL,
PARTIAL_AGG -> throw EsqlIllegalArgumentException.illegalDataType(dataType);
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@
import static org.elasticsearch.xpack.esql.core.type.DataType.IP;
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
import static org.elasticsearch.xpack.esql.core.type.DataType.NESTED;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount;
Expand Down Expand Up @@ -245,8 +244,8 @@ private static void mappingAsAttributes(List<Attribute> list, Source source, Fie
if (DataType.isPrimitive(type)) {
list.add(attribute);
}
// allow compound object even if they are unknown (but not NESTED)
if (type != NESTED && fieldProperties.isEmpty() == false) {
// allow compound object even if they are unknown
if (fieldProperties.isEmpty() == false) {
mappingAsAttributes(list, source, attribute, fieldProperties);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte
case TEXT, KEYWORD -> TopNEncoder.UTF8;
case VERSION -> TopNEncoder.VERSION;
case BOOLEAN, NULL, BYTE, SHORT, INTEGER, LONG, DOUBLE, FLOAT, HALF_FLOAT, DATETIME, DATE_PERIOD, TIME_DURATION, OBJECT,
NESTED, SCALED_FLOAT, UNSIGNED_LONG, DOC_DATA_TYPE, TSID_DATA_TYPE -> TopNEncoder.DEFAULT_SORTABLE;
SCALED_FLOAT, UNSIGNED_LONG, DOC_DATA_TYPE, TSID_DATA_TYPE -> TopNEncoder.DEFAULT_SORTABLE;
case GEO_POINT, CARTESIAN_POINT, GEO_SHAPE, CARTESIAN_SHAPE, COUNTER_LONG, COUNTER_INTEGER, COUNTER_DOUBLE ->
TopNEncoder.DEFAULT_UNSORTABLE;
// unsupported fields are encoded as BytesRef, we'll use the same encoder; all values should be null at this point
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,8 @@ public static ElementType toElementType(DataType dataType, MappedFieldType.Field
case GEO_POINT, CARTESIAN_POINT -> fieldExtractPreference == DOC_VALUES ? ElementType.LONG : ElementType.BYTES_REF;
case GEO_SHAPE, CARTESIAN_SHAPE -> ElementType.BYTES_REF;
case PARTIAL_AGG -> ElementType.COMPOSITE;
case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, NESTED, FLOAT, HALF_FLOAT, SCALED_FLOAT ->
throw EsqlIllegalArgumentException.illegalDataType(dataType);
case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT -> throw EsqlIllegalArgumentException
.illegalDataType(dataType);
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ public void resolveAsMergedMapping(String indexWildcard, Set<String> fieldNames,
);
}

// public for testing only
public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResponse fieldCapsResponse) {
assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.SEARCH_COORDINATION); // too expensive to run this on a transport worker
if (fieldCapsResponse.getIndexResponses().isEmpty()) {
Expand All @@ -93,8 +94,9 @@ public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResp
// TODO flattened is simpler - could we get away with that?
String[] names = fieldsCaps.keySet().toArray(new String[0]);
Arrays.sort(names);
Set<String> forbiddenFields = new HashSet<>();
Map<String, EsField> rootFields = new HashMap<>();
for (String name : names) {
name: for (String name : names) {
Map<String, EsField> fields = rootFields;
String fullName = name;
boolean isAlias = false;
Expand All @@ -105,6 +107,9 @@ public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResp
break;
}
String parent = name.substring(0, nextDot);
if (forbiddenFields.contains(parent)) {
continue name;
}
EsField obj = fields.get(parent);
if (obj == null) {
obj = new EsField(parent, OBJECT, new HashMap<>(), false, true);
Expand All @@ -116,9 +121,16 @@ public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResp
fields = obj.getProperties();
name = name.substring(nextDot + 1);
}

List<IndexFieldCapabilities> caps = fieldsCaps.get(fullName);
if (allNested(caps)) {
forbiddenFields.add(name);
continue;
}
// TODO we're careful to make isAlias match IndexResolver - but do we use it?

EsField field = firstUnsupportedParent == null
? createField(fieldCapsResponse, name, fullName, fieldsCaps.get(fullName), isAlias)
? createField(fieldCapsResponse, name, fullName, caps, isAlias)
: new UnsupportedEsField(
fullName,
firstUnsupportedParent.getOriginalType(),
Expand All @@ -144,6 +156,15 @@ public IndexResolution mergedMappings(String indexPattern, FieldCapabilitiesResp
return IndexResolution.valid(new EsIndex(indexPattern, rootFields, concreteIndices));
}

private boolean allNested(List<IndexFieldCapabilities> caps) {
for (IndexFieldCapabilities cap : caps) {
if (false == cap.type().equalsIgnoreCase("nested")) {
return false;
}
}
return true;
}

private static Map<String, List<IndexFieldCapabilities>> collectFieldCaps(FieldCapabilitiesResponse fieldCapsResponse) {
Set<String> seenHashes = new HashSet<>();
Map<String, List<IndexFieldCapabilities>> fieldsCaps = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public static Literal randomLiteral(DataType type) {
throw new UncheckedIOException(e);
}
}
case UNSUPPORTED, OBJECT, NESTED, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG -> throw new IllegalArgumentException(
case UNSUPPORTED, OBJECT, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG -> throw new IllegalArgumentException(
"can't make random values for [" + type.typeName() + "]"
);
}, type);
Expand Down Expand Up @@ -481,7 +481,7 @@ public static Stream<DataType> validFunctionParameters() {
*/
return false;
}
if (t == DataType.OBJECT || t == DataType.NESTED) {
if (t == DataType.OBJECT) {
// Object and nested fields aren't supported by any functions yet
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,99 @@ unsupported with sort:
- match: { values.0.26: xy }
- match: { values.0.27: "foo bar" }
- match: { values.0.28: 3 }

---
nested declared inline:
- do:
bulk:
index: test
refresh: true
body:
- { "index": { } }
- {
"find_me": 1,
"nested": {
"foo": 1,
"bar": "bar",
"baz": 1.9
}
}

- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
- "No limit defined, adding default limit of \\[.*\\]"
esql.query:
body:
query: 'FROM test | WHERE find_me == 1 | KEEP n*'

# The `nested` field is not visible, nor are any of it's subfields.
- match: { columns: [{name: name, type: keyword}] }
- match: { values: [[null]] }

---
nested declared in mapping:
- do:
indices.create:
index: test_nested
body:
settings:
number_of_shards: 5
mappings:
properties:
name:
type: keyword
nested:
type: nested
properties:
foo:
type: keyword
bar:
type: keyword

- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
- "No limit defined, adding default limit of \\[.*\\]"
esql.query:
body:
query: 'FROM test_nested'

# The `nested` field is not visible, nor are any of it's subfields.
- match: { columns: [{name: name, type: keyword}] }

---
double nested declared in mapping:
- do:
indices.create:
index: test_nested
body:
settings:
number_of_shards: 5
mappings:
properties:
name:
type: keyword
nested:
type: nested
properties:
bort:
type: keyword
nested:
type: nested
properties:
foo:
type: keyword
bar:
type: keyword

- do:
allowed_warnings_regex:
- "Field \\[.*\\] cannot be retrieved, it is unsupported or not indexed; returning null"
- "No limit defined, adding default limit of \\[.*\\]"
esql.query:
body:
query: 'FROM test_nested'

# The `nested` field is not visible, nor are any of it's subfields.
- match: { columns: [{name: name, type: keyword}] }

0 comments on commit 3d6b7aa

Please sign in to comment.