From b47a894732a9caf7eb9b1549872f4a2a1bd8c19f Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto <104530826+acarbonetto@users.noreply.github.com> Date: Mon, 24 Oct 2022 16:45:35 -0700 Subject: [PATCH] 639: allow metadata fields (_id) to be used as QualifiedName --- .../org/opensearch/sql/analysis/Analyzer.java | 7 +++++ .../sql/ast/expression/QualifiedName.java | 17 ++++++++++- .../OpenSearchDescribeIndexRequest.java | 5 ++++ .../response/OpenSearchResponse.java | 28 ++++++++++++++----- .../antlr/OpenSearchSQLIdentifierParser.g4 | 9 ++++++ sql/src/main/antlr/OpenSearchSQLLexer.g4 | 14 +++++++++- .../sql/sql/parser/AstExpressionBuilder.java | 11 ++++---- 7 files changed, 77 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 5fc642fa06..fcf3e08940 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -153,6 +153,13 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { } table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); + // add OpenSearch metadata types + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_index"), ExprCoreType.STRING); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_id"), ExprCoreType.STRING); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_score"), ExprCoreType.FLOAT); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_maxscore"), ExprCoreType.FLOAT); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_sort"), ExprCoreType.LONG); + // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. curEnv.define(new Symbol(Namespace.INDEX_NAME, diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index 8b16119dc0..a6c10c55dd 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -25,14 +25,27 @@ public class QualifiedName extends UnresolvedExpression { private final List parts; + @Getter + private final Boolean isMetadataField; + public QualifiedName(String name) { + this(name, Boolean.FALSE); + } + + public QualifiedName(String name, Boolean isMetadataField) { this.parts = Collections.singletonList(name); + this.isMetadataField = isMetadataField; + } + + public QualifiedName(Iterable parts) { + this(parts, Boolean.FALSE); } /** * QualifiedName Constructor. */ - public QualifiedName(Iterable parts) { + public QualifiedName(Iterable parts, Boolean isMetadataField) { + this.isMetadataField = isMetadataField; List partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList()); if (partsList.isEmpty()) { throw new IllegalArgumentException("parts is empty"); @@ -110,4 +123,6 @@ public List getChild() { public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitQualifiedName(this, context); } + + public Boolean isMetadataField() { return Boolean.TRUE; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java index f321497099..4bfa3ed0d6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java @@ -118,6 +118,11 @@ public Map getFieldTypes() { .filter(entry -> !ExprCoreType.UNKNOWN.equals(entry.getValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } + fieldTypes.put("_index", ExprCoreType.STRING); + fieldTypes.put("_id", ExprCoreType.STRING); + fieldTypes.put("_score", ExprCoreType.FLOAT); + fieldTypes.put("_maxscore", ExprCoreType.FLOAT); + fieldTypes.put("_sort", ExprCoreType.LONG); return fieldTypes; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index aadd73efdd..34a9f440dc 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -17,6 +17,9 @@ import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprLongValue; +import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; @@ -92,14 +95,25 @@ public Iterator iterator() { return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }).iterator(); } else { + float maxScore = hits.getMaxScore(); return Arrays.stream(hits.getHits()) .map(hit -> { - ExprValue docData = exprValueFactory.construct(hit.getSourceAsString()); - if (hit.getHighlightFields().isEmpty()) { - return docData; - } else { - ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); - builder.putAll(docData.tupleValue()); + String source = hit.getSourceAsString(); + ExprValue docData = exprValueFactory.construct(source); + + ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); + builder.putAll(docData.tupleValue()); + builder.put("_index", new ExprStringValue(hit.getIndex())); + builder.put("_id", new ExprStringValue(hit.getId())); + if (!Float.isNaN(hit.getScore())) { + builder.put("_score", new ExprFloatValue(hit.getScore())); + } + if (!Float.isNaN(maxScore)) { + builder.put("_maxscore", new ExprLongValue(maxScore)); + } + builder.put("_sort", new ExprLongValue(hit.getSeqNo())); + + if (!hit.getHighlightFields().isEmpty()) { var hlBuilder = ImmutableMap.builder(); for (var es : hit.getHighlightFields().entrySet()) { hlBuilder.put(es.getKey(), ExprValueUtils.collectionValue( @@ -107,8 +121,8 @@ public Iterator iterator() { t -> (t.toString())).collect(Collectors.toList()))); } builder.put("_highlight", ExprTupleValue.fromExprValueMap(hlBuilder.build())); - return ExprTupleValue.fromExprValueMap(builder.build()); } + return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }).iterator(); } } diff --git a/sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4 b/sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4 index 665d48c97c..cf5091e8cb 100644 --- a/sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4 @@ -54,9 +54,18 @@ qualifiedName ident : DOT? ID | BACKTICK_QUOTE_ID + | metadataField | keywordsCanBeId ; +metadataField + : META_INDEX + | META_ID + | META_SCORE + | META_MAXSCORE + | META_SORT + ; + keywordsCanBeId : FULL | FIELD | D | T | TS // OD SQL and ODBC special diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 470ff5050f..297c077e02 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -135,6 +135,15 @@ SUBSTRING: 'SUBSTRING'; TRIM: 'TRIM'; +// Metadata fields can be ID + +META_INDEX: '_ID'; +META_ID: '_ID'; +META_SCORE: '_SCORE'; +META_MAXSCORE: '_MAXSCORE'; +META_SORT: '_SORT'; + + // Keywords, but can be ID // Common Keywords, but can be ID @@ -441,7 +450,6 @@ BACKTICK_QUOTE_ID: BQUOTA_STRING; // Fragments for Literal primitives fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+; -fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*; fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\''; fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; @@ -449,6 +457,10 @@ fragment HEX_DIGIT: [0-9A-F]; fragment DEC_DIGIT: [0-9]; fragment BIT_STRING_L: 'B' '\'' [01]+ '\''; +// Identifiers cannot start with a single '_' since this an OpebSearch reserved +// metadata field. Two underscores (or more) is acceptable, such as '__field'. +fragment ID_LITERAL: ([_][_]|[@*A-Z])+?[*A-Z_\-0-9]*; + // Last tokens must generate Errors ERROR_RECOGNITION: . -> channel(ERRORCHANNEL); diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index ebfafeec23..cab2eae389 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -430,12 +430,13 @@ private UnresolvedExpression visitConstantFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { + Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent(); return new QualifiedName( - identifiers.stream() - .map(RuleContext::getText) - .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList()) - ); + identifiers.stream() + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()), + isMetadataField); } private List singleFieldRelevanceArguments(