From 62120fd0db56bd777b12a9a2d4c9592acb501ba8 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 May 2023 12:48:19 -0700 Subject: [PATCH 1/3] Create new anonymizer for new engine (#1665) * Create new anonymizer for new engine (#266) * Created anonymizer listener for anonymizing SQL queries through the new engine Signed-off-by: Matthew Wells * Update for review comments Signed-off-by: Andrew Carbonetto * added missing file header, change public variable to private Signed-off-by: Matthew Wells --------- Signed-off-by: Andrew Carbonetto Signed-off-by: Matthew Wells Co-authored-by: Andrew Carbonetto --- .../sql/legacy/plugin/RestSqlAction.java | 4 +- .../sql/sql/antlr/AnonymizerListener.java | 113 ++++++++++ .../sql/sql/antlr/SQLSyntaxParser.java | 13 +- .../sql/parser/AnonymizerListenerTest.java | 207 ++++++++++++++++++ 4 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 sql/src/main/java/org/opensearch/sql/sql/antlr/AnonymizerListener.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/parser/AnonymizerListenerTest.java diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java index 88ed42010b..5249d2d5d0 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlAction.java @@ -141,8 +141,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } } - LOG.info("[{}] Incoming request {}: {}", QueryContext.getRequestId(), request.uri(), - QueryDataAnonymizer.anonymizeData(sqlRequest.getSql())); + LOG.info("[{}] Incoming request {}", QueryContext.getRequestId(), request.uri()); Format format = SqlRequestParam.getFormat(request.params()); @@ -157,6 +156,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } LOG.debug("[{}] Request {} is not supported and falling back to old SQL engine", QueryContext.getRequestId(), newSqlRequest); + LOG.info("Request Query: {}", QueryDataAnonymizer.anonymizeData(sqlRequest.getSql())); QueryAction queryAction = explainRequest(client, sqlRequest, format); executeSqlRequest(request, queryAction, client, restChannel); } catch (Exception e) { diff --git a/sql/src/main/java/org/opensearch/sql/sql/antlr/AnonymizerListener.java b/sql/src/main/java/org/opensearch/sql/sql/antlr/AnonymizerListener.java new file mode 100644 index 0000000000..bd7b5cbedf --- /dev/null +++ b/sql/src/main/java/org/opensearch/sql/sql/antlr/AnonymizerListener.java @@ -0,0 +1,113 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.sql.antlr; + +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.BACKTICK_QUOTE_ID; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.BOOLEAN; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.COMMA; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.DECIMAL_LITERAL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.DOT; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.EQUAL_SYMBOL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.EXCLAMATION_SYMBOL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.FALSE; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.FROM; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.GREATER_SYMBOL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.ID; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.LESS_SYMBOL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.ONE_DECIMAL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.REAL_LITERAL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.STRING_LITERAL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.TIMESTAMP; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.TRUE; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.TWO_DECIMAL; +import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer.ZERO_DECIMAL; + +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.TerminalNode; + +/** + * Parse tree listener for anonymizing SQL requests. + */ +public class AnonymizerListener implements ParseTreeListener { + private String anonymizedQueryString = ""; + private static final int NO_TYPE = -1; + private int previousType = NO_TYPE; + + @Override + public void enterEveryRule(ParserRuleContext ctx) { + } + + @Override + public void exitEveryRule(ParserRuleContext ctx) { + } + + @Override + public void visitTerminal(TerminalNode node) { + // In these situations don't add a space prior: + // 1. a DOT between two identifiers + // 2. before a comma + // 3. between equal comparison tokens: e.g <= + // 4. between alt not equals: <> + int token = node.getSymbol().getType(); + boolean isDotIdentifiers = token == DOT || previousType == DOT; + boolean isComma = token == COMMA; + boolean isEqualComparison = ((token == EQUAL_SYMBOL) + && (previousType == LESS_SYMBOL + || previousType == GREATER_SYMBOL + || previousType == EXCLAMATION_SYMBOL)); + boolean isNotEqualComparisonAlternative = + previousType == LESS_SYMBOL && token == GREATER_SYMBOL; + if (!isDotIdentifiers && !isComma && !isEqualComparison && !isNotEqualComparisonAlternative) { + anonymizedQueryString += " "; + } + + // anonymize the following tokens + switch (node.getSymbol().getType()) { + case ID: + case TIMESTAMP: + case BACKTICK_QUOTE_ID: + if (previousType == FROM) { + anonymizedQueryString += "table"; + } else { + anonymizedQueryString += "identifier"; + } + break; + case ZERO_DECIMAL: + case ONE_DECIMAL: + case TWO_DECIMAL: + case DECIMAL_LITERAL: + case REAL_LITERAL: + anonymizedQueryString += "number"; + break; + case STRING_LITERAL: + anonymizedQueryString += "'string_literal'"; + break; + case BOOLEAN: + case TRUE: + case FALSE: + anonymizedQueryString += "boolean_literal"; + break; + case NO_TYPE: + // end of file + break; + default: + anonymizedQueryString += node.getText().toUpperCase(); + } + previousType = node.getSymbol().getType(); + } + + @Override + public void visitErrorNode(ErrorNode node) { + + } + + public String getAnonymizedQueryString() { + return "(" + anonymizedQueryString + ")"; + } +} diff --git a/sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java b/sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java index ee1e991bd4..4f7b925718 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java +++ b/sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java @@ -8,6 +8,8 @@ import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.tree.ParseTree; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; import org.opensearch.sql.common.antlr.Parser; import org.opensearch.sql.common.antlr.SyntaxAnalysisErrorListener; @@ -18,6 +20,7 @@ * SQL syntax parser which encapsulates an ANTLR parser. */ public class SQLSyntaxParser implements Parser { + private static final Logger LOG = LogManager.getLogger(SQLSyntaxParser.class); /** * Parse a SQL query by ANTLR parser. @@ -26,10 +29,16 @@ public class SQLSyntaxParser implements Parser { */ @Override public ParseTree parse(String query) { + AnonymizerListener anonymizer = new AnonymizerListener(); + OpenSearchSQLLexer lexer = new OpenSearchSQLLexer(new CaseInsensitiveCharStream(query)); OpenSearchSQLParser parser = new OpenSearchSQLParser(new CommonTokenStream(lexer)); parser.addErrorListener(new SyntaxAnalysisErrorListener()); - return parser.root(); - } + parser.addParseListener(anonymizer); + ParseTree parseTree = parser.root(); + LOG.info("New Engine Request Query: {}", anonymizer.getAnonymizedQueryString()); + + return parseTree; + } } diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AnonymizerListenerTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AnonymizerListenerTest.java new file mode 100644 index 0000000000..59d723e3a2 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AnonymizerListenerTest.java @@ -0,0 +1,207 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.sql.parser; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; +import org.opensearch.sql.sql.antlr.AnonymizerListener; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer; +import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; + +public class AnonymizerListenerTest { + + private final AnonymizerListener anonymizerListener = new AnonymizerListener(); + + /** + * Helper function to parse SQl queries for testing purposes. + * @param query SQL query to be anonymized. + */ + private void parse(String query) { + OpenSearchSQLLexer lexer = new OpenSearchSQLLexer(new CaseInsensitiveCharStream(query)); + OpenSearchSQLParser parser = new OpenSearchSQLParser(new CommonTokenStream(lexer)); + parser.addParseListener(anonymizerListener); + + parser.root(); + } + + @Test + public void queriesShouldHaveAnonymousFieldAndIndex() { + String query = "SELECT ABS(balance) FROM accounts WHERE age > 30 GROUP BY ABS(balance)"; + String expectedQuery = "( SELECT ABS ( identifier ) FROM table " + + "WHERE identifier > number GROUP BY ABS ( identifier ) )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesShouldAnonymousNumbers() { + String query = "SELECT ABS(20), LOG(20.20) FROM accounts"; + String expectedQuery = "( SELECT ABS ( number ), LOG ( number ) FROM table )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesShouldHaveAnonymousBooleanLiterals() { + String query = "SELECT TRUE FROM accounts"; + String expectedQuery = "( SELECT boolean_literal FROM table )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesShouldHaveAnonymousInputStrings() { + String query = "SELECT * FROM accounts WHERE name = 'Oliver'"; + String expectedQuery = "( SELECT * FROM table WHERE identifier = 'string_literal' )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithAliasesShouldAnonymizeSensitiveData() { + String query = "SELECT balance AS b FROM accounts AS a"; + String expectedQuery = "( SELECT identifier AS identifier FROM table AS identifier )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithFunctionsShouldAnonymizeSensitiveData() { + String query = "SELECT LTRIM(firstname) FROM accounts"; + String expectedQuery = "( SELECT LTRIM ( identifier ) FROM table )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithAggregatesShouldAnonymizeSensitiveData() { + String query = "SELECT MAX(price) - MIN(price) from tickets"; + String expectedQuery = "( SELECT MAX ( identifier ) - MIN ( identifier ) FROM table )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithSubqueriesShouldAnonymizeSensitiveData() { + String query = "SELECT a.f, a.l, a.a FROM " + + "(SELECT firstname AS f, lastname AS l, age AS a FROM accounts WHERE age > 30) a"; + String expectedQuery = + "( SELECT identifier.identifier, identifier.identifier, identifier.identifier FROM " + + "( SELECT identifier AS identifier, identifier AS identifier, identifier AS identifier " + + "FROM table WHERE identifier > number ) identifier )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithLimitShouldAnonymizeSensitiveData() { + String query = "SELECT balance FROM accounts LIMIT 5"; + String expectedQuery = "( SELECT identifier FROM table LIMIT number )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithOrderByShouldAnonymizeSensitiveData() { + String query = "SELECT firstname FROM accounts ORDER BY lastname"; + String expectedQuery = "( SELECT identifier FROM table ORDER BY identifier )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithHavingShouldAnonymizeSensitiveData() { + String query = "SELECT SUM(balance) FROM accounts GROUP BY lastname HAVING COUNT(balance) > 2"; + String expectedQuery = "( SELECT SUM ( identifier ) FROM table " + + "GROUP BY identifier HAVING COUNT ( identifier ) > number )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithHighlightShouldAnonymizeSensitiveData() { + String query = "SELECT HIGHLIGHT(str0) FROM CALCS WHERE QUERY_STRING(['str0'], 'FURNITURE')"; + String expectedQuery = "( SELECT HIGHLIGHT ( identifier ) FROM table WHERE " + + "QUERY_STRING ( [ 'string_literal' ], 'string_literal' ) )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithMatchShouldAnonymizeSensitiveData() { + String query = "SELECT str0 FROM CALCS WHERE MATCH(str0, 'FURNITURE')"; + String expectedQuery = "( SELECT identifier FROM table " + + "WHERE MATCH ( identifier, 'string_literal' ) )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithPositionShouldAnonymizeSensitiveData() { + String query = "SELECT POSITION('world' IN 'helloworld')"; + String expectedQuery = "( SELECT POSITION ( 'string_literal' IN 'string_literal' ) )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithMatch_Bool_Prefix_ShouldAnonymizeSensitiveData() { + String query = "SELECT firstname, address FROM accounts WHERE " + + "match_bool_prefix(address, 'Bristol Street', minimum_should_match=2)"; + String expectedQuery = "( SELECT identifier, identifier FROM table WHERE MATCH_BOOL_PREFIX " + + "( identifier, 'string_literal', MINIMUM_SHOULD_MATCH = number ) )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithGreaterOrEqualShouldAnonymizeSensitiveData() { + String query = "SELECT int0 FROM accounts WHERE int0 >= 0"; + String expectedQuery = "( SELECT identifier FROM table WHERE identifier >= number )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithLessOrEqualShouldAnonymizeSensitiveData() { + String query = "SELECT int0 FROM accounts WHERE int0 <= 0"; + String expectedQuery = "( SELECT identifier FROM table WHERE identifier <= number )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithNotEqualShouldAnonymizeSensitiveData() { + String query = "SELECT int0 FROM accounts WHERE int0 != 0"; + String expectedQuery = "( SELECT identifier FROM table WHERE identifier != number )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + @Test + public void queriesWithNotEqualAlternateShouldAnonymizeSensitiveData() { + String query = "SELECT int0 FROM calcs WHERE int0 <> 0"; + String expectedQuery = "( SELECT identifier FROM table WHERE identifier <> number )"; + parse(query); + assertEquals(expectedQuery, anonymizerListener.getAnonymizedQueryString()); + } + + + /** + * Test added for coverage, but the errorNode will not be hit normally. + */ + @Test + public void enterErrorNote() { + ErrorNode node = mock(ErrorNode.class); + anonymizerListener.visitErrorNode(node); + } +} From 567cabcb28b720b309a02eb104794d80b91addf8 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Tue, 30 May 2023 13:10:57 -0700 Subject: [PATCH 2/3] Release Notes for 2.8.0 (#1676) Signed-off-by: Andrew Carbonetto --- .../opensearch-sql.release-notes-2.8.0.0.md | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 release-notes/opensearch-sql.release-notes-2.8.0.0.md diff --git a/release-notes/opensearch-sql.release-notes-2.8.0.0.md b/release-notes/opensearch-sql.release-notes-2.8.0.0.md new file mode 100644 index 0000000000..b7eeff664b --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-2.8.0.0.md @@ -0,0 +1,37 @@ +Compatible with OpenSearch and OpenSearch Dashboards Version 2.8.0 + +### Features + +* Support for pagination in v2 engine of SELECT * FROM queries ([#1666](https://github.com/opensearch-project/sql/pull/1666)) +* Support Alternate Datetime Formats ([#1664](https://github.com/opensearch-project/sql/pull/1664)) +* Create new anonymizer for new engine ([#1665](https://github.com/opensearch-project/sql/pull/1665)) +* Add Support for Nested Function Use In WHERE Clause Predicate Expresion ([#1657](https://github.com/opensearch-project/sql/pull/1657)) +* Cross cluster search in PPL ([#1512](https://github.com/opensearch-project/sql/pull/1512)) +* Added COSH to V2 engine ([#1428](https://github.com/opensearch-project/sql/pull/1428)) +* REST API for GET,PUT and DELETE ([#1482](https://github.com/opensearch-project/sql/pull/1482)) + +### Enhancements + +* Minor clean up of datetime and other classes ([#1310](https://github.com/opensearch-project/sql/pull/1310)) +* Add integration JDBC tests for cursor/fetch_size feature ([#1315](https://github.com/opensearch-project/sql/pull/1315)) +* Refactoring datasource changes to a new module. ([#1504](https://github.com/opensearch-project/sql/pull/1504)) + +### Bug Fixes + +* Fixing bug where Nested functions used in WHERE, GROUP BY, HAVING, and ORDER BY clauses don't fallback to legacy engine. ([#1549](https://github.com/opensearch-project/sql/pull/1549)) + +### Documentation + +* Add Nested Documentation for 2.7 Related Features ([#1620](https://github.com/opensearch-project/sql/pull/1620)) +* Update usage example doc for PPL cross-cluster search ([#1610](https://github.com/opensearch-project/sql/pull/1610)) +* Documentation and other papercuts for datasource api launch ([#1530](https://github.com/opensearch-project/sql/pull/1530)) + +### Maintenance + +* Fix IT - address breaking changes from upstream. ([#1659](https://github.com/opensearch-project/sql/pull/1659)) +* Increment version to 2.8.0-SNAPSHOT ([#1552](https://github.com/opensearch-project/sql/pull/1552)) +* Backport maintainer list update to `2.x`. ([#1650](https://github.com/opensearch-project/sql/pull/1650)) +* Backport jackson and gradle update from #1580 to 2.x ([#1596](https://github.com/opensearch-project/sql/pull/1596)) +* adding reflections as a dependency ([#1559](https://github.com/opensearch-project/sql/pull/1596)) +* Bump org.json dependency version ([#1586](https://github.com/opensearch-project/sql/pull/1586)) +* Integ Test Fix ([#1541](https://github.com/opensearch-project/sql/pull/1541)) \ No newline at end of file From 986db39c8612848ef3dff1ff77a5190a84180672 Mon Sep 17 00:00:00 2001 From: Guian Gumpac Date: Tue, 30 May 2023 13:16:27 -0700 Subject: [PATCH 3/3] Support Alternate Datetime Formats (#1664) * Support Alternate Datetime Formats (#268) * Add OpenSearchDateType as a datatype for matching with Date/Time OpenSearch types Signed-off-by: Andrew Carbonetto --------- Signed-off-by: Andrew Carbonetto Signed-off-by: GabeFernandez310 Signed-off-by: Guian Gumpac Signed-off-by: MaxKsyunz Co-authored-by: Andrew Carbonetto Co-authored-by: GabeFernandez310 Co-authored-by: MaxKsyunz --- docs/user/general/datatypes.rst | 8 +- .../sql/legacy/SQLIntegTestCase.java | 4 + .../opensearch/sql/legacy/TestsConstants.java | 1 + .../org/opensearch/sql/sql/AggregationIT.java | 44 +-- .../opensearch/sql/sql/DateTimeFormatsIT.java | 70 ++++ .../sql/sql/DateTimeFunctionIT.java | 63 ++-- .../src/test/resources/date_formats.json | 4 + .../calcs_index_mappings.json | 8 +- .../date_formats_index_mapping.json | 306 ++++++++++++++++++ .../data/type/OpenSearchDataType.java | 124 ++++--- .../data/type/OpenSearchDateType.java | 298 +++++++++++++++++ .../data/type/OpenSearchTextType.java | 16 +- .../value/OpenSearchExprValueFactory.java | 204 +++++++++--- .../sql/opensearch/mapping/IndexMapping.java | 33 +- .../data/type/OpenSearchDataTypeTest.java | 154 +++++---- .../data/type/OpenSearchDateTypeTest.java | 204 ++++++++++++ .../value/OpenSearchExprTextValueTest.java | 4 +- .../value/OpenSearchExprValueFactoryTest.java | 173 ++++++++-- .../AggregationQueryBuilderTest.java | 56 ++++ .../filter/ExpressionFilterScriptTest.java | 34 +- 20 files changed, 1536 insertions(+), 272 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFormatsIT.java create mode 100644 integ-test/src/test/resources/date_formats.json create mode 100644 integ-test/src/test/resources/indexDefinitions/date_formats_index_mapping.json create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateType.java create mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateTypeTest.java diff --git a/docs/user/general/datatypes.rst b/docs/user/general/datatypes.rst index 5899488ab0..4f1f3100c2 100644 --- a/docs/user/general/datatypes.rst +++ b/docs/user/general/datatypes.rst @@ -91,7 +91,7 @@ The table below list the mapping between OpenSearch Data Type, OpenSearch SQL Da +-----------------+---------------------+-----------+ | text | text | VARCHAR | +-----------------+---------------------+-----------+ -| date | timestamp | TIMESTAMP | +| date* | timestamp | TIMESTAMP | +-----------------+---------------------+-----------+ | date_nanos | timestamp | TIMESTAMP | +-----------------+---------------------+-----------+ @@ -104,7 +104,11 @@ The table below list the mapping between OpenSearch Data Type, OpenSearch SQL Da | nested | array | STRUCT | +-----------------+---------------------+-----------+ -Notes: Not all the OpenSearch SQL Type has correspond OpenSearch Type. e.g. data and time. To use function which required such data type, user should explicitly convert the data type. +Notes: +* Not all the OpenSearch SQL Type has correspond OpenSearch Type. e.g. data and time. To use function which required such data type, user should explicitly convert the data type. +* date*: Maps to `timestamp` by default. Based on the "format" property `date` can map to `date` or `time`. See list of supported named formats `here `_. +For example, `basic_date` will map to a `date` type, and `basic_time` will map to a `time` type. + Data Type Conversion ==================== diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index f6e4b23708..d1bcc94506 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -651,6 +651,10 @@ public enum Index { "calcs", getMappingFile("calcs_index_mappings.json"), "src/test/resources/calcs.json"), + DATE_FORMATS(TestsConstants.TEST_INDEX_DATE_FORMATS, + "date_formats", + getMappingFile("date_formats_index_mapping.json"), + "src/test/resources/date_formats.json"), WILDCARD(TestsConstants.TEST_INDEX_WILDCARD, "wildcard", getMappingFile("wildcard_index_mappings.json"), diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index c3af98b794..338be25a0c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -56,6 +56,7 @@ public class TestsConstants { public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer"; public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing"; public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs"; + public final static String TEST_INDEX_DATE_FORMATS = TEST_INDEX + "_date_formats"; public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard"; public final static String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; public final static String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java index 487699cf79..1075b14431 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java @@ -383,17 +383,17 @@ public void testAvgDoubleInMemory() throws IOException { @Test public void testMaxDatePushedDown() throws IOException { - var response = executeQuery(String.format("SELECT max(CAST(date0 AS date))" + var response = executeQuery(String.format("SELECT max(date0)" + " from %s", TEST_INDEX_CALCS)); - verifySchema(response, schema("max(CAST(date0 AS date))", null, "date")); + verifySchema(response, schema("max(date0)", null, "date")); verifyDataRows(response, rows("2004-06-19")); } @Test public void testAvgDatePushedDown() throws IOException { - var response = executeQuery(String.format("SELECT avg(CAST(date0 AS date))" + var response = executeQuery(String.format("SELECT avg(date0)" + " from %s", TEST_INDEX_CALCS)); - verifySchema(response, schema("avg(CAST(date0 AS date))", null, "date")); + verifySchema(response, schema("avg(date0)", null, "date")); verifyDataRows(response, rows("1992-04-23")); } @@ -423,25 +423,25 @@ public void testAvgDateTimePushedDown() throws IOException { @Test public void testMinTimePushedDown() throws IOException { - var response = executeQuery(String.format("SELECT min(CAST(time1 AS time))" + var response = executeQuery(String.format("SELECT min(time1)" + " from %s", TEST_INDEX_CALCS)); - verifySchema(response, schema("min(CAST(time1 AS time))", null, "time")); + verifySchema(response, schema("min(time1)", null, "time")); verifyDataRows(response, rows("00:05:57")); } @Test public void testMaxTimePushedDown() throws IOException { - var response = executeQuery(String.format("SELECT max(CAST(time1 AS time))" + var response = executeQuery(String.format("SELECT max(time1)" + " from %s", TEST_INDEX_CALCS)); - verifySchema(response, schema("max(CAST(time1 AS time))", null, "time")); + verifySchema(response, schema("max(time1)", null, "time")); verifyDataRows(response, rows("22:50:16")); } @Test public void testAvgTimePushedDown() throws IOException { - var response = executeQuery(String.format("SELECT avg(CAST(time1 AS time))" + var response = executeQuery(String.format("SELECT avg(time1)" + " from %s", TEST_INDEX_CALCS)); - verifySchema(response, schema("avg(CAST(time1 AS time))", null, "time")); + verifySchema(response, schema("avg(time1)", null, "time")); verifyDataRows(response, rows("13:06:36.25")); } @@ -471,28 +471,28 @@ public void testAvgTimeStampPushedDown() throws IOException { @Test public void testMinDateInMemory() throws IOException { - var response = executeQuery(String.format("SELECT min(CAST(date0 AS date))" + var response = executeQuery(String.format("SELECT min(date0)" + " OVER(PARTITION BY datetime1) from %s", TEST_INDEX_CALCS)); verifySchema(response, - schema("min(CAST(date0 AS date)) OVER(PARTITION BY datetime1)", null, "date")); + schema("min(date0) OVER(PARTITION BY datetime1)", null, "date")); verifySome(response.getJSONArray("datarows"), rows("1972-07-04")); } @Test public void testMaxDateInMemory() throws IOException { - var response = executeQuery(String.format("SELECT max(CAST(date0 AS date))" + var response = executeQuery(String.format("SELECT max(date0)" + " OVER(PARTITION BY datetime1) from %s", TEST_INDEX_CALCS)); verifySchema(response, - schema("max(CAST(date0 AS date)) OVER(PARTITION BY datetime1)", null, "date")); + schema("max(date0) OVER(PARTITION BY datetime1)", null, "date")); verifySome(response.getJSONArray("datarows"), rows("2004-06-19")); } @Test public void testAvgDateInMemory() throws IOException { - var response = executeQuery(String.format("SELECT avg(CAST(date0 AS date))" + var response = executeQuery(String.format("SELECT avg(date0)" + " OVER(PARTITION BY datetime1) from %s", TEST_INDEX_CALCS)); verifySchema(response, - schema("avg(CAST(date0 AS date)) OVER(PARTITION BY datetime1)", null, "date")); + schema("avg(date0) OVER(PARTITION BY datetime1)", null, "date")); verifySome(response.getJSONArray("datarows"), rows("1992-04-23")); } @@ -525,28 +525,28 @@ public void testAvgDateTimeInMemory() throws IOException { @Test public void testMinTimeInMemory() throws IOException { - var response = executeQuery(String.format("SELECT min(CAST(time1 AS time))" + var response = executeQuery(String.format("SELECT min(time1)" + " OVER(PARTITION BY datetime1) from %s", TEST_INDEX_CALCS)); verifySchema(response, - schema("min(CAST(time1 AS time)) OVER(PARTITION BY datetime1)", null, "time")); + schema("min(time1) OVER(PARTITION BY datetime1)", null, "time")); verifySome(response.getJSONArray("datarows"), rows("00:05:57")); } @Test public void testMaxTimeInMemory() throws IOException { - var response = executeQuery(String.format("SELECT max(CAST(time1 AS time))" + var response = executeQuery(String.format("SELECT max(time1)" + " OVER(PARTITION BY datetime1) from %s", TEST_INDEX_CALCS)); verifySchema(response, - schema("max(CAST(time1 AS time)) OVER(PARTITION BY datetime1)", null, "time")); + schema("max(time1) OVER(PARTITION BY datetime1)", null, "time")); verifySome(response.getJSONArray("datarows"), rows("22:50:16")); } @Test public void testAvgTimeInMemory() throws IOException { - var response = executeQuery(String.format("SELECT avg(CAST(time1 AS time))" + var response = executeQuery(String.format("SELECT avg(time1)" + " OVER(PARTITION BY datetime1) from %s", TEST_INDEX_CALCS)); verifySchema(response, - schema("avg(CAST(time1 AS time)) OVER(PARTITION BY datetime1)", null, "time")); + schema("avg(time1) OVER(PARTITION BY datetime1)", null, "time")); verifySome(response.getJSONArray("datarows"), rows("13:06:36.25")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFormatsIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFormatsIT.java new file mode 100644 index 0000000000..7cd95fb509 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFormatsIT.java @@ -0,0 +1,70 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DATE_FORMATS; +import static org.opensearch.sql.legacy.plugin.RestSqlAction.QUERY_API_ENDPOINT; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; +import static org.opensearch.sql.util.TestUtils.getResponseBody; + +import java.io.IOException; +import java.util.Locale; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class DateTimeFormatsIT extends SQLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + loadIndex(Index.DATE_FORMATS); + } + + @Test + public void testReadingDateFormats() throws IOException { + String query = String.format("SELECT weekyear_week_day, hour_minute_second_millis," + + " strict_ordinal_date_time FROM %s LIMIT 1", TEST_INDEX_DATE_FORMATS); + JSONObject result = executeQuery(query); + verifySchema(result, + schema("weekyear_week_day", null, "date"), + schema("hour_minute_second_millis", null, "time"), + schema("strict_ordinal_date_time", null, "timestamp")); + verifyDataRows(result, + rows("1984-04-12", + "09:07:42", + "1984-04-12 09:07:42.000123456" + )); + } + + @Test + public void testDateFormatsWithOr() throws IOException { + String query = String.format("SELECT yyyy-MM-dd_OR_epoch_millis FROM %s", TEST_INDEX_DATE_FORMATS); + JSONObject result = executeQuery(query); + verifyDataRows(result, + rows("1984-04-12 00:00:00"), + rows("1984-04-12 09:07:42.000123456")); + } + + protected JSONObject executeQuery(String query) throws IOException { + Request request = new Request("POST", QUERY_API_ENDPOINT); + request.setJsonEntity(String.format(Locale.ROOT, "{\n" + " \"query\": \"%s\"\n" + "}", query)); + + RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); + restOptionsBuilder.addHeader("Content-Type", "application/json"); + request.setOptions(restOptionsBuilder); + + Response response = client().performRequest(request); + return new JSONObject(getResponseBody(response)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFunctionIT.java index b5677b04a7..91457296d6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFunctionIT.java @@ -40,7 +40,6 @@ public void init() throws Exception { loadIndex(Index.BANK); loadIndex(Index.CALCS); loadIndex(Index.PEOPLE2); - loadIndex(Index.CALCS); } // Integration test framework sets for OpenSearch instance a random timezone. @@ -283,9 +282,9 @@ public void testDayOfMonthAliasesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT dayofmonth(CAST(date0 AS date)) FROM %s", TEST_INDEX_CALCS)); + "SELECT dayofmonth(date0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT day_of_month(CAST(date0 AS date)) FROM %s", TEST_INDEX_CALCS)); + "SELECT day_of_month(date0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( @@ -301,9 +300,9 @@ public void testDayOfMonthAliasesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT dayofmonth(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT dayofmonth(datetime0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT day_of_month(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT day_of_month(datetime0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); } @Test @@ -336,9 +335,9 @@ public void testDayOfWeekAliasesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT dayofweek(CAST(date0 AS date)) FROM %s", TEST_INDEX_CALCS)); + "SELECT dayofweek(date0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT day_of_week(CAST(date0 AS date)) FROM %s", TEST_INDEX_CALCS)); + "SELECT day_of_week(date0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( @@ -354,9 +353,9 @@ public void testDayOfWeekAliasesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT dayofweek(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT dayofweek(datetime0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT day_of_week(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT day_of_week(datetime0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); } @@ -398,9 +397,9 @@ public void testDayOfYearAlternateSyntaxesReturnTheSameResults() throws IOExcept result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT dayofyear(CAST(date0 AS date)) FROM %s", TEST_INDEX_CALCS)); + "SELECT dayofyear(date0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT day_of_year(CAST(date0 AS date)) FROM %s", TEST_INDEX_CALCS)); + "SELECT day_of_year(date0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( @@ -416,9 +415,9 @@ public void testDayOfYearAlternateSyntaxesReturnTheSameResults() throws IOExcept result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT dayofyear(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT dayofyear(datetime0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT day_of_year(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT day_of_year(datetime0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); } @Test @@ -485,7 +484,7 @@ public void testExtractWithDatetime() throws IOException { public void testExtractWithTime() throws IOException { JSONObject timeResult = executeQuery( String.format( - "SELECT extract(HOUR_SECOND FROM cast(time0 AS TIME)) FROM %s LIMIT 1", + "SELECT extract(HOUR_SECOND FROM time0) FROM %s LIMIT 1", TEST_INDEX_CALCS)); verifyDataRows(timeResult, rows(210732)); @@ -495,7 +494,7 @@ public void testExtractWithTime() throws IOException { public void testExtractWithDate() throws IOException { JSONObject dateResult = executeQuery( String.format( - "SELECT extract(YEAR_MONTH FROM cast(date0 AS DATE)) FROM %s LIMIT 1", + "SELECT extract(YEAR_MONTH FROM date0) FROM %s LIMIT 1", TEST_INDEX_CALCS)); verifyDataRows(dateResult, rows(200404)); } @@ -533,16 +532,16 @@ public void testHourFunctionAliasesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT hour(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT hour(datetime0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT hour_of_day(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT hour_of_day(datetime0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); } @Test public void testLastDay() throws IOException { JSONObject result = executeQuery( - String.format("SELECT last_day(cast(date0 as date)) FROM %s LIMIT 3", + String.format("SELECT last_day(date0) FROM %s LIMIT 3", TEST_INDEX_CALCS)); verifyDataRows(result, rows("2004-04-30"), @@ -550,7 +549,7 @@ public void testLastDay() throws IOException { rows("1975-11-30")); result = executeQuery( - String.format("SELECT last_day(datetime(cast(date0 AS string))) FROM %s LIMIT 3", + String.format("SELECT last_day(date0) FROM %s LIMIT 3", TEST_INDEX_CALCS)); verifyDataRows(result, rows("2004-04-30"), @@ -558,7 +557,7 @@ public void testLastDay() throws IOException { rows("1975-11-30")); result = executeQuery( - String.format("SELECT last_day(cast(date0 AS timestamp)) FROM %s LIMIT 3", + String.format("SELECT last_day(date0) FROM %s LIMIT 3", TEST_INDEX_CALCS)); verifyDataRows(result, rows("2004-04-30"), @@ -690,9 +689,9 @@ public void testMinuteFunctionAliasesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT minute(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT minute(datetime0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT minute_of_hour(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT minute_of_hour(datetime0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); } @@ -734,9 +733,9 @@ public void testMonthAlternateSyntaxesReturnTheSameResults() throws IOException result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT month(CAST(date0 AS date)) FROM %s", TEST_INDEX_CALCS)); + "SELECT month(date0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT month_of_year(CAST(date0 AS date)) FROM %s", TEST_INDEX_CALCS)); + "SELECT month_of_year(date0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( @@ -752,9 +751,9 @@ public void testMonthAlternateSyntaxesReturnTheSameResults() throws IOException result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT month(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT month(datetime0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT month_of_year(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT month_of_year(datetime0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); } @@ -847,9 +846,9 @@ public void testSecondFunctionAliasesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1 = executeQuery(String.format( - "SELECT second(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT second(datetime0) FROM %s", TEST_INDEX_CALCS)); result2 = executeQuery(String.format( - "SELECT second_of_minute(CAST(datetime0 AS timestamp)) FROM %s", TEST_INDEX_CALCS)); + "SELECT second_of_minute(datetime0) FROM %s", TEST_INDEX_CALCS)); result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); } @@ -994,7 +993,7 @@ public void testToDays() throws IOException { @Test public void testToSeconds() throws IOException { JSONObject result = executeQuery( - String.format("select to_seconds(date(date0)) FROM %s LIMIT 2", TEST_INDEX_CALCS)); + String.format("select to_seconds(date0) FROM %s LIMIT 2", TEST_INDEX_CALCS)); verifyDataRows(result, rows(63249206400L), rows(62246275200L)); result = executeQuery( @@ -1002,7 +1001,7 @@ public void testToSeconds() throws IOException { verifyDataRows(result, rows(63256587455L), rows(63258064234L)); result = executeQuery(String.format( - "select to_seconds(timestamp(datetime0)) FROM %s LIMIT 2", TEST_INDEX_CALCS)); + "select to_seconds(datetime0) FROM %s LIMIT 2", TEST_INDEX_CALCS)); verifyDataRows(result, rows(63256587455L), rows(63258064234L)); } @@ -1091,10 +1090,10 @@ public void testWeekAlternateSyntaxesReturnTheSameResults() throws IOException { result1.getJSONArray("datarows").similar(result2.getJSONArray("datarows")); result1.getJSONArray("datarows").similar(result3.getJSONArray("datarows")); - compareWeekResults("CAST(date0 AS date)", TEST_INDEX_CALCS); + compareWeekResults("date0", TEST_INDEX_CALCS); compareWeekResults("datetime(CAST(time0 AS STRING))", TEST_INDEX_CALCS); compareWeekResults("CAST(time0 AS STRING)", TEST_INDEX_CALCS); - compareWeekResults("CAST(datetime0 AS timestamp)", TEST_INDEX_CALCS); + compareWeekResults("datetime0", TEST_INDEX_CALCS); } @Test diff --git a/integ-test/src/test/resources/date_formats.json b/integ-test/src/test/resources/date_formats.json new file mode 100644 index 0000000000..cc694930e9 --- /dev/null +++ b/integ-test/src/test/resources/date_formats.json @@ -0,0 +1,4 @@ +{"index": {}} +{"epoch_millis": "450608862000.123456", "epoch_second": "450608862.000123456", "date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time_nanos": "1984-04-12T09:07:42.000123456Z", "basic_date": "19840412", "basic_date_time": "19840412T090742.000Z", "basic_date_time_no_millis": "19840412T090742Z", "basic_ordinal_date": "1984103", "basic_ordinal_date_time": "1984103T090742.000Z", "basic_ordinal_date_time_no_millis": "1984103T090742Z", "basic_time": "090742.000Z", "basic_time_no_millis": "090742Z", "basic_t_time": "T090742.000Z", "basic_t_time_no_millis": "T090742Z", "basic_week_date": "1984W154", "strict_basic_week_date": "1984W154", "basic_week_date_time": "1984W154T090742.000Z", "strict_basic_week_date_time": "1984W154T090742.000Z", "basic_week_date_time_no_millis": "1984W154T090742Z", "strict_basic_week_date_time_no_millis": "1984W154T090742Z", "date": "1984-04-12", "strict_date": "1984-04-12", "date_hour": "1984-04-12T09", "strict_date_hour": "1984-04-12T09", "date_hour_minute": "1984-04-12T09:07", "strict_date_hour_minute": "1984-04-12T09:07", "date_hour_minute_second": "1984-04-12T09:07:42", "strict_date_hour_minute_second": "1984-04-12T09:07:42", "date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "date_time": "1984-04-12T09:07:42.000Z", "strict_date_time": "1984-04-12T09:07:42.000123456Z", "date_time_no_millis": "1984-04-12T09:07:42Z", "strict_date_time_no_millis": "1984-04-12T09:07:42Z", "hour": "09", "strict_hour": "09", "hour_minute": "09:07", "strict_hour_minute": "09:07", "hour_minute_second": "09:07:42", "strict_hour_minute_second": "09:07:42", "hour_minute_second_fraction": "09:07:42.000", "strict_hour_minute_second_fraction": "09:07:42.000", "hour_minute_second_millis": "09:07:42.000", "strict_hour_minute_second_millis": "09:07:42.000", "ordinal_date": "1984-103", "strict_ordinal_date": "1984-103", "ordinal_date_time": "1984-103T09:07:42.000123456Z", "strict_ordinal_date_time": "1984-103T09:07:42.000123456Z", "ordinal_date_time_no_millis": "1984-103T09:07:42Z", "strict_ordinal_date_time_no_millis": "1984-103T09:07:42Z", "time": "09:07:42.000Z", "strict_time": "09:07:42.000Z", "time_no_millis": "09:07:42Z", "strict_time_no_millis": "09:07:42Z", "t_time": "T09:07:42.000Z", "strict_t_time": "T09:07:42.000Z", "t_time_no_millis": "T09:07:42Z", "strict_t_time_no_millis": "T09:07:42Z", "week_date": "1984-W15-4", "strict_week_date": "1984-W15-4", "week_date_time": "1984-W15-4T09:07:42.000Z", "strict_week_date_time": "1984-W15-4T09:07:42.000Z", "week_date_time_no_millis": "1984-W15-4T09:07:42Z", "strict_week_date_time_no_millis": "1984-W15-4T09:07:42Z", "weekyear_week_day": "1984-W15-4", "strict_weekyear_week_day": "1984-W15-4", "year_month_day": "1984-04-12", "strict_year_month_day": "1984-04-12", "yyyy-MM-dd": "1984-04-12", "HH:mm:ss": "09:07:42", "yyyy-MM-dd_OR_epoch_millis": "1984-04-12", "hour_minute_second_OR_t_time": "09:07:42"} +{"index": {}} +{"epoch_millis": "450608862000.123456", "epoch_second": "450608862.000123456", "date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time_nanos": "1984-04-12T09:07:42.000123456Z", "basic_date": "19840412", "basic_date_time": "19840412T090742.000Z", "basic_date_time_no_millis": "19840412T090742Z", "basic_ordinal_date": "1984103", "basic_ordinal_date_time": "1984103T090742.000Z", "basic_ordinal_date_time_no_millis": "1984103T090742Z", "basic_time": "090742.000Z", "basic_time_no_millis": "090742Z", "basic_t_time": "T090742.000Z", "basic_t_time_no_millis": "T090742Z", "basic_week_date": "1984W154", "strict_basic_week_date": "1984W154", "basic_week_date_time": "1984W154T090742.000Z", "strict_basic_week_date_time": "1984W154T090742.000Z", "basic_week_date_time_no_millis": "1984W154T090742Z", "strict_basic_week_date_time_no_millis": "1984W154T090742Z", "date": "1984-04-12", "strict_date": "1984-04-12", "date_hour": "1984-04-12T09", "strict_date_hour": "1984-04-12T09", "date_hour_minute": "1984-04-12T09:07", "strict_date_hour_minute": "1984-04-12T09:07", "date_hour_minute_second": "1984-04-12T09:07:42", "strict_date_hour_minute_second": "1984-04-12T09:07:42", "date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "date_time": "1984-04-12T09:07:42.000Z", "strict_date_time": "1984-04-12T09:07:42.000123456Z", "date_time_no_millis": "1984-04-12T09:07:42Z", "strict_date_time_no_millis": "1984-04-12T09:07:42Z", "hour": "09", "strict_hour": "09", "hour_minute": "09:07", "strict_hour_minute": "09:07", "hour_minute_second": "09:07:42", "strict_hour_minute_second": "09:07:42", "hour_minute_second_fraction": "09:07:42.000", "strict_hour_minute_second_fraction": "09:07:42.000", "hour_minute_second_millis": "09:07:42.000", "strict_hour_minute_second_millis": "09:07:42.000", "ordinal_date": "1984-103", "strict_ordinal_date": "1984-103", "ordinal_date_time": "1984-103T09:07:42.000123456Z", "strict_ordinal_date_time": "1984-103T09:07:42.000123456Z", "ordinal_date_time_no_millis": "1984-103T09:07:42Z", "strict_ordinal_date_time_no_millis": "1984-103T09:07:42Z", "time": "09:07:42.000Z", "strict_time": "09:07:42.000Z", "time_no_millis": "09:07:42Z", "strict_time_no_millis": "09:07:42Z", "t_time": "T09:07:42.000Z", "strict_t_time": "T09:07:42.000Z", "t_time_no_millis": "T09:07:42Z", "strict_t_time_no_millis": "T09:07:42Z", "week_date": "1984-W15-4", "strict_week_date": "1984-W15-4", "week_date_time": "1984-W15-4T09:07:42.000Z", "strict_week_date_time": "1984-W15-4T09:07:42.000Z", "week_date_time_no_millis": "1984-W15-4T09:07:42Z", "strict_week_date_time_no_millis": "1984-W15-4T09:07:42Z", "weekyear_week_day": "1984-W15-4", "strict_weekyear_week_day": "1984-W15-4", "year_month_day": "1984-04-12", "strict_year_month_day": "1984-04-12", "yyyy-MM-dd": "1984-04-12", "HH:mm:ss": "09:07:42", "yyyy-MM-dd_OR_epoch_millis": "450608862000.123456", "hour_minute_second_OR_t_time": "T09:07:42.000Z"} diff --git a/integ-test/src/test/resources/indexDefinitions/calcs_index_mappings.json b/integ-test/src/test/resources/indexDefinitions/calcs_index_mappings.json index 08a88a9d32..bc1cd79a90 100644 --- a/integ-test/src/test/resources/indexDefinitions/calcs_index_mappings.json +++ b/integ-test/src/test/resources/indexDefinitions/calcs_index_mappings.json @@ -57,19 +57,19 @@ }, "date0" : { "type" : "date", - "format": "yyyy-MM-dd" + "format": "year_month_day" }, "date1" : { "type" : "date", - "format": "yyyy-MM-dd" + "format": "year_month_day" }, "date2" : { "type" : "date", - "format": "yyyy-MM-dd" + "format": "year_month_day" }, "date3" : { "type" : "date", - "format": "yyyy-MM-dd" + "format": "year_month_day" }, "time0" : { "type" : "date", diff --git a/integ-test/src/test/resources/indexDefinitions/date_formats_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/date_formats_index_mapping.json new file mode 100644 index 0000000000..938f598d0b --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/date_formats_index_mapping.json @@ -0,0 +1,306 @@ +{ + "mappings" : { + "properties" : { + "epoch_millis" : { + "type" : "date", + "format" : "epoch_millis" + }, + "epoch_second" : { + "type" : "date", + "format" : "epoch_second" + }, + "date_optional_time" : { + "type" : "date", + "format" : "date_optional_time" + }, + "strict_date_optional_time" : { + "type" : "date", + "format" : "strict_date_optional_time" + }, + "strict_date_optional_time_nanos" : { + "type" : "date", + "format" : "strict_date_optional_time_nanos" + }, + "basic_date" : { + "type" : "date", + "format" : "basic_date" + }, + "basic_date_time" : { + "type" : "date", + "format" : "basic_date_time" + }, + "basic_date_time_no_millis" : { + "type" : "date", + "format" : "basic_date_time_no_millis" + }, + "basic_ordinal_date" : { + "type" : "date", + "format" : "basic_ordinal_date" + }, + "basic_ordinal_date_time" : { + "type" : "date", + "format" : "basic_ordinal_date_time" + }, + "basic_ordinal_date_time_no_millis" : { + "type" : "date", + "format" : "basic_ordinal_date_time_no_millis" + }, + "basic_time" : { + "type" : "date", + "format" : "basic_time" + }, + "basic_time_no_millis" : { + "type" : "date", + "format" : "basic_time_no_millis" + }, + "basic_t_time" : { + "type" : "date", + "format" : "basic_t_time" + }, + "basic_t_time_no_millis" : { + "type" : "date", + "format" : "basic_t_time_no_millis" + }, + "basic_week_date" : { + "type" : "date", + "format" : "basic_week_date" + }, + "strict_basic_week_date" : { + "type" : "date", + "format" : "strict_basic_week_date" + }, + "basic_week_date_time" : { + "type" : "date", + "format" : "basic_week_date_time" + }, + "strict_basic_week_date_time" : { + "type" : "date", + "format" : "strict_basic_week_date_time" + }, + "basic_week_date_time_no_millis" : { + "type" : "date", + "format" : "basic_week_date_time_no_millis" + }, + "strict_basic_week_date_time_no_millis" : { + "type" : "date", + "format" : "strict_basic_week_date_time_no_millis" + }, + "date" : { + "type" : "date", + "format" : "date" + }, + "strict_date" : { + "type" : "date", + "format" : "strict_date" + }, + "date_hour" : { + "type" : "date", + "format" : "date_hour" + }, + "strict_date_hour" : { + "type" : "date", + "format" : "strict_date_hour" + }, + "date_hour_minute" : { + "type" : "date", + "format" : "date_hour_minute" + }, + "strict_date_hour_minute" : { + "type" : "date", + "format" : "strict_date_hour_minute" + }, + "date_hour_minute_second" : { + "type" : "date", + "format" : "date_hour_minute_second" + }, + "strict_date_hour_minute_second" : { + "type" : "date", + "format" : "strict_date_hour_minute_second" + }, + "date_hour_minute_second_fraction" : { + "type" : "date", + "format" : "date_hour_minute_second_fraction" + }, + "strict_date_hour_minute_second_fraction" : { + "type" : "date", + "format" : "strict_date_hour_minute_second_fraction" + }, + "date_hour_minute_second_millis" : { + "type" : "date", + "format" : "date_hour_minute_second_millis" + }, + "strict_date_hour_minute_second_millis" : { + "type" : "date", + "format" : "strict_date_hour_minute_second_millis" + }, + "date_time" : { + "type" : "date", + "format" : "date_time" + }, + "strict_date_time" : { + "type" : "date", + "format" : "strict_date_time" + }, + "date_time_no_millis" : { + "type" : "date", + "format" : "date_time_no_millis" + }, + "strict_date_time_no_millis" : { + "type" : "date", + "format" : "strict_date_time_no_millis" + }, + "hour" : { + "type" : "date", + "format" : "hour" + }, + "strict_hour" : { + "type" : "date", + "format" : "strict_hour" + }, + "hour_minute" : { + "type" : "date", + "format" : "hour_minute" + }, + "strict_hour_minute" : { + "type" : "date", + "format" : "strict_hour_minute" + }, + "hour_minute_second" : { + "type" : "date", + "format" : "hour_minute_second" + }, + "strict_hour_minute_second" : { + "type" : "date", + "format" : "strict_hour_minute_second" + }, + "hour_minute_second_fraction" : { + "type" : "date", + "format" : "hour_minute_second_fraction" + }, + "strict_hour_minute_second_fraction" : { + "type" : "date", + "format" : "strict_hour_minute_second_fraction" + }, + "hour_minute_second_millis" : { + "type" : "date", + "format" : "hour_minute_second_millis" + }, + "strict_hour_minute_second_millis" : { + "type" : "date", + "format" : "strict_hour_minute_second_millis" + }, + "ordinal_date" : { + "type" : "date", + "format" : "ordinal_date" + }, + "strict_ordinal_date" : { + "type" : "date", + "format" : "strict_ordinal_date" + }, + "ordinal_date_time" : { + "type" : "date", + "format" : "ordinal_date_time" + }, + "strict_ordinal_date_time" : { + "type" : "date", + "format" : "strict_ordinal_date_time" + }, + "ordinal_date_time_no_millis" : { + "type" : "date", + "format" : "ordinal_date_time_no_millis" + }, + "strict_ordinal_date_time_no_millis" : { + "type" : "date", + "format" : "strict_ordinal_date_time_no_millis" + }, + "time" : { + "type" : "date", + "format" : "time" + }, + "strict_time" : { + "type" : "date", + "format" : "strict_time" + }, + "time_no_millis" : { + "type" : "date", + "format" : "time_no_millis" + }, + "strict_time_no_millis" : { + "type" : "date", + "format" : "strict_time_no_millis" + }, + "t_time" : { + "type" : "date", + "format" : "t_time" + }, + "strict_t_time" : { + "type" : "date", + "format" : "strict_t_time" + }, + "t_time_no_millis" : { + "type" : "date", + "format" : "t_time_no_millis" + }, + "strict_t_time_no_millis" : { + "type" : "date", + "format" : "strict_t_time_no_millis" + }, + "week_date" : { + "type" : "date", + "format" : "week_date" + }, + "strict_week_date" : { + "type" : "date", + "format" : "strict_week_date" + }, + "week_date_time" : { + "type" : "date", + "format" : "week_date_time" + }, + "strict_week_date_time" : { + "type" : "date", + "format" : "strict_week_date_time" + }, + "week_date_time_no_millis" : { + "type" : "date", + "format" : "week_date_time_no_millis" + }, + "strict_week_date_time_no_millis" : { + "type" : "date", + "format" : "strict_week_date_time_no_millis" + }, + "weekyear_week_day" : { + "type" : "date", + "format" : "weekyear_week_day" + }, + "strict_weekyear_week_day" : { + "type" : "date", + "format" : "strict_weekyear_week_day" + }, + "year_month_day" : { + "type" : "date", + "format" : "year_month_day" + }, + "strict_year_month_day" : { + "type" : "date", + "format" : "strict_year_month_day" + }, + "yyyy-MM-dd" : { + "type" : "date", + "format": "yyyy-MM-dd" + }, + "HH:mm:ss" : { + "type" : "date", + "format": "HH:mm:ss" + }, + "yyyy-MM-dd_OR_epoch_millis" : { + "type" : "date", + "format": "yyyy-MM-dd||epoch_millis" + }, + "hour_minute_second_OR_t_time" : { + "type" : "date", + "format": "hour_minute_second||t_time" + } + } + } +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java index 2fda12a567..273b980d2a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java @@ -65,6 +65,7 @@ public String toString() { } @EqualsAndHashCode.Exclude + @Getter protected MappingType mappingType; // resolved ExprCoreType @@ -97,56 +98,86 @@ public ExprType getExprType() { instances.put(t.toString(), OpenSearchDataType.of(t))); } + /** + * Parses index mapping and maps it to a Data type in the SQL plugin. + * @param indexMapping An input with keys and objects that need to be mapped to a data type. + * @return The mapping. + */ + public static Map parseMapping(Map indexMapping) { + Map result = new LinkedHashMap<>(); + + if (indexMapping == null) { + return result; + } + + indexMapping.forEach((k, v) -> { + var innerMap = (Map)v; + // by default, the type is treated as an Object if "type" is not provided + var type = ((String) innerMap + .getOrDefault( + "type", + "object")) + .replace("_", ""); + if (!EnumUtils.isValidEnumIgnoreCase(OpenSearchDataType.MappingType.class, type)) { + // unknown type, e.g. `alias` + // TODO resolve alias reference + return; + } + // create OpenSearchDataType + result.put(k, OpenSearchDataType.of( + EnumUtils.getEnumIgnoreCase(OpenSearchDataType.MappingType.class, type), + innerMap) + ); + }); + return result; + } + /** * A constructor function which builds proper `OpenSearchDataType` for given mapping `Type`. * @param mappingType A mapping type. * @return An instance or inheritor of `OpenSearchDataType`. */ - public static OpenSearchDataType of(MappingType mappingType) { - var res = instances.getOrDefault(mappingType.toString(), null); - if (res != null) { - return res; - } - ExprCoreType exprCoreType = mappingType.getExprCoreType(); - if (exprCoreType == ExprCoreType.UNKNOWN) { - switch (mappingType) { + public static OpenSearchDataType of(MappingType mappingType, Map innerMap) { + OpenSearchDataType res = instances.getOrDefault(mappingType.toString(), + new OpenSearchDataType(mappingType) + ); + switch (mappingType) { + case Object: + // TODO: use Object type once it has been added + case Nested: + if (innerMap.isEmpty()) { + return res; + } + Map properties = + parseMapping((Map) innerMap.getOrDefault("properties", Map.of())); + OpenSearchDataType objectDataType = res.cloneEmpty(); + objectDataType.properties = properties; + return objectDataType; + case Text: // TODO update these 2 below #1038 https://github.com/opensearch-project/sql/issues/1038 - case Text: return OpenSearchTextType.of(); - case GeoPoint: return OpenSearchGeoPointType.of(); - case Binary: return OpenSearchBinaryType.of(); - case Ip: return OpenSearchIpType.of(); - default: - throw new IllegalArgumentException(mappingType.toString()); - } + Map fields = + parseMapping((Map) innerMap.getOrDefault("fields", Map.of())); + return (!fields.isEmpty()) ? OpenSearchTextType.of(fields) : OpenSearchTextType.of(); + case GeoPoint: return OpenSearchGeoPointType.of(); + case Binary: return OpenSearchBinaryType.of(); + case Ip: return OpenSearchIpType.of(); + case Date: + // Default date formatter is used when "" is passed as the second parameter + String format = (String) innerMap.getOrDefault("format", ""); + return OpenSearchDateType.of(format); + default: + return res; } - res = new OpenSearchDataType(mappingType); - res.exprCoreType = exprCoreType; - return res; } /** * A constructor function which builds proper `OpenSearchDataType` for given mapping `Type`. * Designed to be called by the mapping parser only (and tests). * @param mappingType A mapping type. - * @param properties Properties to set. - * @param fields Fields to set. * @return An instance or inheritor of `OpenSearchDataType`. */ - public static OpenSearchDataType of(MappingType mappingType, - Map properties, - Map fields) { - var res = of(mappingType); - if (!properties.isEmpty() || !fields.isEmpty()) { - // Clone to avoid changing the singleton instance. - res = res.cloneEmpty(); - res.properties = ImmutableMap.copyOf(properties); - res.fields = ImmutableMap.copyOf(fields); - } - return res; - } - - protected OpenSearchDataType(MappingType mappingType) { - this.mappingType = mappingType; + public static OpenSearchDataType of(MappingType mappingType) { + return of(mappingType, Map.of()); } /** @@ -162,14 +193,20 @@ public static OpenSearchDataType of(ExprType type) { if (res != null) { return res; } + if (OpenSearchDateType.isDateTypeCompatible(type)) { + return OpenSearchDateType.of(type); + } + return new OpenSearchDataType((ExprCoreType) type); } - protected OpenSearchDataType(ExprCoreType type) { - this.exprCoreType = type; + protected OpenSearchDataType(MappingType mappingType) { + this.mappingType = mappingType; + this.exprCoreType = mappingType.getExprCoreType(); } - protected OpenSearchDataType() { + protected OpenSearchDataType(ExprCoreType type) { + this.exprCoreType = type; } // For datatypes with properties (example: object and nested types) @@ -178,11 +215,6 @@ protected OpenSearchDataType() { @EqualsAndHashCode.Exclude Map properties = ImmutableMap.of(); - // text could have fields - // a read-only collection - @EqualsAndHashCode.Exclude - Map fields = ImmutableMap.of(); - @Override // Called when building TypeEnvironment and when serializing PPL response public String typeName() { @@ -209,10 +241,8 @@ public String legacyTypeName() { * @return A cloned object. */ protected OpenSearchDataType cloneEmpty() { - var copy = new OpenSearchDataType(); - copy.mappingType = mappingType; - copy.exprCoreType = exprCoreType; - return copy; + return this.mappingType == null + ? new OpenSearchDataType(this.exprCoreType) : new OpenSearchDataType(this.mappingType); } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateType.java new file mode 100644 index 0000000000..3554a5b2b4 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateType.java @@ -0,0 +1,298 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.data.type; + +import static org.opensearch.common.time.DateFormatter.splitCombinedPatterns; +import static org.opensearch.common.time.DateFormatter.strip8Prefix; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; + +import java.util.List; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import org.opensearch.common.time.DateFormatter; +import org.opensearch.common.time.FormatNames; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; + +/** + * Date type with support for predefined and custom formats read from the index mapping. + */ +@EqualsAndHashCode(callSuper = true) +public class OpenSearchDateType extends OpenSearchDataType { + + private static final OpenSearchDateType instance = new OpenSearchDateType(); + + public static final List SUPPORTED_NAMED_DATETIME_FORMATS = List.of( + FormatNames.ISO8601, + FormatNames.EPOCH_MILLIS, + FormatNames.EPOCH_SECOND, + FormatNames.BASIC_DATE_TIME, + FormatNames.BASIC_DATE_TIME_NO_MILLIS, + FormatNames.BASIC_ORDINAL_DATE_TIME, + FormatNames.BASIC_ORDINAL_DATE_TIME_NO_MILLIS, + FormatNames.BASIC_WEEK_DATE_TIME, + FormatNames.STRICT_BASIC_WEEK_DATE_TIME, + FormatNames.BASIC_WEEK_DATE_TIME_NO_MILLIS, + FormatNames.STRICT_BASIC_WEEK_DATE_TIME_NO_MILLIS, + FormatNames.BASIC_WEEK_DATE, + FormatNames.STRICT_BASIC_WEEK_DATE, + FormatNames.DATE_OPTIONAL_TIME, + FormatNames.STRICT_DATE_OPTIONAL_TIME, + FormatNames.STRICT_DATE_OPTIONAL_TIME_NANOS, + FormatNames.DATE_TIME, + FormatNames.STRICT_DATE_TIME, + FormatNames.DATE_TIME_NO_MILLIS, + FormatNames.STRICT_DATE_TIME_NO_MILLIS, + FormatNames.DATE_HOUR_MINUTE_SECOND_FRACTION, + FormatNames.STRICT_DATE_HOUR_MINUTE_SECOND_FRACTION, + FormatNames.DATE_HOUR_MINUTE_SECOND_FRACTION, + FormatNames.DATE_HOUR_MINUTE_SECOND_MILLIS, + FormatNames.STRICT_DATE_HOUR_MINUTE_SECOND_MILLIS, + FormatNames.DATE_HOUR_MINUTE_SECOND, + FormatNames.STRICT_DATE_HOUR_MINUTE_SECOND, + FormatNames.DATE_HOUR_MINUTE, + FormatNames.STRICT_DATE_HOUR_MINUTE, + FormatNames.DATE_HOUR, + FormatNames.STRICT_DATE_HOUR, + FormatNames.ORDINAL_DATE_TIME, + FormatNames.STRICT_ORDINAL_DATE_TIME, + FormatNames.ORDINAL_DATE_TIME_NO_MILLIS, + FormatNames.STRICT_ORDINAL_DATE_TIME_NO_MILLIS, + FormatNames.WEEK_DATE_TIME, + FormatNames.STRICT_WEEK_DATE_TIME, + FormatNames.WEEK_DATE_TIME_NO_MILLIS, + FormatNames.STRICT_WEEK_DATE_TIME_NO_MILLIS + ); + + // list of named formats that only support year/month/day + public static final List SUPPORTED_NAMED_DATE_FORMATS = List.of( + FormatNames.BASIC_DATE, + FormatNames.BASIC_ORDINAL_DATE, + FormatNames.DATE, + FormatNames.STRICT_DATE, + FormatNames.YEAR_MONTH_DAY, + FormatNames.STRICT_YEAR_MONTH_DAY, + FormatNames.YEAR_MONTH, + FormatNames.STRICT_YEAR_MONTH, + FormatNames.YEAR, + FormatNames.STRICT_YEAR, + FormatNames.ORDINAL_DATE, + FormatNames.STRICT_ORDINAL_DATE, + FormatNames.WEEK_DATE, + FormatNames.STRICT_WEEK_DATE, + FormatNames.WEEKYEAR_WEEK_DAY, + FormatNames.STRICT_WEEKYEAR_WEEK_DAY, + FormatNames.WEEK_YEAR, + FormatNames.WEEK_YEAR_WEEK, + FormatNames.STRICT_WEEKYEAR_WEEK, + FormatNames.WEEKYEAR, + FormatNames.STRICT_WEEKYEAR + ); + + // list of named formats that only support hour/minute/second + public static final List SUPPORTED_NAMED_TIME_FORMATS = List.of( + FormatNames.BASIC_TIME, + FormatNames.BASIC_TIME_NO_MILLIS, + FormatNames.BASIC_T_TIME, + FormatNames.BASIC_T_TIME_NO_MILLIS, + FormatNames.TIME, + FormatNames.STRICT_TIME, + FormatNames.TIME_NO_MILLIS, + FormatNames.STRICT_TIME_NO_MILLIS, + FormatNames.HOUR_MINUTE_SECOND_FRACTION, + FormatNames.STRICT_HOUR_MINUTE_SECOND_FRACTION, + FormatNames.HOUR_MINUTE_SECOND_MILLIS, + FormatNames.STRICT_HOUR_MINUTE_SECOND_MILLIS, + FormatNames.HOUR_MINUTE_SECOND, + FormatNames.STRICT_HOUR_MINUTE_SECOND, + FormatNames.HOUR_MINUTE, + FormatNames.STRICT_HOUR_MINUTE, + FormatNames.HOUR, + FormatNames.STRICT_HOUR, + FormatNames.T_TIME, + FormatNames.STRICT_T_TIME, + FormatNames.T_TIME_NO_MILLIS, + FormatNames.STRICT_T_TIME_NO_MILLIS + ); + + @EqualsAndHashCode.Exclude + String formatString; + + private OpenSearchDateType() { + super(MappingType.Date); + this.formatString = ""; + } + + private OpenSearchDateType(ExprCoreType exprCoreType) { + this(); + this.exprCoreType = exprCoreType; + } + + private OpenSearchDateType(ExprType exprType) { + this(); + this.exprCoreType = (ExprCoreType) exprType; + } + + private OpenSearchDateType(String formatStringArg) { + super(MappingType.Date); + this.formatString = formatStringArg; + this.exprCoreType = getExprTypeFromFormatString(formatStringArg); + } + + /** + * Retrieves and splits a user defined format string from the mapping into a list of formats. + * @return A list of format names and user defined formats. + */ + private List getFormatList() { + String format = strip8Prefix(formatString); + List patterns = splitCombinedPatterns(format); + return patterns; + } + + + /** + * Retrieves a list of named OpenSearch formatters given by user mapping. + * @return a list of DateFormatters that can be used to parse a Date/Time/Timestamp. + */ + public List getAllNamedFormatters() { + return getFormatList().stream() + .filter(formatString -> FormatNames.forName(formatString) != null) + .map(DateFormatter::forPattern).collect(Collectors.toList()); + } + + /** + * Retrieves a list of custom formatters defined by the user. + * @return a list of DateFormatters that can be used to parse a Date/Time/Timestamp. + */ + public List getAllCustomFormatters() { + return getFormatList().stream() + .filter(formatString -> FormatNames.forName(formatString) == null) + .map(DateFormatter::forPattern).collect(Collectors.toList()); + } + + /** + * Retrieves a list of named formatters that format for dates. + * + * @return a list of DateFormatters that can be used to parse a Date. + */ + public List getDateNamedFormatters() { + return getFormatList().stream() + .filter(formatString -> { + FormatNames namedFormat = FormatNames.forName(formatString); + return SUPPORTED_NAMED_DATE_FORMATS.contains(namedFormat); + }) + .map(DateFormatter::forPattern).collect(Collectors.toList()); + } + + /** + * Retrieves a list of named formatters that format for Times. + * + * @return a list of DateFormatters that can be used to parse a Time. + */ + public List getTimeNamedFormatters() { + return getFormatList().stream() + .filter(formatString -> { + FormatNames namedFormat = FormatNames.forName(formatString); + return SUPPORTED_NAMED_TIME_FORMATS.contains(namedFormat); + }) + .map(DateFormatter::forPattern).collect(Collectors.toList()); + } + + private ExprCoreType getExprTypeFromFormatString(String formatString) { + if (formatString.isEmpty()) { + // FOLLOW-UP: check the default formatter - and set it here instead + // of assuming that the default is always a timestamp + return TIMESTAMP; + } + + List namedFormatters = getAllNamedFormatters(); + + if (namedFormatters.isEmpty()) { + return TIMESTAMP; + } + + if (!getAllCustomFormatters().isEmpty()) { + // FOLLOW-UP: support custom format in + return TIMESTAMP; + } + + // if there is nothing in the dateformatter that accepts a year/month/day, then + // we can assume the type is strictly a Time object + if (namedFormatters.size() == getTimeNamedFormatters().size()) { + return TIME; + } + + // if there is nothing in the dateformatter that accepts a hour/minute/second, then + // we can assume the type is strictly a Date object + if (namedFormatters.size() == getDateNamedFormatters().size()) { + return DATE; + } + + // According to the user mapping, this field may contain a DATE or a TIME + return TIMESTAMP; + } + + /** + * Check if ExprType is compatible for creation of OpenSearchDateType object. + * + * @param exprType type of the field in the SQL query + * @return a boolean if type is a date/time/timestamp type + */ + public static boolean isDateTypeCompatible(ExprType exprType) { + if (!(exprType instanceof ExprCoreType)) { + return false; + } + switch ((ExprCoreType) exprType) { + case TIMESTAMP: + case DATETIME: + case DATE: + case TIME: + return true; + default: + return false; + } + } + + /** + * Create a Date type which has a LinkedHashMap defining all formats. + * @return A new type object. + */ + public static OpenSearchDateType of(String format) { + return new OpenSearchDateType(format); + } + + public static OpenSearchDateType of(ExprCoreType exprCoreType) { + return new OpenSearchDateType(exprCoreType); + } + + public static OpenSearchDateType of(ExprType exprType) { + return new OpenSearchDateType(exprType); + } + + public static OpenSearchDateType of() { + return OpenSearchDateType.instance; + } + + @Override + public List getParent() { + return List.of(this.exprCoreType); + } + + @Override + public boolean shouldCast(ExprType other) { + return false; + } + + @Override + protected OpenSearchDataType cloneEmpty() { + if (this.formatString.isEmpty()) { + return OpenSearchDateType.of(this.exprCoreType); + } + return OpenSearchDateType.of(this.formatString); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchTextType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchTextType.java index 1098662e65..67b7296834 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchTextType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchTextType.java @@ -18,24 +18,28 @@ * The type of a text value. See * doc */ -@EqualsAndHashCode(callSuper = false) public class OpenSearchTextType extends OpenSearchDataType { private static final OpenSearchTextType instance = new OpenSearchTextType(); + // text could have fields + // a read-only collection + @EqualsAndHashCode.Exclude + Map fields = ImmutableMap.of(); + private OpenSearchTextType() { super(MappingType.Text); exprCoreType = UNKNOWN; } /** - * Create a Text type which has fields. - * @param fields Fields to set for the new type. - * @return A new type object. + * Constructs a Text Type using the passed in fields argument. + * @param fields The fields to be used to construct the text type. + * @return A new OpenSeachTextTypeObject */ public static OpenSearchTextType of(Map fields) { var res = new OpenSearchTextType(); - res.fields = ImmutableMap.copyOf(fields); + res.fields = fields; return res; } @@ -59,7 +63,7 @@ public Map getFields() { @Override protected OpenSearchDataType cloneEmpty() { - return OpenSearchTextType.of(fields); + return OpenSearchTextType.of(Map.copyOf(this.fields)); } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 0c4548a368..1ff5af7304 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -13,6 +13,9 @@ import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.utils.DateTimeFormatters.DATE_TIME_FORMATTER; +import static org.opensearch.sql.utils.DateTimeFormatters.STRICT_HOUR_MINUTE_SECOND_FORMATTER; +import static org.opensearch.sql.utils.DateTimeFormatters.STRICT_YEAR_MONTH_DAY_FORMATTER; +import static org.opensearch.sql.utils.DateTimeUtils.UTC_ZONE_ID; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; @@ -20,21 +23,26 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZonedDateTime; import java.time.format.DateTimeParseException; +import java.time.temporal.TemporalAccessor; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.function.Function; +import java.util.function.BiFunction; import lombok.Getter; import lombok.Setter; +import org.opensearch.common.time.DateFormatter; import org.opensearch.common.time.DateFormatters; import org.opensearch.sql.data.model.ExprBooleanValue; import org.opensearch.sql.data.model.ExprByteValue; import org.opensearch.sql.data.model.ExprCollectionValue; import org.opensearch.sql.data.model.ExprDateValue; -import org.opensearch.sql.data.model.ExprDatetimeValue; import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; @@ -48,6 +56,7 @@ import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; import org.opensearch.sql.opensearch.data.utils.Content; import org.opensearch.sql.opensearch.data.utils.ObjectContent; import org.opensearch.sql.opensearch.data.utils.OpenSearchJsonContent; @@ -85,40 +94,42 @@ public void extendTypeMapping(Map typeMapping) { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final Map> typeActionMap = - new ImmutableMap.Builder>() + private final Map> typeActionMap = + new ImmutableMap.Builder>() .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), - c -> new ExprIntegerValue(c.intValue())) + (c, dt) -> new ExprIntegerValue(c.intValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Long), - c -> new ExprLongValue(c.longValue())) + (c, dt) -> new ExprLongValue(c.longValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Short), - c -> new ExprShortValue(c.shortValue())) + (c, dt) -> new ExprShortValue(c.shortValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Byte), - c -> new ExprByteValue(c.byteValue())) + (c, dt) -> new ExprByteValue(c.byteValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Float), - c -> new ExprFloatValue(c.floatValue())) + (c, dt) -> new ExprFloatValue(c.floatValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), - c -> new ExprDoubleValue(c.doubleValue())) + (c, dt) -> new ExprDoubleValue(c.doubleValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Text), - c -> new OpenSearchExprTextValue(c.stringValue())) + (c, dt) -> new OpenSearchExprTextValue(c.stringValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Keyword), - c -> new ExprStringValue(c.stringValue())) + (c, dt) -> new ExprStringValue(c.stringValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Boolean), - c -> ExprBooleanValue.of(c.booleanValue())) - .put(OpenSearchDataType.of(TIMESTAMP), this::parseTimestamp) - .put(OpenSearchDataType.of(DATE), - c -> new ExprDateValue(parseTimestamp(c).dateValue().toString())) - .put(OpenSearchDataType.of(TIME), - c -> new ExprTimeValue(parseTimestamp(c).timeValue().toString())) - .put(OpenSearchDataType.of(DATETIME), - c -> new ExprDatetimeValue(parseTimestamp(c).datetimeValue())) + (c, dt) -> ExprBooleanValue.of(c.booleanValue())) + //Handles the creation of DATE, TIME & DATETIME + .put(OpenSearchDateType.of(TIME), + this::createOpenSearchDateType) + .put(OpenSearchDateType.of(DATE), + this::createOpenSearchDateType) + .put(OpenSearchDateType.of(TIMESTAMP), + this::createOpenSearchDateType) + .put(OpenSearchDateType.of(DATETIME), + this::createOpenSearchDateType) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Ip), - c -> new OpenSearchExprIpValue(c.stringValue())) + (c, dt) -> new OpenSearchExprIpValue(c.stringValue())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.GeoPoint), - c -> new OpenSearchExprGeoPointValue(c.geoValue().getLeft(), + (c, dt) -> new OpenSearchExprGeoPointValue(c.geoValue().getLeft(), c.geoValue().getRight())) .put(OpenSearchDataType.of(OpenSearchDataType.MappingType.Binary), - c -> new OpenSearchExprBinaryValue(c.stringValue())) + (c, dt) -> new OpenSearchExprBinaryValue(c.stringValue())) .build(); /** @@ -173,7 +184,7 @@ private ExprValue parse(Content content, String field, Optional fieldT return parseArray(content, field); } else { if (typeActionMap.containsKey(type)) { - return typeActionMap.get(type).apply(content); + return typeActionMap.get(type).apply(content, type); } else { throw new IllegalStateException( String.format( @@ -191,33 +202,140 @@ private Optional type(String field) { } /** - * Only default strict_date_optional_time||epoch_millis is supported, - * strict_date_optional_time_nanos||epoch_millis if field is date_nanos. - * - * docs - * The customized date_format is not supported. + * Parses value with the first matching formatter as an Instant to UTF. + * + * @param value - timestamp as string + * @param dateType - field type + * @return Instant without timezone */ - private ExprValue constructTimestamp(String value) { - try { - return new ExprTimestampValue( - // Using OpenSearch DateFormatters for now. - DateFormatters.from(DATE_TIME_FORMATTER.parse(value)).toInstant()); - } catch (DateTimeParseException e) { - throw new IllegalStateException( + private ExprValue parseTimestampString(String value, OpenSearchDateType dateType) { + Instant parsed = null; + for (DateFormatter formatter : dateType.getAllNamedFormatters()) { + try { + TemporalAccessor accessor = formatter.parse(value); + ZonedDateTime zonedDateTime = DateFormatters.from(accessor); + // remove the Zone + parsed = zonedDateTime.withZoneSameLocal(ZoneId.of("Z")).toInstant(); + } catch (IllegalArgumentException ignored) { + // nothing to do, try another format + } + } + + // FOLLOW-UP PR: Check custom formatters too + + // if no named formatters are available, use the default + if (dateType.getAllNamedFormatters().size() == 0 + || dateType.getAllCustomFormatters().size() > 0) { + try { + parsed = DateFormatters.from(DATE_TIME_FORMATTER.parse(value)).toInstant(); + } catch (DateTimeParseException e) { + // ignored + } + } + + if (parsed == null) { + // otherwise, throw an error that no formatters worked + throw new IllegalArgumentException( String.format( - "Construct ExprTimestampValue from \"%s\" failed, unsupported date format.", value), - e); + "Construct ExprTimestampValue from \"%s\" failed, unsupported date format.", value) + ); + } + + return new ExprTimestampValue(parsed); + } + + /** + * return the first matching formatter as a time without timezone. + * + * @param value - time as string + * @param dateType - field data type + * @return time without timezone + */ + private ExprValue parseTimeString(String value, OpenSearchDateType dateType) { + for (DateFormatter formatter : dateType.getAllNamedFormatters()) { + try { + TemporalAccessor accessor = formatter.parse(value); + ZonedDateTime zonedDateTime = DateFormatters.from(accessor); + return new ExprTimeValue( + zonedDateTime.withZoneSameLocal(ZoneId.of("Z")).toLocalTime()); + } catch (IllegalArgumentException ignored) { + // nothing to do, try another format + } + } + + // if no named formatters are available, use the default + if (dateType.getAllNamedFormatters().size() == 0) { + try { + return new ExprTimeValue( + DateFormatters.from(STRICT_HOUR_MINUTE_SECOND_FORMATTER.parse(value)).toLocalTime()); + } catch (DateTimeParseException e) { + // ignored + } } + throw new IllegalArgumentException("Construct ExprTimeValue from \"" + value + + "\" failed, unsupported time format."); } - private ExprValue parseTimestamp(Content value) { + /** + * return the first matching formatter as a date without timezone. + * + * @param value - date as string + * @param dateType - field data type + * @return date without timezone + */ + private ExprValue parseDateString(String value, OpenSearchDateType dateType) { + for (DateFormatter formatter : dateType.getAllNamedFormatters()) { + try { + TemporalAccessor accessor = formatter.parse(value); + ZonedDateTime zonedDateTime = DateFormatters.from(accessor); + // return the first matching formatter as a date without timezone + return new ExprDateValue( + zonedDateTime.withZoneSameLocal(ZoneId.of("Z")).toLocalDate()); + } catch (IllegalArgumentException ignored) { + // nothing to do, try another format + } + } + + // if no named formatters are available, use the default + if (dateType.getAllNamedFormatters().size() == 0) { + try { + return new ExprDateValue( + DateFormatters.from(STRICT_YEAR_MONTH_DAY_FORMATTER.parse(value)).toLocalDate()); + } catch (DateTimeParseException e) { + // ignored + } + } + throw new IllegalArgumentException("Construct ExprDateValue from \"" + value + + "\" failed, unsupported date format."); + } + + private ExprValue createOpenSearchDateType(Content value, ExprType type) { + OpenSearchDateType dt = (OpenSearchDateType) type; + ExprType returnFormat = dt.getExprType(); + if (value.isNumber()) { + Instant epochMillis = Instant.ofEpochMilli(value.longValue()); + if (returnFormat == TIME) { + return new ExprTimeValue(LocalTime.from(epochMillis.atZone(UTC_ZONE_ID))); + } + if (returnFormat == DATE) { + return new ExprDateValue(LocalDate.ofInstant(epochMillis, UTC_ZONE_ID)); + } return new ExprTimestampValue(Instant.ofEpochMilli(value.longValue())); - } else if (value.isString()) { - return constructTimestamp(value.stringValue()); - } else { - return new ExprTimestampValue((Instant) value.objectValue()); } + + if (value.isString()) { + if (returnFormat == TIME) { + return parseTimeString(value.stringValue(), dt); + } + if (returnFormat == DATE) { + return parseDateString(value.stringValue(), dt); + } + // else timestamp/datetime + return parseTimestampString(value.stringValue(), dt); + } + + return new ExprTimestampValue((Instant) value.objectValue()); } private ExprValue parseStruct(Content content, String prefix) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/mapping/IndexMapping.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/mapping/IndexMapping.java index 4fdcf0c637..0185ca95b6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/mapping/IndexMapping.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/mapping/IndexMapping.java @@ -27,10 +27,15 @@ public class IndexMapping { @Getter private final Map fieldMappings; + /** + * Maps each column in the index definition to an OpenSearchSQL datatype. + * @param metaData The metadata retrieved from the index mapping defined by the user. + */ @SuppressWarnings("unchecked") public IndexMapping(MappingMetadata metaData) { - this.fieldMappings = parseMapping((Map) metaData.getSourceAsMap() - .getOrDefault("properties", null)); + this.fieldMappings = OpenSearchDataType.parseMapping( + (Map) metaData.getSourceAsMap().getOrDefault("properties", null) + ); } /** @@ -41,28 +46,4 @@ public IndexMapping(MappingMetadata metaData) { public int size() { return fieldMappings.size(); } - - @SuppressWarnings("unchecked") - private Map parseMapping(Map indexMapping) { - Map result = new LinkedHashMap<>(); - if (indexMapping != null) { - indexMapping.forEach((k, v) -> { - var innerMap = (Map)v; - // TODO: confirm that only `object` mappings can omit `type` field. - var type = ((String) innerMap.getOrDefault("type", "object")).replace("_", ""); - if (!EnumUtils.isValidEnumIgnoreCase(OpenSearchDataType.MappingType.class, type)) { - // unknown type, e.g. `alias` - // TODO resolve alias reference - return; - } - // TODO read formats for date type - result.put(k, OpenSearchDataType.of( - EnumUtils.getEnumIgnoreCase(OpenSearchDataType.MappingType.class, type), - parseMapping((Map) innerMap.getOrDefault("properties", null)), - parseMapping((Map) innerMap.getOrDefault("fields", null)) - )); - }); - } - return result; - } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java index 5cd76b1962..8d69b3d855 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java @@ -9,6 +9,7 @@ import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotSame; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertSame; @@ -18,6 +19,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.BYTE; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -49,6 +51,10 @@ class OpenSearchDataTypeTest { private static final OpenSearchDataType textKeywordType = OpenSearchTextType.of(Map.of("words", OpenSearchTextType.of(MappingType.Keyword))); + private static final String emptyFormatString = ""; + + private static final OpenSearchDateType dateType = OpenSearchDateType.of(emptyFormatString); + @Test public void isCompatible() { assertTrue(STRING.isCompatible(textType)); @@ -131,11 +137,13 @@ public void of_MappingType(MappingType mappingType, String name, ExprType dataTy public void of_ExprCoreType(ExprCoreType coreType) { assumeFalse(coreType == UNKNOWN); var type = OpenSearchDataType.of(coreType); - assertAll( - () -> assertEquals(coreType.toString(), type.typeName()), - () -> assertEquals(coreType.toString(), type.legacyTypeName()), - () -> assertEquals(coreType, type.getExprType()) - ); + if (type instanceof OpenSearchDateType) { + assertEquals(coreType, type.getExprType()); + } else { + assertEquals(coreType.toString(), type.typeName()); + assertEquals(coreType.toString(), type.legacyTypeName()); + assertEquals(coreType, type.getExprType()); + } } @ParameterizedTest(name = "{0}") @@ -157,15 +165,20 @@ public void of_OpenSearchDataType_from_MappingType(OpenSearchDataType.MappingTyp @Test // All types without `fields` and `properties` are singletones unless cloned. - public void types_but_clones_are_singletones_and_cached() { + public void types_but_clones_are_singletons_and_cached() { var type = OpenSearchDataType.of(MappingType.Object); var alsoType = OpenSearchDataType.of(MappingType.Object); - var typeWithProperties = OpenSearchDataType.of(MappingType.Object, - Map.of("subfield", OpenSearchDataType.of(INTEGER)), Map.of()); - var typeWithFields = OpenSearchDataType.of(MappingType.Text, - Map.of(), Map.of("subfield", OpenSearchDataType.of(INTEGER))); - + Map properties = Map.of( + "properties", + Map.of("number", Map.of("type", "integer"))); + var typeWithProperties = OpenSearchDataType.of( + MappingType.Object, + properties); + var typeWithFields = OpenSearchDataType.of( + MappingType.Text, + Map.of()); var cloneType = type.cloneEmpty(); + assertAll( () -> assertSame(type, alsoType), () -> assertNotSame(type, cloneType), @@ -173,6 +186,7 @@ public void types_but_clones_are_singletones_and_cached() { () -> assertNotSame(type, typeWithFields), () -> assertNotSame(typeWithProperties, typeWithProperties.cloneEmpty()), () -> assertNotSame(typeWithFields, typeWithFields.cloneEmpty()), + () -> assertNotSame(dateType, dateType.cloneEmpty()), () -> assertSame(OpenSearchDataType.of(MappingType.Text), OpenSearchTextType.of()), () -> assertSame(OpenSearchDataType.of(MappingType.Binary), @@ -182,7 +196,7 @@ public void types_but_clones_are_singletones_and_cached() { () -> assertSame(OpenSearchDataType.of(MappingType.Ip), OpenSearchIpType.of()), () -> assertNotSame(OpenSearchTextType.of(), - OpenSearchTextType.of(Map.of("subfield", OpenSearchDataType.of(INTEGER)))), + OpenSearchTextType.of(Map.of("properties", OpenSearchDataType.of(INTEGER)))), () -> assertSame(OpenSearchDataType.of(INTEGER), OpenSearchDataType.of(INTEGER)), () -> assertSame(OpenSearchDataType.of(STRING), OpenSearchDataType.of(STRING)), () -> assertSame(OpenSearchDataType.of(STRUCT), OpenSearchDataType.of(STRUCT)), @@ -213,26 +227,23 @@ public void fields_and_properties_are_readonly() { @Test // Test and type added for coverage only public void of_null_MappingType() { - assertThrows(IllegalArgumentException.class, () -> OpenSearchDataType.of(MappingType.Invalid)); + assertNotNull(OpenSearchDataType.of(MappingType.Invalid)); } @Test // cloneEmpty doesn't clone properties and fields. // Fields are cloned by OpenSearchTextType::cloneEmpty, because it is used in that type only. public void cloneEmpty() { - var type = OpenSearchDataType.of(MappingType.Object, - Map.of("val", OpenSearchDataType.of(INTEGER)), - Map.of("words", OpenSearchDataType.of(STRING))); + var type = OpenSearchDataType.of( + MappingType.Object, + Map.of("val", OpenSearchDataType.of(INTEGER)) + ); var clone = type.cloneEmpty(); var textClone = textKeywordType.cloneEmpty(); assertAll( // can compare because `properties` and `fields` are marked as @EqualsAndHashCode.Exclude () -> assertEquals(type, clone), - // read private field `fields` - () -> assertTrue( - ((Map) FieldUtils.readField(clone, "fields", true)) - .isEmpty()), () -> assertTrue(clone.getProperties().isEmpty()), () -> assertEquals(textKeywordType, textClone), () -> assertEquals(FieldUtils.readField(textKeywordType, "fields", true), @@ -261,17 +272,17 @@ public void cloneEmpty() { // ================= // as // ================= - // type : Object - // type.subtype : Object - // type.subtype.subsubtype : Object - // type.subtype.subsubtype.textWithKeywordType : Text + // mapping : Object + // mapping.submapping : Object + // mapping.submapping.subsubmapping : Object + // mapping.submapping.subsubmapping.textWithKeywordType : Text // |- keyword : Keyword - // type.subtype.subsubtype.INTEGER : INTEGER - // type.subtype.geo_point : GeoPoint - // type.subtype.textWithFieldsType: Text + // mapping.submapping.subsubmapping.INTEGER : INTEGER + // mapping.submapping.geo_point : GeoPoint + // mapping.submapping.textWithFieldsType: Text // |- words : Keyword - // type.text : Text - // type.keyword : Keyword + // mapping.text : Text + // mapping.keyword : Keyword // ================== // Objects are flattened by OpenSearch, but Texts aren't // TODO Arrays @@ -281,28 +292,27 @@ public void traverseAndFlatten() { var objectType = OpenSearchDataType.of(MappingType.Object); assertAll( () -> assertEquals(9, flattened.size()), - () -> assertTrue(flattened.get("type").getProperties().isEmpty()), - () -> assertTrue(flattened.get("type.subtype").getProperties().isEmpty()), - () -> assertTrue(flattened.get("type.subtype.subsubtype").getProperties().isEmpty()), + () -> assertTrue(flattened.get("mapping").getProperties().isEmpty()), + () -> assertTrue(flattened.get("mapping.submapping").getProperties().isEmpty()), + () -> assertTrue( + flattened.get("mapping.submapping.subsubmapping").getProperties().isEmpty()), - () -> assertEquals(objectType, flattened.get("type")), - () -> assertEquals(objectType, flattened.get("type.subtype")), - () -> assertEquals(objectType, flattened.get("type.subtype.subsubtype")), + () -> assertEquals(objectType, flattened.get("mapping")), + () -> assertEquals(objectType, flattened.get("mapping.submapping")), + () -> assertEquals(objectType, flattened.get("mapping.submapping.subsubmapping")), () -> assertEquals(OpenSearchDataType.of(MappingType.Keyword), - flattened.get("type.keyword")), + flattened.get("mapping.keyword")), () -> assertEquals(OpenSearchDataType.of(MappingType.Text), - flattened.get("type.text")), - + flattened.get("mapping.text")), () -> assertEquals(OpenSearchGeoPointType.of(), - flattened.get("type.subtype.geo_point")), + flattened.get("mapping.submapping.geo_point")), () -> assertEquals(OpenSearchTextType.of(), - flattened.get("type.subtype.textWithFieldsType")), - + flattened.get("mapping.submapping.textWithFieldsType")), () -> assertEquals(OpenSearchTextType.of(), - flattened.get("type.subtype.subsubtype.textWithKeywordType")), + flattened.get("mapping.submapping.subsubmapping.texttype")), () -> assertEquals(OpenSearchDataType.of(INTEGER), - flattened.get("type.subtype.subsubtype.INTEGER")) + flattened.get("mapping.submapping.subsubmapping.INTEGER")) ); } @@ -313,13 +323,13 @@ public void resolve() { assertAll( () -> assertNull(OpenSearchDataType.resolve(mapping, "incorrect")), () -> assertEquals(OpenSearchDataType.of(MappingType.Object), - OpenSearchDataType.resolve(mapping, "type")), + OpenSearchDataType.resolve(mapping, "mapping")), () -> assertEquals(OpenSearchDataType.of(MappingType.Object), - OpenSearchDataType.resolve(mapping, "subtype")), + OpenSearchDataType.resolve(mapping, "submapping")), () -> assertEquals(OpenSearchDataType.of(MappingType.Object), - OpenSearchDataType.resolve(mapping, "subsubtype")), + OpenSearchDataType.resolve(mapping, "subsubmapping")), () -> assertEquals(OpenSearchDataType.of(MappingType.Text), - OpenSearchDataType.resolve(mapping, "textWithKeywordType")), + OpenSearchDataType.resolve(mapping, "texttype")), () -> assertEquals(OpenSearchDataType.of(MappingType.Text), OpenSearchDataType.resolve(mapping, "textWithFieldsType")), () -> assertEquals(OpenSearchDataType.of(MappingType.Text), @@ -358,28 +368,31 @@ public void text_type_with_fields_ctor() { } private Map getSampleMapping() { - var textWithKeywordType = OpenSearchTextType.of(Map.of("keyword", - OpenSearchDataType.of(MappingType.Keyword))); + Map subsubmapping = Map.of( + "properties", Map.of( + "texttype", Map.of("type", "text"), + "INTEGER", Map.of("type", "integer") + ) + ); - var subsubsubtypes = Map.of( - "textWithKeywordType", textWithKeywordType, - "INTEGER", OpenSearchDataType.of(INTEGER)); - - var subsubtypes = Map.of( - "subsubtype", OpenSearchDataType.of(MappingType.Object, - subsubsubtypes, Map.of()), - "textWithFieldsType", OpenSearchDataType.of(MappingType.Text, Map.of(), - Map.of("words", OpenSearchDataType.of(MappingType.Keyword))), - "geo_point", OpenSearchGeoPointType.of()); - - var subtypes = Map.of( - "subtype", OpenSearchDataType.of(MappingType.Object, - subsubtypes, Map.of()), - "keyword", OpenSearchDataType.of(MappingType.Keyword), - "text", OpenSearchDataType.of(MappingType.Text)); - - var type = OpenSearchDataType.of(MappingType.Object, subtypes, Map.of()); - return Map.of("type", type); + Map submapping = Map.of( + "properties", Map.of( + "subsubmapping", subsubmapping, + "textWithFieldsType", Map.of("type", "text", "fieldsType", true), + "geo_point", Map.of("type", "geo_point") + ) + ); + + Map types = Map.of( + "properties", Map.of( + "submapping", submapping, + "keyword", Map.of("type", "keyword"), + "text", Map.of("type", "text") + ) + ); + + var mapping = OpenSearchDataType.of(MappingType.Object, types); + return Map.of("mapping", mapping); } @Test @@ -392,4 +405,9 @@ public void test_getExprType() { assertEquals(DOUBLE, OpenSearchDataType.of(MappingType.ScaledFloat).getExprType()); assertEquals(TIMESTAMP, OpenSearchDataType.of(MappingType.Date).getExprType()); } + + @Test + public void test_shouldCastFunction() { + assertFalse(dateType.shouldCast(DATE)); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateTypeTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateTypeTest.java new file mode 100644 index 0000000000..f0add5bcd9 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateTypeTest.java @@ -0,0 +1,204 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.opensearch.data.type; + +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeFalse; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import static org.opensearch.sql.opensearch.data.type.OpenSearchDateType.SUPPORTED_NAMED_DATETIME_FORMATS; +import static org.opensearch.sql.opensearch.data.type.OpenSearchDateType.SUPPORTED_NAMED_DATE_FORMATS; +import static org.opensearch.sql.opensearch.data.type.OpenSearchDateType.SUPPORTED_NAMED_TIME_FORMATS; +import static org.opensearch.sql.opensearch.data.type.OpenSearchDateType.isDateTypeCompatible; + +import java.util.EnumSet; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.opensearch.common.time.FormatNames; +import org.opensearch.sql.data.type.ExprType; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class OpenSearchDateTypeTest { + private static final String defaultFormatString = ""; + + private static final String dateFormatString = "date"; + + private static final String timeFormatString = "hourMinuteSecond"; + + private static final String datetimeFormatString = "basic_date_time"; + + private static final OpenSearchDateType defaultDateType = + OpenSearchDateType.of(defaultFormatString); + private static final OpenSearchDateType dateDateType = + OpenSearchDateType.of(dateFormatString); + private static final OpenSearchDateType timeDateType = + OpenSearchDateType.of(timeFormatString); + private static final OpenSearchDateType datetimeDateType = + OpenSearchDateType.of(datetimeFormatString); + + @Test + public void isCompatible() { + // timestamp types is compatible with all date-types + assertTrue(TIMESTAMP.isCompatible(defaultDateType)); + assertTrue(TIMESTAMP.isCompatible(dateDateType)); + assertTrue(TIMESTAMP.isCompatible(timeDateType)); + assertTrue(TIMESTAMP.isCompatible(datetimeDateType)); + + // datetime + assertFalse(DATETIME.isCompatible(defaultDateType)); + assertTrue(DATETIME.isCompatible(dateDateType)); + assertTrue(DATETIME.isCompatible(timeDateType)); + assertFalse(DATETIME.isCompatible(datetimeDateType)); + + // time type + assertFalse(TIME.isCompatible(defaultDateType)); + assertFalse(TIME.isCompatible(dateDateType)); + assertTrue(TIME.isCompatible(timeDateType)); + assertFalse(TIME.isCompatible(datetimeDateType)); + + // date type + assertFalse(DATE.isCompatible(defaultDateType)); + assertTrue(DATE.isCompatible(dateDateType)); + assertFalse(DATE.isCompatible(timeDateType)); + assertFalse(DATE.isCompatible(datetimeDateType)); + } + + // `typeName` and `legacyTypeName` return the same thing for date objects: + // https://github.com/opensearch-project/sql/issues/1296 + @Test + public void check_typeName() { + // always use the MappingType of "DATE" + assertEquals("DATE", defaultDateType.typeName()); + assertEquals("DATE", timeDateType.typeName()); + assertEquals("DATE", dateDateType.typeName()); + assertEquals("DATE", datetimeDateType.typeName()); + } + + @Test + public void check_legacyTypeName() { + // always use the legacy "DATE" type + assertEquals("DATE", defaultDateType.legacyTypeName()); + assertEquals("DATE", timeDateType.legacyTypeName()); + assertEquals("DATE", dateDateType.legacyTypeName()); + assertEquals("DATE", datetimeDateType.legacyTypeName()); + } + + @Test + public void check_exprTypeName() { + // exprType changes based on type (no datetime): + assertEquals(TIMESTAMP, defaultDateType.getExprType()); + assertEquals(TIME, timeDateType.getExprType()); + assertEquals(DATE, dateDateType.getExprType()); + assertEquals(TIMESTAMP, datetimeDateType.getExprType()); + } + + @Test + public void checkSupportedFormatNamesCoverage() { + EnumSet allFormatNames = EnumSet.allOf(FormatNames.class); + allFormatNames.stream().forEach(formatName -> { + assertTrue( + SUPPORTED_NAMED_DATETIME_FORMATS.contains(formatName) + || SUPPORTED_NAMED_DATE_FORMATS.contains(formatName) + || SUPPORTED_NAMED_TIME_FORMATS.contains(formatName), + formatName + " not supported"); + }); + } + + @Test + public void checkTimestampFormatNames() { + SUPPORTED_NAMED_DATETIME_FORMATS.stream().forEach( + datetimeFormat -> { + String camelCaseName = datetimeFormat.getCamelCaseName(); + if (camelCaseName != null && !camelCaseName.isEmpty()) { + OpenSearchDateType dateType = + OpenSearchDateType.of(camelCaseName); + assertTrue(dateType.getExprType() == TIMESTAMP, camelCaseName + + " does not format to a TIMESTAMP type, instead got " + + dateType.getExprType()); + } + + String snakeCaseName = datetimeFormat.getSnakeCaseName(); + if (snakeCaseName != null && !snakeCaseName.isEmpty()) { + OpenSearchDateType dateType = OpenSearchDateType.of(snakeCaseName); + assertTrue(dateType.getExprType() == TIMESTAMP, snakeCaseName + + " does not format to a TIMESTAMP type, instead got " + + dateType.getExprType()); + } + } + ); + + // check the default format case + OpenSearchDateType dateType = OpenSearchDateType.of(""); + assertTrue(dateType.getExprType() == TIMESTAMP); + } + + @Test + public void checkDateFormatNames() { + SUPPORTED_NAMED_DATE_FORMATS.stream().forEach( + dateFormat -> { + String camelCaseName = dateFormat.getCamelCaseName(); + if (camelCaseName != null && !camelCaseName.isEmpty()) { + OpenSearchDateType dateType = + OpenSearchDateType.of(camelCaseName); + assertTrue(dateType.getExprType() == DATE, camelCaseName + + " does not format to a DATE type, instead got " + + dateType.getExprType()); + } + + String snakeCaseName = dateFormat.getSnakeCaseName(); + if (snakeCaseName != null && !snakeCaseName.isEmpty()) { + OpenSearchDateType dateType = OpenSearchDateType.of(snakeCaseName); + assertTrue(dateType.getExprType() == DATE, snakeCaseName + + " does not format to a DATE type, instead got " + + dateType.getExprType()); + } + } + ); + } + + @Test + public void checkTimeFormatNames() { + SUPPORTED_NAMED_TIME_FORMATS.stream().forEach( + timeFormat -> { + String camelCaseName = timeFormat.getCamelCaseName(); + if (camelCaseName != null && !camelCaseName.isEmpty()) { + OpenSearchDateType dateType = + OpenSearchDateType.of(camelCaseName); + assertTrue(dateType.getExprType() == TIME, camelCaseName + + " does not format to a TIME type, instead got " + + dateType.getExprType()); + } + + String snakeCaseName = timeFormat.getSnakeCaseName(); + if (snakeCaseName != null && !snakeCaseName.isEmpty()) { + OpenSearchDateType dateType = OpenSearchDateType.of(snakeCaseName); + assertTrue(dateType.getExprType() == TIME, snakeCaseName + + " does not format to a TIME type, instead got " + + dateType.getExprType()); + } + } + ); + } + + @Test + public void checkIfDateTypeCompatible() { + assertTrue(isDateTypeCompatible(DATE)); + assertFalse(isDateTypeCompatible(OpenSearchDataType.of( + OpenSearchDataType.MappingType.Text))); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprTextValueTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprTextValueTest.java index 2dfa5de93a..b60402e746 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprTextValueTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprTextValueTest.java @@ -55,9 +55,9 @@ void non_text_types_arent_converted() { @Test void non_text_types_with_nested_objects_arent_converted() { var objectType = OpenSearchDataType.of(OpenSearchDataType.MappingType.Object, - Map.of("subfield", OpenSearchDataType.of(STRING)), Map.of()); + Map.of("subfield", OpenSearchDataType.of(STRING))); var arrayType = OpenSearchDataType.of(OpenSearchDataType.MappingType.Nested, - Map.of("subfield", OpenSearchDataType.of(STRING)), Map.of()); + Map.of("subfield", OpenSearchDataType.of(STRING))); assertAll( () -> assertEquals("field", OpenSearchTextType.convertTextToKeyword("field", objectType)), () -> assertEquals("field", OpenSearchTextType.convertTextToKeyword("field", arrayType)) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 8f2c954f65..81ac39ede0 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -33,12 +33,15 @@ import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import static org.opensearch.sql.utils.DateTimeUtils.UTC_ZONE_ID; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; import java.util.LinkedHashMap; import java.util.Map; import lombok.EqualsAndHashCode; @@ -53,6 +56,7 @@ import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.data.utils.OpenSearchJsonContent; @@ -67,10 +71,20 @@ class OpenSearchExprValueFactoryTest { .put("floatV", OpenSearchDataType.of(FLOAT)) .put("doubleV", OpenSearchDataType.of(DOUBLE)) .put("stringV", OpenSearchDataType.of(STRING)) - .put("dateV", OpenSearchDataType.of(DATE)) - .put("datetimeV", OpenSearchDataType.of(DATETIME)) - .put("timeV", OpenSearchDataType.of(TIME)) - .put("timestampV", OpenSearchDataType.of(TIMESTAMP)) + .put("dateV", OpenSearchDateType.of(DATE)) + .put("datetimeV", OpenSearchDateType.of(DATETIME)) + .put("timeV", OpenSearchDateType.of(TIME)) + .put("timestampV", OpenSearchDateType.of(TIMESTAMP)) + .put("datetimeDefaultV", OpenSearchDateType.of()) + .put("dateStringV", OpenSearchDateType.of("date")) + .put("timeStringV", OpenSearchDateType.of("time")) + .put("epochMillisV", OpenSearchDateType.of("epoch_millis")) + .put("dateOrEpochMillisV", OpenSearchDateType.of("date_time_no_millis||epoch_millis")) + .put("timeNoMillisOrTimeV", OpenSearchDateType.of("time_no_millis||time")) + .put("dateOrOrdinalDateV", OpenSearchDateType.of("date||ordinal_date")) + .put("customFormatV", OpenSearchDateType.of("yyyy-MM-dd-HH-mm-ss")) + .put("customAndEpochMillisV", + OpenSearchDateType.of("yyyy-MM-dd-HH-mm-ss||epoch_millis")) .put("boolV", OpenSearchDataType.of(BOOLEAN)) .put("structV", OpenSearchDataType.of(STRUCT)) .put("structV.id", OpenSearchDataType.of(INTEGER)) @@ -192,7 +206,43 @@ public void constructText() { } @Test - public void constructDate() { + public void constructDates() { + ExprValue dateStringV = constructFromObject("dateStringV", "1984-04-12"); + assertEquals(new ExprDateValue("1984-04-12"), dateStringV); + + assertEquals( + new ExprDateValue(LocalDate.ofInstant(Instant.ofEpochMilli(450576000000L), + UTC_ZONE_ID)), + constructFromObject("dateV", 450576000000L)); + + assertEquals( + new ExprDateValue("1984-04-12"), + constructFromObject("dateOrOrdinalDateV", "1984-103")); + assertEquals( + new ExprDateValue("2015-01-01"), + tupleValue("{\"dateV\":\"2015-01-01\"}").get("dateV")); + } + + @Test + public void constructTimes() { + ExprValue timeStringV = constructFromObject("timeStringV","12:10:30.000Z"); + assertTrue(timeStringV.isDateTime()); + assertTrue(timeStringV instanceof ExprTimeValue); + assertEquals(new ExprTimeValue("12:10:30"), timeStringV); + + assertEquals( + new ExprTimeValue(LocalTime.from(Instant.ofEpochMilli(1420070400001L).atZone(UTC_ZONE_ID))), + constructFromObject("timeV", 1420070400001L)); + assertEquals( + new ExprTimeValue("09:07:42.000"), + constructFromObject("timeNoMillisOrTimeV", "09:07:42.000Z")); + assertEquals( + new ExprTimeValue("09:07:42"), + tupleValue("{\"timeV\":\"09:07:42\"}").get("timeV")); + } + + @Test + public void constructDatetime() { assertEquals( new ExprTimestampValue("2015-01-01 00:00:00"), tupleValue("{\"timestampV\":\"2015-01-01\"}").get("timestampV")); @@ -208,41 +258,126 @@ public void constructDate() { assertEquals( new ExprTimestampValue(Instant.ofEpochMilli(1420070400001L)), tupleValue("{\"timestampV\":1420070400001}").get("timestampV")); - assertEquals( - new ExprTimeValue("19:36:22"), - tupleValue("{\"timestampV\":\"19:36:22\"}").get("timestampV")); - assertEquals( new ExprTimestampValue(Instant.ofEpochMilli(1420070400001L)), constructFromObject("timestampV", 1420070400001L)); assertEquals( new ExprTimestampValue(Instant.ofEpochMilli(1420070400001L)), constructFromObject("timestampV", Instant.ofEpochMilli(1420070400001L))); + assertEquals( + new ExprTimestampValue(Instant.ofEpochMilli(1420070400001L)), + constructFromObject("epochMillisV", "1420070400001")); + assertEquals( + new ExprTimestampValue(Instant.ofEpochMilli(1420070400001L)), + constructFromObject("epochMillisV", 1420070400001L)); assertEquals( new ExprTimestampValue("2015-01-01 12:10:30"), constructFromObject("timestampV", "2015-01-01 12:10:30")); assertEquals( - new ExprDateValue("2015-01-01"), - constructFromObject("dateV","2015-01-01")); + new ExprDatetimeValue("2015-01-01 12:10:30"), + constructFromObject("datetimeV", "2015-01-01 12:10:30")); assertEquals( - new ExprTimeValue("12:10:30"), - constructFromObject("timeV","12:10:30")); + new ExprDatetimeValue("2015-01-01 12:10:30"), + constructFromObject("datetimeDefaultV", "2015-01-01 12:10:30")); + assertEquals( + new ExprTimestampValue(Instant.ofEpochMilli(1420070400001L)), + constructFromObject("dateOrEpochMillisV", "1420070400001")); + + // case: timestamp-formatted field, but it only gets a time: should match a time + assertEquals( + new ExprTimeValue("19:36:22"), + tupleValue("{\"timestampV\":\"19:36:22\"}").get("timestampV")); + + // case: timestamp-formatted field, but it only gets a date: should match a date + assertEquals( + new ExprDateValue("2011-03-03"), + tupleValue("{\"timestampV\":\"2011-03-03\"}").get("timestampV")); + } + + @Test + public void constructDatetime_fromCustomFormat() { + // this is not the desirable behaviour - instead if accepts the default formatter assertEquals( new ExprDatetimeValue("2015-01-01 12:10:30"), - constructFromObject("datetimeV", "2015-01-01 12:10:30")); + constructFromObject("customFormatV", "2015-01-01 12:10:30")); + + // this should pass when custom formats are supported + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, + () -> constructFromObject("customFormatV", "2015-01-01-12-10-30")); + assertEquals( + "Construct ExprTimestampValue from \"2015-01-01-12-10-30\" failed, " + + "unsupported date format.", + exception.getMessage()); + + assertEquals( + new ExprDatetimeValue("2015-01-01 12:10:30"), + constructFromObject("customAndEpochMillisV", "2015-01-01 12:10:30")); + + // this should pass when custom formats are supported + exception = + assertThrows(IllegalArgumentException.class, + () -> constructFromObject("customAndEpochMillisV", "2015-01-01-12-10-30")); + assertEquals( + "Construct ExprTimestampValue from \"2015-01-01-12-10-30\" failed, " + + "unsupported date format.", + exception.getMessage()); } @Test - public void constructDateFromUnsupportedFormatThrowException() { - IllegalStateException exception = - assertThrows( - IllegalStateException.class, () -> tupleValue("{\"timestampV\":\"2015-01-01 12:10\"}")); + public void constructDatetimeFromUnsupportedFormat_ThrowIllegalArgumentException() { + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, + () -> constructFromObject("timestampV", "2015-01-01 12:10")); + assertEquals( + "Construct ExprTimestampValue from \"2015-01-01 12:10\" failed, " + + "unsupported date format.", + exception.getMessage()); + + // fail with missing seconds + exception = + assertThrows(IllegalArgumentException.class, + () -> constructFromObject("dateOrEpochMillisV", "2015-01-01 12:10")); assertEquals( "Construct ExprTimestampValue from \"2015-01-01 12:10\" failed, " + "unsupported date format.", exception.getMessage()); } + @Test + public void constructTimeFromUnsupportedFormat_ThrowIllegalArgumentException() { + IllegalArgumentException exception = assertThrows( + IllegalArgumentException.class, () -> constructFromObject("timeV", "2015-01-01")); + assertEquals( + "Construct ExprTimeValue from \"2015-01-01\" failed, " + + "unsupported time format.", + exception.getMessage()); + + exception = assertThrows( + IllegalArgumentException.class, () -> constructFromObject("timeStringV", "10:10")); + assertEquals( + "Construct ExprTimeValue from \"10:10\" failed, " + + "unsupported time format.", + exception.getMessage()); + } + + @Test + public void constructDateFromUnsupportedFormat_ThrowIllegalArgumentException() { + IllegalArgumentException exception = assertThrows( + IllegalArgumentException.class, () -> constructFromObject("dateV", "12:10:10")); + assertEquals( + "Construct ExprDateValue from \"12:10:10\" failed, " + + "unsupported date format.", + exception.getMessage()); + + exception = assertThrows( + IllegalArgumentException.class, () -> constructFromObject("dateStringV", "abc")); + assertEquals( + "Construct ExprDateValue from \"abc\" failed, " + + "unsupported date format.", + exception.getMessage()); + } + @Test public void constructArray() { assertEquals( @@ -431,7 +566,7 @@ private ExprValue constructFromObject(String fieldName, Object value) { private static class TestType extends OpenSearchDataType { public TestType() { - mappingType = null; + super(MappingType.Invalid); } @Override diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java index e771e01bce..03f5cc8b52 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilderTest.java @@ -14,9 +14,11 @@ import static org.mockito.Mockito.doAnswer; import static org.opensearch.sql.common.utils.StringUtils.format; import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.expression.DSL.literal; import static org.opensearch.sql.expression.DSL.named; @@ -50,8 +52,10 @@ import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.aggregation.AvgAggregator; import org.opensearch.sql.expression.aggregation.CountAggregator; +import org.opensearch.sql.expression.aggregation.MaxAggregator; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer; @@ -148,6 +152,58 @@ void should_build_type_mapping_for_field_reference() { )); } + @Test + void should_build_type_mapping_for_datetime_type() { + assertThat( + buildTypeMapping(Arrays.asList( + named("avg(datetime)", + new AvgAggregator(Arrays.asList(ref("datetime", DATETIME)), DATETIME))), + Arrays.asList(named("datetime", ref("datetime", DATETIME)))), + containsInAnyOrder( + map("avg(datetime)", OpenSearchDateType.of(DATETIME)), + map("datetime", OpenSearchDateType.of(DATETIME)) + )); + } + + @Test + void should_build_type_mapping_for_timestamp_type() { + assertThat( + buildTypeMapping(Arrays.asList( + named("avg(timestamp)", + new AvgAggregator(Arrays.asList(ref("timestamp", TIMESTAMP)), TIMESTAMP))), + Arrays.asList(named("timestamp", ref("timestamp", TIMESTAMP)))), + containsInAnyOrder( + map("avg(timestamp)", OpenSearchDateType.of()), + map("timestamp", OpenSearchDateType.of()) + )); + } + + @Test + void should_build_type_mapping_for_date_type() { + assertThat( + buildTypeMapping(Arrays.asList( + named("avg(date)", + new AvgAggregator(Arrays.asList(ref("date", DATE)), DATE))), + Arrays.asList(named("date", ref("date", DATE)))), + containsInAnyOrder( + map("avg(date)", OpenSearchDateType.of(DATE)), + map("date", OpenSearchDateType.of(DATE)) + )); + } + + @Test + void should_build_type_mapping_for_time_type() { + assertThat( + buildTypeMapping(Arrays.asList( + named("avg(time)", + new AvgAggregator(Arrays.asList(ref("time", TIME)), TIME))), + Arrays.asList(named("time", ref("time", TIME)))), + containsInAnyOrder( + map("avg(time)", OpenSearchDateType.of(TIME)), + map("time", OpenSearchDateType.of(TIME)) + )); + } + @Test void should_build_composite_aggregation_for_field_reference_of_keyword() { assertEquals(format( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/ExpressionFilterScriptTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/ExpressionFilterScriptTest.java index b21ab95927..61a3e9d35f 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/ExpressionFilterScriptTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/ExpressionFilterScriptTest.java @@ -15,9 +15,12 @@ import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.DATETIME; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.expression.DSL.literal; import static org.opensearch.sql.expression.DSL.ref; @@ -39,6 +42,8 @@ import org.opensearch.search.lookup.LeafDocLookup; import org.opensearch.search.lookup.LeafSearchLookup; import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.sql.data.model.ExprDateValue; +import org.opensearch.sql.data.model.ExprTimeValue; import org.opensearch.sql.data.model.ExprTimestampValue; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.Expression; @@ -109,7 +114,7 @@ void can_execute_expression_with_float_field() { } @Test - void can_execute_expression_with_date_field() { + void can_execute_expression_with_timestamp_field() { ExprTimestampValue ts = new ExprTimestampValue("2020-08-04 10:00:00"); assertThat() .docValues("birthday", ZonedDateTime.parse("2020-08-04T10:00:00Z")) @@ -117,6 +122,33 @@ void can_execute_expression_with_date_field() { .shouldMatch(); } + @Test + void can_execute_expression_with_datetime_field() { + ExprTimestampValue ts = new ExprTimestampValue("2020-08-04 10:00:00"); + assertThat() + .docValues("birthday", ZonedDateTime.parse("2020-08-04T10:00:00Z")) + .filterBy(DSL.equal(ref("birthday", DATETIME), new LiteralExpression(ts))) + .shouldMatch(); + } + + @Test + void can_execute_expression_with_date_field() { + ExprDateValue date = new ExprDateValue("2020-08-04"); + assertThat() + .docValues("birthday", "2020-08-04") + .filterBy(DSL.equal(ref("birthday", DATE), new LiteralExpression(date))) + .shouldMatch(); + } + + @Test + void can_execute_expression_with_time_field() { + ExprTimeValue time = new ExprTimeValue("10:00:01"); + assertThat() + .docValues("birthday", "10:00:01") + .filterBy(DSL.equal(ref("birthday", TIME), new LiteralExpression(time))) + .shouldMatch(); + } + @Test void can_execute_expression_with_missing_field() { assertThat()