From 9d51dd10aa1542b55e68cd661485e1f49688fffd Mon Sep 17 00:00:00 2001 From: Nong Li Date: Tue, 15 Dec 2015 21:03:41 -0800 Subject: [PATCH 01/14] [SPARK-12363] [SQL] Inline Hive parser into spark sql. This is a WIP. This inlines the hive sql grammar parser into spark sql in the hive subproject. This should eventually be moved into the SQL core project once all the hive dependencies are removed. This patch does some of that by cleaning up the hive code to remove much of semantic analysis. --- pom.xml | 5 + project/plugins.sbt | 2 + sql/hive/pom.xml | 22 + .../spark/sql/parser/FromClauseParser.g | 329 +++ .../spark/sql/parser/IdentifiersParser.g | 696 +++++ .../spark/sql/parser/SelectClauseParser.g | 226 ++ .../apache/spark/sql/parser/SparkSqlLexer.g | 473 ++++ .../apache/spark/sql/parser/SparkSqlParser.g | 2456 +++++++++++++++++ .../apache/spark/sql/parser/ASTErrorNode.java | 49 + .../org/apache/spark/sql/parser/ASTNode.java | 263 ++ .../spark/sql/parser/ASTNodeOrigin.java | 95 + .../apache/spark/sql/parser/ParseDriver.java | 261 ++ .../apache/spark/sql/parser/ParseError.java | 54 + .../spark/sql/parser/ParseException.java | 51 + .../apache/spark/sql/parser/ParseUtils.java | 133 + .../apache/spark/sql/parser/RowResolver.java | 388 +++ .../spark/sql/parser/SemanticAnalyzer.java | 732 +++++ .../apache/spark/sql/parser/TypeCheckCtx.java | 212 ++ .../sql/parser/TypeCheckProcFactory.java | 1394 ++++++++++ .../spark/sql/parser/UnparseTranslator.java | 274 ++ .../org/apache/spark/sql/hive/HiveQl.scala | 98 +- 21 files changed, 8165 insertions(+), 48 deletions(-) create mode 100644 sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g create mode 100644 sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g create mode 100644 sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SelectClauseParser.g create mode 100644 sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g create mode 100644 sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/ASTErrorNode.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNode.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNodeOrigin.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/ParseError.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/ParseException.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/RowResolver.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckCtx.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckProcFactory.java create mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/UnparseTranslator.java diff --git a/pom.xml b/pom.xml index 32918d6a74af..3f5fb02f7e59 100644 --- a/pom.xml +++ b/pom.xml @@ -1951,6 +1951,11 @@ + + org.antlr + antlr3-maven-plugin + 3.4 + org.apache.maven.plugins diff --git a/project/plugins.sbt b/project/plugins.sbt index 5e23224cf8aa..e671b61d9181 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -24,6 +24,8 @@ addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2") addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2") +addSbtPlugin("org.antlr" % "antlr4-maven-plugin" % "3.4") + libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3" libraryDependencies += "org.ow2.asm" % "asm-commons" % "5.0.3" diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index e9885f668202..ffabb92179a1 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -232,6 +232,7 @@ v${hive.version.short}/src/main/scala + ${project.build.directory/generated-sources/antlr @@ -260,6 +261,27 @@ + + + + org.antlr + antlr3-maven-plugin + + + + antlr + + + + + ${basedir}/src/main/antlr3 + + **/SparkSqlLexer.g + **/SparkSqlParser.g + + + + diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g new file mode 100644 index 000000000000..084c4213da37 --- /dev/null +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g @@ -0,0 +1,329 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +parser grammar FromClauseParser; + +options +{ +output=AST; +ASTLabelType=CommonTree; +backtrack=false; +k=3; +} + +@members { + @Override + public Object recoverFromMismatchedSet(IntStream input, + RecognitionException re, BitSet follow) throws RecognitionException { + throw re; + } + @Override + public void displayRecognitionError(String[] tokenNames, + RecognitionException e) { + gParent.errors.add(new ParseError(gParent, e, tokenNames)); + } + protected boolean useSQL11ReservedKeywordsForIdentifier() { + return gParent.useSQL11ReservedKeywordsForIdentifier(); + } +} + +@rulecatch { +catch (RecognitionException e) { + throw e; +} +} + +//----------------------------------------------------------------------------------- + +tableAllColumns + : STAR + -> ^(TOK_ALLCOLREF) + | tableName DOT STAR + -> ^(TOK_ALLCOLREF tableName) + ; + +// (table|column) +tableOrColumn +@init { gParent.pushMsg("table or column identifier", state); } +@after { gParent.popMsg(state); } + : + identifier -> ^(TOK_TABLE_OR_COL identifier) + ; + +expressionList +@init { gParent.pushMsg("expression list", state); } +@after { gParent.popMsg(state); } + : + expression (COMMA expression)* -> ^(TOK_EXPLIST expression+) + ; + +aliasList +@init { gParent.pushMsg("alias list", state); } +@after { gParent.popMsg(state); } + : + identifier (COMMA identifier)* -> ^(TOK_ALIASLIST identifier+) + ; + +//----------------------- Rules for parsing fromClause ------------------------------ +// from [col1, col2, col3] table1, [col4, col5] table2 +fromClause +@init { gParent.pushMsg("from clause", state); } +@after { gParent.popMsg(state); } + : + KW_FROM joinSource -> ^(TOK_FROM joinSource) + ; + +joinSource +@init { gParent.pushMsg("join source", state); } +@after { gParent.popMsg(state); } + : fromSource ( joinToken^ fromSource ( KW_ON! expression {$joinToken.start.getType() != COMMA}? )? )* + | uniqueJoinToken^ uniqueJoinSource (COMMA! uniqueJoinSource)+ + ; + +uniqueJoinSource +@init { gParent.pushMsg("unique join source", state); } +@after { gParent.popMsg(state); } + : KW_PRESERVE? fromSource uniqueJoinExpr + ; + +uniqueJoinExpr +@init { gParent.pushMsg("unique join expression list", state); } +@after { gParent.popMsg(state); } + : LPAREN e1+=expression (COMMA e1+=expression)* RPAREN + -> ^(TOK_EXPLIST $e1*) + ; + +uniqueJoinToken +@init { gParent.pushMsg("unique join", state); } +@after { gParent.popMsg(state); } + : KW_UNIQUEJOIN -> TOK_UNIQUEJOIN; + +joinToken +@init { gParent.pushMsg("join type specifier", state); } +@after { gParent.popMsg(state); } + : + KW_JOIN -> TOK_JOIN + | KW_INNER KW_JOIN -> TOK_JOIN + | COMMA -> TOK_JOIN + | KW_CROSS KW_JOIN -> TOK_CROSSJOIN + | KW_LEFT (KW_OUTER)? KW_JOIN -> TOK_LEFTOUTERJOIN + | KW_RIGHT (KW_OUTER)? KW_JOIN -> TOK_RIGHTOUTERJOIN + | KW_FULL (KW_OUTER)? KW_JOIN -> TOK_FULLOUTERJOIN + | KW_LEFT KW_SEMI KW_JOIN -> TOK_LEFTSEMIJOIN + ; + +lateralView +@init {gParent.pushMsg("lateral view", state); } +@after {gParent.popMsg(state); } + : + (KW_LATERAL KW_VIEW KW_OUTER) => KW_LATERAL KW_VIEW KW_OUTER function tableAlias (KW_AS identifier ((COMMA)=> COMMA identifier)*)? + -> ^(TOK_LATERAL_VIEW_OUTER ^(TOK_SELECT ^(TOK_SELEXPR function identifier* tableAlias))) + | + KW_LATERAL KW_VIEW function tableAlias (KW_AS identifier ((COMMA)=> COMMA identifier)*)? + -> ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR function identifier* tableAlias))) + ; + +tableAlias +@init {gParent.pushMsg("table alias", state); } +@after {gParent.popMsg(state); } + : + identifier -> ^(TOK_TABALIAS identifier) + ; + +fromSource +@init { gParent.pushMsg("from source", state); } +@after { gParent.popMsg(state); } + : + (LPAREN KW_VALUES) => fromSource0 + | (LPAREN) => LPAREN joinSource RPAREN -> joinSource + | fromSource0 + ; + + +fromSource0 +@init { gParent.pushMsg("from source 0", state); } +@after { gParent.popMsg(state); } + : + ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource | virtualTableSource) (lateralView^)* + ; + +tableBucketSample +@init { gParent.pushMsg("table bucket sample specification", state); } +@after { gParent.popMsg(state); } + : + KW_TABLESAMPLE LPAREN KW_BUCKET (numerator=Number) KW_OUT KW_OF (denominator=Number) (KW_ON expr+=expression (COMMA expr+=expression)*)? RPAREN -> ^(TOK_TABLEBUCKETSAMPLE $numerator $denominator $expr*) + ; + +splitSample +@init { gParent.pushMsg("table split sample specification", state); } +@after { gParent.popMsg(state); } + : + KW_TABLESAMPLE LPAREN (numerator=Number) (percent=KW_PERCENT|KW_ROWS) RPAREN + -> {percent != null}? ^(TOK_TABLESPLITSAMPLE TOK_PERCENT $numerator) + -> ^(TOK_TABLESPLITSAMPLE TOK_ROWCOUNT $numerator) + | + KW_TABLESAMPLE LPAREN (numerator=ByteLengthLiteral) RPAREN + -> ^(TOK_TABLESPLITSAMPLE TOK_LENGTH $numerator) + ; + +tableSample +@init { gParent.pushMsg("table sample specification", state); } +@after { gParent.popMsg(state); } + : + tableBucketSample | + splitSample + ; + +tableSource +@init { gParent.pushMsg("table source", state); } +@after { gParent.popMsg(state); } + : tabname=tableName + ((tableProperties) => props=tableProperties)? + ((tableSample) => ts=tableSample)? + ((KW_AS) => (KW_AS alias=Identifier) + | + (Identifier) => (alias=Identifier))? + -> ^(TOK_TABREF $tabname $props? $ts? $alias?) + ; + +tableName +@init { gParent.pushMsg("table name", state); } +@after { gParent.popMsg(state); } + : + db=identifier DOT tab=identifier + -> ^(TOK_TABNAME $db $tab) + | + tab=identifier + -> ^(TOK_TABNAME $tab) + ; + +viewName +@init { gParent.pushMsg("view name", state); } +@after { gParent.popMsg(state); } + : + (db=identifier DOT)? view=identifier + -> ^(TOK_TABNAME $db? $view) + ; + +subQuerySource +@init { gParent.pushMsg("subquery source", state); } +@after { gParent.popMsg(state); } + : + LPAREN queryStatementExpression[false] RPAREN KW_AS? identifier -> ^(TOK_SUBQUERY queryStatementExpression identifier) + ; + +//---------------------- Rules for parsing PTF clauses ----------------------------- +partitioningSpec +@init { gParent.pushMsg("partitioningSpec clause", state); } +@after { gParent.popMsg(state); } + : + partitionByClause orderByClause? -> ^(TOK_PARTITIONINGSPEC partitionByClause orderByClause?) | + orderByClause -> ^(TOK_PARTITIONINGSPEC orderByClause) | + distributeByClause sortByClause? -> ^(TOK_PARTITIONINGSPEC distributeByClause sortByClause?) | + sortByClause -> ^(TOK_PARTITIONINGSPEC sortByClause) | + clusterByClause -> ^(TOK_PARTITIONINGSPEC clusterByClause) + ; + +partitionTableFunctionSource +@init { gParent.pushMsg("partitionTableFunctionSource clause", state); } +@after { gParent.popMsg(state); } + : + subQuerySource | + tableSource | + partitionedTableFunction + ; + +partitionedTableFunction +@init { gParent.pushMsg("ptf clause", state); } +@after { gParent.popMsg(state); } + : + name=Identifier LPAREN KW_ON + ((partitionTableFunctionSource) => (ptfsrc=partitionTableFunctionSource spec=partitioningSpec?)) + ((Identifier LPAREN expression RPAREN ) => Identifier LPAREN expression RPAREN ( COMMA Identifier LPAREN expression RPAREN)*)? + ((RPAREN) => (RPAREN)) ((Identifier) => alias=Identifier)? + -> ^(TOK_PTBLFUNCTION $name $alias? $ptfsrc $spec? expression*) + ; + +//----------------------- Rules for parsing whereClause ----------------------------- +// where a=b and ... +whereClause +@init { gParent.pushMsg("where clause", state); } +@after { gParent.popMsg(state); } + : + KW_WHERE searchCondition -> ^(TOK_WHERE searchCondition) + ; + +searchCondition +@init { gParent.pushMsg("search condition", state); } +@after { gParent.popMsg(state); } + : + expression + ; + +//----------------------------------------------------------------------------------- + +//-------- Row Constructor ---------------------------------------------------------- +//in support of SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as FOO(a,b,c) and +// INSERT INTO (col1,col2,...) VALUES(...),(...),... +// INSERT INTO
(col1,col2,...) SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as Foo(a,b,c) +valueRowConstructor +@init { gParent.pushMsg("value row constructor", state); } +@after { gParent.popMsg(state); } + : + LPAREN precedenceUnaryPrefixExpression (COMMA precedenceUnaryPrefixExpression)* RPAREN -> ^(TOK_VALUE_ROW precedenceUnaryPrefixExpression+) + ; + +valuesTableConstructor +@init { gParent.pushMsg("values table constructor", state); } +@after { gParent.popMsg(state); } + : + valueRowConstructor (COMMA valueRowConstructor)* -> ^(TOK_VALUES_TABLE valueRowConstructor+) + ; + +/* +VALUES(1),(2) means 2 rows, 1 column each. +VALUES(1,2),(3,4) means 2 rows, 2 columns each. +VALUES(1,2,3) means 1 row, 3 columns +*/ +valuesClause +@init { gParent.pushMsg("values clause", state); } +@after { gParent.popMsg(state); } + : + KW_VALUES valuesTableConstructor -> valuesTableConstructor + ; + +/* +This represents a clause like this: +(VALUES(1,2),(2,3)) as VirtTable(col1,col2) +*/ +virtualTableSource +@init { gParent.pushMsg("virtual table source", state); } +@after { gParent.popMsg(state); } + : + LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE tableNameColList valuesClause) + ; +/* +e.g. as VirtTable(col1,col2) +Note that we only want literals as column names +*/ +tableNameColList +@init { gParent.pushMsg("from source", state); } +@after { gParent.popMsg(state); } + : + KW_AS? identifier LPAREN identifier (COMMA identifier)* RPAREN -> ^(TOK_VIRTUAL_TABREF ^(TOK_TABNAME identifier) ^(TOK_COL_NAME identifier+)) + ; + +//----------------------------------------------------------------------------------- diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g new file mode 100644 index 000000000000..bac0d2254c17 --- /dev/null +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g @@ -0,0 +1,696 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +parser grammar IdentifiersParser; + +options +{ +output=AST; +ASTLabelType=CommonTree; +backtrack=false; +k=3; +} + +@members { + @Override + public Object recoverFromMismatchedSet(IntStream input, + RecognitionException re, BitSet follow) throws RecognitionException { + throw re; + } + @Override + public void displayRecognitionError(String[] tokenNames, + RecognitionException e) { + gParent.errors.add(new ParseError(gParent, e, tokenNames)); + } + protected boolean useSQL11ReservedKeywordsForIdentifier() { + return gParent.useSQL11ReservedKeywordsForIdentifier(); + } +} + +@rulecatch { +catch (RecognitionException e) { + throw e; +} +} + +//----------------------------------------------------------------------------------- + +// group by a,b +groupByClause +@init { gParent.pushMsg("group by clause", state); } +@after { gParent.popMsg(state); } + : + KW_GROUP KW_BY + expression + ( COMMA expression)* + ((rollup=KW_WITH KW_ROLLUP) | (cube=KW_WITH KW_CUBE)) ? + (sets=KW_GROUPING KW_SETS + LPAREN groupingSetExpression ( COMMA groupingSetExpression)* RPAREN ) ? + -> {rollup != null}? ^(TOK_ROLLUP_GROUPBY expression+) + -> {cube != null}? ^(TOK_CUBE_GROUPBY expression+) + -> {sets != null}? ^(TOK_GROUPING_SETS expression+ groupingSetExpression+) + -> ^(TOK_GROUPBY expression+) + ; + +groupingSetExpression +@init {gParent.pushMsg("grouping set expression", state); } +@after {gParent.popMsg(state); } + : + (LPAREN) => groupingSetExpressionMultiple + | + groupingExpressionSingle + ; + +groupingSetExpressionMultiple +@init {gParent.pushMsg("grouping set part expression", state); } +@after {gParent.popMsg(state); } + : + LPAREN + expression? (COMMA expression)* + RPAREN + -> ^(TOK_GROUPING_SETS_EXPRESSION expression*) + ; + +groupingExpressionSingle +@init { gParent.pushMsg("groupingExpression expression", state); } +@after { gParent.popMsg(state); } + : + expression -> ^(TOK_GROUPING_SETS_EXPRESSION expression) + ; + +havingClause +@init { gParent.pushMsg("having clause", state); } +@after { gParent.popMsg(state); } + : + KW_HAVING havingCondition -> ^(TOK_HAVING havingCondition) + ; + +havingCondition +@init { gParent.pushMsg("having condition", state); } +@after { gParent.popMsg(state); } + : + expression + ; + +expressionsInParenthese + : + LPAREN expression (COMMA expression)* RPAREN -> expression+ + ; + +expressionsNotInParenthese + : + expression (COMMA expression)* -> expression+ + ; + +columnRefOrderInParenthese + : + LPAREN columnRefOrder (COMMA columnRefOrder)* RPAREN -> columnRefOrder+ + ; + +columnRefOrderNotInParenthese + : + columnRefOrder (COMMA columnRefOrder)* -> columnRefOrder+ + ; + +// order by a,b +orderByClause +@init { gParent.pushMsg("order by clause", state); } +@after { gParent.popMsg(state); } + : + KW_ORDER KW_BY columnRefOrder ( COMMA columnRefOrder)* -> ^(TOK_ORDERBY columnRefOrder+) + ; + +clusterByClause +@init { gParent.pushMsg("cluster by clause", state); } +@after { gParent.popMsg(state); } + : + KW_CLUSTER KW_BY + ( + (LPAREN) => expressionsInParenthese -> ^(TOK_CLUSTERBY expressionsInParenthese) + | + expressionsNotInParenthese -> ^(TOK_CLUSTERBY expressionsNotInParenthese) + ) + ; + +partitionByClause +@init { gParent.pushMsg("partition by clause", state); } +@after { gParent.popMsg(state); } + : + KW_PARTITION KW_BY + ( + (LPAREN) => expressionsInParenthese -> ^(TOK_DISTRIBUTEBY expressionsInParenthese) + | + expressionsNotInParenthese -> ^(TOK_DISTRIBUTEBY expressionsNotInParenthese) + ) + ; + +distributeByClause +@init { gParent.pushMsg("distribute by clause", state); } +@after { gParent.popMsg(state); } + : + KW_DISTRIBUTE KW_BY + ( + (LPAREN) => expressionsInParenthese -> ^(TOK_DISTRIBUTEBY expressionsInParenthese) + | + expressionsNotInParenthese -> ^(TOK_DISTRIBUTEBY expressionsNotInParenthese) + ) + ; + +sortByClause +@init { gParent.pushMsg("sort by clause", state); } +@after { gParent.popMsg(state); } + : + KW_SORT KW_BY + ( + (LPAREN) => columnRefOrderInParenthese -> ^(TOK_SORTBY columnRefOrderInParenthese) + | + columnRefOrderNotInParenthese -> ^(TOK_SORTBY columnRefOrderNotInParenthese) + ) + ; + +// fun(par1, par2, par3) +function +@init { gParent.pushMsg("function specification", state); } +@after { gParent.popMsg(state); } + : + functionName + LPAREN + ( + (STAR) => (star=STAR) + | (dist=KW_DISTINCT)? (selectExpression (COMMA selectExpression)*)? + ) + RPAREN (KW_OVER ws=window_specification)? + -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName $ws?) + -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)? $ws?) + -> ^(TOK_FUNCTIONDI functionName (selectExpression+)?) + ; + +functionName +@init { gParent.pushMsg("function name", state); } +@after { gParent.popMsg(state); } + : // Keyword IF is also a function name + (KW_IF | KW_ARRAY | KW_MAP | KW_STRUCT | KW_UNIONTYPE) => (KW_IF | KW_ARRAY | KW_MAP | KW_STRUCT | KW_UNIONTYPE) + | + (functionIdentifier) => functionIdentifier + | + {!useSQL11ReservedKeywordsForIdentifier()}? sql11ReservedKeywordsUsedAsCastFunctionName -> Identifier[$sql11ReservedKeywordsUsedAsCastFunctionName.text] + ; + +castExpression +@init { gParent.pushMsg("cast expression", state); } +@after { gParent.popMsg(state); } + : + KW_CAST + LPAREN + expression + KW_AS + primitiveType + RPAREN -> ^(TOK_FUNCTION primitiveType expression) + ; + +caseExpression +@init { gParent.pushMsg("case expression", state); } +@after { gParent.popMsg(state); } + : + KW_CASE expression + (KW_WHEN expression KW_THEN expression)+ + (KW_ELSE expression)? + KW_END -> ^(TOK_FUNCTION KW_CASE expression*) + ; + +whenExpression +@init { gParent.pushMsg("case expression", state); } +@after { gParent.popMsg(state); } + : + KW_CASE + ( KW_WHEN expression KW_THEN expression)+ + (KW_ELSE expression)? + KW_END -> ^(TOK_FUNCTION KW_WHEN expression*) + ; + +constant +@init { gParent.pushMsg("constant", state); } +@after { gParent.popMsg(state); } + : + Number + | dateLiteral + | timestampLiteral + | intervalLiteral + | StringLiteral + | stringLiteralSequence + | BigintLiteral + | SmallintLiteral + | TinyintLiteral + | DecimalLiteral + | charSetStringLiteral + | booleanValue + ; + +stringLiteralSequence + : + StringLiteral StringLiteral+ -> ^(TOK_STRINGLITERALSEQUENCE StringLiteral StringLiteral+) + ; + +charSetStringLiteral +@init { gParent.pushMsg("character string literal", state); } +@after { gParent.popMsg(state); } + : + csName=CharSetName csLiteral=CharSetLiteral -> ^(TOK_CHARSETLITERAL $csName $csLiteral) + ; + +dateLiteral + : + KW_DATE StringLiteral -> + { + // Create DateLiteral token, but with the text of the string value + // This makes the dateLiteral more consistent with the other type literals. + adaptor.create(TOK_DATELITERAL, $StringLiteral.text) + } + | + KW_CURRENT_DATE -> ^(TOK_FUNCTION KW_CURRENT_DATE) + ; + +timestampLiteral + : + KW_TIMESTAMP StringLiteral -> + { + adaptor.create(TOK_TIMESTAMPLITERAL, $StringLiteral.text) + } + | + KW_CURRENT_TIMESTAMP -> ^(TOK_FUNCTION KW_CURRENT_TIMESTAMP) + ; + +intervalLiteral + : + KW_INTERVAL StringLiteral qualifiers=intervalQualifiers -> + { + adaptor.create(qualifiers.tree.token.getType(), $StringLiteral.text) + } + ; + +intervalQualifiers + : + KW_YEAR KW_TO KW_MONTH -> TOK_INTERVAL_YEAR_MONTH_LITERAL + | KW_DAY KW_TO KW_SECOND -> TOK_INTERVAL_DAY_TIME_LITERAL + | KW_YEAR -> TOK_INTERVAL_YEAR_LITERAL + | KW_MONTH -> TOK_INTERVAL_MONTH_LITERAL + | KW_DAY -> TOK_INTERVAL_DAY_LITERAL + | KW_HOUR -> TOK_INTERVAL_HOUR_LITERAL + | KW_MINUTE -> TOK_INTERVAL_MINUTE_LITERAL + | KW_SECOND -> TOK_INTERVAL_SECOND_LITERAL + ; + +expression +@init { gParent.pushMsg("expression specification", state); } +@after { gParent.popMsg(state); } + : + precedenceOrExpression + ; + +atomExpression + : + (KW_NULL) => KW_NULL -> TOK_NULL + | (constant) => constant + | castExpression + | caseExpression + | whenExpression + | (functionName LPAREN) => function + | tableOrColumn + | LPAREN! expression RPAREN! + ; + + +precedenceFieldExpression + : + atomExpression ((LSQUARE^ expression RSQUARE!) | (DOT^ identifier))* + ; + +precedenceUnaryOperator + : + PLUS | MINUS | TILDE + ; + +nullCondition + : + KW_NULL -> ^(TOK_ISNULL) + | KW_NOT KW_NULL -> ^(TOK_ISNOTNULL) + ; + +precedenceUnaryPrefixExpression + : + (precedenceUnaryOperator^)* precedenceFieldExpression + ; + +precedenceUnarySuffixExpression + : precedenceUnaryPrefixExpression (a=KW_IS nullCondition)? + -> {$a != null}? ^(TOK_FUNCTION nullCondition precedenceUnaryPrefixExpression) + -> precedenceUnaryPrefixExpression + ; + + +precedenceBitwiseXorOperator + : + BITWISEXOR + ; + +precedenceBitwiseXorExpression + : + precedenceUnarySuffixExpression (precedenceBitwiseXorOperator^ precedenceUnarySuffixExpression)* + ; + + +precedenceStarOperator + : + STAR | DIVIDE | MOD | DIV + ; + +precedenceStarExpression + : + precedenceBitwiseXorExpression (precedenceStarOperator^ precedenceBitwiseXorExpression)* + ; + + +precedencePlusOperator + : + PLUS | MINUS + ; + +precedencePlusExpression + : + precedenceStarExpression (precedencePlusOperator^ precedenceStarExpression)* + ; + + +precedenceAmpersandOperator + : + AMPERSAND + ; + +precedenceAmpersandExpression + : + precedencePlusExpression (precedenceAmpersandOperator^ precedencePlusExpression)* + ; + + +precedenceBitwiseOrOperator + : + BITWISEOR + ; + +precedenceBitwiseOrExpression + : + precedenceAmpersandExpression (precedenceBitwiseOrOperator^ precedenceAmpersandExpression)* + ; + + +// Equal operators supporting NOT prefix +precedenceEqualNegatableOperator + : + KW_LIKE | KW_RLIKE | KW_REGEXP + ; + +precedenceEqualOperator + : + precedenceEqualNegatableOperator | EQUAL | EQUAL_NS | NOTEQUAL | LESSTHANOREQUALTO | LESSTHAN | GREATERTHANOREQUALTO | GREATERTHAN + ; + +subQueryExpression + : + LPAREN! selectStatement[true] RPAREN! + ; + +precedenceEqualExpression + : + (LPAREN precedenceBitwiseOrExpression COMMA) => precedenceEqualExpressionMutiple + | + precedenceEqualExpressionSingle + ; + +precedenceEqualExpressionSingle + : + (left=precedenceBitwiseOrExpression -> $left) + ( + (KW_NOT precedenceEqualNegatableOperator notExpr=precedenceBitwiseOrExpression) + -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpressionSingle $notExpr)) + | (precedenceEqualOperator equalExpr=precedenceBitwiseOrExpression) + -> ^(precedenceEqualOperator $precedenceEqualExpressionSingle $equalExpr) + | (KW_NOT KW_IN LPAREN KW_SELECT)=> (KW_NOT KW_IN subQueryExpression) + -> ^(KW_NOT ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle)) + | (KW_NOT KW_IN expressions) + -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions)) + | (KW_IN LPAREN KW_SELECT)=> (KW_IN subQueryExpression) + -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle) + | (KW_IN expressions) + -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions) + | ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) + -> ^(TOK_FUNCTION Identifier["between"] KW_TRUE $left $min $max) + | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) + -> ^(TOK_FUNCTION Identifier["between"] KW_FALSE $left $min $max) + )* + | (KW_EXISTS LPAREN KW_SELECT)=> (KW_EXISTS subQueryExpression) -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_EXISTS) subQueryExpression) + ; + +expressions + : + LPAREN expression (COMMA expression)* RPAREN -> expression+ + ; + +//we transform the (col0, col1) in ((v00,v01),(v10,v11)) into struct(col0, col1) in (struct(v00,v01),struct(v10,v11)) +precedenceEqualExpressionMutiple + : + (LPAREN precedenceBitwiseOrExpression (COMMA precedenceBitwiseOrExpression)+ RPAREN -> ^(TOK_FUNCTION Identifier["struct"] precedenceBitwiseOrExpression+)) + ( (KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN) + -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple expressionsToStruct+) + | (KW_NOT KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN) + -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple expressionsToStruct+))) + ; + +expressionsToStruct + : + LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_FUNCTION Identifier["struct"] expression+) + ; + +precedenceNotOperator + : + KW_NOT + ; + +precedenceNotExpression + : + (precedenceNotOperator^)* precedenceEqualExpression + ; + + +precedenceAndOperator + : + KW_AND + ; + +precedenceAndExpression + : + precedenceNotExpression (precedenceAndOperator^ precedenceNotExpression)* + ; + + +precedenceOrOperator + : + KW_OR + ; + +precedenceOrExpression + : + precedenceAndExpression (precedenceOrOperator^ precedenceAndExpression)* + ; + + +booleanValue + : + KW_TRUE^ | KW_FALSE^ + ; + +booleanValueTok + : + KW_TRUE -> TOK_TRUE + | KW_FALSE -> TOK_FALSE + ; + +tableOrPartition + : + tableName partitionSpec? -> ^(TOK_TAB tableName partitionSpec?) + ; + +partitionSpec + : + KW_PARTITION + LPAREN partitionVal (COMMA partitionVal )* RPAREN -> ^(TOK_PARTSPEC partitionVal +) + ; + +partitionVal + : + identifier (EQUAL constant)? -> ^(TOK_PARTVAL identifier constant?) + ; + +dropPartitionSpec + : + KW_PARTITION + LPAREN dropPartitionVal (COMMA dropPartitionVal )* RPAREN -> ^(TOK_PARTSPEC dropPartitionVal +) + ; + +dropPartitionVal + : + identifier dropPartitionOperator constant -> ^(TOK_PARTVAL identifier dropPartitionOperator constant) + ; + +dropPartitionOperator + : + EQUAL | NOTEQUAL | LESSTHANOREQUALTO | LESSTHAN | GREATERTHANOREQUALTO | GREATERTHAN + ; + +sysFuncNames + : + KW_AND + | KW_OR + | KW_NOT + | KW_LIKE + | KW_IF + | KW_CASE + | KW_WHEN + | KW_TINYINT + | KW_SMALLINT + | KW_INT + | KW_BIGINT + | KW_FLOAT + | KW_DOUBLE + | KW_BOOLEAN + | KW_STRING + | KW_BINARY + | KW_ARRAY + | KW_MAP + | KW_STRUCT + | KW_UNIONTYPE + | EQUAL + | EQUAL_NS + | NOTEQUAL + | LESSTHANOREQUALTO + | LESSTHAN + | GREATERTHANOREQUALTO + | GREATERTHAN + | DIVIDE + | PLUS + | MINUS + | STAR + | MOD + | DIV + | AMPERSAND + | TILDE + | BITWISEOR + | BITWISEXOR + | KW_RLIKE + | KW_REGEXP + | KW_IN + | KW_BETWEEN + ; + +descFuncNames + : + (sysFuncNames) => sysFuncNames + | StringLiteral + | functionIdentifier + ; + +identifier + : + Identifier + | nonReserved -> Identifier[$nonReserved.text] + // If it decides to support SQL11 reserved keywords, i.e., useSQL11ReservedKeywordsForIdentifier()=false, + // the sql11keywords in existing q tests will NOT be added back. + | {useSQL11ReservedKeywordsForIdentifier()}? sql11ReservedKeywordsUsedAsIdentifier -> Identifier[$sql11ReservedKeywordsUsedAsIdentifier.text] + ; + +functionIdentifier +@init { gParent.pushMsg("function identifier", state); } +@after { gParent.popMsg(state); } + : db=identifier DOT fn=identifier + -> Identifier[$db.text + "." + $fn.text] + | + identifier + ; + +principalIdentifier +@init { gParent.pushMsg("identifier for principal spec", state); } +@after { gParent.popMsg(state); } + : identifier + | QuotedIdentifier + ; + +//The new version of nonReserved + sql11ReservedKeywordsUsedAsIdentifier = old version of nonReserved +//Non reserved keywords are basically the keywords that can be used as identifiers. +//All the KW_* are automatically not only keywords, but also reserved keywords. +//That means, they can NOT be used as identifiers. +//If you would like to use them as identifiers, put them in the nonReserved list below. +//If you are not sure, please refer to the SQL2011 column in +//http://www.postgresql.org/docs/9.5/static/sql-keywords-appendix.html +nonReserved + : + KW_ADD | KW_ADMIN | KW_AFTER | KW_ANALYZE | KW_ARCHIVE | KW_ASC | KW_BEFORE | KW_BUCKET | KW_BUCKETS + | KW_CASCADE | KW_CHANGE | KW_CLUSTER | KW_CLUSTERED | KW_CLUSTERSTATUS | KW_COLLECTION | KW_COLUMNS + | KW_COMMENT | KW_COMPACT | KW_COMPACTIONS | KW_COMPUTE | KW_CONCATENATE | KW_CONTINUE | KW_DATA | KW_DAY + | KW_DATABASES | KW_DATETIME | KW_DBPROPERTIES | KW_DEFERRED | KW_DEFINED | KW_DELIMITED | KW_DEPENDENCY + | KW_DESC | KW_DIRECTORIES | KW_DIRECTORY | KW_DISABLE | KW_DISTRIBUTE | KW_ELEM_TYPE + | KW_ENABLE | KW_ESCAPED | KW_EXCLUSIVE | KW_EXPLAIN | KW_EXPORT | KW_FIELDS | KW_FILE | KW_FILEFORMAT + | KW_FIRST | KW_FORMAT | KW_FORMATTED | KW_FUNCTIONS | KW_HOLD_DDLTIME | KW_HOUR | KW_IDXPROPERTIES | KW_IGNORE + | KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR + | KW_KEYS | KW_KEY_TYPE | KW_LIMIT | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG + | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_OFFLINE + | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY + | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER + | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE + | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED + | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED + | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED + | KW_TINYINT | KW_TOUCH | KW_TRANSACTIONS | KW_UNARCHIVE | KW_UNDO | KW_UNIONTYPE | KW_UNLOCK | KW_UNSET + | KW_UNSIGNED | KW_URI | KW_USE | KW_UTC | KW_UTCTIMESTAMP | KW_VALUE_TYPE | KW_VIEW | KW_WHILE | KW_YEAR + | KW_WORK + | KW_TRANSACTION + | KW_WRITE + | KW_ISOLATION + | KW_LEVEL + | KW_SNAPSHOT + | KW_AUTOCOMMIT +; + +//The following SQL2011 reserved keywords are used as cast function name only, but not as identifiers. +sql11ReservedKeywordsUsedAsCastFunctionName + : + KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_INT | KW_SMALLINT | KW_TIMESTAMP + ; + +//The following SQL2011 reserved keywords are used as identifiers in many q tests, they may be added back due to backward compatibility. +//We are planning to remove the following whole list after several releases. +//Thus, please do not change the following list unless you know what to do. +sql11ReservedKeywordsUsedAsIdentifier + : + KW_ALL | KW_ALTER | KW_ARRAY | KW_AS | KW_AUTHORIZATION | KW_BETWEEN | KW_BIGINT | KW_BINARY | KW_BOOLEAN + | KW_BOTH | KW_BY | KW_CREATE | KW_CUBE | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_CURSOR | KW_DATE | KW_DECIMAL | KW_DELETE | KW_DESCRIBE + | KW_DOUBLE | KW_DROP | KW_EXISTS | KW_EXTERNAL | KW_FALSE | KW_FETCH | KW_FLOAT | KW_FOR | KW_FULL | KW_GRANT + | KW_GROUP | KW_GROUPING | KW_IMPORT | KW_IN | KW_INNER | KW_INSERT | KW_INT | KW_INTERSECT | KW_INTO | KW_IS | KW_LATERAL + | KW_LEFT | KW_LIKE | KW_LOCAL | KW_NONE | KW_NULL | KW_OF | KW_ORDER | KW_OUT | KW_OUTER | KW_PARTITION + | KW_PERCENT | KW_PROCEDURE | KW_RANGE | KW_READS | KW_REVOKE | KW_RIGHT + | KW_ROLLUP | KW_ROW | KW_ROWS | KW_SET | KW_SMALLINT | KW_TABLE | KW_TIMESTAMP | KW_TO | KW_TRIGGER | KW_TRUE + | KW_TRUNCATE | KW_UNION | KW_UPDATE | KW_USER | KW_USING | KW_VALUES | KW_WITH +//The following two keywords come from MySQL. Although they are not keywords in SQL2011, they are reserved keywords in MySQL. + | KW_REGEXP | KW_RLIKE + ; diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SelectClauseParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SelectClauseParser.g new file mode 100644 index 000000000000..48bc8b0a300a --- /dev/null +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SelectClauseParser.g @@ -0,0 +1,226 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +parser grammar SelectClauseParser; + +options +{ +output=AST; +ASTLabelType=CommonTree; +backtrack=false; +k=3; +} + +@members { + @Override + public Object recoverFromMismatchedSet(IntStream input, + RecognitionException re, BitSet follow) throws RecognitionException { + throw re; + } + @Override + public void displayRecognitionError(String[] tokenNames, + RecognitionException e) { + gParent.errors.add(new ParseError(gParent, e, tokenNames)); + } + protected boolean useSQL11ReservedKeywordsForIdentifier() { + return gParent.useSQL11ReservedKeywordsForIdentifier(); + } +} + +@rulecatch { +catch (RecognitionException e) { + throw e; +} +} + +//----------------------- Rules for parsing selectClause ----------------------------- +// select a,b,c ... +selectClause +@init { gParent.pushMsg("select clause", state); } +@after { gParent.popMsg(state); } + : + KW_SELECT hintClause? (((KW_ALL | dist=KW_DISTINCT)? selectList) + | (transform=KW_TRANSFORM selectTrfmClause)) + -> {$transform == null && $dist == null}? ^(TOK_SELECT hintClause? selectList) + -> {$transform == null && $dist != null}? ^(TOK_SELECTDI hintClause? selectList) + -> ^(TOK_SELECT hintClause? ^(TOK_SELEXPR selectTrfmClause) ) + | + trfmClause ->^(TOK_SELECT ^(TOK_SELEXPR trfmClause)) + ; + +selectList +@init { gParent.pushMsg("select list", state); } +@after { gParent.popMsg(state); } + : + selectItem ( COMMA selectItem )* -> selectItem+ + ; + +selectTrfmClause +@init { gParent.pushMsg("transform clause", state); } +@after { gParent.popMsg(state); } + : + LPAREN selectExpressionList RPAREN + inSerde=rowFormat inRec=recordWriter + KW_USING StringLiteral + ( KW_AS ((LPAREN (aliasList | columnNameTypeList) RPAREN) | (aliasList | columnNameTypeList)))? + outSerde=rowFormat outRec=recordReader + -> ^(TOK_TRANSFORM selectExpressionList $inSerde $inRec StringLiteral $outSerde $outRec aliasList? columnNameTypeList?) + ; + +hintClause +@init { gParent.pushMsg("hint clause", state); } +@after { gParent.popMsg(state); } + : + DIVIDE STAR PLUS hintList STAR DIVIDE -> ^(TOK_HINTLIST hintList) + ; + +hintList +@init { gParent.pushMsg("hint list", state); } +@after { gParent.popMsg(state); } + : + hintItem (COMMA hintItem)* -> hintItem+ + ; + +hintItem +@init { gParent.pushMsg("hint item", state); } +@after { gParent.popMsg(state); } + : + hintName (LPAREN hintArgs RPAREN)? -> ^(TOK_HINT hintName hintArgs?) + ; + +hintName +@init { gParent.pushMsg("hint name", state); } +@after { gParent.popMsg(state); } + : + KW_MAPJOIN -> TOK_MAPJOIN + | KW_STREAMTABLE -> TOK_STREAMTABLE + ; + +hintArgs +@init { gParent.pushMsg("hint arguments", state); } +@after { gParent.popMsg(state); } + : + hintArgName (COMMA hintArgName)* -> ^(TOK_HINTARGLIST hintArgName+) + ; + +hintArgName +@init { gParent.pushMsg("hint argument name", state); } +@after { gParent.popMsg(state); } + : + identifier + ; + +selectItem +@init { gParent.pushMsg("selection target", state); } +@after { gParent.popMsg(state); } + : + (tableAllColumns) => tableAllColumns -> ^(TOK_SELEXPR tableAllColumns) + | + ( expression + ((KW_AS? identifier) | (KW_AS LPAREN identifier (COMMA identifier)* RPAREN))? + ) -> ^(TOK_SELEXPR expression identifier*) + ; + +trfmClause +@init { gParent.pushMsg("transform clause", state); } +@after { gParent.popMsg(state); } + : + ( KW_MAP selectExpressionList + | KW_REDUCE selectExpressionList ) + inSerde=rowFormat inRec=recordWriter + KW_USING StringLiteral + ( KW_AS ((LPAREN (aliasList | columnNameTypeList) RPAREN) | (aliasList | columnNameTypeList)))? + outSerde=rowFormat outRec=recordReader + -> ^(TOK_TRANSFORM selectExpressionList $inSerde $inRec StringLiteral $outSerde $outRec aliasList? columnNameTypeList?) + ; + +selectExpression +@init { gParent.pushMsg("select expression", state); } +@after { gParent.popMsg(state); } + : + (tableAllColumns) => tableAllColumns + | + expression + ; + +selectExpressionList +@init { gParent.pushMsg("select expression list", state); } +@after { gParent.popMsg(state); } + : + selectExpression (COMMA selectExpression)* -> ^(TOK_EXPLIST selectExpression+) + ; + +//---------------------- Rules for windowing clauses ------------------------------- +window_clause +@init { gParent.pushMsg("window_clause", state); } +@after { gParent.popMsg(state); } +: + KW_WINDOW window_defn (COMMA window_defn)* -> ^(KW_WINDOW window_defn+) +; + +window_defn +@init { gParent.pushMsg("window_defn", state); } +@after { gParent.popMsg(state); } +: + Identifier KW_AS window_specification -> ^(TOK_WINDOWDEF Identifier window_specification) +; + +window_specification +@init { gParent.pushMsg("window_specification", state); } +@after { gParent.popMsg(state); } +: + (Identifier | ( LPAREN Identifier? partitioningSpec? window_frame? RPAREN)) -> ^(TOK_WINDOWSPEC Identifier? partitioningSpec? window_frame?) +; + +window_frame : + window_range_expression | + window_value_expression +; + +window_range_expression +@init { gParent.pushMsg("window_range_expression", state); } +@after { gParent.popMsg(state); } +: + KW_ROWS sb=window_frame_start_boundary -> ^(TOK_WINDOWRANGE $sb) | + KW_ROWS KW_BETWEEN s=window_frame_boundary KW_AND end=window_frame_boundary -> ^(TOK_WINDOWRANGE $s $end) +; + +window_value_expression +@init { gParent.pushMsg("window_value_expression", state); } +@after { gParent.popMsg(state); } +: + KW_RANGE sb=window_frame_start_boundary -> ^(TOK_WINDOWVALUES $sb) | + KW_RANGE KW_BETWEEN s=window_frame_boundary KW_AND end=window_frame_boundary -> ^(TOK_WINDOWVALUES $s $end) +; + +window_frame_start_boundary +@init { gParent.pushMsg("windowframestartboundary", state); } +@after { gParent.popMsg(state); } +: + KW_UNBOUNDED KW_PRECEDING -> ^(KW_PRECEDING KW_UNBOUNDED) | + KW_CURRENT KW_ROW -> ^(KW_CURRENT) | + Number KW_PRECEDING -> ^(KW_PRECEDING Number) +; + +window_frame_boundary +@init { gParent.pushMsg("windowframeboundary", state); } +@after { gParent.popMsg(state); } +: + KW_UNBOUNDED (r=KW_PRECEDING|r=KW_FOLLOWING) -> ^($r KW_UNBOUNDED) | + KW_CURRENT KW_ROW -> ^(KW_CURRENT) | + Number (d=KW_PRECEDING | d=KW_FOLLOWING ) -> ^($d Number) +; + diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g new file mode 100644 index 000000000000..35cf7fd6797c --- /dev/null +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g @@ -0,0 +1,473 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +lexer grammar SparkSqlLexer; + +@lexer::header { +package org.apache.spark.sql.parser; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +} + +@lexer::members { + private Configuration hiveConf; + + public void setHiveConf(Configuration hiveConf) { + this.hiveConf = hiveConf; + } + + protected boolean allowQuotedId() { + String supportedQIds = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); + return !"none".equals(supportedQIds); + } +} + +// Keywords + +KW_TRUE : 'TRUE'; +KW_FALSE : 'FALSE'; +KW_ALL : 'ALL'; +KW_NONE: 'NONE'; +KW_AND : 'AND'; +KW_OR : 'OR'; +KW_NOT : 'NOT' | '!'; +KW_LIKE : 'LIKE'; + +KW_IF : 'IF'; +KW_EXISTS : 'EXISTS'; + +KW_ASC : 'ASC'; +KW_DESC : 'DESC'; +KW_ORDER : 'ORDER'; +KW_GROUP : 'GROUP'; +KW_BY : 'BY'; +KW_HAVING : 'HAVING'; +KW_WHERE : 'WHERE'; +KW_FROM : 'FROM'; +KW_AS : 'AS'; +KW_SELECT : 'SELECT'; +KW_DISTINCT : 'DISTINCT'; +KW_INSERT : 'INSERT'; +KW_OVERWRITE : 'OVERWRITE'; +KW_OUTER : 'OUTER'; +KW_UNIQUEJOIN : 'UNIQUEJOIN'; +KW_PRESERVE : 'PRESERVE'; +KW_JOIN : 'JOIN'; +KW_LEFT : 'LEFT'; +KW_RIGHT : 'RIGHT'; +KW_FULL : 'FULL'; +KW_ON : 'ON'; +KW_PARTITION : 'PARTITION'; +KW_PARTITIONS : 'PARTITIONS'; +KW_TABLE: 'TABLE'; +KW_TABLES: 'TABLES'; +KW_COLUMNS: 'COLUMNS'; +KW_INDEX: 'INDEX'; +KW_INDEXES: 'INDEXES'; +KW_REBUILD: 'REBUILD'; +KW_FUNCTIONS: 'FUNCTIONS'; +KW_SHOW: 'SHOW'; +KW_MSCK: 'MSCK'; +KW_REPAIR: 'REPAIR'; +KW_DIRECTORY: 'DIRECTORY'; +KW_LOCAL: 'LOCAL'; +KW_TRANSFORM : 'TRANSFORM'; +KW_USING: 'USING'; +KW_CLUSTER: 'CLUSTER'; +KW_DISTRIBUTE: 'DISTRIBUTE'; +KW_SORT: 'SORT'; +KW_UNION: 'UNION'; +KW_LOAD: 'LOAD'; +KW_EXPORT: 'EXPORT'; +KW_IMPORT: 'IMPORT'; +KW_REPLICATION: 'REPLICATION'; +KW_METADATA: 'METADATA'; +KW_DATA: 'DATA'; +KW_INPATH: 'INPATH'; +KW_IS: 'IS'; +KW_NULL: 'NULL'; +KW_CREATE: 'CREATE'; +KW_EXTERNAL: 'EXTERNAL'; +KW_ALTER: 'ALTER'; +KW_CHANGE: 'CHANGE'; +KW_COLUMN: 'COLUMN'; +KW_FIRST: 'FIRST'; +KW_AFTER: 'AFTER'; +KW_DESCRIBE: 'DESCRIBE'; +KW_DROP: 'DROP'; +KW_RENAME: 'RENAME'; +KW_TO: 'TO'; +KW_COMMENT: 'COMMENT'; +KW_BOOLEAN: 'BOOLEAN'; +KW_TINYINT: 'TINYINT'; +KW_SMALLINT: 'SMALLINT'; +KW_INT: 'INT'; +KW_BIGINT: 'BIGINT'; +KW_FLOAT: 'FLOAT'; +KW_DOUBLE: 'DOUBLE'; +KW_DATE: 'DATE'; +KW_DATETIME: 'DATETIME'; +KW_TIMESTAMP: 'TIMESTAMP'; +KW_INTERVAL: 'INTERVAL'; +KW_DECIMAL: 'DECIMAL'; +KW_STRING: 'STRING'; +KW_CHAR: 'CHAR'; +KW_VARCHAR: 'VARCHAR'; +KW_ARRAY: 'ARRAY'; +KW_STRUCT: 'STRUCT'; +KW_MAP: 'MAP'; +KW_UNIONTYPE: 'UNIONTYPE'; +KW_REDUCE: 'REDUCE'; +KW_PARTITIONED: 'PARTITIONED'; +KW_CLUSTERED: 'CLUSTERED'; +KW_SORTED: 'SORTED'; +KW_INTO: 'INTO'; +KW_BUCKETS: 'BUCKETS'; +KW_ROW: 'ROW'; +KW_ROWS: 'ROWS'; +KW_FORMAT: 'FORMAT'; +KW_DELIMITED: 'DELIMITED'; +KW_FIELDS: 'FIELDS'; +KW_TERMINATED: 'TERMINATED'; +KW_ESCAPED: 'ESCAPED'; +KW_COLLECTION: 'COLLECTION'; +KW_ITEMS: 'ITEMS'; +KW_KEYS: 'KEYS'; +KW_KEY_TYPE: '$KEY$'; +KW_LINES: 'LINES'; +KW_STORED: 'STORED'; +KW_FILEFORMAT: 'FILEFORMAT'; +KW_INPUTFORMAT: 'INPUTFORMAT'; +KW_OUTPUTFORMAT: 'OUTPUTFORMAT'; +KW_INPUTDRIVER: 'INPUTDRIVER'; +KW_OUTPUTDRIVER: 'OUTPUTDRIVER'; +KW_ENABLE: 'ENABLE'; +KW_DISABLE: 'DISABLE'; +KW_LOCATION: 'LOCATION'; +KW_TABLESAMPLE: 'TABLESAMPLE'; +KW_BUCKET: 'BUCKET'; +KW_OUT: 'OUT'; +KW_OF: 'OF'; +KW_PERCENT: 'PERCENT'; +KW_CAST: 'CAST'; +KW_ADD: 'ADD'; +KW_REPLACE: 'REPLACE'; +KW_RLIKE: 'RLIKE'; +KW_REGEXP: 'REGEXP'; +KW_TEMPORARY: 'TEMPORARY'; +KW_FUNCTION: 'FUNCTION'; +KW_MACRO: 'MACRO'; +KW_FILE: 'FILE'; +KW_JAR: 'JAR'; +KW_EXPLAIN: 'EXPLAIN'; +KW_EXTENDED: 'EXTENDED'; +KW_FORMATTED: 'FORMATTED'; +KW_PRETTY: 'PRETTY'; +KW_DEPENDENCY: 'DEPENDENCY'; +KW_LOGICAL: 'LOGICAL'; +KW_SERDE: 'SERDE'; +KW_WITH: 'WITH'; +KW_DEFERRED: 'DEFERRED'; +KW_SERDEPROPERTIES: 'SERDEPROPERTIES'; +KW_DBPROPERTIES: 'DBPROPERTIES'; +KW_LIMIT: 'LIMIT'; +KW_SET: 'SET'; +KW_UNSET: 'UNSET'; +KW_TBLPROPERTIES: 'TBLPROPERTIES'; +KW_IDXPROPERTIES: 'IDXPROPERTIES'; +KW_VALUE_TYPE: '$VALUE$'; +KW_ELEM_TYPE: '$ELEM$'; +KW_DEFINED: 'DEFINED'; +KW_CASE: 'CASE'; +KW_WHEN: 'WHEN'; +KW_THEN: 'THEN'; +KW_ELSE: 'ELSE'; +KW_END: 'END'; +KW_MAPJOIN: 'MAPJOIN'; +KW_STREAMTABLE: 'STREAMTABLE'; +KW_CLUSTERSTATUS: 'CLUSTERSTATUS'; +KW_UTC: 'UTC'; +KW_UTCTIMESTAMP: 'UTC_TMESTAMP'; +KW_LONG: 'LONG'; +KW_DELETE: 'DELETE'; +KW_PLUS: 'PLUS'; +KW_MINUS: 'MINUS'; +KW_FETCH: 'FETCH'; +KW_INTERSECT: 'INTERSECT'; +KW_VIEW: 'VIEW'; +KW_IN: 'IN'; +KW_DATABASE: 'DATABASE'; +KW_DATABASES: 'DATABASES'; +KW_MATERIALIZED: 'MATERIALIZED'; +KW_SCHEMA: 'SCHEMA'; +KW_SCHEMAS: 'SCHEMAS'; +KW_GRANT: 'GRANT'; +KW_REVOKE: 'REVOKE'; +KW_SSL: 'SSL'; +KW_UNDO: 'UNDO'; +KW_LOCK: 'LOCK'; +KW_LOCKS: 'LOCKS'; +KW_UNLOCK: 'UNLOCK'; +KW_SHARED: 'SHARED'; +KW_EXCLUSIVE: 'EXCLUSIVE'; +KW_PROCEDURE: 'PROCEDURE'; +KW_UNSIGNED: 'UNSIGNED'; +KW_WHILE: 'WHILE'; +KW_READ: 'READ'; +KW_READS: 'READS'; +KW_PURGE: 'PURGE'; +KW_RANGE: 'RANGE'; +KW_ANALYZE: 'ANALYZE'; +KW_BEFORE: 'BEFORE'; +KW_BETWEEN: 'BETWEEN'; +KW_BOTH: 'BOTH'; +KW_BINARY: 'BINARY'; +KW_CROSS: 'CROSS'; +KW_CONTINUE: 'CONTINUE'; +KW_CURSOR: 'CURSOR'; +KW_TRIGGER: 'TRIGGER'; +KW_RECORDREADER: 'RECORDREADER'; +KW_RECORDWRITER: 'RECORDWRITER'; +KW_SEMI: 'SEMI'; +KW_LATERAL: 'LATERAL'; +KW_TOUCH: 'TOUCH'; +KW_ARCHIVE: 'ARCHIVE'; +KW_UNARCHIVE: 'UNARCHIVE'; +KW_COMPUTE: 'COMPUTE'; +KW_STATISTICS: 'STATISTICS'; +KW_USE: 'USE'; +KW_OPTION: 'OPTION'; +KW_CONCATENATE: 'CONCATENATE'; +KW_SHOW_DATABASE: 'SHOW_DATABASE'; +KW_UPDATE: 'UPDATE'; +KW_RESTRICT: 'RESTRICT'; +KW_CASCADE: 'CASCADE'; +KW_SKEWED: 'SKEWED'; +KW_ROLLUP: 'ROLLUP'; +KW_CUBE: 'CUBE'; +KW_DIRECTORIES: 'DIRECTORIES'; +KW_FOR: 'FOR'; +KW_WINDOW: 'WINDOW'; +KW_UNBOUNDED: 'UNBOUNDED'; +KW_PRECEDING: 'PRECEDING'; +KW_FOLLOWING: 'FOLLOWING'; +KW_CURRENT: 'CURRENT'; +KW_CURRENT_DATE: 'CURRENT_DATE'; +KW_CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; +KW_LESS: 'LESS'; +KW_MORE: 'MORE'; +KW_OVER: 'OVER'; +KW_GROUPING: 'GROUPING'; +KW_SETS: 'SETS'; +KW_TRUNCATE: 'TRUNCATE'; +KW_NOSCAN: 'NOSCAN'; +KW_PARTIALSCAN: 'PARTIALSCAN'; +KW_USER: 'USER'; +KW_ROLE: 'ROLE'; +KW_ROLES: 'ROLES'; +KW_INNER: 'INNER'; +KW_EXCHANGE: 'EXCHANGE'; +KW_URI: 'URI'; +KW_SERVER : 'SERVER'; +KW_ADMIN: 'ADMIN'; +KW_OWNER: 'OWNER'; +KW_PRINCIPALS: 'PRINCIPALS'; +KW_COMPACT: 'COMPACT'; +KW_COMPACTIONS: 'COMPACTIONS'; +KW_TRANSACTIONS: 'TRANSACTIONS'; +KW_REWRITE : 'REWRITE'; +KW_AUTHORIZATION: 'AUTHORIZATION'; +KW_CONF: 'CONF'; +KW_VALUES: 'VALUES'; +KW_RELOAD: 'RELOAD'; +KW_YEAR: 'YEAR'; +KW_MONTH: 'MONTH'; +KW_DAY: 'DAY'; +KW_HOUR: 'HOUR'; +KW_MINUTE: 'MINUTE'; +KW_SECOND: 'SECOND'; +KW_START: 'START'; +KW_TRANSACTION: 'TRANSACTION'; +KW_COMMIT: 'COMMIT'; +KW_ROLLBACK: 'ROLLBACK'; +KW_WORK: 'WORK'; +KW_ONLY: 'ONLY'; +KW_WRITE: 'WRITE'; +KW_ISOLATION: 'ISOLATION'; +KW_LEVEL: 'LEVEL'; +KW_SNAPSHOT: 'SNAPSHOT'; +KW_AUTOCOMMIT: 'AUTOCOMMIT'; + +// Operators +// NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. + +DOT : '.'; // generated as a part of Number rule +COLON : ':' ; +COMMA : ',' ; +SEMICOLON : ';' ; + +LPAREN : '(' ; +RPAREN : ')' ; +LSQUARE : '[' ; +RSQUARE : ']' ; +LCURLY : '{'; +RCURLY : '}'; + +EQUAL : '=' | '=='; +EQUAL_NS : '<=>'; +NOTEQUAL : '<>' | '!='; +LESSTHANOREQUALTO : '<='; +LESSTHAN : '<'; +GREATERTHANOREQUALTO : '>='; +GREATERTHAN : '>'; + +DIVIDE : '/'; +PLUS : '+'; +MINUS : '-'; +STAR : '*'; +MOD : '%'; +DIV : 'DIV'; + +AMPERSAND : '&'; +TILDE : '~'; +BITWISEOR : '|'; +BITWISEXOR : '^'; +QUESTION : '?'; +DOLLAR : '$'; + +// LITERALS +fragment +Letter + : 'a'..'z' | 'A'..'Z' + ; + +fragment +HexDigit + : 'a'..'f' | 'A'..'F' + ; + +fragment +Digit + : + '0'..'9' + ; + +fragment +Exponent + : + ('e' | 'E') ( PLUS|MINUS )? (Digit)+ + ; + +fragment +RegexComponent + : 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' + | PLUS | STAR | QUESTION | MINUS | DOT + | LPAREN | RPAREN | LSQUARE | RSQUARE | LCURLY | RCURLY + | BITWISEXOR | BITWISEOR | DOLLAR | '!' + ; + +StringLiteral + : + ( '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | '\"' ( ~('\"'|'\\') | ('\\' .) )* '\"' + )+ + ; + +CharSetLiteral + : + StringLiteral + | '0' 'X' (HexDigit|Digit)+ + ; + +BigintLiteral + : + (Digit)+ 'L' + ; + +SmallintLiteral + : + (Digit)+ 'S' + ; + +TinyintLiteral + : + (Digit)+ 'Y' + ; + +DecimalLiteral + : + Number 'B' 'D' + ; + +ByteLengthLiteral + : + (Digit)+ ('b' | 'B' | 'k' | 'K' | 'm' | 'M' | 'g' | 'G') + ; + +Number + : + (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)? + ; + +/* +An Identifier can be: +- tableName +- columnName +- select expr alias +- lateral view aliases +- database name +- view name +- subquery alias +- function name +- ptf argument identifier +- index name +- property name for: db,tbl,partition... +- fileFormat +- role name +- privilege name +- principal name +- macro name +- hint name +- window name +*/ +Identifier + : + (Letter | Digit) (Letter | Digit | '_')* + | {allowQuotedId()}? QuotedIdentifier /* though at the language level we allow all Identifiers to be QuotedIdentifiers; + at the API level only columns are allowed to be of this form */ + | '`' RegexComponent+ '`' + ; + +fragment +QuotedIdentifier + : + '`' ( '``' | ~('`') )* '`' { setText(getText().substring(1, getText().length() -1 ).replaceAll("``", "`")); } + ; + +CharSetName + : + '_' (Letter | Digit | '_' | '-' | '.' | ':' )+ + ; + +WS : (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;} + ; + +COMMENT + : '--' (~('\n'|'\r'))* + { $channel=HIDDEN; } + ; + diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g new file mode 100644 index 000000000000..a1bd3d10c957 --- /dev/null +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g @@ -0,0 +1,2456 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +parser grammar SparkSqlParser; + +options +{ +tokenVocab=SparkSqlLexer; +output=AST; +ASTLabelType=CommonTree; +backtrack=false; +k=3; +} +import SelectClauseParser, FromClauseParser, IdentifiersParser; + +tokens { +TOK_INSERT; +TOK_QUERY; +TOK_SELECT; +TOK_SELECTDI; +TOK_SELEXPR; +TOK_FROM; +TOK_TAB; +TOK_PARTSPEC; +TOK_PARTVAL; +TOK_DIR; +TOK_TABREF; +TOK_SUBQUERY; +TOK_INSERT_INTO; +TOK_DESTINATION; +TOK_ALLCOLREF; +TOK_TABLE_OR_COL; +TOK_FUNCTION; +TOK_FUNCTIONDI; +TOK_FUNCTIONSTAR; +TOK_WHERE; +TOK_OP_EQ; +TOK_OP_NE; +TOK_OP_LE; +TOK_OP_LT; +TOK_OP_GE; +TOK_OP_GT; +TOK_OP_DIV; +TOK_OP_ADD; +TOK_OP_SUB; +TOK_OP_MUL; +TOK_OP_MOD; +TOK_OP_BITAND; +TOK_OP_BITNOT; +TOK_OP_BITOR; +TOK_OP_BITXOR; +TOK_OP_AND; +TOK_OP_OR; +TOK_OP_NOT; +TOK_OP_LIKE; +TOK_TRUE; +TOK_FALSE; +TOK_TRANSFORM; +TOK_SERDE; +TOK_SERDENAME; +TOK_SERDEPROPS; +TOK_EXPLIST; +TOK_ALIASLIST; +TOK_GROUPBY; +TOK_ROLLUP_GROUPBY; +TOK_CUBE_GROUPBY; +TOK_GROUPING_SETS; +TOK_GROUPING_SETS_EXPRESSION; +TOK_HAVING; +TOK_ORDERBY; +TOK_CLUSTERBY; +TOK_DISTRIBUTEBY; +TOK_SORTBY; +TOK_UNIONALL; +TOK_UNIONDISTINCT; +TOK_JOIN; +TOK_LEFTOUTERJOIN; +TOK_RIGHTOUTERJOIN; +TOK_FULLOUTERJOIN; +TOK_UNIQUEJOIN; +TOK_CROSSJOIN; +TOK_LOAD; +TOK_EXPORT; +TOK_IMPORT; +TOK_REPLICATION; +TOK_METADATA; +TOK_NULL; +TOK_ISNULL; +TOK_ISNOTNULL; +TOK_TINYINT; +TOK_SMALLINT; +TOK_INT; +TOK_BIGINT; +TOK_BOOLEAN; +TOK_FLOAT; +TOK_DOUBLE; +TOK_DATE; +TOK_DATELITERAL; +TOK_DATETIME; +TOK_TIMESTAMP; +TOK_TIMESTAMPLITERAL; +TOK_INTERVAL_YEAR_MONTH; +TOK_INTERVAL_YEAR_MONTH_LITERAL; +TOK_INTERVAL_DAY_TIME; +TOK_INTERVAL_DAY_TIME_LITERAL; +TOK_INTERVAL_YEAR_LITERAL; +TOK_INTERVAL_MONTH_LITERAL; +TOK_INTERVAL_DAY_LITERAL; +TOK_INTERVAL_HOUR_LITERAL; +TOK_INTERVAL_MINUTE_LITERAL; +TOK_INTERVAL_SECOND_LITERAL; +TOK_STRING; +TOK_CHAR; +TOK_VARCHAR; +TOK_BINARY; +TOK_DECIMAL; +TOK_LIST; +TOK_STRUCT; +TOK_MAP; +TOK_UNIONTYPE; +TOK_COLTYPELIST; +TOK_CREATEDATABASE; +TOK_CREATETABLE; +TOK_TRUNCATETABLE; +TOK_CREATEINDEX; +TOK_CREATEINDEX_INDEXTBLNAME; +TOK_DEFERRED_REBUILDINDEX; +TOK_DROPINDEX; +TOK_LIKETABLE; +TOK_DESCTABLE; +TOK_DESCFUNCTION; +TOK_ALTERTABLE; +TOK_ALTERTABLE_RENAME; +TOK_ALTERTABLE_ADDCOLS; +TOK_ALTERTABLE_RENAMECOL; +TOK_ALTERTABLE_RENAMEPART; +TOK_ALTERTABLE_REPLACECOLS; +TOK_ALTERTABLE_ADDPARTS; +TOK_ALTERTABLE_DROPPARTS; +TOK_ALTERTABLE_PARTCOLTYPE; +TOK_ALTERTABLE_MERGEFILES; +TOK_ALTERTABLE_TOUCH; +TOK_ALTERTABLE_ARCHIVE; +TOK_ALTERTABLE_UNARCHIVE; +TOK_ALTERTABLE_SERDEPROPERTIES; +TOK_ALTERTABLE_SERIALIZER; +TOK_ALTERTABLE_UPDATECOLSTATS; +TOK_TABLE_PARTITION; +TOK_ALTERTABLE_FILEFORMAT; +TOK_ALTERTABLE_LOCATION; +TOK_ALTERTABLE_PROPERTIES; +TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION; +TOK_ALTERTABLE_DROPPROPERTIES; +TOK_ALTERTABLE_SKEWED; +TOK_ALTERTABLE_EXCHANGEPARTITION; +TOK_ALTERTABLE_SKEWED_LOCATION; +TOK_ALTERTABLE_BUCKETS; +TOK_ALTERTABLE_CLUSTER_SORT; +TOK_ALTERTABLE_COMPACT; +TOK_ALTERINDEX_REBUILD; +TOK_ALTERINDEX_PROPERTIES; +TOK_MSCK; +TOK_SHOWDATABASES; +TOK_SHOWTABLES; +TOK_SHOWCOLUMNS; +TOK_SHOWFUNCTIONS; +TOK_SHOWPARTITIONS; +TOK_SHOW_CREATEDATABASE; +TOK_SHOW_CREATETABLE; +TOK_SHOW_TABLESTATUS; +TOK_SHOW_TBLPROPERTIES; +TOK_SHOWLOCKS; +TOK_SHOWCONF; +TOK_LOCKTABLE; +TOK_UNLOCKTABLE; +TOK_LOCKDB; +TOK_UNLOCKDB; +TOK_SWITCHDATABASE; +TOK_DROPDATABASE; +TOK_DROPTABLE; +TOK_DATABASECOMMENT; +TOK_TABCOLLIST; +TOK_TABCOL; +TOK_TABLECOMMENT; +TOK_TABLEPARTCOLS; +TOK_TABLEROWFORMAT; +TOK_TABLEROWFORMATFIELD; +TOK_TABLEROWFORMATCOLLITEMS; +TOK_TABLEROWFORMATMAPKEYS; +TOK_TABLEROWFORMATLINES; +TOK_TABLEROWFORMATNULL; +TOK_TABLEFILEFORMAT; +TOK_FILEFORMAT_GENERIC; +TOK_OFFLINE; +TOK_ENABLE; +TOK_DISABLE; +TOK_READONLY; +TOK_NO_DROP; +TOK_STORAGEHANDLER; +TOK_NOT_CLUSTERED; +TOK_NOT_SORTED; +TOK_TABCOLNAME; +TOK_TABLELOCATION; +TOK_PARTITIONLOCATION; +TOK_TABLEBUCKETSAMPLE; +TOK_TABLESPLITSAMPLE; +TOK_PERCENT; +TOK_LENGTH; +TOK_ROWCOUNT; +TOK_TMP_FILE; +TOK_TABSORTCOLNAMEASC; +TOK_TABSORTCOLNAMEDESC; +TOK_STRINGLITERALSEQUENCE; +TOK_CHARSETLITERAL; +TOK_CREATEFUNCTION; +TOK_DROPFUNCTION; +TOK_RELOADFUNCTION; +TOK_CREATEMACRO; +TOK_DROPMACRO; +TOK_TEMPORARY; +TOK_CREATEVIEW; +TOK_DROPVIEW; +TOK_ALTERVIEW; +TOK_ALTERVIEW_PROPERTIES; +TOK_ALTERVIEW_DROPPROPERTIES; +TOK_ALTERVIEW_ADDPARTS; +TOK_ALTERVIEW_DROPPARTS; +TOK_ALTERVIEW_RENAME; +TOK_VIEWPARTCOLS; +TOK_EXPLAIN; +TOK_EXPLAIN_SQ_REWRITE; +TOK_TABLESERIALIZER; +TOK_TABLEPROPERTIES; +TOK_TABLEPROPLIST; +TOK_INDEXPROPERTIES; +TOK_INDEXPROPLIST; +TOK_TABTYPE; +TOK_LIMIT; +TOK_TABLEPROPERTY; +TOK_IFEXISTS; +TOK_IFNOTEXISTS; +TOK_ORREPLACE; +TOK_HINTLIST; +TOK_HINT; +TOK_MAPJOIN; +TOK_STREAMTABLE; +TOK_HINTARGLIST; +TOK_USERSCRIPTCOLNAMES; +TOK_USERSCRIPTCOLSCHEMA; +TOK_RECORDREADER; +TOK_RECORDWRITER; +TOK_LEFTSEMIJOIN; +TOK_LATERAL_VIEW; +TOK_LATERAL_VIEW_OUTER; +TOK_TABALIAS; +TOK_ANALYZE; +TOK_CREATEROLE; +TOK_DROPROLE; +TOK_GRANT; +TOK_REVOKE; +TOK_SHOW_GRANT; +TOK_PRIVILEGE_LIST; +TOK_PRIVILEGE; +TOK_PRINCIPAL_NAME; +TOK_USER; +TOK_GROUP; +TOK_ROLE; +TOK_RESOURCE_ALL; +TOK_GRANT_WITH_OPTION; +TOK_GRANT_WITH_ADMIN_OPTION; +TOK_ADMIN_OPTION_FOR; +TOK_GRANT_OPTION_FOR; +TOK_PRIV_ALL; +TOK_PRIV_ALTER_METADATA; +TOK_PRIV_ALTER_DATA; +TOK_PRIV_DELETE; +TOK_PRIV_DROP; +TOK_PRIV_INDEX; +TOK_PRIV_INSERT; +TOK_PRIV_LOCK; +TOK_PRIV_SELECT; +TOK_PRIV_SHOW_DATABASE; +TOK_PRIV_CREATE; +TOK_PRIV_OBJECT; +TOK_PRIV_OBJECT_COL; +TOK_GRANT_ROLE; +TOK_REVOKE_ROLE; +TOK_SHOW_ROLE_GRANT; +TOK_SHOW_ROLES; +TOK_SHOW_SET_ROLE; +TOK_SHOW_ROLE_PRINCIPALS; +TOK_SHOWINDEXES; +TOK_SHOWDBLOCKS; +TOK_INDEXCOMMENT; +TOK_DESCDATABASE; +TOK_DATABASEPROPERTIES; +TOK_DATABASELOCATION; +TOK_DBPROPLIST; +TOK_ALTERDATABASE_PROPERTIES; +TOK_ALTERDATABASE_OWNER; +TOK_TABNAME; +TOK_TABSRC; +TOK_RESTRICT; +TOK_CASCADE; +TOK_TABLESKEWED; +TOK_TABCOLVALUE; +TOK_TABCOLVALUE_PAIR; +TOK_TABCOLVALUES; +TOK_SKEWED_LOCATIONS; +TOK_SKEWED_LOCATION_LIST; +TOK_SKEWED_LOCATION_MAP; +TOK_STOREDASDIRS; +TOK_PARTITIONINGSPEC; +TOK_PTBLFUNCTION; +TOK_WINDOWDEF; +TOK_WINDOWSPEC; +TOK_WINDOWVALUES; +TOK_WINDOWRANGE; +TOK_SUBQUERY_EXPR; +TOK_SUBQUERY_OP; +TOK_SUBQUERY_OP_NOTIN; +TOK_SUBQUERY_OP_NOTEXISTS; +TOK_DB_TYPE; +TOK_TABLE_TYPE; +TOK_CTE; +TOK_ARCHIVE; +TOK_FILE; +TOK_JAR; +TOK_RESOURCE_URI; +TOK_RESOURCE_LIST; +TOK_SHOW_COMPACTIONS; +TOK_SHOW_TRANSACTIONS; +TOK_DELETE_FROM; +TOK_UPDATE_TABLE; +TOK_SET_COLUMNS_CLAUSE; +TOK_VALUE_ROW; +TOK_VALUES_TABLE; +TOK_VIRTUAL_TABLE; +TOK_VIRTUAL_TABREF; +TOK_ANONYMOUS; +TOK_COL_NAME; +TOK_URI_TYPE; +TOK_SERVER_TYPE; +TOK_START_TRANSACTION; +TOK_ISOLATION_LEVEL; +TOK_ISOLATION_SNAPSHOT; +TOK_TXN_ACCESS_MODE; +TOK_TXN_READ_ONLY; +TOK_TXN_READ_WRITE; +TOK_COMMIT; +TOK_ROLLBACK; +TOK_SET_AUTOCOMMIT; +} + + +// Package headers +@header { +package org.apache.spark.sql.parser; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +} + + +@members { + ArrayList errors = new ArrayList(); + Stack msgs = new Stack(); + + private static HashMap xlateMap; + static { + //this is used to support auto completion in CLI + xlateMap = new HashMap(); + + // Keywords + xlateMap.put("KW_TRUE", "TRUE"); + xlateMap.put("KW_FALSE", "FALSE"); + xlateMap.put("KW_ALL", "ALL"); + xlateMap.put("KW_NONE", "NONE"); + xlateMap.put("KW_AND", "AND"); + xlateMap.put("KW_OR", "OR"); + xlateMap.put("KW_NOT", "NOT"); + xlateMap.put("KW_LIKE", "LIKE"); + + xlateMap.put("KW_ASC", "ASC"); + xlateMap.put("KW_DESC", "DESC"); + xlateMap.put("KW_ORDER", "ORDER"); + xlateMap.put("KW_BY", "BY"); + xlateMap.put("KW_GROUP", "GROUP"); + xlateMap.put("KW_WHERE", "WHERE"); + xlateMap.put("KW_FROM", "FROM"); + xlateMap.put("KW_AS", "AS"); + xlateMap.put("KW_SELECT", "SELECT"); + xlateMap.put("KW_DISTINCT", "DISTINCT"); + xlateMap.put("KW_INSERT", "INSERT"); + xlateMap.put("KW_OVERWRITE", "OVERWRITE"); + xlateMap.put("KW_OUTER", "OUTER"); + xlateMap.put("KW_JOIN", "JOIN"); + xlateMap.put("KW_LEFT", "LEFT"); + xlateMap.put("KW_RIGHT", "RIGHT"); + xlateMap.put("KW_FULL", "FULL"); + xlateMap.put("KW_ON", "ON"); + xlateMap.put("KW_PARTITION", "PARTITION"); + xlateMap.put("KW_PARTITIONS", "PARTITIONS"); + xlateMap.put("KW_TABLE", "TABLE"); + xlateMap.put("KW_TABLES", "TABLES"); + xlateMap.put("KW_TBLPROPERTIES", "TBLPROPERTIES"); + xlateMap.put("KW_SHOW", "SHOW"); + xlateMap.put("KW_MSCK", "MSCK"); + xlateMap.put("KW_DIRECTORY", "DIRECTORY"); + xlateMap.put("KW_LOCAL", "LOCAL"); + xlateMap.put("KW_TRANSFORM", "TRANSFORM"); + xlateMap.put("KW_USING", "USING"); + xlateMap.put("KW_CLUSTER", "CLUSTER"); + xlateMap.put("KW_DISTRIBUTE", "DISTRIBUTE"); + xlateMap.put("KW_SORT", "SORT"); + xlateMap.put("KW_UNION", "UNION"); + xlateMap.put("KW_LOAD", "LOAD"); + xlateMap.put("KW_DATA", "DATA"); + xlateMap.put("KW_INPATH", "INPATH"); + xlateMap.put("KW_IS", "IS"); + xlateMap.put("KW_NULL", "NULL"); + xlateMap.put("KW_CREATE", "CREATE"); + xlateMap.put("KW_EXTERNAL", "EXTERNAL"); + xlateMap.put("KW_ALTER", "ALTER"); + xlateMap.put("KW_DESCRIBE", "DESCRIBE"); + xlateMap.put("KW_DROP", "DROP"); + xlateMap.put("KW_RENAME", "RENAME"); + xlateMap.put("KW_TO", "TO"); + xlateMap.put("KW_COMMENT", "COMMENT"); + xlateMap.put("KW_BOOLEAN", "BOOLEAN"); + xlateMap.put("KW_TINYINT", "TINYINT"); + xlateMap.put("KW_SMALLINT", "SMALLINT"); + xlateMap.put("KW_INT", "INT"); + xlateMap.put("KW_BIGINT", "BIGINT"); + xlateMap.put("KW_FLOAT", "FLOAT"); + xlateMap.put("KW_DOUBLE", "DOUBLE"); + xlateMap.put("KW_DATE", "DATE"); + xlateMap.put("KW_DATETIME", "DATETIME"); + xlateMap.put("KW_TIMESTAMP", "TIMESTAMP"); + xlateMap.put("KW_STRING", "STRING"); + xlateMap.put("KW_BINARY", "BINARY"); + xlateMap.put("KW_ARRAY", "ARRAY"); + xlateMap.put("KW_MAP", "MAP"); + xlateMap.put("KW_REDUCE", "REDUCE"); + xlateMap.put("KW_PARTITIONED", "PARTITIONED"); + xlateMap.put("KW_CLUSTERED", "CLUSTERED"); + xlateMap.put("KW_SORTED", "SORTED"); + xlateMap.put("KW_INTO", "INTO"); + xlateMap.put("KW_BUCKETS", "BUCKETS"); + xlateMap.put("KW_ROW", "ROW"); + xlateMap.put("KW_FORMAT", "FORMAT"); + xlateMap.put("KW_DELIMITED", "DELIMITED"); + xlateMap.put("KW_FIELDS", "FIELDS"); + xlateMap.put("KW_TERMINATED", "TERMINATED"); + xlateMap.put("KW_COLLECTION", "COLLECTION"); + xlateMap.put("KW_ITEMS", "ITEMS"); + xlateMap.put("KW_KEYS", "KEYS"); + xlateMap.put("KW_KEY_TYPE", "\$KEY\$"); + xlateMap.put("KW_LINES", "LINES"); + xlateMap.put("KW_STORED", "STORED"); + xlateMap.put("KW_SEQUENCEFILE", "SEQUENCEFILE"); + xlateMap.put("KW_TEXTFILE", "TEXTFILE"); + xlateMap.put("KW_INPUTFORMAT", "INPUTFORMAT"); + xlateMap.put("KW_OUTPUTFORMAT", "OUTPUTFORMAT"); + xlateMap.put("KW_LOCATION", "LOCATION"); + xlateMap.put("KW_TABLESAMPLE", "TABLESAMPLE"); + xlateMap.put("KW_BUCKET", "BUCKET"); + xlateMap.put("KW_OUT", "OUT"); + xlateMap.put("KW_OF", "OF"); + xlateMap.put("KW_CAST", "CAST"); + xlateMap.put("KW_ADD", "ADD"); + xlateMap.put("KW_REPLACE", "REPLACE"); + xlateMap.put("KW_COLUMNS", "COLUMNS"); + xlateMap.put("KW_RLIKE", "RLIKE"); + xlateMap.put("KW_REGEXP", "REGEXP"); + xlateMap.put("KW_TEMPORARY", "TEMPORARY"); + xlateMap.put("KW_FUNCTION", "FUNCTION"); + xlateMap.put("KW_EXPLAIN", "EXPLAIN"); + xlateMap.put("KW_EXTENDED", "EXTENDED"); + xlateMap.put("KW_SERDE", "SERDE"); + xlateMap.put("KW_WITH", "WITH"); + xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES"); + xlateMap.put("KW_LIMIT", "LIMIT"); + xlateMap.put("KW_SET", "SET"); + xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES"); + xlateMap.put("KW_VALUE_TYPE", "\$VALUE\$"); + xlateMap.put("KW_ELEM_TYPE", "\$ELEM\$"); + xlateMap.put("KW_DEFINED", "DEFINED"); + xlateMap.put("KW_SUBQUERY", "SUBQUERY"); + xlateMap.put("KW_REWRITE", "REWRITE"); + xlateMap.put("KW_UPDATE", "UPDATE"); + xlateMap.put("KW_VALUES", "VALUES"); + xlateMap.put("KW_PURGE", "PURGE"); + + + // Operators + xlateMap.put("DOT", "."); + xlateMap.put("COLON", ":"); + xlateMap.put("COMMA", ","); + xlateMap.put("SEMICOLON", ");"); + + xlateMap.put("LPAREN", "("); + xlateMap.put("RPAREN", ")"); + xlateMap.put("LSQUARE", "["); + xlateMap.put("RSQUARE", "]"); + + xlateMap.put("EQUAL", "="); + xlateMap.put("NOTEQUAL", "<>"); + xlateMap.put("EQUAL_NS", "<=>"); + xlateMap.put("LESSTHANOREQUALTO", "<="); + xlateMap.put("LESSTHAN", "<"); + xlateMap.put("GREATERTHANOREQUALTO", ">="); + xlateMap.put("GREATERTHAN", ">"); + + xlateMap.put("DIVIDE", "/"); + xlateMap.put("PLUS", "+"); + xlateMap.put("MINUS", "-"); + xlateMap.put("STAR", "*"); + xlateMap.put("MOD", "\%"); + + xlateMap.put("AMPERSAND", "&"); + xlateMap.put("TILDE", "~"); + xlateMap.put("BITWISEOR", "|"); + xlateMap.put("BITWISEXOR", "^"); + xlateMap.put("CharSetLiteral", "\\'"); + } + + public static Collection getKeywords() { + return xlateMap.values(); + } + + private static String xlate(String name) { + + String ret = xlateMap.get(name); + if (ret == null) { + ret = name; + } + + return ret; + } + + @Override + public Object recoverFromMismatchedSet(IntStream input, + RecognitionException re, BitSet follow) throws RecognitionException { + throw re; + } + + @Override + public void displayRecognitionError(String[] tokenNames, + RecognitionException e) { + errors.add(new ParseError(this, e, tokenNames)); + } + + @Override + public String getErrorHeader(RecognitionException e) { + String header = null; + if (e.charPositionInLine < 0 && input.LT(-1) != null) { + Token t = input.LT(-1); + header = "line " + t.getLine() + ":" + t.getCharPositionInLine(); + } else { + header = super.getErrorHeader(e); + } + + return header; + } + + @Override + public String getErrorMessage(RecognitionException e, String[] tokenNames) { + String msg = null; + + // Translate the token names to something that the user can understand + String[] xlateNames = new String[tokenNames.length]; + for (int i = 0; i < tokenNames.length; ++i) { + xlateNames[i] = SparkSqlParser.xlate(tokenNames[i]); + } + + if (e instanceof NoViableAltException) { + @SuppressWarnings("unused") + NoViableAltException nvae = (NoViableAltException) e; + // for development, can add + // "decision=<<"+nvae.grammarDecisionDescription+">>" + // and "(decision="+nvae.decisionNumber+") and + // "state "+nvae.stateNumber + msg = "cannot recognize input near" + + (input.LT(1) != null ? " " + getTokenErrorDisplay(input.LT(1)) : "") + + (input.LT(2) != null ? " " + getTokenErrorDisplay(input.LT(2)) : "") + + (input.LT(3) != null ? " " + getTokenErrorDisplay(input.LT(3)) : ""); + } else if (e instanceof MismatchedTokenException) { + MismatchedTokenException mte = (MismatchedTokenException) e; + msg = super.getErrorMessage(e, xlateNames) + (input.LT(-1) == null ? "":" near '" + input.LT(-1).getText()) + "'"; + } else if (e instanceof FailedPredicateException) { + FailedPredicateException fpe = (FailedPredicateException) e; + msg = "Failed to recognize predicate '" + fpe.token.getText() + "'. Failed rule: '" + fpe.ruleName + "'"; + } else { + msg = super.getErrorMessage(e, xlateNames); + } + + if (msgs.size() > 0) { + msg = msg + " in " + msgs.peek(); + } + return msg; + } + + public void pushMsg(String msg, RecognizerSharedState state) { + // ANTLR generated code does not wrap the @init code wit this backtracking check, + // even if the matching @after has it. If we have parser rules with that are doing + // some lookahead with syntactic predicates this can cause the push() and pop() calls + // to become unbalanced, so make sure both push/pop check the backtracking state. + if (state.backtracking == 0) { + msgs.push(msg); + } + } + + public void popMsg(RecognizerSharedState state) { + if (state.backtracking == 0) { + Object o = msgs.pop(); + } + } + + // counter to generate unique union aliases + private int aliasCounter; + private String generateUnionAlias() { + return "_u" + (++aliasCounter); + } + private char [] excludedCharForColumnName = {'.', ':'}; + private boolean containExcludedCharForCreateTableColumnName(String input) { + for(char c : excludedCharForColumnName) { + if(input.indexOf(c)>-1) { + return true; + } + } + return false; + } + private CommonTree throwSetOpException() throws RecognitionException { + throw new FailedPredicateException(input, "orderByClause clusterByClause distributeByClause sortByClause limitClause can only be applied to the whole union.", ""); + } + private CommonTree throwColumnNameException() throws RecognitionException { + throw new FailedPredicateException(input, Arrays.toString(excludedCharForColumnName) + " can not be used in column name in create table statement.", ""); + } + private Configuration hiveConf; + public void setHiveConf(Configuration hiveConf) { + this.hiveConf = hiveConf; + } + protected boolean useSQL11ReservedKeywordsForIdentifier() { + if(hiveConf==null){ + return false; + } + return !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS); + } +} + +@rulecatch { +catch (RecognitionException e) { + reportError(e); + throw e; +} +} + +// starting rule +statement + : explainStatement EOF + | execStatement EOF + ; + +explainStatement +@init { pushMsg("explain statement", state); } +@after { popMsg(state); } + : KW_EXPLAIN ( + explainOption* execStatement -> ^(TOK_EXPLAIN execStatement explainOption*) + | + KW_REWRITE queryStatementExpression[true] -> ^(TOK_EXPLAIN_SQ_REWRITE queryStatementExpression)) + ; + +explainOption +@init { msgs.push("explain option"); } +@after { msgs.pop(); } + : KW_EXTENDED|KW_FORMATTED|KW_DEPENDENCY|KW_LOGICAL|KW_AUTHORIZATION + ; + +execStatement +@init { pushMsg("statement", state); } +@after { popMsg(state); } + : queryStatementExpression[true] + | loadStatement + | exportStatement + | importStatement + | ddlStatement + | deleteStatement + | updateStatement + | sqlTransactionStatement + ; + +loadStatement +@init { pushMsg("load statement", state); } +@after { popMsg(state); } + : KW_LOAD KW_DATA (islocal=KW_LOCAL)? KW_INPATH (path=StringLiteral) (isoverwrite=KW_OVERWRITE)? KW_INTO KW_TABLE (tab=tableOrPartition) + -> ^(TOK_LOAD $path $tab $islocal? $isoverwrite?) + ; + +replicationClause +@init { pushMsg("replication clause", state); } +@after { popMsg(state); } + : KW_FOR (isMetadataOnly=KW_METADATA)? KW_REPLICATION LPAREN (replId=StringLiteral) RPAREN + -> ^(TOK_REPLICATION $replId $isMetadataOnly?) + ; + +exportStatement +@init { pushMsg("export statement", state); } +@after { popMsg(state); } + : KW_EXPORT + KW_TABLE (tab=tableOrPartition) + KW_TO (path=StringLiteral) + replicationClause? + -> ^(TOK_EXPORT $tab $path replicationClause?) + ; + +importStatement +@init { pushMsg("import statement", state); } +@after { popMsg(state); } + : KW_IMPORT + ((ext=KW_EXTERNAL)? KW_TABLE (tab=tableOrPartition))? + KW_FROM (path=StringLiteral) + tableLocation? + -> ^(TOK_IMPORT $path $tab? $ext? tableLocation?) + ; + +ddlStatement +@init { pushMsg("ddl statement", state); } +@after { popMsg(state); } + : createDatabaseStatement + | switchDatabaseStatement + | dropDatabaseStatement + | createTableStatement + | dropTableStatement + | truncateTableStatement + | alterStatement + | descStatement + | showStatement + | metastoreCheck + | createViewStatement + | dropViewStatement + | createFunctionStatement + | createMacroStatement + | createIndexStatement + | dropIndexStatement + | dropFunctionStatement + | reloadFunctionStatement + | dropMacroStatement + | analyzeStatement + | lockStatement + | unlockStatement + | lockDatabase + | unlockDatabase + | createRoleStatement + | dropRoleStatement + | (grantPrivileges) => grantPrivileges + | (revokePrivileges) => revokePrivileges + | showGrants + | showRoleGrants + | showRolePrincipals + | showRoles + | grantRole + | revokeRole + | setRole + | showCurrentRole + ; + +ifExists +@init { pushMsg("if exists clause", state); } +@after { popMsg(state); } + : KW_IF KW_EXISTS + -> ^(TOK_IFEXISTS) + ; + +restrictOrCascade +@init { pushMsg("restrict or cascade clause", state); } +@after { popMsg(state); } + : KW_RESTRICT + -> ^(TOK_RESTRICT) + | KW_CASCADE + -> ^(TOK_CASCADE) + ; + +ifNotExists +@init { pushMsg("if not exists clause", state); } +@after { popMsg(state); } + : KW_IF KW_NOT KW_EXISTS + -> ^(TOK_IFNOTEXISTS) + ; + +storedAsDirs +@init { pushMsg("stored as directories", state); } +@after { popMsg(state); } + : KW_STORED KW_AS KW_DIRECTORIES + -> ^(TOK_STOREDASDIRS) + ; + +orReplace +@init { pushMsg("or replace clause", state); } +@after { popMsg(state); } + : KW_OR KW_REPLACE + -> ^(TOK_ORREPLACE) + ; + +createDatabaseStatement +@init { pushMsg("create database statement", state); } +@after { popMsg(state); } + : KW_CREATE (KW_DATABASE|KW_SCHEMA) + ifNotExists? + name=identifier + databaseComment? + dbLocation? + (KW_WITH KW_DBPROPERTIES dbprops=dbProperties)? + -> ^(TOK_CREATEDATABASE $name ifNotExists? dbLocation? databaseComment? $dbprops?) + ; + +dbLocation +@init { pushMsg("database location specification", state); } +@after { popMsg(state); } + : + KW_LOCATION locn=StringLiteral -> ^(TOK_DATABASELOCATION $locn) + ; + +dbProperties +@init { pushMsg("dbproperties", state); } +@after { popMsg(state); } + : + LPAREN dbPropertiesList RPAREN -> ^(TOK_DATABASEPROPERTIES dbPropertiesList) + ; + +dbPropertiesList +@init { pushMsg("database properties list", state); } +@after { popMsg(state); } + : + keyValueProperty (COMMA keyValueProperty)* -> ^(TOK_DBPROPLIST keyValueProperty+) + ; + + +switchDatabaseStatement +@init { pushMsg("switch database statement", state); } +@after { popMsg(state); } + : KW_USE identifier + -> ^(TOK_SWITCHDATABASE identifier) + ; + +dropDatabaseStatement +@init { pushMsg("drop database statement", state); } +@after { popMsg(state); } + : KW_DROP (KW_DATABASE|KW_SCHEMA) ifExists? identifier restrictOrCascade? + -> ^(TOK_DROPDATABASE identifier ifExists? restrictOrCascade?) + ; + +databaseComment +@init { pushMsg("database's comment", state); } +@after { popMsg(state); } + : KW_COMMENT comment=StringLiteral + -> ^(TOK_DATABASECOMMENT $comment) + ; + +createTableStatement +@init { pushMsg("create table statement", state); } +@after { popMsg(state); } + : KW_CREATE (temp=KW_TEMPORARY)? (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName + ( like=KW_LIKE likeName=tableName + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + | (LPAREN columnNameTypeList RPAREN)? + tableComment? + tablePartition? + tableBuckets? + tableSkewed? + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + (KW_AS selectStatementWithCTE)? + ) + -> ^(TOK_CREATETABLE $name $temp? $ext? ifNotExists? + ^(TOK_LIKETABLE $likeName?) + columnNameTypeList? + tableComment? + tablePartition? + tableBuckets? + tableSkewed? + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + selectStatementWithCTE? + ) + ; + +truncateTableStatement +@init { pushMsg("truncate table statement", state); } +@after { popMsg(state); } + : KW_TRUNCATE KW_TABLE tablePartitionPrefix (KW_COLUMNS LPAREN columnNameList RPAREN)? -> ^(TOK_TRUNCATETABLE tablePartitionPrefix columnNameList?); + +createIndexStatement +@init { pushMsg("create index statement", state);} +@after {popMsg(state);} + : KW_CREATE KW_INDEX indexName=identifier + KW_ON KW_TABLE tab=tableName LPAREN indexedCols=columnNameList RPAREN + KW_AS typeName=StringLiteral + autoRebuild? + indexPropertiesPrefixed? + indexTblName? + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + indexComment? + ->^(TOK_CREATEINDEX $indexName $typeName $tab $indexedCols + autoRebuild? + indexPropertiesPrefixed? + indexTblName? + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + indexComment?) + ; + +indexComment +@init { pushMsg("comment on an index", state);} +@after {popMsg(state);} + : + KW_COMMENT comment=StringLiteral -> ^(TOK_INDEXCOMMENT $comment) + ; + +autoRebuild +@init { pushMsg("auto rebuild index", state);} +@after {popMsg(state);} + : KW_WITH KW_DEFERRED KW_REBUILD + ->^(TOK_DEFERRED_REBUILDINDEX) + ; + +indexTblName +@init { pushMsg("index table name", state);} +@after {popMsg(state);} + : KW_IN KW_TABLE indexTbl=tableName + ->^(TOK_CREATEINDEX_INDEXTBLNAME $indexTbl) + ; + +indexPropertiesPrefixed +@init { pushMsg("table properties with prefix", state); } +@after { popMsg(state); } + : + KW_IDXPROPERTIES! indexProperties + ; + +indexProperties +@init { pushMsg("index properties", state); } +@after { popMsg(state); } + : + LPAREN indexPropertiesList RPAREN -> ^(TOK_INDEXPROPERTIES indexPropertiesList) + ; + +indexPropertiesList +@init { pushMsg("index properties list", state); } +@after { popMsg(state); } + : + keyValueProperty (COMMA keyValueProperty)* -> ^(TOK_INDEXPROPLIST keyValueProperty+) + ; + +dropIndexStatement +@init { pushMsg("drop index statement", state);} +@after {popMsg(state);} + : KW_DROP KW_INDEX ifExists? indexName=identifier KW_ON tab=tableName + ->^(TOK_DROPINDEX $indexName $tab ifExists?) + ; + +dropTableStatement +@init { pushMsg("drop statement", state); } +@after { popMsg(state); } + : KW_DROP KW_TABLE ifExists? tableName KW_PURGE? replicationClause? + -> ^(TOK_DROPTABLE tableName ifExists? KW_PURGE? replicationClause?) + ; + +alterStatement +@init { pushMsg("alter statement", state); } +@after { popMsg(state); } + : KW_ALTER KW_TABLE tableName alterTableStatementSuffix -> ^(TOK_ALTERTABLE tableName alterTableStatementSuffix) + | KW_ALTER KW_VIEW tableName KW_AS? alterViewStatementSuffix -> ^(TOK_ALTERVIEW tableName alterViewStatementSuffix) + | KW_ALTER KW_INDEX alterIndexStatementSuffix -> alterIndexStatementSuffix + | KW_ALTER (KW_DATABASE|KW_SCHEMA) alterDatabaseStatementSuffix -> alterDatabaseStatementSuffix + ; + +alterTableStatementSuffix +@init { pushMsg("alter table statement", state); } +@after { popMsg(state); } + : (alterStatementSuffixRename[true]) => alterStatementSuffixRename[true] + | alterStatementSuffixDropPartitions[true] + | alterStatementSuffixAddPartitions[true] + | alterStatementSuffixTouch + | alterStatementSuffixArchive + | alterStatementSuffixUnArchive + | alterStatementSuffixProperties + | alterStatementSuffixSkewedby + | alterStatementSuffixExchangePartition + | alterStatementPartitionKeyType + | partitionSpec? alterTblPartitionStatementSuffix -> alterTblPartitionStatementSuffix partitionSpec? + ; + +alterTblPartitionStatementSuffix +@init {pushMsg("alter table partition statement suffix", state);} +@after {popMsg(state);} + : alterStatementSuffixFileFormat + | alterStatementSuffixLocation + | alterStatementSuffixMergeFiles + | alterStatementSuffixSerdeProperties + | alterStatementSuffixRenamePart + | alterStatementSuffixBucketNum + | alterTblPartitionStatementSuffixSkewedLocation + | alterStatementSuffixClusterbySortby + | alterStatementSuffixCompact + | alterStatementSuffixUpdateStatsCol + | alterStatementSuffixRenameCol + | alterStatementSuffixAddCol + ; + +alterStatementPartitionKeyType +@init {msgs.push("alter partition key type"); } +@after {msgs.pop();} + : KW_PARTITION KW_COLUMN LPAREN columnNameType RPAREN + -> ^(TOK_ALTERTABLE_PARTCOLTYPE columnNameType) + ; + +alterViewStatementSuffix +@init { pushMsg("alter view statement", state); } +@after { popMsg(state); } + : alterViewSuffixProperties + | alterStatementSuffixRename[false] + | alterStatementSuffixAddPartitions[false] + | alterStatementSuffixDropPartitions[false] + | selectStatementWithCTE + ; + +alterIndexStatementSuffix +@init { pushMsg("alter index statement", state); } +@after { popMsg(state); } + : indexName=identifier KW_ON tableName partitionSpec? + ( + KW_REBUILD + ->^(TOK_ALTERINDEX_REBUILD tableName $indexName partitionSpec?) + | + KW_SET KW_IDXPROPERTIES + indexProperties + ->^(TOK_ALTERINDEX_PROPERTIES tableName $indexName indexProperties) + ) + ; + +alterDatabaseStatementSuffix +@init { pushMsg("alter database statement", state); } +@after { popMsg(state); } + : alterDatabaseSuffixProperties + | alterDatabaseSuffixSetOwner + ; + +alterDatabaseSuffixProperties +@init { pushMsg("alter database properties statement", state); } +@after { popMsg(state); } + : name=identifier KW_SET KW_DBPROPERTIES dbProperties + -> ^(TOK_ALTERDATABASE_PROPERTIES $name dbProperties) + ; + +alterDatabaseSuffixSetOwner +@init { pushMsg("alter database set owner", state); } +@after { popMsg(state); } + : dbName=identifier KW_SET KW_OWNER principalName + -> ^(TOK_ALTERDATABASE_OWNER $dbName principalName) + ; + +alterStatementSuffixRename[boolean table] +@init { pushMsg("rename statement", state); } +@after { popMsg(state); } + : KW_RENAME KW_TO tableName + -> { table }? ^(TOK_ALTERTABLE_RENAME tableName) + -> ^(TOK_ALTERVIEW_RENAME tableName) + ; + +alterStatementSuffixAddCol +@init { pushMsg("add column statement", state); } +@after { popMsg(state); } + : (add=KW_ADD | replace=KW_REPLACE) KW_COLUMNS LPAREN columnNameTypeList RPAREN restrictOrCascade? + -> {$add != null}? ^(TOK_ALTERTABLE_ADDCOLS columnNameTypeList restrictOrCascade?) + -> ^(TOK_ALTERTABLE_REPLACECOLS columnNameTypeList restrictOrCascade?) + ; + +alterStatementSuffixRenameCol +@init { pushMsg("rename column name", state); } +@after { popMsg(state); } + : KW_CHANGE KW_COLUMN? oldName=identifier newName=identifier colType (KW_COMMENT comment=StringLiteral)? alterStatementChangeColPosition? restrictOrCascade? + ->^(TOK_ALTERTABLE_RENAMECOL $oldName $newName colType $comment? alterStatementChangeColPosition? restrictOrCascade?) + ; + +alterStatementSuffixUpdateStatsCol +@init { pushMsg("update column statistics", state); } +@after { popMsg(state); } + : KW_UPDATE KW_STATISTICS KW_FOR KW_COLUMN? colName=identifier KW_SET tableProperties (KW_COMMENT comment=StringLiteral)? + ->^(TOK_ALTERTABLE_UPDATECOLSTATS $colName tableProperties $comment?) + ; + +alterStatementChangeColPosition + : first=KW_FIRST|KW_AFTER afterCol=identifier + ->{$first != null}? ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION ) + -> ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION $afterCol) + ; + +alterStatementSuffixAddPartitions[boolean table] +@init { pushMsg("add partition statement", state); } +@after { popMsg(state); } + : KW_ADD ifNotExists? alterStatementSuffixAddPartitionsElement+ + -> { table }? ^(TOK_ALTERTABLE_ADDPARTS ifNotExists? alterStatementSuffixAddPartitionsElement+) + -> ^(TOK_ALTERVIEW_ADDPARTS ifNotExists? alterStatementSuffixAddPartitionsElement+) + ; + +alterStatementSuffixAddPartitionsElement + : partitionSpec partitionLocation? + ; + +alterStatementSuffixTouch +@init { pushMsg("touch statement", state); } +@after { popMsg(state); } + : KW_TOUCH (partitionSpec)* + -> ^(TOK_ALTERTABLE_TOUCH (partitionSpec)*) + ; + +alterStatementSuffixArchive +@init { pushMsg("archive statement", state); } +@after { popMsg(state); } + : KW_ARCHIVE (partitionSpec)* + -> ^(TOK_ALTERTABLE_ARCHIVE (partitionSpec)*) + ; + +alterStatementSuffixUnArchive +@init { pushMsg("unarchive statement", state); } +@after { popMsg(state); } + : KW_UNARCHIVE (partitionSpec)* + -> ^(TOK_ALTERTABLE_UNARCHIVE (partitionSpec)*) + ; + +partitionLocation +@init { pushMsg("partition location", state); } +@after { popMsg(state); } + : + KW_LOCATION locn=StringLiteral -> ^(TOK_PARTITIONLOCATION $locn) + ; + +alterStatementSuffixDropPartitions[boolean table] +@init { pushMsg("drop partition statement", state); } +@after { popMsg(state); } + : KW_DROP ifExists? dropPartitionSpec (COMMA dropPartitionSpec)* KW_PURGE? replicationClause? + -> { table }? ^(TOK_ALTERTABLE_DROPPARTS dropPartitionSpec+ ifExists? KW_PURGE? replicationClause?) + -> ^(TOK_ALTERVIEW_DROPPARTS dropPartitionSpec+ ifExists? replicationClause?) + ; + +alterStatementSuffixProperties +@init { pushMsg("alter properties statement", state); } +@after { popMsg(state); } + : KW_SET KW_TBLPROPERTIES tableProperties + -> ^(TOK_ALTERTABLE_PROPERTIES tableProperties) + | KW_UNSET KW_TBLPROPERTIES ifExists? tableProperties + -> ^(TOK_ALTERTABLE_DROPPROPERTIES tableProperties ifExists?) + ; + +alterViewSuffixProperties +@init { pushMsg("alter view properties statement", state); } +@after { popMsg(state); } + : KW_SET KW_TBLPROPERTIES tableProperties + -> ^(TOK_ALTERVIEW_PROPERTIES tableProperties) + | KW_UNSET KW_TBLPROPERTIES ifExists? tableProperties + -> ^(TOK_ALTERVIEW_DROPPROPERTIES tableProperties ifExists?) + ; + +alterStatementSuffixSerdeProperties +@init { pushMsg("alter serdes statement", state); } +@after { popMsg(state); } + : KW_SET KW_SERDE serdeName=StringLiteral (KW_WITH KW_SERDEPROPERTIES tableProperties)? + -> ^(TOK_ALTERTABLE_SERIALIZER $serdeName tableProperties?) + | KW_SET KW_SERDEPROPERTIES tableProperties + -> ^(TOK_ALTERTABLE_SERDEPROPERTIES tableProperties) + ; + +tablePartitionPrefix +@init {pushMsg("table partition prefix", state);} +@after {popMsg(state);} + : tableName partitionSpec? + ->^(TOK_TABLE_PARTITION tableName partitionSpec?) + ; + +alterStatementSuffixFileFormat +@init {pushMsg("alter fileformat statement", state); } +@after {popMsg(state);} + : KW_SET KW_FILEFORMAT fileFormat + -> ^(TOK_ALTERTABLE_FILEFORMAT fileFormat) + ; + +alterStatementSuffixClusterbySortby +@init {pushMsg("alter partition cluster by sort by statement", state);} +@after {popMsg(state);} + : KW_NOT KW_CLUSTERED -> ^(TOK_ALTERTABLE_CLUSTER_SORT TOK_NOT_CLUSTERED) + | KW_NOT KW_SORTED -> ^(TOK_ALTERTABLE_CLUSTER_SORT TOK_NOT_SORTED) + | tableBuckets -> ^(TOK_ALTERTABLE_CLUSTER_SORT tableBuckets) + ; + +alterTblPartitionStatementSuffixSkewedLocation +@init {pushMsg("alter partition skewed location", state);} +@after {popMsg(state);} + : KW_SET KW_SKEWED KW_LOCATION skewedLocations + -> ^(TOK_ALTERTABLE_SKEWED_LOCATION skewedLocations) + ; + +skewedLocations +@init { pushMsg("skewed locations", state); } +@after { popMsg(state); } + : + LPAREN skewedLocationsList RPAREN -> ^(TOK_SKEWED_LOCATIONS skewedLocationsList) + ; + +skewedLocationsList +@init { pushMsg("skewed locations list", state); } +@after { popMsg(state); } + : + skewedLocationMap (COMMA skewedLocationMap)* -> ^(TOK_SKEWED_LOCATION_LIST skewedLocationMap+) + ; + +skewedLocationMap +@init { pushMsg("specifying skewed location map", state); } +@after { popMsg(state); } + : + key=skewedValueLocationElement EQUAL value=StringLiteral -> ^(TOK_SKEWED_LOCATION_MAP $key $value) + ; + +alterStatementSuffixLocation +@init {pushMsg("alter location", state);} +@after {popMsg(state);} + : KW_SET KW_LOCATION newLoc=StringLiteral + -> ^(TOK_ALTERTABLE_LOCATION $newLoc) + ; + + +alterStatementSuffixSkewedby +@init {pushMsg("alter skewed by statement", state);} +@after{popMsg(state);} + : tableSkewed + ->^(TOK_ALTERTABLE_SKEWED tableSkewed) + | + KW_NOT KW_SKEWED + ->^(TOK_ALTERTABLE_SKEWED) + | + KW_NOT storedAsDirs + ->^(TOK_ALTERTABLE_SKEWED storedAsDirs) + ; + +alterStatementSuffixExchangePartition +@init {pushMsg("alter exchange partition", state);} +@after{popMsg(state);} + : KW_EXCHANGE partitionSpec KW_WITH KW_TABLE exchangename=tableName + -> ^(TOK_ALTERTABLE_EXCHANGEPARTITION partitionSpec $exchangename) + ; + +alterStatementSuffixRenamePart +@init { pushMsg("alter table rename partition statement", state); } +@after { popMsg(state); } + : KW_RENAME KW_TO partitionSpec + ->^(TOK_ALTERTABLE_RENAMEPART partitionSpec) + ; + +alterStatementSuffixStatsPart +@init { pushMsg("alter table stats partition statement", state); } +@after { popMsg(state); } + : KW_UPDATE KW_STATISTICS KW_FOR KW_COLUMN? colName=identifier KW_SET tableProperties (KW_COMMENT comment=StringLiteral)? + ->^(TOK_ALTERTABLE_UPDATECOLSTATS $colName tableProperties $comment?) + ; + +alterStatementSuffixMergeFiles +@init { pushMsg("", state); } +@after { popMsg(state); } + : KW_CONCATENATE + -> ^(TOK_ALTERTABLE_MERGEFILES) + ; + +alterStatementSuffixBucketNum +@init { pushMsg("", state); } +@after { popMsg(state); } + : KW_INTO num=Number KW_BUCKETS + -> ^(TOK_ALTERTABLE_BUCKETS $num) + ; + +alterStatementSuffixCompact +@init { msgs.push("compaction request"); } +@after { msgs.pop(); } + : KW_COMPACT compactType=StringLiteral + -> ^(TOK_ALTERTABLE_COMPACT $compactType) + ; + + +fileFormat +@init { pushMsg("file format specification", state); } +@after { popMsg(state); } + : KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral KW_SERDE serdeCls=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? + -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $serdeCls $inDriver? $outDriver?) + | genericSpec=identifier -> ^(TOK_FILEFORMAT_GENERIC $genericSpec) + ; + +tabTypeExpr +@init { pushMsg("specifying table types", state); } +@after { popMsg(state); } + : identifier (DOT^ identifier)? + (identifier (DOT^ + ( + (KW_ELEM_TYPE) => KW_ELEM_TYPE + | + (KW_KEY_TYPE) => KW_KEY_TYPE + | + (KW_VALUE_TYPE) => KW_VALUE_TYPE + | identifier + ))* + )? + ; + +partTypeExpr +@init { pushMsg("specifying table partitions", state); } +@after { popMsg(state); } + : tabTypeExpr partitionSpec? -> ^(TOK_TABTYPE tabTypeExpr partitionSpec?) + ; + +tabPartColTypeExpr +@init { pushMsg("specifying table partitions columnName", state); } +@after { popMsg(state); } + : tableName partitionSpec? extColumnName? -> ^(TOK_TABTYPE tableName partitionSpec? extColumnName?) + ; + +descStatement +@init { pushMsg("describe statement", state); } +@after { popMsg(state); } + : + (KW_DESCRIBE|KW_DESC) + ( + (KW_DATABASE|KW_SCHEMA) => (KW_DATABASE|KW_SCHEMA) KW_EXTENDED? (dbName=identifier) -> ^(TOK_DESCDATABASE $dbName KW_EXTENDED?) + | + (KW_FUNCTION) => KW_FUNCTION KW_EXTENDED? (name=descFuncNames) -> ^(TOK_DESCFUNCTION $name KW_EXTENDED?) + | + (KW_FORMATTED|KW_EXTENDED|KW_PRETTY) => ((descOptions=KW_FORMATTED|descOptions=KW_EXTENDED|descOptions=KW_PRETTY) parttype=tabPartColTypeExpr) -> ^(TOK_DESCTABLE $parttype $descOptions) + | + parttype=tabPartColTypeExpr -> ^(TOK_DESCTABLE $parttype) + ) + ; + +analyzeStatement +@init { pushMsg("analyze statement", state); } +@after { popMsg(state); } + : KW_ANALYZE KW_TABLE (parttype=tableOrPartition) KW_COMPUTE KW_STATISTICS ((noscan=KW_NOSCAN) | (partialscan=KW_PARTIALSCAN) + | (KW_FOR KW_COLUMNS (statsColumnName=columnNameList)?))? + -> ^(TOK_ANALYZE $parttype $noscan? $partialscan? KW_COLUMNS? $statsColumnName?) + ; + +showStatement +@init { pushMsg("show statement", state); } +@after { popMsg(state); } + : KW_SHOW (KW_DATABASES|KW_SCHEMAS) (KW_LIKE showStmtIdentifier)? -> ^(TOK_SHOWDATABASES showStmtIdentifier?) + | KW_SHOW KW_TABLES ((KW_FROM|KW_IN) db_name=identifier)? (KW_LIKE showStmtIdentifier|showStmtIdentifier)? -> ^(TOK_SHOWTABLES (TOK_FROM $db_name)? showStmtIdentifier?) + | KW_SHOW KW_COLUMNS (KW_FROM|KW_IN) tableName ((KW_FROM|KW_IN) db_name=identifier)? + -> ^(TOK_SHOWCOLUMNS tableName $db_name?) + | KW_SHOW KW_FUNCTIONS (KW_LIKE showFunctionIdentifier|showFunctionIdentifier)? -> ^(TOK_SHOWFUNCTIONS KW_LIKE? showFunctionIdentifier?) + | KW_SHOW KW_PARTITIONS tabName=tableName partitionSpec? -> ^(TOK_SHOWPARTITIONS $tabName partitionSpec?) + | KW_SHOW KW_CREATE ( + (KW_DATABASE|KW_SCHEMA) => (KW_DATABASE|KW_SCHEMA) db_name=identifier -> ^(TOK_SHOW_CREATEDATABASE $db_name) + | + KW_TABLE tabName=tableName -> ^(TOK_SHOW_CREATETABLE $tabName) + ) + | KW_SHOW KW_TABLE KW_EXTENDED ((KW_FROM|KW_IN) db_name=identifier)? KW_LIKE showStmtIdentifier partitionSpec? + -> ^(TOK_SHOW_TABLESTATUS showStmtIdentifier $db_name? partitionSpec?) + | KW_SHOW KW_TBLPROPERTIES tableName (LPAREN prptyName=StringLiteral RPAREN)? -> ^(TOK_SHOW_TBLPROPERTIES tableName $prptyName?) + | KW_SHOW KW_LOCKS + ( + (KW_DATABASE|KW_SCHEMA) => (KW_DATABASE|KW_SCHEMA) (dbName=Identifier) (isExtended=KW_EXTENDED)? -> ^(TOK_SHOWDBLOCKS $dbName $isExtended?) + | + (parttype=partTypeExpr)? (isExtended=KW_EXTENDED)? -> ^(TOK_SHOWLOCKS $parttype? $isExtended?) + ) + | KW_SHOW (showOptions=KW_FORMATTED)? (KW_INDEX|KW_INDEXES) KW_ON showStmtIdentifier ((KW_FROM|KW_IN) db_name=identifier)? + -> ^(TOK_SHOWINDEXES showStmtIdentifier $showOptions? $db_name?) + | KW_SHOW KW_COMPACTIONS -> ^(TOK_SHOW_COMPACTIONS) + | KW_SHOW KW_TRANSACTIONS -> ^(TOK_SHOW_TRANSACTIONS) + | KW_SHOW KW_CONF StringLiteral -> ^(TOK_SHOWCONF StringLiteral) + ; + +lockStatement +@init { pushMsg("lock statement", state); } +@after { popMsg(state); } + : KW_LOCK KW_TABLE tableName partitionSpec? lockMode -> ^(TOK_LOCKTABLE tableName lockMode partitionSpec?) + ; + +lockDatabase +@init { pushMsg("lock database statement", state); } +@after { popMsg(state); } + : KW_LOCK (KW_DATABASE|KW_SCHEMA) (dbName=Identifier) lockMode -> ^(TOK_LOCKDB $dbName lockMode) + ; + +lockMode +@init { pushMsg("lock mode", state); } +@after { popMsg(state); } + : KW_SHARED | KW_EXCLUSIVE + ; + +unlockStatement +@init { pushMsg("unlock statement", state); } +@after { popMsg(state); } + : KW_UNLOCK KW_TABLE tableName partitionSpec? -> ^(TOK_UNLOCKTABLE tableName partitionSpec?) + ; + +unlockDatabase +@init { pushMsg("unlock database statement", state); } +@after { popMsg(state); } + : KW_UNLOCK (KW_DATABASE|KW_SCHEMA) (dbName=Identifier) -> ^(TOK_UNLOCKDB $dbName) + ; + +createRoleStatement +@init { pushMsg("create role", state); } +@after { popMsg(state); } + : KW_CREATE KW_ROLE roleName=identifier + -> ^(TOK_CREATEROLE $roleName) + ; + +dropRoleStatement +@init {pushMsg("drop role", state);} +@after {popMsg(state);} + : KW_DROP KW_ROLE roleName=identifier + -> ^(TOK_DROPROLE $roleName) + ; + +grantPrivileges +@init {pushMsg("grant privileges", state);} +@after {popMsg(state);} + : KW_GRANT privList=privilegeList + privilegeObject? + KW_TO principalSpecification + withGrantOption? + -> ^(TOK_GRANT $privList principalSpecification privilegeObject? withGrantOption?) + ; + +revokePrivileges +@init {pushMsg("revoke privileges", state);} +@afer {popMsg(state);} + : KW_REVOKE grantOptionFor? privilegeList privilegeObject? KW_FROM principalSpecification + -> ^(TOK_REVOKE privilegeList principalSpecification privilegeObject? grantOptionFor?) + ; + +grantRole +@init {pushMsg("grant role", state);} +@after {popMsg(state);} + : KW_GRANT KW_ROLE? identifier (COMMA identifier)* KW_TO principalSpecification withAdminOption? + -> ^(TOK_GRANT_ROLE principalSpecification withAdminOption? identifier+) + ; + +revokeRole +@init {pushMsg("revoke role", state);} +@after {popMsg(state);} + : KW_REVOKE adminOptionFor? KW_ROLE? identifier (COMMA identifier)* KW_FROM principalSpecification + -> ^(TOK_REVOKE_ROLE principalSpecification adminOptionFor? identifier+) + ; + +showRoleGrants +@init {pushMsg("show role grants", state);} +@after {popMsg(state);} + : KW_SHOW KW_ROLE KW_GRANT principalName + -> ^(TOK_SHOW_ROLE_GRANT principalName) + ; + + +showRoles +@init {pushMsg("show roles", state);} +@after {popMsg(state);} + : KW_SHOW KW_ROLES + -> ^(TOK_SHOW_ROLES) + ; + +showCurrentRole +@init {pushMsg("show current role", state);} +@after {popMsg(state);} + : KW_SHOW KW_CURRENT KW_ROLES + -> ^(TOK_SHOW_SET_ROLE) + ; + +setRole +@init {pushMsg("set role", state);} +@after {popMsg(state);} + : KW_SET KW_ROLE + ( + (KW_ALL) => (all=KW_ALL) -> ^(TOK_SHOW_SET_ROLE Identifier[$all.text]) + | + (KW_NONE) => (none=KW_NONE) -> ^(TOK_SHOW_SET_ROLE Identifier[$none.text]) + | + identifier -> ^(TOK_SHOW_SET_ROLE identifier) + ) + ; + +showGrants +@init {pushMsg("show grants", state);} +@after {popMsg(state);} + : KW_SHOW KW_GRANT principalName? (KW_ON privilegeIncludeColObject)? + -> ^(TOK_SHOW_GRANT principalName? privilegeIncludeColObject?) + ; + +showRolePrincipals +@init {pushMsg("show role principals", state);} +@after {popMsg(state);} + : KW_SHOW KW_PRINCIPALS roleName=identifier + -> ^(TOK_SHOW_ROLE_PRINCIPALS $roleName) + ; + + +privilegeIncludeColObject +@init {pushMsg("privilege object including columns", state);} +@after {popMsg(state);} + : (KW_ALL) => KW_ALL -> ^(TOK_RESOURCE_ALL) + | privObjectCols -> ^(TOK_PRIV_OBJECT_COL privObjectCols) + ; + +privilegeObject +@init {pushMsg("privilege object", state);} +@after {popMsg(state);} + : KW_ON privObject -> ^(TOK_PRIV_OBJECT privObject) + ; + +// database or table type. Type is optional, default type is table +privObject + : (KW_DATABASE|KW_SCHEMA) identifier -> ^(TOK_DB_TYPE identifier) + | KW_TABLE? tableName partitionSpec? -> ^(TOK_TABLE_TYPE tableName partitionSpec?) + | KW_URI (path=StringLiteral) -> ^(TOK_URI_TYPE $path) + | KW_SERVER identifier -> ^(TOK_SERVER_TYPE identifier) + ; + +privObjectCols + : (KW_DATABASE|KW_SCHEMA) identifier -> ^(TOK_DB_TYPE identifier) + | KW_TABLE? tableName (LPAREN cols=columnNameList RPAREN)? partitionSpec? -> ^(TOK_TABLE_TYPE tableName $cols? partitionSpec?) + | KW_URI (path=StringLiteral) -> ^(TOK_URI_TYPE $path) + | KW_SERVER identifier -> ^(TOK_SERVER_TYPE identifier) + ; + +privilegeList +@init {pushMsg("grant privilege list", state);} +@after {popMsg(state);} + : privlegeDef (COMMA privlegeDef)* + -> ^(TOK_PRIVILEGE_LIST privlegeDef+) + ; + +privlegeDef +@init {pushMsg("grant privilege", state);} +@after {popMsg(state);} + : privilegeType (LPAREN cols=columnNameList RPAREN)? + -> ^(TOK_PRIVILEGE privilegeType $cols?) + ; + +privilegeType +@init {pushMsg("privilege type", state);} +@after {popMsg(state);} + : KW_ALL -> ^(TOK_PRIV_ALL) + | KW_ALTER -> ^(TOK_PRIV_ALTER_METADATA) + | KW_UPDATE -> ^(TOK_PRIV_ALTER_DATA) + | KW_CREATE -> ^(TOK_PRIV_CREATE) + | KW_DROP -> ^(TOK_PRIV_DROP) + | KW_INDEX -> ^(TOK_PRIV_INDEX) + | KW_LOCK -> ^(TOK_PRIV_LOCK) + | KW_SELECT -> ^(TOK_PRIV_SELECT) + | KW_SHOW_DATABASE -> ^(TOK_PRIV_SHOW_DATABASE) + | KW_INSERT -> ^(TOK_PRIV_INSERT) + | KW_DELETE -> ^(TOK_PRIV_DELETE) + ; + +principalSpecification +@init { pushMsg("user/group/role name list", state); } +@after { popMsg(state); } + : principalName (COMMA principalName)* -> ^(TOK_PRINCIPAL_NAME principalName+) + ; + +principalName +@init {pushMsg("user|group|role name", state);} +@after {popMsg(state);} + : KW_USER principalIdentifier -> ^(TOK_USER principalIdentifier) + | KW_GROUP principalIdentifier -> ^(TOK_GROUP principalIdentifier) + | KW_ROLE identifier -> ^(TOK_ROLE identifier) + ; + +withGrantOption +@init {pushMsg("with grant option", state);} +@after {popMsg(state);} + : KW_WITH KW_GRANT KW_OPTION + -> ^(TOK_GRANT_WITH_OPTION) + ; + +grantOptionFor +@init {pushMsg("grant option for", state);} +@after {popMsg(state);} + : KW_GRANT KW_OPTION KW_FOR + -> ^(TOK_GRANT_OPTION_FOR) +; + +adminOptionFor +@init {pushMsg("admin option for", state);} +@after {popMsg(state);} + : KW_ADMIN KW_OPTION KW_FOR + -> ^(TOK_ADMIN_OPTION_FOR) +; + +withAdminOption +@init {pushMsg("with admin option", state);} +@after {popMsg(state);} + : KW_WITH KW_ADMIN KW_OPTION + -> ^(TOK_GRANT_WITH_ADMIN_OPTION) + ; + +metastoreCheck +@init { pushMsg("metastore check statement", state); } +@after { popMsg(state); } + : KW_MSCK (repair=KW_REPAIR)? (KW_TABLE tableName partitionSpec? (COMMA partitionSpec)*)? + -> ^(TOK_MSCK $repair? (tableName partitionSpec*)?) + ; + +resourceList +@init { pushMsg("resource list", state); } +@after { popMsg(state); } + : + resource (COMMA resource)* -> ^(TOK_RESOURCE_LIST resource+) + ; + +resource +@init { pushMsg("resource", state); } +@after { popMsg(state); } + : + resType=resourceType resPath=StringLiteral -> ^(TOK_RESOURCE_URI $resType $resPath) + ; + +resourceType +@init { pushMsg("resource type", state); } +@after { popMsg(state); } + : + KW_JAR -> ^(TOK_JAR) + | + KW_FILE -> ^(TOK_FILE) + | + KW_ARCHIVE -> ^(TOK_ARCHIVE) + ; + +createFunctionStatement +@init { pushMsg("create function statement", state); } +@after { popMsg(state); } + : KW_CREATE (temp=KW_TEMPORARY)? KW_FUNCTION functionIdentifier KW_AS StringLiteral + (KW_USING rList=resourceList)? + -> {$temp != null}? ^(TOK_CREATEFUNCTION functionIdentifier StringLiteral $rList? TOK_TEMPORARY) + -> ^(TOK_CREATEFUNCTION functionIdentifier StringLiteral $rList?) + ; + +dropFunctionStatement +@init { pushMsg("drop function statement", state); } +@after { popMsg(state); } + : KW_DROP (temp=KW_TEMPORARY)? KW_FUNCTION ifExists? functionIdentifier + -> {$temp != null}? ^(TOK_DROPFUNCTION functionIdentifier ifExists? TOK_TEMPORARY) + -> ^(TOK_DROPFUNCTION functionIdentifier ifExists?) + ; + +reloadFunctionStatement +@init { pushMsg("reload function statement", state); } +@after { popMsg(state); } + : KW_RELOAD KW_FUNCTION -> ^(TOK_RELOADFUNCTION); + +createMacroStatement +@init { pushMsg("create macro statement", state); } +@after { popMsg(state); } + : KW_CREATE KW_TEMPORARY KW_MACRO Identifier + LPAREN columnNameTypeList? RPAREN expression + -> ^(TOK_CREATEMACRO Identifier columnNameTypeList? expression) + ; + +dropMacroStatement +@init { pushMsg("drop macro statement", state); } +@after { popMsg(state); } + : KW_DROP KW_TEMPORARY KW_MACRO ifExists? Identifier + -> ^(TOK_DROPMACRO Identifier ifExists?) + ; + +createViewStatement +@init { + pushMsg("create view statement", state); +} +@after { popMsg(state); } + : KW_CREATE (orReplace)? KW_VIEW (ifNotExists)? name=tableName + (LPAREN columnNameCommentList RPAREN)? tableComment? viewPartition? + tablePropertiesPrefixed? + KW_AS + selectStatementWithCTE + -> ^(TOK_CREATEVIEW $name orReplace? + ifNotExists? + columnNameCommentList? + tableComment? + viewPartition? + tablePropertiesPrefixed? + selectStatementWithCTE + ) + ; + +viewPartition +@init { pushMsg("view partition specification", state); } +@after { popMsg(state); } + : KW_PARTITIONED KW_ON LPAREN columnNameList RPAREN + -> ^(TOK_VIEWPARTCOLS columnNameList) + ; + +dropViewStatement +@init { pushMsg("drop view statement", state); } +@after { popMsg(state); } + : KW_DROP KW_VIEW ifExists? viewName -> ^(TOK_DROPVIEW viewName ifExists?) + ; + +showFunctionIdentifier +@init { pushMsg("identifier for show function statement", state); } +@after { popMsg(state); } + : functionIdentifier + | StringLiteral + ; + +showStmtIdentifier +@init { pushMsg("identifier for show statement", state); } +@after { popMsg(state); } + : identifier + | StringLiteral + ; + +tableComment +@init { pushMsg("table's comment", state); } +@after { popMsg(state); } + : + KW_COMMENT comment=StringLiteral -> ^(TOK_TABLECOMMENT $comment) + ; + +tablePartition +@init { pushMsg("table partition specification", state); } +@after { popMsg(state); } + : KW_PARTITIONED KW_BY LPAREN columnNameTypeList RPAREN + -> ^(TOK_TABLEPARTCOLS columnNameTypeList) + ; + +tableBuckets +@init { pushMsg("table buckets specification", state); } +@after { popMsg(state); } + : + KW_CLUSTERED KW_BY LPAREN bucketCols=columnNameList RPAREN (KW_SORTED KW_BY LPAREN sortCols=columnNameOrderList RPAREN)? KW_INTO num=Number KW_BUCKETS + -> ^(TOK_ALTERTABLE_BUCKETS $bucketCols $sortCols? $num) + ; + +tableSkewed +@init { pushMsg("table skewed specification", state); } +@after { popMsg(state); } + : + KW_SKEWED KW_BY LPAREN skewedCols=columnNameList RPAREN KW_ON LPAREN (skewedValues=skewedValueElement) RPAREN ((storedAsDirs) => storedAsDirs)? + -> ^(TOK_TABLESKEWED $skewedCols $skewedValues storedAsDirs?) + ; + +rowFormat +@init { pushMsg("serde specification", state); } +@after { popMsg(state); } + : rowFormatSerde -> ^(TOK_SERDE rowFormatSerde) + | rowFormatDelimited -> ^(TOK_SERDE rowFormatDelimited) + | -> ^(TOK_SERDE) + ; + +recordReader +@init { pushMsg("record reader specification", state); } +@after { popMsg(state); } + : KW_RECORDREADER StringLiteral -> ^(TOK_RECORDREADER StringLiteral) + | -> ^(TOK_RECORDREADER) + ; + +recordWriter +@init { pushMsg("record writer specification", state); } +@after { popMsg(state); } + : KW_RECORDWRITER StringLiteral -> ^(TOK_RECORDWRITER StringLiteral) + | -> ^(TOK_RECORDWRITER) + ; + +rowFormatSerde +@init { pushMsg("serde format specification", state); } +@after { popMsg(state); } + : KW_ROW KW_FORMAT KW_SERDE name=StringLiteral (KW_WITH KW_SERDEPROPERTIES serdeprops=tableProperties)? + -> ^(TOK_SERDENAME $name $serdeprops?) + ; + +rowFormatDelimited +@init { pushMsg("serde properties specification", state); } +@after { popMsg(state); } + : + KW_ROW KW_FORMAT KW_DELIMITED tableRowFormatFieldIdentifier? tableRowFormatCollItemsIdentifier? tableRowFormatMapKeysIdentifier? tableRowFormatLinesIdentifier? tableRowNullFormat? + -> ^(TOK_SERDEPROPS tableRowFormatFieldIdentifier? tableRowFormatCollItemsIdentifier? tableRowFormatMapKeysIdentifier? tableRowFormatLinesIdentifier? tableRowNullFormat?) + ; + +tableRowFormat +@init { pushMsg("table row format specification", state); } +@after { popMsg(state); } + : + rowFormatDelimited + -> ^(TOK_TABLEROWFORMAT rowFormatDelimited) + | rowFormatSerde + -> ^(TOK_TABLESERIALIZER rowFormatSerde) + ; + +tablePropertiesPrefixed +@init { pushMsg("table properties with prefix", state); } +@after { popMsg(state); } + : + KW_TBLPROPERTIES! tableProperties + ; + +tableProperties +@init { pushMsg("table properties", state); } +@after { popMsg(state); } + : + LPAREN tablePropertiesList RPAREN -> ^(TOK_TABLEPROPERTIES tablePropertiesList) + ; + +tablePropertiesList +@init { pushMsg("table properties list", state); } +@after { popMsg(state); } + : + keyValueProperty (COMMA keyValueProperty)* -> ^(TOK_TABLEPROPLIST keyValueProperty+) + | + keyProperty (COMMA keyProperty)* -> ^(TOK_TABLEPROPLIST keyProperty+) + ; + +keyValueProperty +@init { pushMsg("specifying key/value property", state); } +@after { popMsg(state); } + : + key=StringLiteral EQUAL value=StringLiteral -> ^(TOK_TABLEPROPERTY $key $value) + ; + +keyProperty +@init { pushMsg("specifying key property", state); } +@after { popMsg(state); } + : + key=StringLiteral -> ^(TOK_TABLEPROPERTY $key TOK_NULL) + ; + +tableRowFormatFieldIdentifier +@init { pushMsg("table row format's field separator", state); } +@after { popMsg(state); } + : + KW_FIELDS KW_TERMINATED KW_BY fldIdnt=StringLiteral (KW_ESCAPED KW_BY fldEscape=StringLiteral)? + -> ^(TOK_TABLEROWFORMATFIELD $fldIdnt $fldEscape?) + ; + +tableRowFormatCollItemsIdentifier +@init { pushMsg("table row format's column separator", state); } +@after { popMsg(state); } + : + KW_COLLECTION KW_ITEMS KW_TERMINATED KW_BY collIdnt=StringLiteral + -> ^(TOK_TABLEROWFORMATCOLLITEMS $collIdnt) + ; + +tableRowFormatMapKeysIdentifier +@init { pushMsg("table row format's map key separator", state); } +@after { popMsg(state); } + : + KW_MAP KW_KEYS KW_TERMINATED KW_BY mapKeysIdnt=StringLiteral + -> ^(TOK_TABLEROWFORMATMAPKEYS $mapKeysIdnt) + ; + +tableRowFormatLinesIdentifier +@init { pushMsg("table row format's line separator", state); } +@after { popMsg(state); } + : + KW_LINES KW_TERMINATED KW_BY linesIdnt=StringLiteral + -> ^(TOK_TABLEROWFORMATLINES $linesIdnt) + ; + +tableRowNullFormat +@init { pushMsg("table row format's null specifier", state); } +@after { popMsg(state); } + : + KW_NULL KW_DEFINED KW_AS nullIdnt=StringLiteral + -> ^(TOK_TABLEROWFORMATNULL $nullIdnt) + ; +tableFileFormat +@init { pushMsg("table file format specification", state); } +@after { popMsg(state); } + : + (KW_STORED KW_AS KW_INPUTFORMAT) => KW_STORED KW_AS KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? + -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) + | KW_STORED KW_BY storageHandler=StringLiteral + (KW_WITH KW_SERDEPROPERTIES serdeprops=tableProperties)? + -> ^(TOK_STORAGEHANDLER $storageHandler $serdeprops?) + | KW_STORED KW_AS genericSpec=identifier + -> ^(TOK_FILEFORMAT_GENERIC $genericSpec) + ; + +tableLocation +@init { pushMsg("table location specification", state); } +@after { popMsg(state); } + : + KW_LOCATION locn=StringLiteral -> ^(TOK_TABLELOCATION $locn) + ; + +columnNameTypeList +@init { pushMsg("column name type list", state); } +@after { popMsg(state); } + : columnNameType (COMMA columnNameType)* -> ^(TOK_TABCOLLIST columnNameType+) + ; + +columnNameColonTypeList +@init { pushMsg("column name type list", state); } +@after { popMsg(state); } + : columnNameColonType (COMMA columnNameColonType)* -> ^(TOK_TABCOLLIST columnNameColonType+) + ; + +columnNameList +@init { pushMsg("column name list", state); } +@after { popMsg(state); } + : columnName (COMMA columnName)* -> ^(TOK_TABCOLNAME columnName+) + ; + +columnName +@init { pushMsg("column name", state); } +@after { popMsg(state); } + : + identifier + ; + +extColumnName +@init { pushMsg("column name for complex types", state); } +@after { popMsg(state); } + : + identifier (DOT^ ((KW_ELEM_TYPE) => KW_ELEM_TYPE | (KW_KEY_TYPE) => KW_KEY_TYPE | (KW_VALUE_TYPE) => KW_VALUE_TYPE | identifier))* + ; + +columnNameOrderList +@init { pushMsg("column name order list", state); } +@after { popMsg(state); } + : columnNameOrder (COMMA columnNameOrder)* -> ^(TOK_TABCOLNAME columnNameOrder+) + ; + +skewedValueElement +@init { pushMsg("skewed value element", state); } +@after { popMsg(state); } + : + skewedColumnValues + | skewedColumnValuePairList + ; + +skewedColumnValuePairList +@init { pushMsg("column value pair list", state); } +@after { popMsg(state); } + : skewedColumnValuePair (COMMA skewedColumnValuePair)* -> ^(TOK_TABCOLVALUE_PAIR skewedColumnValuePair+) + ; + +skewedColumnValuePair +@init { pushMsg("column value pair", state); } +@after { popMsg(state); } + : + LPAREN colValues=skewedColumnValues RPAREN + -> ^(TOK_TABCOLVALUES $colValues) + ; + +skewedColumnValues +@init { pushMsg("column values", state); } +@after { popMsg(state); } + : skewedColumnValue (COMMA skewedColumnValue)* -> ^(TOK_TABCOLVALUE skewedColumnValue+) + ; + +skewedColumnValue +@init { pushMsg("column value", state); } +@after { popMsg(state); } + : + constant + ; + +skewedValueLocationElement +@init { pushMsg("skewed value location element", state); } +@after { popMsg(state); } + : + skewedColumnValue + | skewedColumnValuePair + ; + +columnNameOrder +@init { pushMsg("column name order", state); } +@after { popMsg(state); } + : identifier (asc=KW_ASC | desc=KW_DESC)? + -> {$desc == null}? ^(TOK_TABSORTCOLNAMEASC identifier) + -> ^(TOK_TABSORTCOLNAMEDESC identifier) + ; + +columnNameCommentList +@init { pushMsg("column name comment list", state); } +@after { popMsg(state); } + : columnNameComment (COMMA columnNameComment)* -> ^(TOK_TABCOLNAME columnNameComment+) + ; + +columnNameComment +@init { pushMsg("column name comment", state); } +@after { popMsg(state); } + : colName=identifier (KW_COMMENT comment=StringLiteral)? + -> ^(TOK_TABCOL $colName TOK_NULL $comment?) + ; + +columnRefOrder +@init { pushMsg("column order", state); } +@after { popMsg(state); } + : expression (asc=KW_ASC | desc=KW_DESC)? + -> {$desc == null}? ^(TOK_TABSORTCOLNAMEASC expression) + -> ^(TOK_TABSORTCOLNAMEDESC expression) + ; + +columnNameType +@init { pushMsg("column specification", state); } +@after { popMsg(state); } + : colName=identifier colType (KW_COMMENT comment=StringLiteral)? + -> {containExcludedCharForCreateTableColumnName($colName.text)}? {throwColumnNameException()} + -> {$comment == null}? ^(TOK_TABCOL $colName colType) + -> ^(TOK_TABCOL $colName colType $comment) + ; + +columnNameColonType +@init { pushMsg("column specification", state); } +@after { popMsg(state); } + : colName=identifier COLON colType (KW_COMMENT comment=StringLiteral)? + -> {$comment == null}? ^(TOK_TABCOL $colName colType) + -> ^(TOK_TABCOL $colName colType $comment) + ; + +colType +@init { pushMsg("column type", state); } +@after { popMsg(state); } + : type + ; + +colTypeList +@init { pushMsg("column type list", state); } +@after { popMsg(state); } + : colType (COMMA colType)* -> ^(TOK_COLTYPELIST colType+) + ; + +type + : primitiveType + | listType + | structType + | mapType + | unionType; + +primitiveType +@init { pushMsg("primitive type specification", state); } +@after { popMsg(state); } + : KW_TINYINT -> TOK_TINYINT + | KW_SMALLINT -> TOK_SMALLINT + | KW_INT -> TOK_INT + | KW_BIGINT -> TOK_BIGINT + | KW_BOOLEAN -> TOK_BOOLEAN + | KW_FLOAT -> TOK_FLOAT + | KW_DOUBLE -> TOK_DOUBLE + | KW_DATE -> TOK_DATE + | KW_DATETIME -> TOK_DATETIME + | KW_TIMESTAMP -> TOK_TIMESTAMP + // Uncomment to allow intervals as table column types + //| KW_INTERVAL KW_YEAR KW_TO KW_MONTH -> TOK_INTERVAL_YEAR_MONTH + //| KW_INTERVAL KW_DAY KW_TO KW_SECOND -> TOK_INTERVAL_DAY_TIME + | KW_STRING -> TOK_STRING + | KW_BINARY -> TOK_BINARY + | KW_DECIMAL (LPAREN prec=Number (COMMA scale=Number)? RPAREN)? -> ^(TOK_DECIMAL $prec? $scale?) + | KW_VARCHAR LPAREN length=Number RPAREN -> ^(TOK_VARCHAR $length) + | KW_CHAR LPAREN length=Number RPAREN -> ^(TOK_CHAR $length) + ; + +listType +@init { pushMsg("list type", state); } +@after { popMsg(state); } + : KW_ARRAY LESSTHAN type GREATERTHAN -> ^(TOK_LIST type) + ; + +structType +@init { pushMsg("struct type", state); } +@after { popMsg(state); } + : KW_STRUCT LESSTHAN columnNameColonTypeList GREATERTHAN -> ^(TOK_STRUCT columnNameColonTypeList) + ; + +mapType +@init { pushMsg("map type", state); } +@after { popMsg(state); } + : KW_MAP LESSTHAN left=primitiveType COMMA right=type GREATERTHAN + -> ^(TOK_MAP $left $right) + ; + +unionType +@init { pushMsg("uniontype type", state); } +@after { popMsg(state); } + : KW_UNIONTYPE LESSTHAN colTypeList GREATERTHAN -> ^(TOK_UNIONTYPE colTypeList) + ; + +setOperator +@init { pushMsg("set operator", state); } +@after { popMsg(state); } + : KW_UNION KW_ALL -> ^(TOK_UNIONALL) + | KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT) + ; + +queryStatementExpression[boolean topLevel] + : + /* Would be nice to do this as a gated semantic perdicate + But the predicate gets pushed as a lookahead decision. + Calling rule doesnot know about topLevel + */ + (w=withClause {topLevel}?)? + queryStatementExpressionBody[topLevel] { + if ($w.tree != null) { + $queryStatementExpressionBody.tree.insertChild(0, $w.tree); + } + } + -> queryStatementExpressionBody + ; + +queryStatementExpressionBody[boolean topLevel] + : + fromStatement[topLevel] + | regularBody[topLevel] + ; + +withClause + : + KW_WITH cteStatement (COMMA cteStatement)* -> ^(TOK_CTE cteStatement+) +; + +cteStatement + : + identifier KW_AS LPAREN queryStatementExpression[false] RPAREN + -> ^(TOK_SUBQUERY queryStatementExpression identifier) +; + +fromStatement[boolean topLevel] +: (singleFromStatement -> singleFromStatement) + (u=setOperator r=singleFromStatement + -> ^($u {$fromStatement.tree} $r) + )* + -> {u != null && topLevel}? ^(TOK_QUERY + ^(TOK_FROM + ^(TOK_SUBQUERY + {$fromStatement.tree} + {adaptor.create(Identifier, generateUnionAlias())} + ) + ) + ^(TOK_INSERT + ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF)) + ) + ) + -> {$fromStatement.tree} + ; + + +singleFromStatement + : + fromClause + ( b+=body )+ -> ^(TOK_QUERY fromClause body+) + ; + +/* +The valuesClause rule below ensures that the parse tree for +"insert into table FOO values (1,2),(3,4)" looks the same as +"insert into table FOO select a,b from (values(1,2),(3,4)) as BAR(a,b)" which itself is made to look +very similar to the tree for "insert into table FOO select a,b from BAR". Since virtual table name +is implicit, it's represented as TOK_ANONYMOUS. +*/ +regularBody[boolean topLevel] + : + i=insertClause + ( + s=selectStatement[topLevel] + {$s.tree.getFirstChildWithType(TOK_INSERT).replaceChildren(0, 0, $i.tree);} -> {$s.tree} + | + valuesClause + -> ^(TOK_QUERY + ^(TOK_FROM + ^(TOK_VIRTUAL_TABLE ^(TOK_VIRTUAL_TABREF ^(TOK_ANONYMOUS)) valuesClause) + ) + ^(TOK_INSERT {$i.tree} ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF))) + ) + ) + | + selectStatement[topLevel] + ; + +selectStatement[boolean topLevel] + : + ( + s=selectClause + f=fromClause? + w=whereClause? + g=groupByClause? + h=havingClause? + o=orderByClause? + c=clusterByClause? + d=distributeByClause? + sort=sortByClause? + win=window_clause? + l=limitClause? + -> ^(TOK_QUERY $f? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + $s $w? $g? $h? $o? $c? + $d? $sort? $win? $l?)) + ) + (set=setOpSelectStatement[$selectStatement.tree, topLevel])? + -> {set == null}? + {$selectStatement.tree} + -> {o==null && c==null && d==null && sort==null && l==null}? + {$set.tree} + -> {throwSetOpException()} + ; + +setOpSelectStatement[CommonTree t, boolean topLevel] + : + (u=setOperator b=simpleSelectStatement + -> {$setOpSelectStatement.tree != null && u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}? + ^(TOK_QUERY + ^(TOK_FROM + ^(TOK_SUBQUERY + ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b) + {adaptor.create(Identifier, generateUnionAlias())} + ) + ) + ^(TOK_INSERT + ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF)) + ) + ) + -> {$setOpSelectStatement.tree != null && u.tree.getType()!=SparkSqlParser.TOK_UNIONDISTINCT}? + ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b) + -> {$setOpSelectStatement.tree == null && u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}? + ^(TOK_QUERY + ^(TOK_FROM + ^(TOK_SUBQUERY + ^(TOK_UNIONALL {$t} $b) + {adaptor.create(Identifier, generateUnionAlias())} + ) + ) + ^(TOK_INSERT + ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF)) + ) + ) + -> ^(TOK_UNIONALL {$t} $b) + )+ + o=orderByClause? + c=clusterByClause? + d=distributeByClause? + sort=sortByClause? + win=window_clause? + l=limitClause? + -> {o==null && c==null && d==null && sort==null && win==null && l==null && !topLevel}? + {$setOpSelectStatement.tree} + -> ^(TOK_QUERY + ^(TOK_FROM + ^(TOK_SUBQUERY + {$setOpSelectStatement.tree} + {adaptor.create(Identifier, generateUnionAlias())} + ) + ) + ^(TOK_INSERT + ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF)) + $o? $c? $d? $sort? $win? $l? + ) + ) + ; + +simpleSelectStatement + : + selectClause + fromClause? + whereClause? + groupByClause? + havingClause? + ((window_clause) => window_clause)? + -> ^(TOK_QUERY fromClause? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + selectClause whereClause? groupByClause? havingClause? window_clause?)) + ; + +selectStatementWithCTE + : + (w=withClause)? + selectStatement[true] { + if ($w.tree != null) { + $selectStatement.tree.insertChild(0, $w.tree); + } + } + -> selectStatement + ; + +body + : + insertClause + selectClause + lateralView? + whereClause? + groupByClause? + havingClause? + orderByClause? + clusterByClause? + distributeByClause? + sortByClause? + window_clause? + limitClause? -> ^(TOK_INSERT insertClause + selectClause lateralView? whereClause? groupByClause? havingClause? orderByClause? clusterByClause? + distributeByClause? sortByClause? window_clause? limitClause?) + | + selectClause + lateralView? + whereClause? + groupByClause? + havingClause? + orderByClause? + clusterByClause? + distributeByClause? + sortByClause? + window_clause? + limitClause? -> ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + selectClause lateralView? whereClause? groupByClause? havingClause? orderByClause? clusterByClause? + distributeByClause? sortByClause? window_clause? limitClause?) + ; + +insertClause +@init { pushMsg("insert clause", state); } +@after { popMsg(state); } + : + KW_INSERT KW_OVERWRITE destination ifNotExists? -> ^(TOK_DESTINATION destination ifNotExists?) + | KW_INSERT KW_INTO KW_TABLE? tableOrPartition (LPAREN targetCols=columnNameList RPAREN)? + -> ^(TOK_INSERT_INTO tableOrPartition $targetCols?) + ; + +destination +@init { pushMsg("destination specification", state); } +@after { popMsg(state); } + : + (local = KW_LOCAL)? KW_DIRECTORY StringLiteral tableRowFormat? tableFileFormat? + -> ^(TOK_DIR StringLiteral $local? tableRowFormat? tableFileFormat?) + | KW_TABLE tableOrPartition -> tableOrPartition + ; + +limitClause +@init { pushMsg("limit clause", state); } +@after { popMsg(state); } + : + KW_LIMIT num=Number -> ^(TOK_LIMIT $num) + ; + +//DELETE FROM WHERE ...; +deleteStatement +@init { pushMsg("delete statement", state); } +@after { popMsg(state); } + : + KW_DELETE KW_FROM tableName (whereClause)? -> ^(TOK_DELETE_FROM tableName whereClause?) + ; + +/*SET = (3 + col2)*/ +columnAssignmentClause + : + tableOrColumn EQUAL^ precedencePlusExpression + ; + +/*SET col1 = 5, col2 = (4 + col4), ...*/ +setColumnsClause + : + KW_SET columnAssignmentClause (COMMA columnAssignmentClause)* -> ^(TOK_SET_COLUMNS_CLAUSE columnAssignmentClause* ) + ; + +/* + UPDATE
+ SET col1 = val1, col2 = val2... WHERE ... +*/ +updateStatement +@init { pushMsg("update statement", state); } +@after { popMsg(state); } + : + KW_UPDATE tableName setColumnsClause whereClause? -> ^(TOK_UPDATE_TABLE tableName setColumnsClause whereClause?) + ; + +/* +BEGIN user defined transaction boundaries; follows SQL 2003 standard exactly except for addition of +"setAutoCommitStatement" which is not in the standard doc but is supported by most SQL engines. +*/ +sqlTransactionStatement +@init { pushMsg("transaction statement", state); } +@after { popMsg(state); } + : + startTransactionStatement + | commitStatement + | rollbackStatement + | setAutoCommitStatement + ; + +startTransactionStatement + : + KW_START KW_TRANSACTION ( transactionMode ( COMMA transactionMode )* )? -> ^(TOK_START_TRANSACTION transactionMode*) + ; + +transactionMode + : + isolationLevel + | transactionAccessMode -> ^(TOK_TXN_ACCESS_MODE transactionAccessMode) + ; + +transactionAccessMode + : + KW_READ KW_ONLY -> TOK_TXN_READ_ONLY + | KW_READ KW_WRITE -> TOK_TXN_READ_WRITE + ; + +isolationLevel + : + KW_ISOLATION KW_LEVEL levelOfIsolation -> ^(TOK_ISOLATION_LEVEL levelOfIsolation) + ; + +/*READ UNCOMMITTED | READ COMMITTED | REPEATABLE READ | SERIALIZABLE may be supported later*/ +levelOfIsolation + : + KW_SNAPSHOT -> TOK_ISOLATION_SNAPSHOT + ; + +commitStatement + : + KW_COMMIT ( KW_WORK )? -> TOK_COMMIT + ; + +rollbackStatement + : + KW_ROLLBACK ( KW_WORK )? -> TOK_ROLLBACK + ; +setAutoCommitStatement + : + KW_SET KW_AUTOCOMMIT booleanValueTok -> ^(TOK_SET_AUTOCOMMIT booleanValueTok) + ; +/* +END user defined transaction boundaries +*/ diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTErrorNode.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTErrorNode.java new file mode 100644 index 000000000000..35ecdc5ad10a --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTErrorNode.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.Token; +import org.antlr.runtime.TokenStream; +import org.antlr.runtime.tree.CommonErrorNode; + +public class ASTErrorNode extends ASTNode { + + /** + * + */ + private static final long serialVersionUID = 1L; + CommonErrorNode delegate; + + public ASTErrorNode(TokenStream input, Token start, Token stop, + RecognitionException e){ + delegate = new CommonErrorNode(input,start,stop,e); + } + + @Override + public boolean isNil() { return delegate.isNil(); } + + @Override + public int getType() { return delegate.getType(); } + + @Override + public String getText() { return delegate.getText(); } + @Override + public String toString() { return delegate.toString(); } +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNode.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNode.java new file mode 100644 index 000000000000..8e5ca5883d49 --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNode.java @@ -0,0 +1,263 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import org.antlr.runtime.Token; +import org.antlr.runtime.tree.CommonTree; +import org.antlr.runtime.tree.Tree; +import org.apache.hadoop.hive.ql.lib.Node; + +public class ASTNode extends CommonTree implements Node, Serializable { + private static final long serialVersionUID = 1L; + private transient StringBuffer astStr; + private transient ASTNodeOrigin origin; + private transient int startIndx = -1; + private transient int endIndx = -1; + private transient ASTNode rootNode; + private transient boolean isValidASTStr; + + public ASTNode() { + } + + /** + * Constructor. + * + * @param t + * Token for the CommonTree Node + */ + public ASTNode(Token t) { + super(t); + } + + public ASTNode(ASTNode node) { + super(node); + this.origin = node.origin; + } + + @Override + public Tree dupNode() { + return new ASTNode(this); + } + + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.hive.ql.lib.Node#getChildren() + */ + @Override + public ArrayList getChildren() { + if (super.getChildCount() == 0) { + return null; + } + + ArrayList ret_vec = new ArrayList(); + for (int i = 0; i < super.getChildCount(); ++i) { + ret_vec.add((Node) super.getChild(i)); + } + + return ret_vec; + } + + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.hive.ql.lib.Node#getName() + */ + @Override + public String getName() { + return (Integer.valueOf(super.getToken().getType())).toString(); + } + + /** + * @return information about the object from which this ASTNode originated, or + * null if this ASTNode was not expanded from an object reference + */ + public ASTNodeOrigin getOrigin() { + return origin; + } + + /** + * Tag this ASTNode with information about the object from which this node + * originated. + */ + public void setOrigin(ASTNodeOrigin origin) { + this.origin = origin; + } + + public String dump() { + StringBuilder sb = new StringBuilder("\n"); + dump(sb, ""); + return sb.toString(); + } + + private StringBuilder dump(StringBuilder sb, String ws) { + sb.append(ws); + sb.append(toString()); + sb.append("\n"); + + ArrayList children = getChildren(); + if (children != null) { + for (Node node : getChildren()) { + if (node instanceof ASTNode) { + ((ASTNode) node).dump(sb, ws + " "); + } else { + sb.append(ws); + sb.append(" NON-ASTNODE!!"); + sb.append("\n"); + } + } + } + return sb; + } + + private ASTNode getRootNodeWithValidASTStr(boolean useMemoizedRoot) { + if (useMemoizedRoot && rootNode != null && rootNode.parent == null && + rootNode.hasValidMemoizedString()) { + return rootNode; + } + ASTNode retNode = this; + while (retNode.parent != null) { + retNode = (ASTNode) retNode.parent; + } + rootNode=retNode; + if (!rootNode.isValidASTStr) { + rootNode.astStr = new StringBuffer(); + rootNode.toStringTree(rootNode); + rootNode.isValidASTStr = true; + } + return retNode; + } + + private boolean hasValidMemoizedString() { + return isValidASTStr && astStr != null; + } + + private void resetRootInformation() { + // Reset the previously stored rootNode string + if (rootNode != null) { + rootNode.astStr = null; + rootNode.isValidASTStr = false; + } + } + + private int getMemoizedStringLen() { + return astStr == null ? 0 : astStr.length(); + } + + private String getMemoizedSubString(int start, int end) { + return (astStr == null || start < 0 || end > astStr.length() || start >= end) ? null : + astStr.subSequence(start, end).toString(); + } + + private void addtoMemoizedString(String string) { + if (astStr == null) { + astStr = new StringBuffer(); + } + astStr.append(string); + } + + @Override + public void setParent(Tree t) { + super.setParent(t); + resetRootInformation(); + } + + @Override + public void addChild(Tree t) { + super.addChild(t); + resetRootInformation(); + } + + @Override + public void addChildren(List kids) { + super.addChildren(kids); + resetRootInformation(); + } + + @Override + public void setChild(int i, Tree t) { + super.setChild(i, t); + resetRootInformation(); + } + + @Override + public void insertChild(int i, Object t) { + super.insertChild(i, t); + resetRootInformation(); + } + + @Override + public Object deleteChild(int i) { + Object ret = super.deleteChild(i); + resetRootInformation(); + return ret; + } + + @Override + public void replaceChildren(int startChildIndex, int stopChildIndex, Object t) { + super.replaceChildren(startChildIndex, stopChildIndex, t); + resetRootInformation(); + } + + @Override + public String toStringTree() { + + // The root might have changed because of tree modifications. + // Compute the new root for this tree and set the astStr. + getRootNodeWithValidASTStr(true); + + // If rootNotModified is false, then startIndx and endIndx will be stale. + if (startIndx >= 0 && endIndx <= rootNode.getMemoizedStringLen()) { + return rootNode.getMemoizedSubString(startIndx, endIndx); + } + return toStringTree(rootNode); + } + + private String toStringTree(ASTNode rootNode) { + this.rootNode = rootNode; + startIndx = rootNode.getMemoizedStringLen(); + // Leaf node + if ( children==null || children.size()==0 ) { + rootNode.addtoMemoizedString(this.toString()); + endIndx = rootNode.getMemoizedStringLen(); + return this.toString(); + } + if ( !isNil() ) { + rootNode.addtoMemoizedString("("); + rootNode.addtoMemoizedString(this.toString()); + rootNode.addtoMemoizedString(" "); + } + for (int i = 0; children!=null && i < children.size(); i++) { + ASTNode t = (ASTNode)children.get(i); + if ( i>0 ) { + rootNode.addtoMemoizedString(" "); + } + t.toStringTree(rootNode); + } + if ( !isNil() ) { + rootNode.addtoMemoizedString(")"); + } + endIndx = rootNode.getMemoizedStringLen(); + return rootNode.getMemoizedSubString(startIndx, endIndx); + } +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNodeOrigin.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNodeOrigin.java new file mode 100644 index 000000000000..596787d2dd55 --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNodeOrigin.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +/** + * ASTNodeOrigin contains contextual information about the object from whose + * definition a particular ASTNode originated. For example, suppose a view v is + * defined as select x+1 as y from t, and we're processing a query + * select v1.y from v as v1, and there's a type-checking problem + * with the expression x+1 due to an ALTER TABLE on t subsequent to + * the creation of v. Then, when reporting the error, we want to provide the + * parser location with respect to the definition of v (rather than with respect + * to the top-level query, since that represents a completely different + * "parser coordinate system"). + * + *

+ * + * So, when expanding the definition of v while analyzing the top-level query, + * we tag each ASTNode with a reference to an ASTNodeOrign describing v and its + * usage within the query. + */ +public class ASTNodeOrigin { + private final String objectType; + private final String objectName; + private final String objectDefinition; + private final String usageAlias; + private final ASTNode usageNode; + + public ASTNodeOrigin(String objectType, String objectName, + String objectDefinition, String usageAlias, ASTNode usageNode) { + this.objectType = objectType; + this.objectName = objectName; + this.objectDefinition = objectDefinition; + this.usageAlias = usageAlias; + this.usageNode = usageNode; + } + + /** + * @return the type of the object from which an ASTNode originated, e.g. + * "view". + */ + public String getObjectType() { + return objectType; + } + + /** + * @return the name of the object from which an ASTNode originated, e.g. "v". + */ + public String getObjectName() { + return objectName; + } + + /** + * @return the definition of the object from which an ASTNode originated, e.g. + * select x+1 as y from t. + */ + public String getObjectDefinition() { + return objectDefinition; + } + + /** + * @return the alias of the object from which an ASTNode originated, e.g. "v1" + * (this can help with debugging context-dependent expansions) + */ + public String getUsageAlias() { + return usageAlias; + } + + /** + * @return the expression node triggering usage of an object from which an + * ASTNode originated, e.g. v as v1 (this can help with + * debugging context-dependent expansions) + */ + public ASTNode getUsageNode() { + return usageNode; + } +} + +// End ASTNodeOrigin.java diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java new file mode 100644 index 000000000000..2e968c3c475c --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java @@ -0,0 +1,261 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import java.util.ArrayList; +import org.antlr.runtime.ANTLRStringStream; +import org.antlr.runtime.CharStream; +import org.antlr.runtime.NoViableAltException; +import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.Token; +import org.antlr.runtime.TokenRewriteStream; +import org.antlr.runtime.TokenStream; +import org.antlr.runtime.tree.CommonTree; +import org.antlr.runtime.tree.CommonTreeAdaptor; +import org.antlr.runtime.tree.TreeAdaptor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.Context; + +/** + * ParseDriver. + * + */ +public class ParseDriver { + + private static final Logger LOG = LoggerFactory.getLogger("hive.ql.parse.ParseDriver"); + + /** + * ANTLRNoCaseStringStream. + * + */ + //This class provides and implementation for a case insensitive token checker + //for the lexical analysis part of antlr. By converting the token stream into + //upper case at the time when lexical rules are checked, this class ensures that the + //lexical rules need to just match the token with upper case letters as opposed to + //combination of upper case and lower case characteres. This is purely used for matching lexical + //rules. The actual token text is stored in the same way as the user input without + //actually converting it into an upper case. The token values are generated by the consume() + //function of the super class ANTLRStringStream. The LA() function is the lookahead funtion + //and is purely used for matching lexical rules. This also means that the grammar will only + //accept capitalized tokens in case it is run from other tools like antlrworks which + //do not have the ANTLRNoCaseStringStream implementation. + public class ANTLRNoCaseStringStream extends ANTLRStringStream { + + public ANTLRNoCaseStringStream(String input) { + super(input); + } + + @Override + public int LA(int i) { + + int returnChar = super.LA(i); + if (returnChar == CharStream.EOF) { + return returnChar; + } else if (returnChar == 0) { + return returnChar; + } + + return Character.toUpperCase((char) returnChar); + } + } + + /** + * HiveLexerX. + * + */ + public class HiveLexerX extends SparkSqlLexer { + + private final ArrayList errors; + + public HiveLexerX() { + super(); + errors = new ArrayList(); + } + + public HiveLexerX(CharStream input) { + super(input); + errors = new ArrayList(); + } + + @Override + public void displayRecognitionError(String[] tokenNames, + RecognitionException e) { + + errors.add(new ParseError(this, e, tokenNames)); + } + + @Override + public String getErrorMessage(RecognitionException e, String[] tokenNames) { + String msg = null; + + if (e instanceof NoViableAltException) { + @SuppressWarnings("unused") + NoViableAltException nvae = (NoViableAltException) e; + // for development, can add + // "decision=<<"+nvae.grammarDecisionDescription+">>" + // and "(decision="+nvae.decisionNumber+") and + // "state "+nvae.stateNumber + msg = "character " + getCharErrorDisplay(e.c) + " not supported here"; + } else { + msg = super.getErrorMessage(e, tokenNames); + } + + return msg; + } + + public ArrayList getErrors() { + return errors; + } + + } + + /** + * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes + * so that the graph walking algorithms and the rules framework defined in + * ql.lib can be used with the AST Nodes. + */ + public static final TreeAdaptor adaptor = new CommonTreeAdaptor() { + /** + * Creates an ASTNode for the given token. The ASTNode is a wrapper around + * antlr's CommonTree class that implements the Node interface. + * + * @param payload + * The token. + * @return Object (which is actually an ASTNode) for the token. + */ + @Override + public Object create(Token payload) { + return new ASTNode(payload); + } + + @Override + public Object dupNode(Object t) { + + return create(((CommonTree)t).token); + }; + + @Override + public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) { + return new ASTErrorNode(input, start, stop, e); + }; + }; + + public ASTNode parse(String command) throws ParseException { + return parse(command, null); + } + + public ASTNode parse(String command, Context ctx) + throws ParseException { + return parse(command, ctx, true); + } + + /** + * Parses a command, optionally assigning the parser's token stream to the + * given context. + * + * @param command + * command to parse + * + * @param ctx + * context with which to associate this parser's token stream, or + * null if either no context is available or the context already has + * an existing stream + * + * @return parsed AST + */ + public ASTNode parse(String command, Context ctx, boolean setTokenRewriteStream) + throws ParseException { + System.out.println("Parsing!!!"); + LOG.info("Parsing command: " + command); + + HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); + TokenRewriteStream tokens = new TokenRewriteStream(lexer); + if (ctx != null) { + if ( setTokenRewriteStream) { + ctx.setTokenRewriteStream(tokens); + } + lexer.setHiveConf(ctx.getConf()); + } + SparkSqlParser parser = new SparkSqlParser(tokens); + if (ctx != null) { + parser.setHiveConf(ctx.getConf()); + } + parser.setTreeAdaptor(adaptor); + SparkSqlParser.statement_return r = null; + try { + r = parser.statement(); + } catch (RecognitionException e) { + e.printStackTrace(); + throw new ParseException(parser.errors); + } + + if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) { + LOG.info("Parse Completed"); + } else if (lexer.getErrors().size() != 0) { + throw new ParseException(lexer.getErrors()); + } else { + throw new ParseException(parser.errors); + } + + ASTNode tree = (ASTNode) r.getTree(); + tree.setUnknownTokenBoundaries(); + return tree; + } + + + /* + * parse a String as a Select List. This allows table functions to be passed expression Strings + * that are translated in + * the context they define at invocation time. Currently used by NPath to allow users to specify + * what output they want. + * NPath allows expressions n 'tpath' a column that represents the matched set of rows. This + * column doesn't exist in + * the input schema and hence the Result Expression cannot be analyzed by the regular Hive + * translation process. + */ + public ASTNode parseSelect(String command, Context ctx) throws ParseException { + LOG.info("Parsing command: " + command); + + HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); + TokenRewriteStream tokens = new TokenRewriteStream(lexer); + if (ctx != null) { + ctx.setTokenRewriteStream(tokens); + } + SparkSqlParser parser = new SparkSqlParser(tokens); + parser.setTreeAdaptor(adaptor); + SparkSqlParser_SelectClauseParser.selectClause_return r = null; + try { + r = parser.selectClause(); + } catch (RecognitionException e) { + e.printStackTrace(); + throw new ParseException(parser.errors); + } + + if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) { + LOG.info("Parse Completed"); + } else if (lexer.getErrors().size() != 0) { + throw new ParseException(lexer.getErrors()); + } else { + throw new ParseException(parser.errors); + } + + return (ASTNode) r.getTree(); + } +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseError.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseError.java new file mode 100644 index 000000000000..b47bcfb2914d --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseError.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import org.antlr.runtime.BaseRecognizer; +import org.antlr.runtime.RecognitionException; + +/** + * + */ +public class ParseError { + private final BaseRecognizer br; + private final RecognitionException re; + private final String[] tokenNames; + + ParseError(BaseRecognizer br, RecognitionException re, String[] tokenNames) { + this.br = br; + this.re = re; + this.tokenNames = tokenNames; + } + + BaseRecognizer getBaseRecognizer() { + return br; + } + + RecognitionException getRecognitionException() { + return re; + } + + String[] getTokenNames() { + return tokenNames; + } + + String getMessage() { + return br.getErrorHeader(re) + " " + br.getErrorMessage(re, tokenNames); + } + +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseException.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseException.java new file mode 100644 index 000000000000..fff891ced555 --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseException.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import java.util.ArrayList; + +/** + * ParseException. + * + */ +public class ParseException extends Exception { + + private static final long serialVersionUID = 1L; + ArrayList errors; + + public ParseException(ArrayList errors) { + super(); + this.errors = errors; + } + + @Override + public String getMessage() { + + StringBuilder sb = new StringBuilder(); + for (ParseError err : errors) { + if (sb.length() > 0) { + sb.append('\n'); + } + sb.append(err.getMessage()); + } + + return sb.toString(); + } + +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java new file mode 100644 index 000000000000..d8840e75fd3d --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + + +/** + * Library of utility functions used in the parse code. + * + */ +public final class ParseUtils { + /** + * Performs a descent of the leftmost branch of a tree, stopping when either a + * node with a non-null token is found or the leaf level is encountered. + * + * @param tree + * candidate node from which to start searching + * + * @return node at which descent stopped + */ + public static ASTNode findRootNonNullToken(ASTNode tree) { + while ((tree.getToken() == null) && (tree.getChildCount() > 0)) { + tree = (org.apache.spark.sql.parser.ASTNode) tree.getChild(0); + } + return tree; + } + + private ParseUtils() { + // prevent instantiation + } + + public static VarcharTypeInfo getVarcharTypeInfo(org.apache.spark.sql.parser.ASTNode node) + throws SemanticException { + if (node.getChildCount() != 1) { + throw new SemanticException("Bad params for type varchar"); + } + + String lengthStr = node.getChild(0).getText(); + return TypeInfoFactory.getVarcharTypeInfo(Integer.valueOf(lengthStr)); + } + + public static CharTypeInfo getCharTypeInfo(org.apache.spark.sql.parser.ASTNode node) + throws SemanticException { + if (node.getChildCount() != 1) { + throw new SemanticException("Bad params for type char"); + } + + String lengthStr = node.getChild(0).getText(); + return TypeInfoFactory.getCharTypeInfo(Integer.valueOf(lengthStr)); + } + + static int getIndex(String[] list, String elem) { + for(int i=0; i < list.length; i++) { + if (list[i] != null && list[i].toLowerCase().equals(elem)) { + return i; + } + } + return -1; + } + + public static DecimalTypeInfo getDecimalTypeTypeInfo(org.apache.spark.sql.parser.ASTNode node) + throws SemanticException { + if (node.getChildCount() > 2) { + throw new SemanticException("Bad params for type decimal"); + } + + int precision = HiveDecimal.USER_DEFAULT_PRECISION; + int scale = HiveDecimal.USER_DEFAULT_SCALE; + + if (node.getChildCount() >= 1) { + String precStr = node.getChild(0).getText(); + precision = Integer.valueOf(precStr); + } + + if (node.getChildCount() == 2) { + String scaleStr = node.getChild(1).getText(); + scale = Integer.valueOf(scaleStr); + } + + return TypeInfoFactory.getDecimalTypeInfo(precision, scale); + } + + public static String ensureClassExists(String className) + throws SemanticException { + if (className == null) { + return null; + } + try { + Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); + } catch (ClassNotFoundException e) { + throw new SemanticException("Cannot find class '" + className + "'", e); + } + return className; + } + + public static String unparseIdentifier(String identifier) { + return unparseIdentifier(identifier, (Configuration)null); + } + + public static String unparseIdentifier(String identifier, Configuration conf) { + String qIdSupport = conf == null?null: HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); + if(qIdSupport != null && !"none".equals(qIdSupport)) { + identifier = identifier.replaceAll("`", "``"); + } + + return "`" + identifier + "`"; + } +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/RowResolver.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/RowResolver.java new file mode 100644 index 000000000000..4c5bc87473f0 --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/RowResolver.java @@ -0,0 +1,388 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implementation of the Row Resolver. + * + */ +public class RowResolver implements Serializable{ + private static final long serialVersionUID = 1L; + private RowSchema rowSchema; + private HashMap> rslvMap; + + private HashMap invRslvMap; + /* + * now a Column can have an alternate mapping. + * This captures the alternate mapping. + * The primary(first) mapping is still only held in + * invRslvMap. + */ + private final Map altInvRslvMap; + private Map expressionMap; + + // TODO: Refactor this and do in a more object oriented manner + private boolean isExprResolver; + + private static final Logger LOG = LoggerFactory.getLogger(RowResolver.class.getName()); + + public RowResolver() { + rowSchema = new RowSchema(); + rslvMap = new HashMap>(); + invRslvMap = new HashMap(); + altInvRslvMap = new HashMap(); + expressionMap = new HashMap(); + isExprResolver = false; + } + + /** + * Puts a resolver entry corresponding to a source expression which is to be + * used for identical expression recognition (e.g. for matching expressions + * in the SELECT list with the GROUP BY clause). The convention for such + * entries is an empty-string ("") as the table alias together with the + * string rendering of the ASTNode as the column alias. + */ + public void putExpression(ASTNode node, ColumnInfo colInfo) { + String treeAsString = node.toStringTree(); + expressionMap.put(treeAsString, node); + put("", treeAsString, colInfo); + } + + /** + * Retrieves the ColumnInfo corresponding to a source expression which + * exactly matches the string rendering of the given ASTNode. + */ + public ColumnInfo getExpression(ASTNode node) throws SemanticException { + return get("", node.toStringTree()); + } + + /** + * Retrieves the source expression matching a given ASTNode's + * string rendering exactly. + */ + public ASTNode getExpressionSource(ASTNode node) { + return expressionMap.get(node.toStringTree()); + } + + public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { + if (!addMappingOnly(tab_alias, col_alias, colInfo)) { + //Make sure that the table alias and column alias are stored + //in the column info + if (tab_alias != null) { + colInfo.setTabAlias(tab_alias.toLowerCase()); + } + if (col_alias != null) { + colInfo.setAlias(col_alias.toLowerCase()); + } + rowSchema.getSignature().add(colInfo); + } + } + + public boolean addMappingOnly(String tab_alias, String col_alias, ColumnInfo colInfo) { + if (tab_alias != null) { + tab_alias = tab_alias.toLowerCase(); + } + col_alias = col_alias.toLowerCase(); + + /* + * allow multiple mappings to the same ColumnInfo. + * When a ColumnInfo is mapped multiple times, only the + * first inverse mapping is captured. + */ + boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName()); + + LinkedHashMap f_map = rslvMap.get(tab_alias); + if (f_map == null) { + f_map = new LinkedHashMap(); + rslvMap.put(tab_alias, f_map); + } + ColumnInfo oldColInfo = f_map.put(col_alias, colInfo); + if (oldColInfo != null) { + LOG.warn("Duplicate column info for " + tab_alias + "." + col_alias + + " was overwritten in RowResolver map: " + oldColInfo + " by " + colInfo); + } + + String[] qualifiedAlias = new String[2]; + qualifiedAlias[0] = tab_alias; + qualifiedAlias[1] = col_alias; + if ( !colPresent ) { + invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + } else { + altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + } + + return colPresent; + } + + public boolean hasTableAlias(String tab_alias) { + return rslvMap.get(tab_alias.toLowerCase()) != null; + } + + /** + * Gets the column Info to tab_alias.col_alias type of a column reference. I + * the tab_alias is not provided as can be the case with an non aliased + * column, this function looks up the column in all the table aliases in this + * row resolver and returns the match. It also throws an exception if the + * column is found in multiple table aliases. If no match is found a null + * values is returned. + * + * This allows us to interpret both select t.c1 type of references and select + * c1 kind of references. The later kind are what we call non aliased column + * references in the query. + * + * @param tab_alias + * The table alias to match (this is null if the column reference is + * non aliased) + * @param col_alias + * The column name that is being searched for + * @return ColumnInfo + * @throws SemanticException + */ + public ColumnInfo get(String tab_alias, String col_alias) throws SemanticException { + col_alias = col_alias.toLowerCase(); + ColumnInfo ret = null; + + if (tab_alias != null) { + tab_alias = tab_alias.toLowerCase(); + HashMap f_map = rslvMap.get(tab_alias); + if (f_map == null) { + return null; + } + ret = f_map.get(col_alias); + } else { + boolean found = false; + String foundTbl = null; + for (Map.Entry> rslvEntry: rslvMap.entrySet()) { + String rslvKey = rslvEntry.getKey(); + LinkedHashMap cmap = rslvEntry.getValue(); + for (Map.Entry cmapEnt : cmap.entrySet()) { + if (col_alias.equalsIgnoreCase(cmapEnt.getKey())) { + /* + * We can have an unaliased and one aliased mapping to a Column. + */ + if (found && foundTbl != null && rslvKey != null) { + throw new SemanticException("Column " + col_alias + + " Found in more than One Tables/Subqueries"); + } + found = true; + foundTbl = rslvKey == null ? foundTbl : rslvKey; + ret = cmapEnt.getValue(); + } + } + } + } + + return ret; + } + + public ArrayList getColumnInfos() { + return rowSchema.getSignature(); + } + + /** + * Get a list of aliases for non-hidden columns + * @param max the maximum number of columns to return + * @return a list of non-hidden column names no greater in size than max + */ + public List getReferenceableColumnAliases(String tableAlias, int max) { + int count = 0; + Set columnNames = new LinkedHashSet(); + + int tables = rslvMap.size(); + + Map mapping = rslvMap.get(tableAlias); + if (mapping != null) { + for (Map.Entry entry : mapping.entrySet()) { + if (max > 0 && count >= max) { + break; + } + ColumnInfo columnInfo = entry.getValue(); + if (!columnInfo.isHiddenVirtualCol()) { + columnNames.add(entry.getKey()); + count++; + } + } + } else { + for (ColumnInfo columnInfo : getColumnInfos()) { + if (max > 0 && count >= max) { + break; + } + if (!columnInfo.isHiddenVirtualCol()) { + String[] inverse = !isExprResolver ? reverseLookup(columnInfo.getInternalName()) : null; + if (inverse != null) { + columnNames.add(inverse[0] == null || tables <= 1 ? inverse[1] : + inverse[0] + "." + inverse[1]); + } else { + columnNames.add(columnInfo.getAlias()); + } + count++; + } + } + } + return new ArrayList(columnNames); + } + + public HashMap getFieldMap(String tabAlias) { + if (tabAlias == null) { + return rslvMap.get(null); + } else { + return rslvMap.get(tabAlias.toLowerCase()); + } + } + + public String[] reverseLookup(String internalName) { + return invRslvMap.get(internalName); + } + + public boolean getIsExprResolver() { + return isExprResolver; + } + + public String[] getAlternateMappings(String internalName) { + return altInvRslvMap.get(internalName); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + for (Map.Entry> e : rslvMap + .entrySet()) { + String tab = e.getKey(); + sb.append(tab + "{"); + HashMap f_map = e.getValue(); + if (f_map != null) { + for (Map.Entry entry : f_map.entrySet()) { + sb.append("(" + entry.getKey() + "," + entry.getValue().toString() + + ")"); + } + } + sb.append("} "); + } + return sb.toString(); + } + + public RowSchema getRowSchema() { + return rowSchema; + } + + private static class IntRef { + public int val = 0; + } + + public static boolean add(RowResolver rrToAddTo, RowResolver rrToAddFrom, int numColumns) + throws SemanticException { + return add(rrToAddTo, rrToAddFrom, null, numColumns); + } + + // TODO: 1) How to handle collisions? 2) Should we be cloning ColumnInfo or not? + private static boolean add(RowResolver rrToAddTo, RowResolver rrToAddFrom, + IntRef outputColPosRef, int numColumns) throws SemanticException { + boolean hasDuplicates = false; + String tabAlias; + String colAlias; + String[] qualifiedColName; + int i = 0; + + int outputColPos = outputColPosRef == null ? 0 : outputColPosRef.val; + for (ColumnInfo cInfoFrmInput : rrToAddFrom.getRowSchema().getSignature()) { + if ( numColumns >= 0 && i == numColumns ) { + break; + } + ColumnInfo newCI = null; + String internalName = cInfoFrmInput.getInternalName(); + qualifiedColName = rrToAddFrom.reverseLookup(internalName); + tabAlias = qualifiedColName[0]; + colAlias = qualifiedColName[1]; + + newCI = new ColumnInfo(cInfoFrmInput); + newCI.setInternalName(SemanticAnalyzer.getColumnInternalName(outputColPos)); + + outputColPos++; + + boolean isUnique = rrToAddTo.putWithCheck(tabAlias, colAlias, internalName, newCI); + hasDuplicates |= (!isUnique); + + qualifiedColName = rrToAddFrom.getAlternateMappings(internalName); + if (qualifiedColName != null) { + tabAlias = qualifiedColName[0]; + colAlias = qualifiedColName[1]; + rrToAddTo.put(tabAlias, colAlias, newCI); + } + i++; + } + + if (outputColPosRef != null) { + outputColPosRef.val = outputColPos; + } + return !hasDuplicates; + } + + /** + * Adds column to RR, checking for duplicate columns. Needed because CBO cannot handle the Hive + * behavior of blindly overwriting old mapping in RR and still somehow working after that. + * @return True if mapping was added without duplicates. + */ + public boolean putWithCheck(String tabAlias, String colAlias, + String internalName, ColumnInfo newCI) throws SemanticException { + ColumnInfo existing = get(tabAlias, colAlias); + // Hive adds the same mapping twice... I wish we could fix stuff like that. + if (existing == null) { + put(tabAlias, colAlias, newCI); + return true; + } else if (existing.isSameColumnForRR(newCI)) { + return true; + } + LOG.warn("Found duplicate column alias in RR: " + + existing.toMappingString(tabAlias, colAlias) + " adding " + + newCI.toMappingString(tabAlias, colAlias)); + if (internalName != null) { + existing = get(tabAlias, internalName); + if (existing == null) { + put(tabAlias, internalName, newCI); + return true; + } else if (existing.isSameColumnForRR(newCI)) { + return true; + } + LOG.warn("Failed to use internal name after finding a duplicate: " + + existing.toMappingString(tabAlias, internalName)); + } + return false; + } + + public static boolean add(RowResolver rrToAddTo, RowResolver rrToAddFrom) + throws SemanticException { + return add(rrToAddTo, rrToAddFrom, null, -1); + } +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java new file mode 100644 index 000000000000..cfdd3cf684b0 --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java @@ -0,0 +1,732 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.antlr.runtime.tree.Tree; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * SemanticAnalyzer. + * + */ +public abstract class SemanticAnalyzer { + protected static final Logger STATIC_LOG = LoggerFactory.getLogger(SemanticAnalyzer.class.getName()); + protected final Hive db; + protected final HiveConf conf; + protected final Logger LOG; + + public SemanticAnalyzer(HiveConf conf, Hive db) throws SemanticException { + try { + this.conf = conf; + this.db = db; + LOG = LoggerFactory.getLogger(this.getClass().getName()); + } catch (Exception e) { + throw new SemanticException(e); + } + } + + public static String stripIdentifierQuotes(String val) { + if ((val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`')) { + val = val.substring(1, val.length() - 1); + } + return val; + } + + public static String stripQuotes(String val) { + return PlanUtils.stripQuotes(val); + } + + public static String charSetString(String charSetName, String charSetString) + throws SemanticException { + try { + // The character set name starts with a _, so strip that + charSetName = charSetName.substring(1); + if (charSetString.charAt(0) == '\'') { + return new String(unescapeSQLString(charSetString).getBytes(), + charSetName); + } else // hex input is also supported + { + assert charSetString.charAt(0) == '0'; + assert charSetString.charAt(1) == 'x'; + charSetString = charSetString.substring(2); + + byte[] bArray = new byte[charSetString.length() / 2]; + int j = 0; + for (int i = 0; i < charSetString.length(); i += 2) { + int val = Character.digit(charSetString.charAt(i), 16) * 16 + + Character.digit(charSetString.charAt(i + 1), 16); + if (val > 127) { + val = val - 256; + } + bArray[j++] = (byte)val; + } + + String res = new String(bArray, charSetName); + return res; + } + } catch (UnsupportedEncodingException e) { + throw new SemanticException(e); + } + } + + /** + * Get dequoted name from a table/column node. + * @param tableOrColumnNode the table or column node + * @return for table node, db.tab or tab. for column node column. + */ + public static String getUnescapedName(ASTNode tableOrColumnNode) { + return getUnescapedName(tableOrColumnNode, null); + } + + public static Map.Entry getDbTableNamePair(ASTNode tableNameNode) { + assert(tableNameNode.getToken().getType() == SparkSqlParser.TOK_TABNAME); + if (tableNameNode.getChildCount() == 2) { + String dbName = unescapeIdentifier(tableNameNode.getChild(0).getText()); + String tableName = unescapeIdentifier(tableNameNode.getChild(1).getText()); + return Pair.of(dbName, tableName); + } else { + String tableName = unescapeIdentifier(tableNameNode.getChild(0).getText()); + return Pair.of(null,tableName); + } + } + + public static String getUnescapedName(ASTNode tableOrColumnNode, String currentDatabase) { + int tokenType = tableOrColumnNode.getToken().getType(); + if (tokenType == SparkSqlParser.TOK_TABNAME) { + // table node + Map.Entry dbTablePair = getDbTableNamePair(tableOrColumnNode); + String dbName = dbTablePair.getKey(); + String tableName = dbTablePair.getValue(); + if (dbName != null){ + return dbName + "." + tableName; + } + if (currentDatabase != null) { + return currentDatabase + "." + tableName; + } + return tableName; + } else if (tokenType == SparkSqlParser.StringLiteral) { + return unescapeSQLString(tableOrColumnNode.getText()); + } + // column node + return unescapeIdentifier(tableOrColumnNode.getText()); + } + + /** + * Remove the encapsulating "`" pair from the identifier. We allow users to + * use "`" to escape identifier for table names, column names and aliases, in + * case that coincide with Hive language keywords. + */ + public static String unescapeIdentifier(String val) { + if (val == null) { + return null; + } + if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') { + val = val.substring(1, val.length() - 1); + } + return val; + } + + /** + * Converts parsed key/value properties pairs into a map. + * + * @param prop ASTNode parent of the key/value pairs + * + * @param mapProp property map which receives the mappings + */ + public static void readProps( + ASTNode prop, Map mapProp) { + + for (int propChild = 0; propChild < prop.getChildCount(); propChild++) { + String key = unescapeSQLString(prop.getChild(propChild).getChild(0) + .getText()); + String value = null; + if (prop.getChild(propChild).getChild(1) != null) { + value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); + } + mapProp.put(key, value); + } + } + + private static final int[] multiplier = new int[] {1000, 100, 10, 1}; + + @SuppressWarnings("nls") + public static String unescapeSQLString(String b) { + Character enclosure = null; + + // Some of the strings can be passed in as unicode. For example, the + // delimiter can be passed in as \002 - So, we first check if the + // string is a unicode number, else go back to the old behavior + StringBuilder sb = new StringBuilder(b.length()); + for (int i = 0; i < b.length(); i++) { + + char currentChar = b.charAt(i); + if (enclosure == null) { + if (currentChar == '\'' || b.charAt(i) == '\"') { + enclosure = currentChar; + } + // ignore all other chars outside the enclosure + continue; + } + + if (enclosure.equals(currentChar)) { + enclosure = null; + continue; + } + + if (currentChar == '\\' && (i + 6 < b.length()) && b.charAt(i + 1) == 'u') { + int code = 0; + int base = i + 2; + for (int j = 0; j < 4; j++) { + int digit = Character.digit(b.charAt(j + base), 16); + code += digit * multiplier[j]; + } + sb.append((char)code); + i += 5; + continue; + } + + if (currentChar == '\\' && (i + 4 < b.length())) { + char i1 = b.charAt(i + 1); + char i2 = b.charAt(i + 2); + char i3 = b.charAt(i + 3); + if ((i1 >= '0' && i1 <= '1') && (i2 >= '0' && i2 <= '7') + && (i3 >= '0' && i3 <= '7')) { + byte bVal = (byte) ((i3 - '0') + ((i2 - '0') * 8) + ((i1 - '0') * 8 * 8)); + byte[] bValArr = new byte[1]; + bValArr[0] = bVal; + String tmp = new String(bValArr); + sb.append(tmp); + i += 3; + continue; + } + } + + if (currentChar == '\\' && (i + 2 < b.length())) { + char n = b.charAt(i + 1); + switch (n) { + case '0': + sb.append("\0"); + break; + case '\'': + sb.append("'"); + break; + case '"': + sb.append("\""); + break; + case 'b': + sb.append("\b"); + break; + case 'n': + sb.append("\n"); + break; + case 'r': + sb.append("\r"); + break; + case 't': + sb.append("\t"); + break; + case 'Z': + sb.append("\u001A"); + break; + case '\\': + sb.append("\\"); + break; + // The following 2 lines are exactly what MySQL does TODO: why do we do this? + case '%': + sb.append("\\%"); + break; + case '_': + sb.append("\\_"); + break; + default: + sb.append(n); + } + i++; + } else { + sb.append(currentChar); + } + } + return sb.toString(); + } + + /** + * Escapes the string for AST; doesn't enclose it in quotes, however. + */ + public static String escapeSQLString(String b) { + // There's usually nothing to escape so we will be optimistic. + String result = b; + for (int i = 0; i < result.length(); ++i) { + char currentChar = result.charAt(i); + if (currentChar == '\\' && ((i + 1) < result.length())) { + // TODO: do we need to handle the "this is what MySQL does" here? + char nextChar = result.charAt(i + 1); + if (nextChar == '%' || nextChar == '_') { + ++i; + continue; + } + } + switch (currentChar) { + case '\0': + result = spliceString(result, i, "\\0"); + ++i; + break; + case '\'': + result = spliceString(result, i, "\\'"); + ++i; + break; + case '\"': + result = spliceString(result, i, "\\\""); + ++i; + break; + case '\b': + result = spliceString(result, i, "\\b"); + ++i; + break; + case '\n': + result = spliceString(result, i, "\\n"); + ++i; + break; + case '\r': + result = spliceString(result, i, "\\r"); + ++i; + break; + case '\t': + result = spliceString(result, i, "\\t"); + ++i; + break; + case '\\': + result = spliceString(result, i, "\\\\"); + ++i; + break; + case '\u001A': + result = spliceString(result, i, "\\Z"); + ++i; + break; + default: { + if (currentChar < ' ') { + String hex = Integer.toHexString(currentChar); + String unicode = "\\u"; + for (int j = 4; j > hex.length(); --j) { + unicode += '0'; + } + unicode += hex; + result = spliceString(result, i, unicode); + i += (unicode.length() - 1); + } + break; // if not a control character, do nothing + } + } + } + return result; + } + + private static String spliceString(String str, int i, String replacement) { + return spliceString(str, i, 1, replacement); + } + + private static String spliceString(String str, int i, int length, String replacement) { + return str.substring(0, i) + replacement + str.substring(i + length); + } + + /** + * Get the list of FieldSchema out of the ASTNode. + */ + public static List getColumns(ASTNode ast, boolean lowerCase) throws SemanticException { + List colList = new ArrayList(); + int numCh = ast.getChildCount(); + for (int i = 0; i < numCh; i++) { + FieldSchema col = new FieldSchema(); + ASTNode child = (ASTNode) ast.getChild(i); + Tree grandChild = child.getChild(0); + if(grandChild != null) { + String name = grandChild.getText(); + if(lowerCase) { + name = name.toLowerCase(); + } + // child 0 is the name of the column + col.setName(unescapeIdentifier(name)); + // child 1 is the type of the column + ASTNode typeChild = (ASTNode) (child.getChild(1)); + col.setType(getTypeStringFromAST(typeChild)); + + // child 2 is the optional comment of the column + if (child.getChildCount() == 3) { + col.setComment(unescapeSQLString(child.getChild(2).getText())); + } + } + colList.add(col); + } + return colList; + } + + protected static String getTypeStringFromAST(ASTNode typeNode) + throws SemanticException { + switch (typeNode.getType()) { + case SparkSqlParser.TOK_LIST: + return serdeConstants.LIST_TYPE_NAME + "<" + + getTypeStringFromAST((ASTNode) typeNode.getChild(0)) + ">"; + case SparkSqlParser.TOK_MAP: + return serdeConstants.MAP_TYPE_NAME + "<" + + getTypeStringFromAST((ASTNode) typeNode.getChild(0)) + "," + + getTypeStringFromAST((ASTNode) typeNode.getChild(1)) + ">"; + case SparkSqlParser.TOK_STRUCT: + return getStructTypeStringFromAST(typeNode); + case SparkSqlParser.TOK_UNIONTYPE: + return getUnionTypeStringFromAST(typeNode); + default: + return getTypeName(typeNode); + } + } + + private static String getStructTypeStringFromAST(ASTNode typeNode) + throws SemanticException { + String typeStr = serdeConstants.STRUCT_TYPE_NAME + "<"; + typeNode = (ASTNode) typeNode.getChild(0); + int children = typeNode.getChildCount(); + if (children <= 0) { + throw new SemanticException("empty struct not allowed."); + } + StringBuilder buffer = new StringBuilder(typeStr); + for (int i = 0; i < children; i++) { + ASTNode child = (ASTNode) typeNode.getChild(i); + buffer.append(unescapeIdentifier(child.getChild(0).getText())).append(":"); + buffer.append(getTypeStringFromAST((ASTNode) child.getChild(1))); + if (i < children - 1) { + buffer.append(","); + } + } + + buffer.append(">"); + return buffer.toString(); + } + + private static String getUnionTypeStringFromAST(ASTNode typeNode) + throws SemanticException { + String typeStr = serdeConstants.UNION_TYPE_NAME + "<"; + typeNode = (ASTNode) typeNode.getChild(0); + int children = typeNode.getChildCount(); + if (children <= 0) { + throw new SemanticException("empty union not allowed."); + } + StringBuilder buffer = new StringBuilder(typeStr); + for (int i = 0; i < children; i++) { + buffer.append(getTypeStringFromAST((ASTNode) typeNode.getChild(i))); + if (i < children - 1) { + buffer.append(","); + } + } + buffer.append(">"); + typeStr = buffer.toString(); + return typeStr; + } + + public Hive getDb() { + return db; + } + + /** + * Given a ASTNode, return list of values. + * + * use case: + * create table xyz list bucketed (col1) with skew (1,2,5) + * AST Node is for (1,2,5) + * @param ast + * @return + */ + public static List getSkewedValueFromASTNode(ASTNode ast) { + List colList = new ArrayList(); + int numCh = ast.getChildCount(); + for (int i = 0; i < numCh; i++) { + ASTNode child = (ASTNode) ast.getChild(i); + colList.add(stripQuotes(child.getText()).toLowerCase()); + } + return colList; + } + + /** + * Retrieve skewed values from ASTNode. + * + * @param node + * @return + * @throws SemanticException + */ + public static List getSkewedValuesFromASTNode(Node node) throws SemanticException { + List result = null; + Tree leafVNode = ((ASTNode) node).getChild(0); + if (leafVNode == null) { + throw new SemanticException( + ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg()); + } else { + ASTNode lVAstNode = (ASTNode) leafVNode; + if (lVAstNode.getToken().getType() != SparkSqlParser.TOK_TABCOLVALUE) { + throw new SemanticException( + ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg()); + } else { + result = new ArrayList(getSkewedValueFromASTNode(lVAstNode)); + } + } + return result; + } + + private static boolean getPartExprNodeDesc(ASTNode astNode, HiveConf conf, + Map astExprNodeMap) throws SemanticException { + + if (astNode == null) { + return true; + } else if ((astNode.getChildren() == null) || (astNode.getChildren().size() == 0)) { + return astNode.getType() != SparkSqlParser.TOK_PARTVAL; + } + + TypeCheckCtx typeCheckCtx = new TypeCheckCtx(null); + String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME); + boolean result = true; + for (Node childNode : astNode.getChildren()) { + ASTNode childASTNode = (ASTNode)childNode; + + if (childASTNode.getType() != SparkSqlParser.TOK_PARTVAL) { + result = getPartExprNodeDesc(childASTNode, conf, astExprNodeMap) && result; + } else { + boolean isDynamicPart = childASTNode.getChildren().size() <= 1; + result = !isDynamicPart && result; + if (!isDynamicPart) { + ASTNode partVal = (ASTNode)childASTNode.getChildren().get(1); + if (!defaultPartitionName.equalsIgnoreCase(unescapeSQLString(partVal.getText()))) { + astExprNodeMap.put((ASTNode)childASTNode.getChildren().get(0), + TypeCheckProcFactory.genExprNode(partVal, typeCheckCtx).get(partVal)); + } + } + } + } + return result; + } + + public static void validatePartSpec(Table tbl, Map partSpec, + ASTNode astNode, HiveConf conf, boolean shouldBeFull) throws SemanticException { + tbl.validatePartColumnNames(partSpec, shouldBeFull); + validatePartColumnType(tbl, partSpec, astNode, conf); + } + + public static void validatePartColumnType(Table tbl, Map partSpec, + ASTNode astNode, HiveConf conf) throws SemanticException { + if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) { + return; + } + + Map astExprNodeMap = new HashMap(); + if (!getPartExprNodeDesc(astNode, conf, astExprNodeMap)) { + STATIC_LOG.warn("Dynamic partitioning is used; only validating " + + astExprNodeMap.size() + " columns"); + } + + if (astExprNodeMap.isEmpty()) { + return; // All columns are dynamic, nothing to do. + } + + List parts = tbl.getPartitionKeys(); + Map partCols = new HashMap(parts.size()); + for (FieldSchema col : parts) { + partCols.put(col.getName(), col.getType().toLowerCase()); + } + for (Entry astExprNodePair : astExprNodeMap.entrySet()) { + String astKeyName = astExprNodePair.getKey().toString().toLowerCase(); + if (astExprNodePair.getKey().getType() == SparkSqlParser.Identifier) { + astKeyName = stripIdentifierQuotes(astKeyName); + } + String colType = partCols.get(astKeyName); + ObjectInspector inputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( + astExprNodePair.getValue().getTypeInfo()); + + TypeInfo expectedType = + TypeInfoUtils.getTypeInfoFromTypeString(colType); + ObjectInspector outputOI = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType); + // Since partVal is a constant, it is safe to cast ExprNodeDesc to ExprNodeConstantDesc. + // Its value should be in normalized format (e.g. no leading zero in integer, date is in + // format of YYYY-MM-DD etc) + Object value = ((ExprNodeConstantDesc)astExprNodePair.getValue()).getValue(); + Object convertedValue = value; + if (!inputOI.getTypeName().equals(outputOI.getTypeName())) { + convertedValue = ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value); + if (convertedValue == null) { + throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH, astKeyName, + inputOI.getTypeName(), outputOI.getTypeName()); + } + + if (!convertedValue.toString().equals(value.toString())) { + // value might have been changed because of the normalization in conversion + STATIC_LOG.warn("Partition " + astKeyName + " expects type " + outputOI.getTypeName() + + " but input value is in type " + inputOI.getTypeName() + ". Convert " + + value.toString() + " to " + convertedValue.toString()); + } + } + + if (!convertedValue.toString().equals(partSpec.get(astKeyName))) { + STATIC_LOG.warn("Partition Spec " + astKeyName + "=" + partSpec.get(astKeyName) + + " has been changed to " + astKeyName + "=" + convertedValue.toString()); + } + partSpec.put(astKeyName, convertedValue.toString()); + } + } + + private Path tryQualifyPath(Path path) throws IOException { + try { + return path.getFileSystem(conf).makeQualified(path); + } catch (IOException e) { + return path; // some tests expected to pass invalid schema + } + } + + protected String toMessage(ErrorMsg message, Object detail) { + return detail == null ? message.getMsg() : message.getMsg(detail.toString()); + } + + public static String getAstNodeText(ASTNode tree) { + return tree.getChildCount() == 0?tree.getText() : + getAstNodeText((ASTNode)tree.getChild(tree.getChildCount() - 1)); + } + + public static String generateErrorMessage(ASTNode ast, String message) { + StringBuilder sb = new StringBuilder(); + if (ast == null) { + sb.append(message).append(". Cannot tell the position of null AST."); + return sb.toString(); + } + sb.append(ast.getLine()); + sb.append(":"); + sb.append(ast.getCharPositionInLine()); + sb.append(" "); + sb.append(message); + sb.append(". Error encountered near token '"); + sb.append(getAstNodeText(ast)); + sb.append("'"); + return sb.toString(); + } + + public static String getColumnInternalName(int pos) { + return HiveConf.getColumnInternalName(pos); + } + + private static final Map TokenToTypeName = new HashMap(); + + static { + TokenToTypeName.put(SparkSqlParser.TOK_BOOLEAN, serdeConstants.BOOLEAN_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_TINYINT, serdeConstants.TINYINT_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_SMALLINT, serdeConstants.SMALLINT_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_INT, serdeConstants.INT_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_BIGINT, serdeConstants.BIGINT_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_FLOAT, serdeConstants.FLOAT_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_DOUBLE, serdeConstants.DOUBLE_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_CHAR, serdeConstants.CHAR_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_DATETIME, serdeConstants.DATETIME_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_INTERVAL_YEAR_MONTH, serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_INTERVAL_DAY_TIME, serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME); + TokenToTypeName.put(SparkSqlParser.TOK_DECIMAL, serdeConstants.DECIMAL_TYPE_NAME); + } + + public static String getTypeName(ASTNode node) throws SemanticException { + int token = node.getType(); + String typeName; + + // datetime type isn't currently supported + if (token == SparkSqlParser.TOK_DATETIME) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_TYPE.getMsg()); + } + + switch (token) { + case SparkSqlParser.TOK_CHAR: + CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(node); + typeName = charTypeInfo.getQualifiedName(); + break; + case SparkSqlParser.TOK_VARCHAR: + VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(node); + typeName = varcharTypeInfo.getQualifiedName(); + break; + case SparkSqlParser.TOK_DECIMAL: + DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(node); + typeName = decTypeInfo.getQualifiedName(); + break; + default: + typeName = TokenToTypeName.get(token); + } + return typeName; + } + + public static String relativeToAbsolutePath(HiveConf conf, String location) throws SemanticException { + boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); + if (testMode) { + URI uri = new Path(location).toUri(); + String scheme = uri.getScheme(); + String authority = uri.getAuthority(); + String path = uri.getPath(); + if (!path.startsWith("/")) { + path = (new Path(System.getProperty("test.tmp.dir"), + path)).toUri().getPath(); + } + if (StringUtils.isEmpty(scheme)) { + scheme = "pfile"; + } + try { + uri = new URI(scheme, authority, path, null, null); + } catch (URISyntaxException e) { + throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e); + } + return uri.toString(); + } else { + //no-op for non-test mode for now + return location; + } + } +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckCtx.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckCtx.java new file mode 100644 index 000000000000..5f185efa85bc --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckCtx.java @@ -0,0 +1,212 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; + +/** + * This class implements the context information that is used for typechecking + * phase in query compilation. + */ +public class TypeCheckCtx implements NodeProcessorCtx { + protected static final Logger LOG = LoggerFactory.getLogger(TypeCheckCtx.class); + + /** + * The row resolver of the previous operator. This field is used to generate + * expression descriptors from the expression ASTs. + */ + private RowResolver inputRR; + + private final boolean useCaching; + + /** + * Receives translations which will need to be applied during unparse. + */ + private UnparseTranslator unparseTranslator; + + /** + * Potential typecheck error reason. + */ + private String error; + + /** + * The node that generated the potential typecheck error + */ + private ASTNode errorSrcNode; + + /** + * Whether to allow stateful UDF invocations. + */ + private boolean allowStatefulFunctions; + + private boolean allowDistinctFunctions; + + private final boolean allowGBExprElimination; + + private final boolean allowAllColRef; + + private final boolean allowFunctionStar; + + private final boolean allowWindowing; + + // "[]" : LSQUARE/INDEX Expression + private final boolean allowIndexExpr; + + private final boolean allowSubQueryExpr; + + /** + * Constructor. + * + * @param inputRR + * The input row resolver of the previous operator. + */ + public TypeCheckCtx(RowResolver inputRR) { + this(inputRR, true); + } + + public TypeCheckCtx(RowResolver inputRR, boolean useCaching) { + this(inputRR, useCaching, false, true, true, true, true, true, true, true); + } + + public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean allowStatefulFunctions, + boolean allowDistinctFunctions, boolean allowGBExprElimination, boolean allowAllColRef, + boolean allowFunctionStar, boolean allowWindowing, + boolean allowIndexExpr, boolean allowSubQueryExpr) { + setInputRR(inputRR); + error = null; + this.useCaching = useCaching; + this.allowStatefulFunctions = allowStatefulFunctions; + this.allowDistinctFunctions = allowDistinctFunctions; + this.allowGBExprElimination = allowGBExprElimination; + this.allowAllColRef = allowAllColRef; + this.allowFunctionStar = allowFunctionStar; + this.allowWindowing = allowWindowing; + this.allowIndexExpr = allowIndexExpr; + this.allowSubQueryExpr = allowSubQueryExpr; + } + + /** + * @param inputRR + * the inputRR to set + */ + public void setInputRR(RowResolver inputRR) { + this.inputRR = inputRR; + } + + /** + * @return the inputRR + */ + public RowResolver getInputRR() { + return inputRR; + } + + /** + * @param unparseTranslator + * the unparseTranslator to set + */ + public void setUnparseTranslator(UnparseTranslator unparseTranslator) { + this.unparseTranslator = unparseTranslator; + } + + /** + * @return the unparseTranslator + */ + public UnparseTranslator getUnparseTranslator() { + return unparseTranslator; + } + + /** + * @param allowStatefulFunctions + * whether to allow stateful UDF invocations + */ + public void setAllowStatefulFunctions(boolean allowStatefulFunctions) { + this.allowStatefulFunctions = allowStatefulFunctions; + } + + /** + * @return whether to allow stateful UDF invocations + */ + public boolean getAllowStatefulFunctions() { + return allowStatefulFunctions; + } + + /** + * @param error + * the error to set + * + */ + public void setError(String error, ASTNode errorSrcNode) { + if (LOG.isDebugEnabled()) { + // Logger the callstack from which the error has been set. + LOG.debug("Setting error: [" + error + "] from " + + ((errorSrcNode == null) ? "null" : errorSrcNode.toStringTree()), new Exception()); + } + this.error = error; + this.errorSrcNode = errorSrcNode; + } + + /** + * @return the error + */ + public String getError() { + return error; + } + + public ASTNode getErrorSrcNode() { + return errorSrcNode; + } + + public void setAllowDistinctFunctions(boolean allowDistinctFunctions) { + this.allowDistinctFunctions = allowDistinctFunctions; + } + + public boolean getAllowDistinctFunctions() { + return allowDistinctFunctions; + } + + public boolean getAllowGBExprElimination() { + return allowGBExprElimination; + } + + public boolean getallowAllColRef() { + return allowAllColRef; + } + + public boolean getallowFunctionStar() { + return allowFunctionStar; + } + + public boolean getallowWindowing() { + return allowWindowing; + } + + public boolean getallowIndexExpr() { + return allowIndexExpr; + } + + public boolean getallowSubQueryExpr() { + return allowSubQueryExpr; + } + + public boolean isUseCaching() { + return useCaching; + } +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckProcFactory.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckProcFactory.java new file mode 100644 index 000000000000..b951a9ee12b8 --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckProcFactory.java @@ -0,0 +1,1394 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import com.google.common.collect.Lists; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.SettableUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.NullWritable; +import org.apache.hive.common.util.DateUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * The Factory for creating typecheck processors. The typecheck processors are + * used to processes the syntax trees for expressions and convert them into + * expression Node Descriptor trees. They also introduce the correct conversion + * functions to do proper implicit conversion. + */ +public class TypeCheckProcFactory { + + protected static final Logger LOG = LoggerFactory.getLogger(TypeCheckProcFactory.class + .getName()); + + protected TypeCheckProcFactory() { + // prevent instantiation + } + + /** + * Function to do groupby subexpression elimination. This is called by all the + * processors initially. As an example, consider the query select a+b, + * count(1) from T group by a+b; Then a+b is already precomputed in the group + * by operators key, so we substitute a+b in the select list with the internal + * column name of the a+b expression that appears in the in input row + * resolver. + * + * @param nd + * The node that is being inspected. + * @param procCtx + * The processor context. + * + * @return exprNodeColumnDesc. + */ + public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) + throws SemanticException { + // We recursively create the exprNodeDesc. Base cases: when we encounter + // a column ref, we convert that into an exprNodeColumnDesc; when we + // encounter + // a constant, we convert that into an exprNodeConstantDesc. For others we + // just + // build the exprNodeFuncDesc with recursively built children. + ASTNode expr = (ASTNode) nd; + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + + if (!ctx.isUseCaching()) { + return null; + } + + RowResolver input = ctx.getInputRR(); + ExprNodeDesc desc = null; + + if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) { + return null; + } + + // If the current subExpression is pre-calculated, as in Group-By etc. + ColumnInfo colInfo = input.getExpression(expr); + if (colInfo != null) { + desc = new ExprNodeColumnDesc(colInfo); + ASTNode source = input.getExpressionSource(expr); + if (source != null) { + ctx.getUnparseTranslator().addCopyTranslation(expr, source); + } + return desc; + } + return desc; + } + + public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) + throws SemanticException { + return genExprNode(expr, tcCtx, new TypeCheckProcFactory()); + } + + protected static Map genExprNode(ASTNode expr, + TypeCheckCtx tcCtx, TypeCheckProcFactory tf) throws SemanticException { + // Create the walker, the rules dispatcher and the context. + // create a walker which walks the tree in a DFS manner while maintaining + // the operator stack. The dispatcher + // generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + + opRules.put(new RuleRegExp("R1", SparkSqlParser.TOK_NULL + "%"), + tf.getNullExprProcessor()); + opRules.put(new RuleRegExp("R2", SparkSqlParser.Number + "%|" + + SparkSqlParser.TinyintLiteral + "%|" + + SparkSqlParser.SmallintLiteral + "%|" + + SparkSqlParser.BigintLiteral + "%|" + + SparkSqlParser.DecimalLiteral + "%"), + tf.getNumExprProcessor()); + opRules + .put(new RuleRegExp("R3", SparkSqlParser.Identifier + "%|" + + SparkSqlParser.StringLiteral + "%|" + SparkSqlParser.TOK_CHARSETLITERAL + "%|" + + SparkSqlParser.TOK_STRINGLITERALSEQUENCE + "%|" + + "%|" + SparkSqlParser.KW_IF + "%|" + SparkSqlParser.KW_CASE + "%|" + + SparkSqlParser.KW_WHEN + "%|" + SparkSqlParser.KW_IN + "%|" + + SparkSqlParser.KW_ARRAY + "%|" + SparkSqlParser.KW_MAP + "%|" + + SparkSqlParser.KW_STRUCT + "%|" + SparkSqlParser.KW_EXISTS + "%|" + + SparkSqlParser.TOK_SUBQUERY_OP_NOTIN + "%"), + tf.getStrExprProcessor()); + opRules.put(new RuleRegExp("R4", SparkSqlParser.KW_TRUE + "%|" + + SparkSqlParser.KW_FALSE + "%"), tf.getBoolExprProcessor()); + opRules.put(new RuleRegExp("R5", SparkSqlParser.TOK_DATELITERAL + "%|" + + SparkSqlParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor()); + opRules.put(new RuleRegExp("R6", + SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|" + + SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|" + + SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL + "%|" + + SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL + "%|" + + SparkSqlParser.TOK_INTERVAL_DAY_LITERAL + "%|" + + SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL + "%|" + + SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL + "%|" + + SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor()); + opRules.put(new RuleRegExp("R7", SparkSqlParser.TOK_TABLE_OR_COL + "%"), + tf.getColumnExprProcessor()); + opRules.put(new RuleRegExp("R8", SparkSqlParser.TOK_SUBQUERY_OP + "%"), + tf.getSubQueryExprProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), + opRules, tcCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of top nodes + ArrayList topNodes = Lists.newArrayList(expr); + HashMap nodeOutputs = new LinkedHashMap(); + + ogw.startWalking(topNodes, nodeOutputs); + + return convert(nodeOutputs); + } + + // temporary type-safe casting + private static Map convert(Map outputs) { + Map converted = new LinkedHashMap(); + for (Map.Entry entry : outputs.entrySet()) { + if (entry.getKey() instanceof ASTNode && + (entry.getValue() == null || entry.getValue() instanceof ExprNodeDesc)) { + converted.put((ASTNode)entry.getKey(), (ExprNodeDesc)entry.getValue()); + } else { + LOG.warn("Invalid type entry " + entry); + } + } + return converted; + } + + /** + * Processor for processing NULL expression. + */ + public static class NullExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + return new ExprNodeConstantDesc(TypeInfoFactory.getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class), null); + } + + } + + /** + * Factory method to get NullExprProcessor. + * + * @return NullExprProcessor. + */ + public NullExprProcessor getNullExprProcessor() { + return new NullExprProcessor(); + } + + /** + * Processor for processing numeric constants. + */ + public static class NumExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + Number v = null; + ASTNode expr = (ASTNode) nd; + // The expression can be any one of Double, Long and Integer. We + // try to parse the expression in that order to ensure that the + // most specific type is used for conversion. + try { + if (expr.getText().endsWith("L")) { + // Literal bigint. + v = Long.valueOf(expr.getText().substring( + 0, expr.getText().length() - 1)); + } else if (expr.getText().endsWith("S")) { + // Literal smallint. + v = Short.valueOf(expr.getText().substring( + 0, expr.getText().length() - 1)); + } else if (expr.getText().endsWith("Y")) { + // Literal tinyint. + v = Byte.valueOf(expr.getText().substring( + 0, expr.getText().length() - 1)); + } else if (expr.getText().endsWith("BD")) { + // Literal decimal + String strVal = expr.getText().substring(0, expr.getText().length() - 2); + HiveDecimal hd = HiveDecimal.create(strVal); + int prec = 1; + int scale = 0; + if (hd != null) { + prec = hd.precision(); + scale = hd.scale(); + } + DecimalTypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(prec, scale); + return new ExprNodeConstantDesc(typeInfo, hd); + } else { + v = Double.valueOf(expr.getText()); + v = Long.valueOf(expr.getText()); + v = Integer.valueOf(expr.getText()); + } + } catch (NumberFormatException e) { + // do nothing here, we will throw an exception in the following block + } + if (v == null) { + throw new SemanticException(ErrorMsg.INVALID_NUMERICAL_CONSTANT + .getMsg(expr)); + } + return new ExprNodeConstantDesc(v); + } + + } + + /** + * Factory method to get NumExprProcessor. + * + * @return NumExprProcessor. + */ + public NumExprProcessor getNumExprProcessor() { + return new NumExprProcessor(); + } + + /** + * Processor for processing string constants. + */ + public static class StrExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + ASTNode expr = (ASTNode) nd; + String str = null; + + switch (expr.getToken().getType()) { + case SparkSqlParser.StringLiteral: + str = SemanticAnalyzer.unescapeSQLString(expr.getText()); + break; + case SparkSqlParser.TOK_STRINGLITERALSEQUENCE: + StringBuilder sb = new StringBuilder(); + for (Node n : expr.getChildren()) { + sb.append( + SemanticAnalyzer.unescapeSQLString(((ASTNode)n).getText())); + } + str = sb.toString(); + break; + case SparkSqlParser.TOK_CHARSETLITERAL: + str = SemanticAnalyzer.charSetString(expr.getChild(0).getText(), + expr.getChild(1).getText()); + break; + default: + // SparkSqlParser.identifier | HiveParse.KW_IF | HiveParse.KW_LEFT | + // HiveParse.KW_RIGHT + str = SemanticAnalyzer.unescapeIdentifier(expr.getText()); + break; + } + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str); + } + + } + + /** + * Factory method to get StrExprProcessor. + * + * @return StrExprProcessor. + */ + public StrExprProcessor getStrExprProcessor() { + return new StrExprProcessor(); + } + + /** + * Processor for boolean constants. + */ + public static class BoolExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + ASTNode expr = (ASTNode) nd; + Boolean bool = null; + + switch (expr.getToken().getType()) { + case SparkSqlParser.KW_TRUE: + bool = Boolean.TRUE; + break; + case SparkSqlParser.KW_FALSE: + bool = Boolean.FALSE; + break; + default: + assert false; + } + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, bool); + } + + } + + /** + * Factory method to get BoolExprProcessor. + * + * @return BoolExprProcessor. + */ + public BoolExprProcessor getBoolExprProcessor() { + return new BoolExprProcessor(); + } + + /** + * Processor for date constants. + */ + public static class DateTimeExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + ASTNode expr = (ASTNode) nd; + String timeString = SemanticAnalyzer.stripQuotes(expr.getText()); + + // Get the string value and convert to a Date value. + try { + // todo replace below with joda-time, which supports timezone + if (expr.getType() == SparkSqlParser.TOK_DATELITERAL) { + PrimitiveTypeInfo typeInfo = TypeInfoFactory.dateTypeInfo; + return new ExprNodeConstantDesc(typeInfo, + Date.valueOf(timeString)); + } + if (expr.getType() == SparkSqlParser.TOK_TIMESTAMPLITERAL) { + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, + Timestamp.valueOf(timeString)); + } + throw new IllegalArgumentException("Invalid time literal type " + expr.getType()); + } catch (Exception err) { + throw new SemanticException( + "Unable to convert time literal '" + timeString + "' to time value.", err); + } + } + } + + /** + * Factory method to get DateExprProcessor. + * + * @return DateExprProcessor. + */ + public DateTimeExprProcessor getDateTimeExprProcessor() { + return new DateTimeExprProcessor(); + } + + /** + * Processor for interval constants. + */ + public static class IntervalExprProcessor implements NodeProcessor { + + private static final BigDecimal NANOS_PER_SEC_BD = new BigDecimal(DateUtils.NANOS_PER_SEC); + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + ASTNode expr = (ASTNode) nd; + String intervalString = SemanticAnalyzer.stripQuotes(expr.getText()); + + // Get the string value and convert to a Interval value. + try { + switch (expr.getType()) { + case SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, + HiveIntervalYearMonth.valueOf(intervalString)); + case SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + HiveIntervalDayTime.valueOf(intervalString)); + case SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, + new HiveIntervalYearMonth(Integer.parseInt(intervalString), 0)); + case SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, + new HiveIntervalYearMonth(0, Integer.parseInt(intervalString))); + case SparkSqlParser.TOK_INTERVAL_DAY_LITERAL: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + new HiveIntervalDayTime(Integer.parseInt(intervalString), 0, 0, 0, 0)); + case SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + new HiveIntervalDayTime(0, Integer.parseInt(intervalString), 0, 0, 0)); + case SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + new HiveIntervalDayTime(0, 0, Integer.parseInt(intervalString), 0, 0)); + case SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL: + BigDecimal bd = new BigDecimal(intervalString); + BigDecimal bdSeconds = new BigDecimal(bd.toBigInteger()); + BigDecimal bdNanos = bd.subtract(bdSeconds); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + new HiveIntervalDayTime(0, 0, 0, bdSeconds.intValueExact(), + bdNanos.multiply(NANOS_PER_SEC_BD).intValue())); + default: + throw new IllegalArgumentException("Invalid time literal type " + expr.getType()); + } + } catch (Exception err) { + throw new SemanticException( + "Unable to convert interval literal '" + intervalString + "' to interval value.", err); + } + } + } + + /** + * Factory method to get IntervalExprProcessor. + * + * @return IntervalExprProcessor. + */ + public IntervalExprProcessor getIntervalExprProcessor() { + return new IntervalExprProcessor(); + } + + /** + * Processor for table columns. + */ + public static class ColumnExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + ASTNode expr = (ASTNode) nd; + ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) : null; + RowResolver input = ctx.getInputRR(); + + if (expr.getType() != SparkSqlParser.TOK_TABLE_OR_COL) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr); + return null; + } + + assert (expr.getChildCount() == 1); + String tableOrCol = SemanticAnalyzer.unescapeIdentifier(expr + .getChild(0).getText()); + + boolean isTableAlias = input.hasTableAlias(tableOrCol); + ColumnInfo colInfo = input.get(null, tableOrCol); + + if (isTableAlias) { + if (colInfo != null) { + if (parent != null && parent.getType() == SparkSqlParser.DOT) { + // It's a table alias. + return null; + } + // It's a column. + return toExprNodeDesc(colInfo); + } else { + // It's a table alias. + // We will process that later in DOT. + return null; + } + } else { + if (colInfo == null) { + // It's not a column or a table alias. + if (input.getIsExprResolver()) { + ASTNode exprNode = expr; + if (!stack.empty()) { + ASTNode tmp = (ASTNode) stack.pop(); + if (!stack.empty()) { + exprNode = (ASTNode) stack.peek(); + } + stack.push(tmp); + } + ctx.setError(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(exprNode), expr); + return null; + } else { + List possibleColumnNames = input.getReferenceableColumnAliases(tableOrCol, -1); + String reason = String.format("(possible column names are: %s)", + StringUtils.join(possibleColumnNames, ", ")); + ctx.setError(ErrorMsg.INVALID_TABLE_OR_COLUMN.getMsg(expr.getChild(0), reason), + expr); + LOG.debug(ErrorMsg.INVALID_TABLE_OR_COLUMN.toString() + ":" + + input.toString()); + return null; + } + } else { + // It's a column. + return toExprNodeDesc(colInfo); + } + } + + } + + } + + private static ExprNodeDesc toExprNodeDesc(ColumnInfo colInfo) { + ObjectInspector inspector = colInfo.getObjectInspector(); + if (inspector instanceof ConstantObjectInspector && + inspector instanceof PrimitiveObjectInspector) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector; + Object constant = ((ConstantObjectInspector) inspector).getWritableConstantValue(); + return new ExprNodeConstantDesc(colInfo.getType(), poi.getPrimitiveJavaObject(constant)); + } + // non-constant or non-primitive constants + ExprNodeColumnDesc column = new ExprNodeColumnDesc(colInfo); + column.setSkewedCol(colInfo.isSkewedCol()); + return column; + } + + /** + * Factory method to get ColumnExprProcessor. + * + * @return ColumnExprProcessor. + */ + public ColumnExprProcessor getColumnExprProcessor() { + return new ColumnExprProcessor(); + } + + /** + * The default processor for typechecking. + */ + public static class DefaultExprProcessor implements NodeProcessor { + + static HashMap specialUnaryOperatorTextHashMap; + static HashMap specialFunctionTextHashMap; + static HashMap conversionFunctionTextHashMap; + static HashSet windowingTokens; + static { + specialUnaryOperatorTextHashMap = new HashMap(); + specialUnaryOperatorTextHashMap.put(SparkSqlParser.PLUS, "positive"); + specialUnaryOperatorTextHashMap.put(SparkSqlParser.MINUS, "negative"); + specialFunctionTextHashMap = new HashMap(); + specialFunctionTextHashMap.put(SparkSqlParser.TOK_ISNULL, "isnull"); + specialFunctionTextHashMap.put(SparkSqlParser.TOK_ISNOTNULL, "isnotnull"); + conversionFunctionTextHashMap = new HashMap(); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_BOOLEAN, + serdeConstants.BOOLEAN_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_TINYINT, + serdeConstants.TINYINT_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_SMALLINT, + serdeConstants.SMALLINT_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_INT, + serdeConstants.INT_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_BIGINT, + serdeConstants.BIGINT_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_FLOAT, + serdeConstants.FLOAT_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_DOUBLE, + serdeConstants.DOUBLE_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_STRING, + serdeConstants.STRING_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_CHAR, + serdeConstants.CHAR_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_VARCHAR, + serdeConstants.VARCHAR_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_BINARY, + serdeConstants.BINARY_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_DATE, + serdeConstants.DATE_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_TIMESTAMP, + serdeConstants.TIMESTAMP_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_INTERVAL_YEAR_MONTH, + serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_INTERVAL_DAY_TIME, + serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME); + conversionFunctionTextHashMap.put(SparkSqlParser.TOK_DECIMAL, + serdeConstants.DECIMAL_TYPE_NAME); + + windowingTokens = new HashSet(); + windowingTokens.add(SparkSqlParser.KW_OVER); + windowingTokens.add(SparkSqlParser.TOK_PARTITIONINGSPEC); + windowingTokens.add(SparkSqlParser.TOK_DISTRIBUTEBY); + windowingTokens.add(SparkSqlParser.TOK_SORTBY); + windowingTokens.add(SparkSqlParser.TOK_CLUSTERBY); + windowingTokens.add(SparkSqlParser.TOK_WINDOWSPEC); + windowingTokens.add(SparkSqlParser.TOK_WINDOWRANGE); + windowingTokens.add(SparkSqlParser.TOK_WINDOWVALUES); + windowingTokens.add(SparkSqlParser.KW_UNBOUNDED); + windowingTokens.add(SparkSqlParser.KW_PRECEDING); + windowingTokens.add(SparkSqlParser.KW_FOLLOWING); + windowingTokens.add(SparkSqlParser.KW_CURRENT); + windowingTokens.add(SparkSqlParser.TOK_TABSORTCOLNAMEASC); + windowingTokens.add(SparkSqlParser.TOK_TABSORTCOLNAMEDESC); + } + + protected static boolean isRedundantConversionFunction(ASTNode expr, + boolean isFunction, ArrayList children) { + if (!isFunction) { + return false; + } + // conversion functions take a single parameter + if (children.size() != 1) { + return false; + } + String funcText = conversionFunctionTextHashMap.get(((ASTNode) expr + .getChild(0)).getType()); + // not a conversion function + if (funcText == null) { + return false; + } + // return true when the child type and the conversion target type is the + // same + return ((PrimitiveTypeInfo) children.get(0).getTypeInfo()).getTypeName() + .equalsIgnoreCase(funcText); + } + + public static String getFunctionText(ASTNode expr, boolean isFunction) { + String funcText = null; + if (!isFunction) { + // For operator, the function name is the operator text, unless it's in + // our special dictionary + if (expr.getChildCount() == 1) { + funcText = specialUnaryOperatorTextHashMap.get(expr.getType()); + } + if (funcText == null) { + funcText = expr.getText(); + } + } else { + // For TOK_FUNCTION, the function name is stored in the first child, + // unless it's in our + // special dictionary. + assert (expr.getChildCount() >= 1); + int funcType = ((ASTNode) expr.getChild(0)).getType(); + funcText = specialFunctionTextHashMap.get(funcType); + if (funcText == null) { + funcText = conversionFunctionTextHashMap.get(funcType); + } + if (funcText == null) { + funcText = ((ASTNode) expr.getChild(0)).getText(); + } + } + return SemanticAnalyzer.unescapeIdentifier(funcText); + } + + /** + * This function create an ExprNodeDesc for a UDF function given the + * children (arguments). It will insert implicit type conversion functions + * if necessary. + * + * @throws UDFArgumentException + */ + static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, TypeInfo typeInfo, + ExprNodeDesc... children) throws UDFArgumentException { + + FunctionInfo fi; + try { + fi = FunctionRegistry.getFunctionInfo(udfName); + } catch (SemanticException e) { + throw new UDFArgumentException(e); + } + if (fi == null) { + throw new UDFArgumentException(udfName + " not found."); + } + + GenericUDF genericUDF = fi.getGenericUDF(); + if (genericUDF == null) { + throw new UDFArgumentException(udfName + + " is an aggregation function or a table function."); + } + + // Add udfData to UDF if necessary + if (typeInfo != null) { + if (genericUDF instanceof SettableUDF) { + ((SettableUDF)genericUDF).setTypeInfo(typeInfo); + } + } + + List childrenList = new ArrayList(children.length); + + childrenList.addAll(Arrays.asList(children)); + return ExprNodeGenericFuncDesc.newInstance(genericUDF, + childrenList); + } + + public static ExprNodeDesc getFuncExprNodeDesc(String udfName, + ExprNodeDesc... children) throws UDFArgumentException { + return getFuncExprNodeDescWithUdfData(udfName, null, children); + } + + protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, + List children, GenericUDF genericUDF) throws SemanticException { + // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't + // supported + if (fi.getGenericUDTF() != null) { + throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); + } + // UDAF in filter condition, group-by caluse, param of funtion, etc. + if (fi.getGenericUDAFResolver() != null) { + if (isFunction) { + throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg((ASTNode) expr + .getChild(0))); + } else { + throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); + } + } + if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) { + if (FunctionRegistry.isStateful(genericUDF)) { + throw new SemanticException(ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); + } + } + } + + protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, + boolean isFunction, ArrayList children, TypeCheckCtx ctx) + throws SemanticException, UDFArgumentException { + // return the child directly if the conversion is redundant. + if (isRedundantConversionFunction(expr, isFunction, children)) { + assert (children.size() == 1); + assert (children.get(0) != null); + return children.get(0); + } + String funcText = getFunctionText(expr, isFunction); + ExprNodeDesc desc; + if (funcText.equals(".")) { + // "." : FIELD Expression + + assert (children.size() == 2); + // Only allow constant field name for now + assert (children.get(1) instanceof ExprNodeConstantDesc); + ExprNodeDesc object = children.get(0); + ExprNodeConstantDesc fieldName = (ExprNodeConstantDesc) children.get(1); + assert (fieldName.getValue() instanceof String); + + // Calculate result TypeInfo + String fieldNameString = (String) fieldName.getValue(); + TypeInfo objectTypeInfo = object.getTypeInfo(); + + // Allow accessing a field of list element structs directly from a list + boolean isList = (object.getTypeInfo().getCategory() == ObjectInspector.Category.LIST); + if (isList) { + objectTypeInfo = ((ListTypeInfo) objectTypeInfo).getListElementTypeInfo(); + } + if (objectTypeInfo.getCategory() != Category.STRUCT) { + throw new SemanticException(ErrorMsg.INVALID_DOT.getMsg(expr)); + } + TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString); + if (isList) { + t = TypeInfoFactory.getListTypeInfo(t); + } + + desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, isList); + } else if (funcText.equals("[")) { + // "[]" : LSQUARE/INDEX Expression + if (!ctx.getallowIndexExpr()) { + throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); + } + + assert (children.size() == 2); + + // Check whether this is a list or a map + TypeInfo myt = children.get(0).getTypeInfo(); + + if (myt.getCategory() == Category.LIST) { + // Only allow integer index for now + if (!(children.get(1) instanceof ExprNodeConstantDesc) + || !(((ExprNodeConstantDesc) children.get(1)).getTypeInfo() + .equals(TypeInfoFactory.intTypeInfo))) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage( + expr, + ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg())); + } + + // Calculate TypeInfo + TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo(); + desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children); + } else if (myt.getCategory() == Category.MAP) { + if (!(children.get(1) instanceof ExprNodeConstantDesc)) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage( + expr, + ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg())); + } + // Calculate TypeInfo + TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo(); + desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children); + } else { + throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, myt.getTypeName())); + } + } else { + // other operators or functions + FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcText); + + if (fi == null) { + if (isFunction) { + throw new SemanticException(ErrorMsg.INVALID_FUNCTION + .getMsg((ASTNode) expr.getChild(0))); + } else { + throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); + } + } + + // getGenericUDF() actually clones the UDF. Just call it once and reuse. + GenericUDF genericUDF = fi.getGenericUDF(); + + if (!fi.isNative()) { + ctx.getUnparseTranslator().addIdentifierTranslation( + (ASTNode) expr.getChild(0)); + } + + // Handle type casts that may contain type parameters + if (isFunction) { + ASTNode funcNameNode = (ASTNode)expr.getChild(0); + switch (funcNameNode.getType()) { + case SparkSqlParser.TOK_CHAR: + // Add type params + CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(funcNameNode); + if (genericUDF != null) { + ((SettableUDF)genericUDF).setTypeInfo(charTypeInfo); + } + break; + case SparkSqlParser.TOK_VARCHAR: + VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(funcNameNode); + if (genericUDF != null) { + ((SettableUDF)genericUDF).setTypeInfo(varcharTypeInfo); + } + break; + case SparkSqlParser.TOK_DECIMAL: + DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(funcNameNode); + if (genericUDF != null) { + ((SettableUDF)genericUDF).setTypeInfo(decTypeInfo); + } + break; + default: + // Do nothing + break; + } + } + + validateUDF(expr, isFunction, ctx, fi, children, genericUDF); + + // Try to infer the type of the constant only if there are two + // nodes, one of them is column and the other is numeric const + if (genericUDF instanceof GenericUDFBaseCompare + && children.size() == 2 + && ((children.get(0) instanceof ExprNodeConstantDesc + && children.get(1) instanceof ExprNodeColumnDesc) + || (children.get(0) instanceof ExprNodeColumnDesc + && children.get(1) instanceof ExprNodeConstantDesc))) { + int constIdx = + children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1; + + Set inferTypes = new HashSet(Arrays.asList( + serdeConstants.TINYINT_TYPE_NAME.toLowerCase(), + serdeConstants.SMALLINT_TYPE_NAME.toLowerCase(), + serdeConstants.INT_TYPE_NAME.toLowerCase(), + serdeConstants.BIGINT_TYPE_NAME.toLowerCase(), + serdeConstants.FLOAT_TYPE_NAME.toLowerCase(), + serdeConstants.DOUBLE_TYPE_NAME.toLowerCase(), + serdeConstants.STRING_TYPE_NAME.toLowerCase() + )); + + String constType = children.get(constIdx).getTypeString().toLowerCase(); + String columnType = children.get(1 - constIdx).getTypeString().toLowerCase(); + + if (inferTypes.contains(constType) && inferTypes.contains(columnType) + && !columnType.equalsIgnoreCase(constType)) { + Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue(); + String constValue = originalValue.toString(); + boolean triedDouble = false; + Number value = null; + try { + if (columnType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)) { + value = new Byte(constValue); + } else if (columnType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)) { + value = new Short(constValue); + } else if (columnType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) { + value = new Integer(constValue); + } else if (columnType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + value = new Long(constValue); + } else if (columnType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + value = new Float(constValue); + } else if (columnType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + triedDouble = true; + value = new Double(constValue); + } else if (columnType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + // Don't scramble the const type information if comparing to a string column, + // It's not useful to do so; as of now, there is also a hack in + // SemanticAnalyzer#genTablePlan that causes every column to look like a string + // a string down here, so number type information is always lost otherwise. + boolean isNumber = (originalValue instanceof Number); + triedDouble = !isNumber; + value = isNumber ? (Number)originalValue : new Double(constValue); + } + } catch (NumberFormatException nfe) { + // this exception suggests the precise type inference did not succeed + // we'll try again to convert it to double + // however, if we already tried this, or the column is NUMBER type and + // the operator is EQUAL, return false due to the type mismatch + if (triedDouble && + (genericUDF instanceof GenericUDFOPEqual + && !columnType.equals(serdeConstants.STRING_TYPE_NAME))) { + return new ExprNodeConstantDesc(false); + } + + try { + value = new Double(constValue); + } catch (NumberFormatException ex) { + return new ExprNodeConstantDesc(false); + } + } + + if (value != null) { + children.set(constIdx, new ExprNodeConstantDesc(value)); + } + } + + // if column type is char and constant type is string, then convert the constant to char + // type with padded spaces. + final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory + .getPrimitiveTypeInfo(columnType); + if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) && + colTypeInfo instanceof CharTypeInfo) { + final Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue(); + final String constValue = originalValue.toString(); + final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo); + final HiveChar newValue = new HiveChar(constValue, length); + children.set(constIdx, new ExprNodeConstantDesc(colTypeInfo, newValue)); + } + } + if (genericUDF instanceof GenericUDFOPOr) { + // flatten OR + List childrenList = new ArrayList( + children.size()); + for (ExprNodeDesc child : children) { + if (FunctionRegistry.isOpOr(child)) { + childrenList.addAll(child.getChildren()); + } else { + childrenList.add(child); + } + } + desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, + childrenList); + } else if (genericUDF instanceof GenericUDFOPAnd) { + // flatten AND + List childrenList = new ArrayList( + children.size()); + for (ExprNodeDesc child : children) { + if (FunctionRegistry.isOpAnd(child)) { + childrenList.addAll(child.getChildren()); + } else { + childrenList.add(child); + } + } + desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, + childrenList); + } else { + desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, + children); + } + } + // UDFOPPositive is a no-op. + // However, we still create it, and then remove it here, to make sure we + // only allow + // "+" for numeric types. + if (FunctionRegistry.isOpPositive(desc)) { + assert (desc.getChildren().size() == 1); + desc = desc.getChildren().get(0); + } + assert (desc != null); + return desc; + } + + /** + * Returns true if des is a descendant of ans (ancestor) + */ + private boolean isDescendant(Node ans, Node des) { + if (ans.getChildren() == null) { + return false; + } + for (Node c : ans.getChildren()) { + if (c == des) { + return true; + } + if (isDescendant(c, des)) { + return true; + } + } + return false; + } + + protected ExprNodeDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, + Object... nodeOutputs) throws SemanticException { + RowResolver input = ctx.getInputRR(); + String tableAlias = SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) + .getText()); + // NOTE: tableAlias must be a valid non-ambiguous table alias, + // because we've checked that in TOK_TABLE_OR_COL's process method. + String colName; + if (nodeOutputs[1] instanceof ExprNodeConstantDesc) { + colName = ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(); + } else if (nodeOutputs[1] instanceof ExprNodeColumnDesc) { + colName = ((ExprNodeColumnDesc)nodeOutputs[1]).getColumn(); + } else { + throw new SemanticException("Unexpected ExprNode : " + nodeOutputs[1]); + } + ColumnInfo colInfo = input.get(tableAlias, colName); + + if (colInfo == null) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); + return null; + } + return toExprNodeDesc(colInfo); + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + // Here we know nd represents a group by expression. + + // During the DFS traversal of the AST, a descendant of nd likely set an + // error because a sub-tree of nd is unlikely to also be a group by + // expression. For example, in a query such as + // SELECT *concat(key)* FROM src GROUP BY concat(key), 'key' will be + // processed before 'concat(key)' and since 'key' is not a group by + // expression, an error will be set in ctx by ColumnExprProcessor. + + // We can clear the global error when we see that it was set in a + // descendant node of a group by expression because + // processGByExpr() returns a ExprNodeDesc that effectively ignores + // its children. Although the error can be set multiple times by + // descendant nodes, DFS traversal ensures that the error only needs to + // be cleared once. Also, for a case like + // SELECT concat(value, concat(value))... the logic still works as the + // error is only set with the first 'value'; all node processors quit + // early if the global error is set. + + if (isDescendant(nd, ctx.getErrorSrcNode())) { + ctx.setError(null, null); + } + return desc; + } + + if (ctx.getError() != null) { + return null; + } + + ASTNode expr = (ASTNode) nd; + + /* + * A Windowing specification get added as a child to a UDAF invocation to distinguish it + * from similar UDAFs but on different windows. + * The UDAF is translated to a WindowFunction invocation in the PTFTranslator. + * So here we just return null for tokens that appear in a Window Specification. + * When the traversal reaches up to the UDAF invocation its ExprNodeDesc is build using the + * ColumnInfo in the InputRR. This is similar to how UDAFs are handled in Select lists. + * The difference is that there is translation for Window related tokens, so we just + * return null; + */ + if (windowingTokens.contains(expr.getType())) { + if (!ctx.getallowWindowing()) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_FUNCTION.getMsg("Windowing is not supported in the context"))); + } + return null; + } + + if (expr.getType() == SparkSqlParser.TOK_TABNAME) { + return null; + } + + if (expr.getType() == SparkSqlParser.TOK_ALLCOLREF) { + if (!ctx.getallowAllColRef()) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_COLUMN + .getMsg("All column reference is not supported in the context"))); + } + + RowResolver input = ctx.getInputRR(); + ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc(); + assert expr.getChildCount() <= 1; + if (expr.getChildCount() == 1) { + // table aliased (select a.*, for example) + ASTNode child = (ASTNode) expr.getChild(0); + assert child.getType() == SparkSqlParser.TOK_TABNAME; + assert child.getChildCount() == 1; + String tableAlias = SemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText()); + HashMap columns = input.getFieldMap(tableAlias); + if (columns == null) { + throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(child)); + } + for (Map.Entry colMap : columns.entrySet()) { + ColumnInfo colInfo = colMap.getValue(); + if (!colInfo.getIsVirtualCol()) { + columnList.addColumn(toExprNodeDesc(colInfo)); + } + } + } else { + // all columns (select *, for example) + for (ColumnInfo colInfo : input.getColumnInfos()) { + if (!colInfo.getIsVirtualCol()) { + columnList.addColumn(toExprNodeDesc(colInfo)); + } + } + } + return columnList; + } + + // If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL, + // and the operator is a DOT, then it's a table column reference. + if (expr.getType() == SparkSqlParser.DOT + && expr.getChild(0).getType() == SparkSqlParser.TOK_TABLE_OR_COL + && nodeOutputs[0] == null) { + return processQualifiedColRef(ctx, expr, nodeOutputs); + } + + // Return nulls for conversion operators + if (conversionFunctionTextHashMap.keySet().contains(expr.getType()) + || specialFunctionTextHashMap.keySet().contains(expr.getType()) + || expr.getToken().getType() == SparkSqlParser.CharSetName + || expr.getToken().getType() == SparkSqlParser.CharSetLiteral) { + return null; + } + + boolean isFunction = (expr.getType() == SparkSqlParser.TOK_FUNCTION || + expr.getType() == SparkSqlParser.TOK_FUNCTIONSTAR || + expr.getType() == SparkSqlParser.TOK_FUNCTIONDI); + + if (!ctx.getAllowDistinctFunctions() && expr.getType() == SparkSqlParser.TOK_FUNCTIONDI) { + throw new SemanticException( + SemanticAnalyzer.generateErrorMessage(expr, ErrorMsg.DISTINCT_NOT_SUPPORTED.getMsg())); + } + + // Create all children + int childrenBegin = (isFunction ? 1 : 0); + ArrayList children = new ArrayList( + expr.getChildCount() - childrenBegin); + for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) { + if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) { + children.addAll(((ExprNodeColumnListDesc) nodeOutputs[ci]).getChildren()); + } else { + children.add((ExprNodeDesc) nodeOutputs[ci]); + } + } + + if (expr.getType() == SparkSqlParser.TOK_FUNCTIONSTAR) { + if (!ctx.getallowFunctionStar()) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_COLUMN + .getMsg(".* reference is not supported in the context"))); + } + + RowResolver input = ctx.getInputRR(); + for (ColumnInfo colInfo : input.getColumnInfos()) { + if (!colInfo.getIsVirtualCol()) { + children.add(toExprNodeDesc(colInfo)); + } + } + } + + // If any of the children contains null, then return a null + // this is a hack for now to handle the group by case + if (children.contains(null)) { + List possibleColumnNames = getReferenceableColumnAliases(ctx); + String reason = String.format("(possible column names are: %s)", + StringUtils.join(possibleColumnNames, ", ")); + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0), reason), + expr); + return null; + } + + // Create function desc + try { + return getXpathOrFuncExprNodeDesc(expr, isFunction, children, ctx); + } catch (UDFArgumentTypeException e) { + throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_TYPE.getMsg(expr + .getChild(childrenBegin + e.getArgumentId()), e.getMessage())); + } catch (UDFArgumentLengthException e) { + throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_LENGTH.getMsg( + expr, e.getMessage())); + } catch (UDFArgumentException e) { + throw new SemanticException(ErrorMsg.INVALID_ARGUMENT.getMsg(expr, e + .getMessage())); + } + } + + protected List getReferenceableColumnAliases(TypeCheckCtx ctx) { + return ctx.getInputRR().getReferenceableColumnAliases(null, -1); + } + } + + /** + * Factory method to get DefaultExprProcessor. + * + * @return DefaultExprProcessor. + */ + public DefaultExprProcessor getDefaultExprProcessor() { + return new DefaultExprProcessor(); + } + + /** + * Processor for subquery expressions.. + */ + public static class SubQueryExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ASTNode expr = (ASTNode) nd; + ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); + + if (!ctx.getallowSubQueryExpr()) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(sqNode, + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg())); + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + /* + * Restriction.1.h :: SubQueries only supported in the SQL Where Clause. + */ + ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode, + "Currently SubQuery expressions are only allowed as Where Clause predicates"), + sqNode); + return null; + } + } + + /** + * Factory method to get SubQueryExprProcessor. + * + * @return DateExprProcessor. + */ + public SubQueryExprProcessor getSubQueryExprProcessor() { + return new SubQueryExprProcessor(); + } +} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/UnparseTranslator.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/UnparseTranslator.java new file mode 100644 index 000000000000..defbeb93ab3d --- /dev/null +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/UnparseTranslator.java @@ -0,0 +1,274 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; + +import org.antlr.runtime.TokenRewriteStream; +import org.apache.hadoop.conf.Configuration; + +/** + * UnparseTranslator is used to "unparse" objects such as views when their + * definition is stored. It has a translations map where its possible to replace all the + * text with the appropriate escaped version [say invites.ds will be replaced with + * `invites`.`ds` and the entire query is processed like this and stored as + * Extended text in table's metadata]. This holds all individual translations and + * where they apply in the stream. The unparse is lazy and happens when + * SemanticAnalyzer.saveViewDefinition() calls TokenRewriteStream.toString(). + * + */ +public class UnparseTranslator { + // key is token start index + private final NavigableMap translations; + private final List copyTranslations; + private boolean enabled; + private Configuration conf; + + public UnparseTranslator(Configuration conf) { + this.conf = conf; + translations = new TreeMap(); + copyTranslations = new ArrayList(); + } + + /** + * Enable this translator. + */ + void enable() { + enabled = true; + } + + /** + * @return whether this translator has been enabled + */ + boolean isEnabled() { + return enabled; + } + + /** + * Register a translation to be performed as part of unparse. ANTLR imposes + * strict conditions on the translations and errors out during + * TokenRewriteStream.toString() if there is an overlap. It expects all + * the translations to be disjoint (See HIVE-2439). + * If the translation overlaps with any previously + * registered translation, then it must be either + * identical or a prefix (in which cases it is ignored), + * or else it must extend the existing translation (i.e. + * the existing translation must be a prefix/suffix of the new translation). + * All other overlap cases result in assertion failures. + * + * @param node + * target node whose subtree is to be replaced + * + * @param replacementText + * text to use as replacement + */ + void addTranslation(ASTNode node, String replacementText) { + if (!enabled) { + return; + } + + if (node.getOrigin() != null) { + // This node was parsed while loading the definition of another view + // being referenced by the one being created, and we don't want + // to track any expansions for the underlying view. + return; + } + + int tokenStartIndex = node.getTokenStartIndex(); + int tokenStopIndex = node.getTokenStopIndex(); + if (tokenStopIndex < 0) { + // this is for artificially added tokens + return; + } + Translation translation = new Translation(); + translation.tokenStopIndex = tokenStopIndex; + translation.replacementText = replacementText; + + // Sanity check for overlap with regions already being expanded + assert (tokenStopIndex >= tokenStartIndex); + + List subsetEntries = new ArrayList(); + // Is the existing entry and newer entry are subset of one another ? + for (Map.Entry existingEntry : + translations.headMap(tokenStopIndex, true).entrySet()) { + // check if the new entry contains the existing + if (existingEntry.getValue().tokenStopIndex <= tokenStopIndex && + existingEntry.getKey() >= tokenStartIndex) { + // Collect newer entry is if a super-set of existing entry, + assert (replacementText.contains(existingEntry.getValue().replacementText)); + subsetEntries.add(existingEntry.getKey()); + // check if the existing entry contains the new + } else if (existingEntry.getValue().tokenStopIndex >= tokenStopIndex && + existingEntry.getKey() <= tokenStartIndex) { + assert (existingEntry.getValue().replacementText.contains(replacementText)); + // we don't need to add this new entry since there's already an overlapping one + return; + } + } + // remove any existing entries that are contained by the new one + for (Integer index : subsetEntries) { + translations.remove(index); + } + + // It's all good: create a new entry in the map (or update existing one) + translations.put(tokenStartIndex, translation); + } + + /** + * Register a translation for an tabName. + * + * @param tableName + * source node (which must be an tabName) to be replaced + */ + void addTableNameTranslation(ASTNode tableName, String currentDatabaseName) { + if (!enabled) { + return; + } + if (tableName.getToken().getType() == SparkSqlParser.Identifier) { + addIdentifierTranslation(tableName); + return; + } + assert (tableName.getToken().getType() == SparkSqlParser.TOK_TABNAME); + assert (tableName.getChildCount() <= 2); + + if (tableName.getChildCount() == 2) { + addIdentifierTranslation((ASTNode)tableName.getChild(0)); + addIdentifierTranslation((ASTNode)tableName.getChild(1)); + } + else { + // transform the table reference to an absolute reference (i.e., "db.table") + StringBuilder replacementText = new StringBuilder(); + replacementText.append(ParseUtils.unparseIdentifier(currentDatabaseName, conf)); + replacementText.append('.'); + + ASTNode identifier = (ASTNode)tableName.getChild(0); + String identifierText = SemanticAnalyzer.unescapeIdentifier(identifier.getText()); + replacementText.append(ParseUtils.unparseIdentifier(identifierText, conf)); + + addTranslation(identifier, replacementText.toString()); + } + } + + /** + * Register a translation for an identifier. + * + * @param identifier + * source node (which must be an identifier) to be replaced + */ + void addIdentifierTranslation(ASTNode identifier) { + if (!enabled) { + return; + } + assert (identifier.getToken().getType() == SparkSqlParser.Identifier); + String replacementText = identifier.getText(); + replacementText = SemanticAnalyzer.unescapeIdentifier(replacementText); + replacementText = ParseUtils.unparseIdentifier(replacementText, conf); + addTranslation(identifier, replacementText); + } + + /** + * Register a "copy" translation in which a node will be translated into + * whatever the translation turns out to be for another node (after + * previously registered translations have already been performed). Deferred + * translations are performed in the order they are registered, and follow + * the same rules regarding overlap as non-copy translations. + * + * @param targetNode node whose subtree is to be replaced + * + * @param sourceNode the node providing the replacement text + * + */ + void addCopyTranslation(ASTNode targetNode, ASTNode sourceNode) { + if (!enabled) { + return; + } + + if (targetNode.getOrigin() != null) { + return; + } + + CopyTranslation copyTranslation = new CopyTranslation(); + copyTranslation.targetNode = targetNode; + copyTranslation.sourceNode = sourceNode; + copyTranslations.add(copyTranslation); + } + + /** + * Apply all translations on the given token stream. + * + * @param tokenRewriteStream + * rewrite-capable stream + */ + void applyTranslations(TokenRewriteStream tokenRewriteStream) { + for (Map.Entry entry : translations.entrySet()) { + if (entry.getKey() > 0) { + // negative means the key didn't exist in the original stream (i.e.: we changed the tree) + tokenRewriteStream.replace( + entry.getKey(), + entry.getValue().tokenStopIndex, + entry.getValue().replacementText); + } + } + for (CopyTranslation copyTranslation : copyTranslations) { + String replacementText = tokenRewriteStream.toString( + copyTranslation.sourceNode.getTokenStartIndex(), + copyTranslation.sourceNode.getTokenStopIndex()); + String currentText = tokenRewriteStream.toString( + copyTranslation.targetNode.getTokenStartIndex(), + copyTranslation.targetNode.getTokenStopIndex()); + if (currentText.equals(replacementText)) { + // copy is a nop, so skip it--this is important for avoiding + // spurious overlap assertions + continue; + } + // Call addTranslation just to get the assertions for overlap + // checking. + addTranslation(copyTranslation.targetNode, replacementText); + tokenRewriteStream.replace( + copyTranslation.targetNode.getTokenStartIndex(), + copyTranslation.targetNode.getTokenStopIndex(), + replacementText); + } + } + + private static class Translation { + int tokenStopIndex; + String replacementText; + + @Override + public String toString() { + return "" + tokenStopIndex + " -> " + replacementText; + } + } + + private static class CopyTranslation { + ASTNode targetNode; + ASTNode sourceNode; + } + + public void clear() { + translations.clear(); + copyTranslations.clear(); + enabled = false; + } +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 0e89928cb636..8781355f4cd9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -27,28 +27,28 @@ import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry} import org.apache.hadoop.hive.ql.lib.Node -import org.apache.hadoop.hive.ql.parse._ +import org.apache.hadoop.hive.ql.parse.SemanticException import org.apache.hadoop.hive.ql.plan.PlanUtils import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.ql.{Context, ErrorMsg} import org.apache.hadoop.hive.serde.serdeConstants import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe - import org.apache.spark.Logging -import org.apache.spark.sql.{AnalysisException, catalyst} +import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ -import org.apache.spark.sql.catalyst.plans.{logical, _} import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.plans.{logical, _} import org.apache.spark.sql.catalyst.trees.CurrentOrigin -import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.ExplainCommand import org.apache.spark.sql.execution.datasources.DescribeCommand import org.apache.spark.sql.hive.HiveShim._ import org.apache.spark.sql.hive.client._ import org.apache.spark.sql.hive.execution.{AnalyzeTable, DropTable, HiveNativeCommand, HiveScriptIOSchema} +import org.apache.spark.sql.parser._ import org.apache.spark.sql.types._ +import org.apache.spark.sql.{AnalysisException, catalyst} import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.random.RandomSampler @@ -273,7 +273,8 @@ private[hive] object HiveQl extends Logging { private def createContext(): Context = new Context(hiveConf) private def getAst(sql: String, context: Context) = - ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, context)) + ParseUtils.findRootNonNullToken( + (new ParseDriver).parse(sql, context)) /** * Returns the HiveConf @@ -337,7 +338,8 @@ private[hive] object HiveQl extends Logging { val tree = try { ParseUtils.findRootNonNullToken( - (new ParseDriver).parse(ddl, null /* no context required for parsing alone */)) + (new ParseDriver) + .parse(ddl, null /* no context required for parsing alone */)) } catch { case pe: org.apache.hadoop.hive.ql.parse.ParseException => throw new RuntimeException(s"Failed to parse ddl: '$ddl'", pe) @@ -662,7 +664,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C NativePlaceholder } else { val schema = maybeColumns.map { cols => - BaseSemanticAnalyzer.getColumns(cols, true).asScala.map { field => + SemanticAnalyzer.getColumns(cols, true).asScala.map { field => // We can't specify column types when create view, so fill it with null first, and // update it after the schema has been resolved later. HiveColumn(field.getName, null, field.getComment) @@ -678,7 +680,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C maybeComment.foreach { case Token("TOK_TABLECOMMENT", child :: Nil) => - val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText) + val comment = SemanticAnalyzer.unescapeSQLString(child.getText) if (comment ne null) { properties += ("comment" -> comment) } @@ -750,7 +752,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C children.collect { case list @ Token("TOK_TABCOLLIST", _) => - val cols = BaseSemanticAnalyzer.getColumns(list, true) + val cols = SemanticAnalyzer.getColumns(list, true) if (cols != null) { tableDesc = tableDesc.copy( schema = cols.asScala.map { field => @@ -758,11 +760,11 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C }) } case Token("TOK_TABLECOMMENT", child :: Nil) => - val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText) + val comment = SemanticAnalyzer.unescapeSQLString(child.getText) // TODO support the sql text tableDesc = tableDesc.copy(viewText = Option(comment)) case Token("TOK_TABLEPARTCOLS", list @ Token("TOK_TABCOLLIST", _) :: Nil) => - val cols = BaseSemanticAnalyzer.getColumns(list(0), false) + val cols = SemanticAnalyzer.getColumns(list(0), false) if (cols != null) { tableDesc = tableDesc.copy( partitionColumns = cols.asScala.map { field => @@ -773,21 +775,21 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C val serdeParams = new java.util.HashMap[String, String]() child match { case Token("TOK_TABLEROWFORMATFIELD", rowChild1 :: rowChild2) => - val fieldDelim = BaseSemanticAnalyzer.unescapeSQLString (rowChild1.getText()) + val fieldDelim = SemanticAnalyzer.unescapeSQLString (rowChild1.getText()) serdeParams.put(serdeConstants.FIELD_DELIM, fieldDelim) serdeParams.put(serdeConstants.SERIALIZATION_FORMAT, fieldDelim) if (rowChild2.length > 1) { - val fieldEscape = BaseSemanticAnalyzer.unescapeSQLString (rowChild2(0).getText) + val fieldEscape = SemanticAnalyzer.unescapeSQLString (rowChild2(0).getText) serdeParams.put(serdeConstants.ESCAPE_CHAR, fieldEscape) } case Token("TOK_TABLEROWFORMATCOLLITEMS", rowChild :: Nil) => - val collItemDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + val collItemDelim = SemanticAnalyzer.unescapeSQLString(rowChild.getText) serdeParams.put(serdeConstants.COLLECTION_DELIM, collItemDelim) case Token("TOK_TABLEROWFORMATMAPKEYS", rowChild :: Nil) => - val mapKeyDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + val mapKeyDelim = SemanticAnalyzer.unescapeSQLString(rowChild.getText) serdeParams.put(serdeConstants.MAPKEY_DELIM, mapKeyDelim) case Token("TOK_TABLEROWFORMATLINES", rowChild :: Nil) => - val lineDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + val lineDelim = SemanticAnalyzer.unescapeSQLString(rowChild.getText) if (!(lineDelim == "\n") && !(lineDelim == "10")) { throw new AnalysisException( SemanticAnalyzer.generateErrorMessage( @@ -796,22 +798,22 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C } serdeParams.put(serdeConstants.LINE_DELIM, lineDelim) case Token("TOK_TABLEROWFORMATNULL", rowChild :: Nil) => - val nullFormat = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + val nullFormat = SemanticAnalyzer.unescapeSQLString(rowChild.getText) // TODO support the nullFormat case _ => assert(false) } tableDesc = tableDesc.copy( serdeProperties = tableDesc.serdeProperties ++ serdeParams.asScala) case Token("TOK_TABLELOCATION", child :: Nil) => - var location = BaseSemanticAnalyzer.unescapeSQLString(child.getText) - location = EximUtil.relativeToAbsolutePath(hiveConf, location) + var location = SemanticAnalyzer.unescapeSQLString(child.getText) + location = SemanticAnalyzer.relativeToAbsolutePath(hiveConf, location) tableDesc = tableDesc.copy(location = Option(location)) case Token("TOK_TABLESERIALIZER", child :: Nil) => tableDesc = tableDesc.copy( - serde = Option(BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText))) + serde = Option(SemanticAnalyzer.unescapeSQLString(child.getChild(0).getText))) if (child.getChildCount == 2) { val serdeParams = new java.util.HashMap[String, String]() - BaseSemanticAnalyzer.readProps( + SemanticAnalyzer.readProps( (child.getChild(1).getChild(0)).asInstanceOf[ASTNode], serdeParams) tableDesc = tableDesc.copy( serdeProperties = tableDesc.serdeProperties ++ serdeParams.asScala) @@ -891,9 +893,9 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case list @ Token("TOK_TABLEFILEFORMAT", children) => tableDesc = tableDesc.copy( inputFormat = - Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)), + Option(SemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)), outputFormat = - Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(1).getText))) + Option(SemanticAnalyzer.unescapeSQLString(list.getChild(1).getText))) case Token("TOK_STORAGEHANDLER", _) => throw new AnalysisException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg()) case _ => // Unsupport features @@ -1025,20 +1027,20 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C (rowFormat, None, Nil, false) case Token("TOK_SERDENAME", Token(serdeClass, Nil) :: Nil) :: Nil => - (Nil, Some(BaseSemanticAnalyzer.unescapeSQLString(serdeClass)), Nil, false) + (Nil, Some(SemanticAnalyzer.unescapeSQLString(serdeClass)), Nil, false) case Token("TOK_SERDENAME", Token(serdeClass, Nil) :: Token("TOK_TABLEPROPERTIES", Token("TOK_TABLEPROPLIST", propsClause) :: Nil) :: Nil) :: Nil => val serdeProps = propsClause.map { case Token("TOK_TABLEPROPERTY", Token(name, Nil) :: Token(value, Nil) :: Nil) => - (BaseSemanticAnalyzer.unescapeSQLString(name), - BaseSemanticAnalyzer.unescapeSQLString(value)) + (SemanticAnalyzer.unescapeSQLString(name), + SemanticAnalyzer.unescapeSQLString(value)) } // SPARK-10310: Special cases LazySimpleSerDe // TODO Fully supports user-defined record reader/writer classes - val unescapedSerDeClass = BaseSemanticAnalyzer.unescapeSQLString(serdeClass) + val unescapedSerDeClass = SemanticAnalyzer.unescapeSQLString(serdeClass) val useDefaultRecordReaderWriter = unescapedSerDeClass == classOf[LazySimpleSerDe].getCanonicalName (Nil, Some(unescapedSerDeClass), serdeProps, useDefaultRecordReaderWriter) @@ -1055,7 +1057,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C val (outRowFormat, outSerdeClass, outSerdeProps, useDefaultRecordWriter) = matchSerDe(outputSerdeClause) - val unescapedScript = BaseSemanticAnalyzer.unescapeSQLString(script) + val unescapedScript = SemanticAnalyzer.unescapeSQLString(script) // TODO Adds support for user-defined record reader/writer classes val recordReaderClass = if (useDefaultRecordReader) { @@ -1475,11 +1477,11 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C } val numericAstTypes = Seq( - HiveParser.Number, - HiveParser.TinyintLiteral, - HiveParser.SmallintLiteral, - HiveParser.BigintLiteral, - HiveParser.DecimalLiteral) + SparkSqlParser.Number, + SparkSqlParser.TinyintLiteral, + SparkSqlParser.SmallintLiteral, + SparkSqlParser.BigintLiteral, + SparkSqlParser.DecimalLiteral) /* Case insensitive matches */ val COUNT = "(?i)COUNT".r @@ -1649,7 +1651,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case Token(TRUE(), Nil) => Literal.create(true, BooleanType) case Token(FALSE(), Nil) => Literal.create(false, BooleanType) case Token("TOK_STRINGLITERALSEQUENCE", strings) => - Literal(strings.map(s => BaseSemanticAnalyzer.unescapeSQLString(s.getText)).mkString) + Literal(strings.map(s => SemanticAnalyzer.unescapeSQLString(s.getText)).mkString) // This code is adapted from // /ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java#L223 @@ -1684,37 +1686,37 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C v } - case ast: ASTNode if ast.getType == HiveParser.StringLiteral => - Literal(BaseSemanticAnalyzer.unescapeSQLString(ast.getText)) + case ast: ASTNode if ast.getType == SparkSqlParser.StringLiteral => + Literal(SemanticAnalyzer.unescapeSQLString(ast.getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_DATELITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_DATELITERAL => Literal(Date.valueOf(ast.getText.substring(1, ast.getText.length - 1))) - case ast: ASTNode if ast.getType == HiveParser.TOK_CHARSETLITERAL => - Literal(BaseSemanticAnalyzer.charSetString(ast.getChild(0).getText, ast.getChild(1).getText)) + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_CHARSETLITERAL => + Literal(SemanticAnalyzer.charSetString(ast.getChild(0).getText, ast.getChild(1).getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL => Literal(CalendarInterval.fromYearMonthString(ast.getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL => Literal(CalendarInterval.fromDayTimeString(ast.getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_YEAR_LITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL => Literal(CalendarInterval.fromSingleUnitString("year", ast.getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_MONTH_LITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL => Literal(CalendarInterval.fromSingleUnitString("month", ast.getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_DAY_LITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_DAY_LITERAL => Literal(CalendarInterval.fromSingleUnitString("day", ast.getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_HOUR_LITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL => Literal(CalendarInterval.fromSingleUnitString("hour", ast.getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_MINUTE_LITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL => Literal(CalendarInterval.fromSingleUnitString("minute", ast.getText)) - case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_SECOND_LITERAL => + case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL => Literal(CalendarInterval.fromSingleUnitString("second", ast.getText)) case a: ASTNode => From 0cbf502356ca70d2455d385c4fb0540c38ef9301 Mon Sep 17 00:00:00 2001 From: Nong Li Date: Mon, 21 Dec 2015 11:57:18 -0800 Subject: [PATCH 02/14] Updates to support antlr 3.5.2 and SBT build. --- pom.xml | 2 +- project/SparkBuild.scala | 2 +- project/plugins.sbt | 4 +++- .../org/apache/spark/sql/parser/IdentifiersParser.g | 2 +- .../antlr3/org/apache/spark/sql/parser/SparkSqlParser.g | 8 ++++---- .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 2 +- 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pom.xml b/pom.xml index 3f5fb02f7e59..d03d2ec8d3bc 100644 --- a/pom.xml +++ b/pom.xml @@ -1954,7 +1954,7 @@ org.antlr antlr3-maven-plugin - 3.4 + 3.5.2 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index c3d53f835f39..df21d3eb636f 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -415,7 +415,7 @@ object Hive { // in order to generate golden files. This is only required for developers who are adding new // new query tests. fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") } - ) + ) ++ sbtantlr.SbtAntlrPlugin.antlrSettings } diff --git a/project/plugins.sbt b/project/plugins.sbt index e671b61d9181..f172dc9c1f0e 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -4,6 +4,8 @@ resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/release resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/" +resolvers += "stefri" at "http://stefri.github.io/repo/releases" + addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0") @@ -24,7 +26,7 @@ addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2") addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2") -addSbtPlugin("org.antlr" % "antlr4-maven-plugin" % "3.4") +addSbtPlugin("com.github.stefri" % "sbt-antlr" % "0.5.3") libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3" diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g index bac0d2254c17..e5d9f2eb1a02 100644 --- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g @@ -297,7 +297,7 @@ intervalLiteral : KW_INTERVAL StringLiteral qualifiers=intervalQualifiers -> { - adaptor.create(qualifiers.tree.token.getType(), $StringLiteral.text) + adaptor.create($qualifiers.tree.token.getType(), $StringLiteral.text) } ; diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g index a1bd3d10c957..6f1cd41e6593 100644 --- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g @@ -2110,7 +2110,7 @@ unionType @after { popMsg(state); } : KW_UNIONTYPE LESSTHAN colTypeList GREATERTHAN -> ^(TOK_UNIONTYPE colTypeList) ; - + setOperator @init { pushMsg("set operator", state); } @after { popMsg(state); } @@ -2232,7 +2232,7 @@ selectStatement[boolean topLevel] setOpSelectStatement[CommonTree t, boolean topLevel] : (u=setOperator b=simpleSelectStatement - -> {$setOpSelectStatement.tree != null && u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}? + -> {$setOpSelectStatement.tree != null && $u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}? ^(TOK_QUERY ^(TOK_FROM ^(TOK_SUBQUERY @@ -2245,9 +2245,9 @@ setOpSelectStatement[CommonTree t, boolean topLevel] ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF)) ) ) - -> {$setOpSelectStatement.tree != null && u.tree.getType()!=SparkSqlParser.TOK_UNIONDISTINCT}? + -> {$setOpSelectStatement.tree != null && $u.tree.getType()!=SparkSqlParser.TOK_UNIONDISTINCT}? ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b) - -> {$setOpSelectStatement.tree == null && u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}? + -> {$setOpSelectStatement.tree == null && $u.tree.getType()==SparkSqlParser.TOK_UNIONDISTINCT}? ^(TOK_QUERY ^(TOK_FROM ^(TOK_SUBQUERY diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 8781355f4cd9..46a6150c3a58 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -227,7 +227,7 @@ private[hive] object HiveQl extends Logging { */ def withChildren(newChildren: Seq[ASTNode]): ASTNode = { (1 to n.getChildCount).foreach(_ => n.deleteChild(0)) - n.addChildren(newChildren.asJava) + newChildren.foreach(n.addChild(_)) n } From 7e1a14582fc32fda2016072138b4a431c7ba9333 Mon Sep 17 00:00:00 2001 From: Nong Li Date: Tue, 15 Dec 2015 21:59:26 -0800 Subject: [PATCH 03/14] Add anti join to grammar as an example. --- .../main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g | 1 + .../main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g | 1 + .../src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g | 1 + .../src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g | 1 + .../src/main/java/org/apache/spark/sql/parser/ParseDriver.java | 1 - sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 1 + 6 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g index 084c4213da37..e4a80f0ce8eb 100644 --- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/FromClauseParser.g @@ -123,6 +123,7 @@ joinToken | KW_RIGHT (KW_OUTER)? KW_JOIN -> TOK_RIGHTOUTERJOIN | KW_FULL (KW_OUTER)? KW_JOIN -> TOK_FULLOUTERJOIN | KW_LEFT KW_SEMI KW_JOIN -> TOK_LEFTSEMIJOIN + | KW_ANTI KW_JOIN -> TOK_ANTIJOIN ; lateralView diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g index e5d9f2eb1a02..5c3d7ef86624 100644 --- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/IdentifiersParser.g @@ -670,6 +670,7 @@ nonReserved | KW_LEVEL | KW_SNAPSHOT | KW_AUTOCOMMIT + | KW_ANTI ; //The following SQL2011 reserved keywords are used as cast function name only, but not as identifiers. diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g index 35cf7fd6797c..ee1b8989b5af 100644 --- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlLexer.g @@ -70,6 +70,7 @@ KW_JOIN : 'JOIN'; KW_LEFT : 'LEFT'; KW_RIGHT : 'RIGHT'; KW_FULL : 'FULL'; +KW_ANTI : 'ANTI'; KW_ON : 'ON'; KW_PARTITION : 'PARTITION'; KW_PARTITIONS : 'PARTITIONS'; diff --git a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g index 6f1cd41e6593..69574d713d0b 100644 --- a/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g +++ b/sql/hive/src/main/antlr3/org/apache/spark/sql/parser/SparkSqlParser.g @@ -263,6 +263,7 @@ TOK_USERSCRIPTCOLSCHEMA; TOK_RECORDREADER; TOK_RECORDWRITER; TOK_LEFTSEMIJOIN; +TOK_ANTIJOIN; TOK_LATERAL_VIEW; TOK_LATERAL_VIEW_OUTER; TOK_TABALIAS; diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java index 2e968c3c475c..a8cab05f6a4f 100644 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java @@ -182,7 +182,6 @@ public ASTNode parse(String command, Context ctx) */ public ASTNode parse(String command, Context ctx, boolean setTokenRewriteStream) throws ParseException { - System.out.println("Parsing!!!"); LOG.info("Parsing command: " + command); HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 46a6150c3a58..41f2f42ad0b1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -1363,6 +1363,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case "TOK_LEFTOUTERJOIN" => LeftOuter case "TOK_FULLOUTERJOIN" => FullOuter case "TOK_LEFTSEMIJOIN" => LeftSemi + case "TOK_ANTIJOIN" => throw new NotImplementedError("Anti join not supported") } Join(nodeToRelation(relation1, context), nodeToRelation(relation2, context), From cd07d7f1391af8b3f777c56b4017c71a5a77c725 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Mon, 28 Dec 2015 15:23:48 +0100 Subject: [PATCH 04/14] Remove dead code from the parser. --- .../apache/spark/sql/parser/ParseDriver.java | 57 +- .../apache/spark/sql/parser/ParseUtils.java | 43 +- .../apache/spark/sql/parser/RowResolver.java | 388 ----- .../spark/sql/parser/SemanticAnalyzer.java | 326 ---- .../apache/spark/sql/parser/TypeCheckCtx.java | 212 --- .../sql/parser/TypeCheckProcFactory.java | 1394 ----------------- .../spark/sql/parser/UnparseTranslator.java | 274 ---- .../org/apache/spark/sql/hive/HiveQl.scala | 2 +- 8 files changed, 9 insertions(+), 2687 deletions(-) delete mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/RowResolver.java delete mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckCtx.java delete mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckProcFactory.java delete mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/UnparseTranslator.java diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java index a8cab05f6a4f..c77198b087cb 100644 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java @@ -49,10 +49,10 @@ public class ParseDriver { //for the lexical analysis part of antlr. By converting the token stream into //upper case at the time when lexical rules are checked, this class ensures that the //lexical rules need to just match the token with upper case letters as opposed to - //combination of upper case and lower case characteres. This is purely used for matching lexical + //combination of upper case and lower case characters. This is purely used for matching lexical //rules. The actual token text is stored in the same way as the user input without //actually converting it into an upper case. The token values are generated by the consume() - //function of the super class ANTLRStringStream. The LA() function is the lookahead funtion + //function of the super class ANTLRStringStream. The LA() function is the lookahead function //and is purely used for matching lexical rules. This also means that the grammar will only //accept capitalized tokens in case it is run from other tools like antlrworks which //do not have the ANTLRNoCaseStringStream implementation. @@ -84,20 +84,13 @@ public class HiveLexerX extends SparkSqlLexer { private final ArrayList errors; - public HiveLexerX() { - super(); - errors = new ArrayList(); - } - public HiveLexerX(CharStream input) { super(input); errors = new ArrayList(); } @Override - public void displayRecognitionError(String[] tokenNames, - RecognitionException e) { - + public void displayRecognitionError(String[] tokenNames, RecognitionException e) { errors.add(new ParseError(this, e, tokenNames)); } @@ -106,8 +99,8 @@ public String getErrorMessage(RecognitionException e, String[] tokenNames) { String msg = null; if (e instanceof NoViableAltException) { - @SuppressWarnings("unused") - NoViableAltException nvae = (NoViableAltException) e; + // @SuppressWarnings("unused") + // NoViableAltException nvae = (NoViableAltException) e; // for development, can add // "decision=<<"+nvae.grammarDecisionDescription+">>" // and "(decision="+nvae.decisionNumber+") and @@ -217,44 +210,4 @@ public ASTNode parse(String command, Context ctx, boolean setTokenRewriteStream) tree.setUnknownTokenBoundaries(); return tree; } - - - /* - * parse a String as a Select List. This allows table functions to be passed expression Strings - * that are translated in - * the context they define at invocation time. Currently used by NPath to allow users to specify - * what output they want. - * NPath allows expressions n 'tpath' a column that represents the matched set of rows. This - * column doesn't exist in - * the input schema and hence the Result Expression cannot be analyzed by the regular Hive - * translation process. - */ - public ASTNode parseSelect(String command, Context ctx) throws ParseException { - LOG.info("Parsing command: " + command); - - HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); - TokenRewriteStream tokens = new TokenRewriteStream(lexer); - if (ctx != null) { - ctx.setTokenRewriteStream(tokens); - } - SparkSqlParser parser = new SparkSqlParser(tokens); - parser.setTreeAdaptor(adaptor); - SparkSqlParser_SelectClauseParser.selectClause_return r = null; - try { - r = parser.selectClause(); - } catch (RecognitionException e) { - e.printStackTrace(); - throw new ParseException(parser.errors); - } - - if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) { - LOG.info("Parse Completed"); - } else if (lexer.getErrors().size() != 0) { - throw new ParseException(lexer.getErrors()); - } else { - throw new ParseException(parser.errors); - } - - return (ASTNode) r.getTree(); - } } diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java index d8840e75fd3d..a5c2998f86cc 100644 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java @@ -18,10 +18,7 @@ package org.apache.spark.sql.parser; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -54,7 +51,7 @@ private ParseUtils() { // prevent instantiation } - public static VarcharTypeInfo getVarcharTypeInfo(org.apache.spark.sql.parser.ASTNode node) + public static VarcharTypeInfo getVarcharTypeInfo(ASTNode node) throws SemanticException { if (node.getChildCount() != 1) { throw new SemanticException("Bad params for type varchar"); @@ -64,7 +61,7 @@ public static VarcharTypeInfo getVarcharTypeInfo(org.apache.spark.sql.parser.AST return TypeInfoFactory.getVarcharTypeInfo(Integer.valueOf(lengthStr)); } - public static CharTypeInfo getCharTypeInfo(org.apache.spark.sql.parser.ASTNode node) + public static CharTypeInfo getCharTypeInfo(ASTNode node) throws SemanticException { if (node.getChildCount() != 1) { throw new SemanticException("Bad params for type char"); @@ -74,16 +71,7 @@ public static CharTypeInfo getCharTypeInfo(org.apache.spark.sql.parser.ASTNode n return TypeInfoFactory.getCharTypeInfo(Integer.valueOf(lengthStr)); } - static int getIndex(String[] list, String elem) { - for(int i=0; i < list.length; i++) { - if (list[i] != null && list[i].toLowerCase().equals(elem)) { - return i; - } - } - return -1; - } - - public static DecimalTypeInfo getDecimalTypeTypeInfo(org.apache.spark.sql.parser.ASTNode node) + public static DecimalTypeInfo getDecimalTypeTypeInfo(ASTNode node) throws SemanticException { if (node.getChildCount() > 2) { throw new SemanticException("Bad params for type decimal"); @@ -105,29 +93,4 @@ public static DecimalTypeInfo getDecimalTypeTypeInfo(org.apache.spark.sql.parser return TypeInfoFactory.getDecimalTypeInfo(precision, scale); } - public static String ensureClassExists(String className) - throws SemanticException { - if (className == null) { - return null; - } - try { - Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); - } catch (ClassNotFoundException e) { - throw new SemanticException("Cannot find class '" + className + "'", e); - } - return className; - } - - public static String unparseIdentifier(String identifier) { - return unparseIdentifier(identifier, (Configuration)null); - } - - public static String unparseIdentifier(String identifier, Configuration conf) { - String qIdSupport = conf == null?null: HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT); - if(qIdSupport != null && !"none".equals(qIdSupport)) { - identifier = identifier.replaceAll("`", "``"); - } - - return "`" + identifier + "`"; - } } diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/RowResolver.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/RowResolver.java deleted file mode 100644 index 4c5bc87473f0..000000000000 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/RowResolver.java +++ /dev/null @@ -1,388 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Implementation of the Row Resolver. - * - */ -public class RowResolver implements Serializable{ - private static final long serialVersionUID = 1L; - private RowSchema rowSchema; - private HashMap> rslvMap; - - private HashMap invRslvMap; - /* - * now a Column can have an alternate mapping. - * This captures the alternate mapping. - * The primary(first) mapping is still only held in - * invRslvMap. - */ - private final Map altInvRslvMap; - private Map expressionMap; - - // TODO: Refactor this and do in a more object oriented manner - private boolean isExprResolver; - - private static final Logger LOG = LoggerFactory.getLogger(RowResolver.class.getName()); - - public RowResolver() { - rowSchema = new RowSchema(); - rslvMap = new HashMap>(); - invRslvMap = new HashMap(); - altInvRslvMap = new HashMap(); - expressionMap = new HashMap(); - isExprResolver = false; - } - - /** - * Puts a resolver entry corresponding to a source expression which is to be - * used for identical expression recognition (e.g. for matching expressions - * in the SELECT list with the GROUP BY clause). The convention for such - * entries is an empty-string ("") as the table alias together with the - * string rendering of the ASTNode as the column alias. - */ - public void putExpression(ASTNode node, ColumnInfo colInfo) { - String treeAsString = node.toStringTree(); - expressionMap.put(treeAsString, node); - put("", treeAsString, colInfo); - } - - /** - * Retrieves the ColumnInfo corresponding to a source expression which - * exactly matches the string rendering of the given ASTNode. - */ - public ColumnInfo getExpression(ASTNode node) throws SemanticException { - return get("", node.toStringTree()); - } - - /** - * Retrieves the source expression matching a given ASTNode's - * string rendering exactly. - */ - public ASTNode getExpressionSource(ASTNode node) { - return expressionMap.get(node.toStringTree()); - } - - public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { - if (!addMappingOnly(tab_alias, col_alias, colInfo)) { - //Make sure that the table alias and column alias are stored - //in the column info - if (tab_alias != null) { - colInfo.setTabAlias(tab_alias.toLowerCase()); - } - if (col_alias != null) { - colInfo.setAlias(col_alias.toLowerCase()); - } - rowSchema.getSignature().add(colInfo); - } - } - - public boolean addMappingOnly(String tab_alias, String col_alias, ColumnInfo colInfo) { - if (tab_alias != null) { - tab_alias = tab_alias.toLowerCase(); - } - col_alias = col_alias.toLowerCase(); - - /* - * allow multiple mappings to the same ColumnInfo. - * When a ColumnInfo is mapped multiple times, only the - * first inverse mapping is captured. - */ - boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName()); - - LinkedHashMap f_map = rslvMap.get(tab_alias); - if (f_map == null) { - f_map = new LinkedHashMap(); - rslvMap.put(tab_alias, f_map); - } - ColumnInfo oldColInfo = f_map.put(col_alias, colInfo); - if (oldColInfo != null) { - LOG.warn("Duplicate column info for " + tab_alias + "." + col_alias - + " was overwritten in RowResolver map: " + oldColInfo + " by " + colInfo); - } - - String[] qualifiedAlias = new String[2]; - qualifiedAlias[0] = tab_alias; - qualifiedAlias[1] = col_alias; - if ( !colPresent ) { - invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); - } else { - altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias); - } - - return colPresent; - } - - public boolean hasTableAlias(String tab_alias) { - return rslvMap.get(tab_alias.toLowerCase()) != null; - } - - /** - * Gets the column Info to tab_alias.col_alias type of a column reference. I - * the tab_alias is not provided as can be the case with an non aliased - * column, this function looks up the column in all the table aliases in this - * row resolver and returns the match. It also throws an exception if the - * column is found in multiple table aliases. If no match is found a null - * values is returned. - * - * This allows us to interpret both select t.c1 type of references and select - * c1 kind of references. The later kind are what we call non aliased column - * references in the query. - * - * @param tab_alias - * The table alias to match (this is null if the column reference is - * non aliased) - * @param col_alias - * The column name that is being searched for - * @return ColumnInfo - * @throws SemanticException - */ - public ColumnInfo get(String tab_alias, String col_alias) throws SemanticException { - col_alias = col_alias.toLowerCase(); - ColumnInfo ret = null; - - if (tab_alias != null) { - tab_alias = tab_alias.toLowerCase(); - HashMap f_map = rslvMap.get(tab_alias); - if (f_map == null) { - return null; - } - ret = f_map.get(col_alias); - } else { - boolean found = false; - String foundTbl = null; - for (Map.Entry> rslvEntry: rslvMap.entrySet()) { - String rslvKey = rslvEntry.getKey(); - LinkedHashMap cmap = rslvEntry.getValue(); - for (Map.Entry cmapEnt : cmap.entrySet()) { - if (col_alias.equalsIgnoreCase(cmapEnt.getKey())) { - /* - * We can have an unaliased and one aliased mapping to a Column. - */ - if (found && foundTbl != null && rslvKey != null) { - throw new SemanticException("Column " + col_alias - + " Found in more than One Tables/Subqueries"); - } - found = true; - foundTbl = rslvKey == null ? foundTbl : rslvKey; - ret = cmapEnt.getValue(); - } - } - } - } - - return ret; - } - - public ArrayList getColumnInfos() { - return rowSchema.getSignature(); - } - - /** - * Get a list of aliases for non-hidden columns - * @param max the maximum number of columns to return - * @return a list of non-hidden column names no greater in size than max - */ - public List getReferenceableColumnAliases(String tableAlias, int max) { - int count = 0; - Set columnNames = new LinkedHashSet(); - - int tables = rslvMap.size(); - - Map mapping = rslvMap.get(tableAlias); - if (mapping != null) { - for (Map.Entry entry : mapping.entrySet()) { - if (max > 0 && count >= max) { - break; - } - ColumnInfo columnInfo = entry.getValue(); - if (!columnInfo.isHiddenVirtualCol()) { - columnNames.add(entry.getKey()); - count++; - } - } - } else { - for (ColumnInfo columnInfo : getColumnInfos()) { - if (max > 0 && count >= max) { - break; - } - if (!columnInfo.isHiddenVirtualCol()) { - String[] inverse = !isExprResolver ? reverseLookup(columnInfo.getInternalName()) : null; - if (inverse != null) { - columnNames.add(inverse[0] == null || tables <= 1 ? inverse[1] : - inverse[0] + "." + inverse[1]); - } else { - columnNames.add(columnInfo.getAlias()); - } - count++; - } - } - } - return new ArrayList(columnNames); - } - - public HashMap getFieldMap(String tabAlias) { - if (tabAlias == null) { - return rslvMap.get(null); - } else { - return rslvMap.get(tabAlias.toLowerCase()); - } - } - - public String[] reverseLookup(String internalName) { - return invRslvMap.get(internalName); - } - - public boolean getIsExprResolver() { - return isExprResolver; - } - - public String[] getAlternateMappings(String internalName) { - return altInvRslvMap.get(internalName); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - - for (Map.Entry> e : rslvMap - .entrySet()) { - String tab = e.getKey(); - sb.append(tab + "{"); - HashMap f_map = e.getValue(); - if (f_map != null) { - for (Map.Entry entry : f_map.entrySet()) { - sb.append("(" + entry.getKey() + "," + entry.getValue().toString() - + ")"); - } - } - sb.append("} "); - } - return sb.toString(); - } - - public RowSchema getRowSchema() { - return rowSchema; - } - - private static class IntRef { - public int val = 0; - } - - public static boolean add(RowResolver rrToAddTo, RowResolver rrToAddFrom, int numColumns) - throws SemanticException { - return add(rrToAddTo, rrToAddFrom, null, numColumns); - } - - // TODO: 1) How to handle collisions? 2) Should we be cloning ColumnInfo or not? - private static boolean add(RowResolver rrToAddTo, RowResolver rrToAddFrom, - IntRef outputColPosRef, int numColumns) throws SemanticException { - boolean hasDuplicates = false; - String tabAlias; - String colAlias; - String[] qualifiedColName; - int i = 0; - - int outputColPos = outputColPosRef == null ? 0 : outputColPosRef.val; - for (ColumnInfo cInfoFrmInput : rrToAddFrom.getRowSchema().getSignature()) { - if ( numColumns >= 0 && i == numColumns ) { - break; - } - ColumnInfo newCI = null; - String internalName = cInfoFrmInput.getInternalName(); - qualifiedColName = rrToAddFrom.reverseLookup(internalName); - tabAlias = qualifiedColName[0]; - colAlias = qualifiedColName[1]; - - newCI = new ColumnInfo(cInfoFrmInput); - newCI.setInternalName(SemanticAnalyzer.getColumnInternalName(outputColPos)); - - outputColPos++; - - boolean isUnique = rrToAddTo.putWithCheck(tabAlias, colAlias, internalName, newCI); - hasDuplicates |= (!isUnique); - - qualifiedColName = rrToAddFrom.getAlternateMappings(internalName); - if (qualifiedColName != null) { - tabAlias = qualifiedColName[0]; - colAlias = qualifiedColName[1]; - rrToAddTo.put(tabAlias, colAlias, newCI); - } - i++; - } - - if (outputColPosRef != null) { - outputColPosRef.val = outputColPos; - } - return !hasDuplicates; - } - - /** - * Adds column to RR, checking for duplicate columns. Needed because CBO cannot handle the Hive - * behavior of blindly overwriting old mapping in RR and still somehow working after that. - * @return True if mapping was added without duplicates. - */ - public boolean putWithCheck(String tabAlias, String colAlias, - String internalName, ColumnInfo newCI) throws SemanticException { - ColumnInfo existing = get(tabAlias, colAlias); - // Hive adds the same mapping twice... I wish we could fix stuff like that. - if (existing == null) { - put(tabAlias, colAlias, newCI); - return true; - } else if (existing.isSameColumnForRR(newCI)) { - return true; - } - LOG.warn("Found duplicate column alias in RR: " - + existing.toMappingString(tabAlias, colAlias) + " adding " - + newCI.toMappingString(tabAlias, colAlias)); - if (internalName != null) { - existing = get(tabAlias, internalName); - if (existing == null) { - put(tabAlias, internalName, newCI); - return true; - } else if (existing.isSameColumnForRR(newCI)) { - return true; - } - LOG.warn("Failed to use internal name after finding a duplicate: " - + existing.toMappingString(tabAlias, internalName)); - } - return false; - } - - public static boolean add(RowResolver rrToAddTo, RowResolver rrToAddFrom) - throws SemanticException { - return add(rrToAddTo, rrToAddFrom, null, -1); - } -} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java index cfdd3cf684b0..4b2015e0df84 100644 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java @@ -18,7 +18,6 @@ package org.apache.spark.sql.parser; -import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; @@ -26,64 +25,24 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import org.antlr.runtime.tree.Tree; import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * SemanticAnalyzer. * */ public abstract class SemanticAnalyzer { - protected static final Logger STATIC_LOG = LoggerFactory.getLogger(SemanticAnalyzer.class.getName()); - protected final Hive db; - protected final HiveConf conf; - protected final Logger LOG; - - public SemanticAnalyzer(HiveConf conf, Hive db) throws SemanticException { - try { - this.conf = conf; - this.db = db; - LOG = LoggerFactory.getLogger(this.getClass().getName()); - } catch (Exception e) { - throw new SemanticException(e); - } - } - - public static String stripIdentifierQuotes(String val) { - if ((val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`')) { - val = val.substring(1, val.length() - 1); - } - return val; - } - - public static String stripQuotes(String val) { - return PlanUtils.stripQuotes(val); - } - public static String charSetString(String charSetName, String charSetString) throws SemanticException { try { @@ -117,48 +76,6 @@ public static String charSetString(String charSetName, String charSetString) } } - /** - * Get dequoted name from a table/column node. - * @param tableOrColumnNode the table or column node - * @return for table node, db.tab or tab. for column node column. - */ - public static String getUnescapedName(ASTNode tableOrColumnNode) { - return getUnescapedName(tableOrColumnNode, null); - } - - public static Map.Entry getDbTableNamePair(ASTNode tableNameNode) { - assert(tableNameNode.getToken().getType() == SparkSqlParser.TOK_TABNAME); - if (tableNameNode.getChildCount() == 2) { - String dbName = unescapeIdentifier(tableNameNode.getChild(0).getText()); - String tableName = unescapeIdentifier(tableNameNode.getChild(1).getText()); - return Pair.of(dbName, tableName); - } else { - String tableName = unescapeIdentifier(tableNameNode.getChild(0).getText()); - return Pair.of(null,tableName); - } - } - - public static String getUnescapedName(ASTNode tableOrColumnNode, String currentDatabase) { - int tokenType = tableOrColumnNode.getToken().getType(); - if (tokenType == SparkSqlParser.TOK_TABNAME) { - // table node - Map.Entry dbTablePair = getDbTableNamePair(tableOrColumnNode); - String dbName = dbTablePair.getKey(); - String tableName = dbTablePair.getValue(); - if (dbName != null){ - return dbName + "." + tableName; - } - if (currentDatabase != null) { - return currentDatabase + "." + tableName; - } - return tableName; - } else if (tokenType == SparkSqlParser.StringLiteral) { - return unescapeSQLString(tableOrColumnNode.getText()); - } - // column node - return unescapeIdentifier(tableOrColumnNode.getText()); - } - /** * Remove the encapsulating "`" pair from the identifier. We allow users to * use "`" to escape identifier for table names, column names and aliases, in @@ -297,85 +214,6 @@ public static String unescapeSQLString(String b) { return sb.toString(); } - /** - * Escapes the string for AST; doesn't enclose it in quotes, however. - */ - public static String escapeSQLString(String b) { - // There's usually nothing to escape so we will be optimistic. - String result = b; - for (int i = 0; i < result.length(); ++i) { - char currentChar = result.charAt(i); - if (currentChar == '\\' && ((i + 1) < result.length())) { - // TODO: do we need to handle the "this is what MySQL does" here? - char nextChar = result.charAt(i + 1); - if (nextChar == '%' || nextChar == '_') { - ++i; - continue; - } - } - switch (currentChar) { - case '\0': - result = spliceString(result, i, "\\0"); - ++i; - break; - case '\'': - result = spliceString(result, i, "\\'"); - ++i; - break; - case '\"': - result = spliceString(result, i, "\\\""); - ++i; - break; - case '\b': - result = spliceString(result, i, "\\b"); - ++i; - break; - case '\n': - result = spliceString(result, i, "\\n"); - ++i; - break; - case '\r': - result = spliceString(result, i, "\\r"); - ++i; - break; - case '\t': - result = spliceString(result, i, "\\t"); - ++i; - break; - case '\\': - result = spliceString(result, i, "\\\\"); - ++i; - break; - case '\u001A': - result = spliceString(result, i, "\\Z"); - ++i; - break; - default: { - if (currentChar < ' ') { - String hex = Integer.toHexString(currentChar); - String unicode = "\\u"; - for (int j = 4; j > hex.length(); --j) { - unicode += '0'; - } - unicode += hex; - result = spliceString(result, i, unicode); - i += (unicode.length() - 1); - } - break; // if not a control character, do nothing - } - } - } - return result; - } - - private static String spliceString(String str, int i, String replacement) { - return spliceString(str, i, 1, replacement); - } - - private static String spliceString(String str, int i, int length, String replacement) { - return str.substring(0, i) + replacement + str.substring(i + length); - } - /** * Get the list of FieldSchema out of the ASTNode. */ @@ -468,166 +306,6 @@ private static String getUnionTypeStringFromAST(ASTNode typeNode) return typeStr; } - public Hive getDb() { - return db; - } - - /** - * Given a ASTNode, return list of values. - * - * use case: - * create table xyz list bucketed (col1) with skew (1,2,5) - * AST Node is for (1,2,5) - * @param ast - * @return - */ - public static List getSkewedValueFromASTNode(ASTNode ast) { - List colList = new ArrayList(); - int numCh = ast.getChildCount(); - for (int i = 0; i < numCh; i++) { - ASTNode child = (ASTNode) ast.getChild(i); - colList.add(stripQuotes(child.getText()).toLowerCase()); - } - return colList; - } - - /** - * Retrieve skewed values from ASTNode. - * - * @param node - * @return - * @throws SemanticException - */ - public static List getSkewedValuesFromASTNode(Node node) throws SemanticException { - List result = null; - Tree leafVNode = ((ASTNode) node).getChild(0); - if (leafVNode == null) { - throw new SemanticException( - ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg()); - } else { - ASTNode lVAstNode = (ASTNode) leafVNode; - if (lVAstNode.getToken().getType() != SparkSqlParser.TOK_TABCOLVALUE) { - throw new SemanticException( - ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg()); - } else { - result = new ArrayList(getSkewedValueFromASTNode(lVAstNode)); - } - } - return result; - } - - private static boolean getPartExprNodeDesc(ASTNode astNode, HiveConf conf, - Map astExprNodeMap) throws SemanticException { - - if (astNode == null) { - return true; - } else if ((astNode.getChildren() == null) || (astNode.getChildren().size() == 0)) { - return astNode.getType() != SparkSqlParser.TOK_PARTVAL; - } - - TypeCheckCtx typeCheckCtx = new TypeCheckCtx(null); - String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME); - boolean result = true; - for (Node childNode : astNode.getChildren()) { - ASTNode childASTNode = (ASTNode)childNode; - - if (childASTNode.getType() != SparkSqlParser.TOK_PARTVAL) { - result = getPartExprNodeDesc(childASTNode, conf, astExprNodeMap) && result; - } else { - boolean isDynamicPart = childASTNode.getChildren().size() <= 1; - result = !isDynamicPart && result; - if (!isDynamicPart) { - ASTNode partVal = (ASTNode)childASTNode.getChildren().get(1); - if (!defaultPartitionName.equalsIgnoreCase(unescapeSQLString(partVal.getText()))) { - astExprNodeMap.put((ASTNode)childASTNode.getChildren().get(0), - TypeCheckProcFactory.genExprNode(partVal, typeCheckCtx).get(partVal)); - } - } - } - } - return result; - } - - public static void validatePartSpec(Table tbl, Map partSpec, - ASTNode astNode, HiveConf conf, boolean shouldBeFull) throws SemanticException { - tbl.validatePartColumnNames(partSpec, shouldBeFull); - validatePartColumnType(tbl, partSpec, astNode, conf); - } - - public static void validatePartColumnType(Table tbl, Map partSpec, - ASTNode astNode, HiveConf conf) throws SemanticException { - if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) { - return; - } - - Map astExprNodeMap = new HashMap(); - if (!getPartExprNodeDesc(astNode, conf, astExprNodeMap)) { - STATIC_LOG.warn("Dynamic partitioning is used; only validating " - + astExprNodeMap.size() + " columns"); - } - - if (astExprNodeMap.isEmpty()) { - return; // All columns are dynamic, nothing to do. - } - - List parts = tbl.getPartitionKeys(); - Map partCols = new HashMap(parts.size()); - for (FieldSchema col : parts) { - partCols.put(col.getName(), col.getType().toLowerCase()); - } - for (Entry astExprNodePair : astExprNodeMap.entrySet()) { - String astKeyName = astExprNodePair.getKey().toString().toLowerCase(); - if (astExprNodePair.getKey().getType() == SparkSqlParser.Identifier) { - astKeyName = stripIdentifierQuotes(astKeyName); - } - String colType = partCols.get(astKeyName); - ObjectInspector inputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( - astExprNodePair.getValue().getTypeInfo()); - - TypeInfo expectedType = - TypeInfoUtils.getTypeInfoFromTypeString(colType); - ObjectInspector outputOI = - TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType); - // Since partVal is a constant, it is safe to cast ExprNodeDesc to ExprNodeConstantDesc. - // Its value should be in normalized format (e.g. no leading zero in integer, date is in - // format of YYYY-MM-DD etc) - Object value = ((ExprNodeConstantDesc)astExprNodePair.getValue()).getValue(); - Object convertedValue = value; - if (!inputOI.getTypeName().equals(outputOI.getTypeName())) { - convertedValue = ObjectInspectorConverters.getConverter(inputOI, outputOI).convert(value); - if (convertedValue == null) { - throw new SemanticException(ErrorMsg.PARTITION_SPEC_TYPE_MISMATCH, astKeyName, - inputOI.getTypeName(), outputOI.getTypeName()); - } - - if (!convertedValue.toString().equals(value.toString())) { - // value might have been changed because of the normalization in conversion - STATIC_LOG.warn("Partition " + astKeyName + " expects type " + outputOI.getTypeName() - + " but input value is in type " + inputOI.getTypeName() + ". Convert " - + value.toString() + " to " + convertedValue.toString()); - } - } - - if (!convertedValue.toString().equals(partSpec.get(astKeyName))) { - STATIC_LOG.warn("Partition Spec " + astKeyName + "=" + partSpec.get(astKeyName) - + " has been changed to " + astKeyName + "=" + convertedValue.toString()); - } - partSpec.put(astKeyName, convertedValue.toString()); - } - } - - private Path tryQualifyPath(Path path) throws IOException { - try { - return path.getFileSystem(conf).makeQualified(path); - } catch (IOException e) { - return path; // some tests expected to pass invalid schema - } - } - - protected String toMessage(ErrorMsg message, Object detail) { - return detail == null ? message.getMsg() : message.getMsg(detail.toString()); - } - public static String getAstNodeText(ASTNode tree) { return tree.getChildCount() == 0?tree.getText() : getAstNodeText((ASTNode)tree.getChild(tree.getChildCount() - 1)); @@ -650,10 +328,6 @@ public static String generateErrorMessage(ASTNode ast, String message) { return sb.toString(); } - public static String getColumnInternalName(int pos) { - return HiveConf.getColumnInternalName(pos); - } - private static final Map TokenToTypeName = new HashMap(); static { diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckCtx.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckCtx.java deleted file mode 100644 index 5f185efa85bc..000000000000 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckCtx.java +++ /dev/null @@ -1,212 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; - -/** - * This class implements the context information that is used for typechecking - * phase in query compilation. - */ -public class TypeCheckCtx implements NodeProcessorCtx { - protected static final Logger LOG = LoggerFactory.getLogger(TypeCheckCtx.class); - - /** - * The row resolver of the previous operator. This field is used to generate - * expression descriptors from the expression ASTs. - */ - private RowResolver inputRR; - - private final boolean useCaching; - - /** - * Receives translations which will need to be applied during unparse. - */ - private UnparseTranslator unparseTranslator; - - /** - * Potential typecheck error reason. - */ - private String error; - - /** - * The node that generated the potential typecheck error - */ - private ASTNode errorSrcNode; - - /** - * Whether to allow stateful UDF invocations. - */ - private boolean allowStatefulFunctions; - - private boolean allowDistinctFunctions; - - private final boolean allowGBExprElimination; - - private final boolean allowAllColRef; - - private final boolean allowFunctionStar; - - private final boolean allowWindowing; - - // "[]" : LSQUARE/INDEX Expression - private final boolean allowIndexExpr; - - private final boolean allowSubQueryExpr; - - /** - * Constructor. - * - * @param inputRR - * The input row resolver of the previous operator. - */ - public TypeCheckCtx(RowResolver inputRR) { - this(inputRR, true); - } - - public TypeCheckCtx(RowResolver inputRR, boolean useCaching) { - this(inputRR, useCaching, false, true, true, true, true, true, true, true); - } - - public TypeCheckCtx(RowResolver inputRR, boolean useCaching, boolean allowStatefulFunctions, - boolean allowDistinctFunctions, boolean allowGBExprElimination, boolean allowAllColRef, - boolean allowFunctionStar, boolean allowWindowing, - boolean allowIndexExpr, boolean allowSubQueryExpr) { - setInputRR(inputRR); - error = null; - this.useCaching = useCaching; - this.allowStatefulFunctions = allowStatefulFunctions; - this.allowDistinctFunctions = allowDistinctFunctions; - this.allowGBExprElimination = allowGBExprElimination; - this.allowAllColRef = allowAllColRef; - this.allowFunctionStar = allowFunctionStar; - this.allowWindowing = allowWindowing; - this.allowIndexExpr = allowIndexExpr; - this.allowSubQueryExpr = allowSubQueryExpr; - } - - /** - * @param inputRR - * the inputRR to set - */ - public void setInputRR(RowResolver inputRR) { - this.inputRR = inputRR; - } - - /** - * @return the inputRR - */ - public RowResolver getInputRR() { - return inputRR; - } - - /** - * @param unparseTranslator - * the unparseTranslator to set - */ - public void setUnparseTranslator(UnparseTranslator unparseTranslator) { - this.unparseTranslator = unparseTranslator; - } - - /** - * @return the unparseTranslator - */ - public UnparseTranslator getUnparseTranslator() { - return unparseTranslator; - } - - /** - * @param allowStatefulFunctions - * whether to allow stateful UDF invocations - */ - public void setAllowStatefulFunctions(boolean allowStatefulFunctions) { - this.allowStatefulFunctions = allowStatefulFunctions; - } - - /** - * @return whether to allow stateful UDF invocations - */ - public boolean getAllowStatefulFunctions() { - return allowStatefulFunctions; - } - - /** - * @param error - * the error to set - * - */ - public void setError(String error, ASTNode errorSrcNode) { - if (LOG.isDebugEnabled()) { - // Logger the callstack from which the error has been set. - LOG.debug("Setting error: [" + error + "] from " - + ((errorSrcNode == null) ? "null" : errorSrcNode.toStringTree()), new Exception()); - } - this.error = error; - this.errorSrcNode = errorSrcNode; - } - - /** - * @return the error - */ - public String getError() { - return error; - } - - public ASTNode getErrorSrcNode() { - return errorSrcNode; - } - - public void setAllowDistinctFunctions(boolean allowDistinctFunctions) { - this.allowDistinctFunctions = allowDistinctFunctions; - } - - public boolean getAllowDistinctFunctions() { - return allowDistinctFunctions; - } - - public boolean getAllowGBExprElimination() { - return allowGBExprElimination; - } - - public boolean getallowAllColRef() { - return allowAllColRef; - } - - public boolean getallowFunctionStar() { - return allowFunctionStar; - } - - public boolean getallowWindowing() { - return allowWindowing; - } - - public boolean getallowIndexExpr() { - return allowIndexExpr; - } - - public boolean getallowSubQueryExpr() { - return allowSubQueryExpr; - } - - public boolean isUseCaching() { - return useCaching; - } -} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckProcFactory.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckProcFactory.java deleted file mode 100644 index b951a9ee12b8..000000000000 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/TypeCheckProcFactory.java +++ /dev/null @@ -1,1394 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser; - -import java.math.BigDecimal; -import java.sql.Date; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Stack; - -import com.google.common.collect.Lists; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.SettableUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.io.NullWritable; -import org.apache.hive.common.util.DateUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * The Factory for creating typecheck processors. The typecheck processors are - * used to processes the syntax trees for expressions and convert them into - * expression Node Descriptor trees. They also introduce the correct conversion - * functions to do proper implicit conversion. - */ -public class TypeCheckProcFactory { - - protected static final Logger LOG = LoggerFactory.getLogger(TypeCheckProcFactory.class - .getName()); - - protected TypeCheckProcFactory() { - // prevent instantiation - } - - /** - * Function to do groupby subexpression elimination. This is called by all the - * processors initially. As an example, consider the query select a+b, - * count(1) from T group by a+b; Then a+b is already precomputed in the group - * by operators key, so we substitute a+b in the select list with the internal - * column name of the a+b expression that appears in the in input row - * resolver. - * - * @param nd - * The node that is being inspected. - * @param procCtx - * The processor context. - * - * @return exprNodeColumnDesc. - */ - public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) - throws SemanticException { - // We recursively create the exprNodeDesc. Base cases: when we encounter - // a column ref, we convert that into an exprNodeColumnDesc; when we - // encounter - // a constant, we convert that into an exprNodeConstantDesc. For others we - // just - // build the exprNodeFuncDesc with recursively built children. - ASTNode expr = (ASTNode) nd; - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - - if (!ctx.isUseCaching()) { - return null; - } - - RowResolver input = ctx.getInputRR(); - ExprNodeDesc desc = null; - - if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) { - return null; - } - - // If the current subExpression is pre-calculated, as in Group-By etc. - ColumnInfo colInfo = input.getExpression(expr); - if (colInfo != null) { - desc = new ExprNodeColumnDesc(colInfo); - ASTNode source = input.getExpressionSource(expr); - if (source != null) { - ctx.getUnparseTranslator().addCopyTranslation(expr, source); - } - return desc; - } - return desc; - } - - public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) - throws SemanticException { - return genExprNode(expr, tcCtx, new TypeCheckProcFactory()); - } - - protected static Map genExprNode(ASTNode expr, - TypeCheckCtx tcCtx, TypeCheckProcFactory tf) throws SemanticException { - // Create the walker, the rules dispatcher and the context. - // create a walker which walks the tree in a DFS manner while maintaining - // the operator stack. The dispatcher - // generates the plan from the operator tree - Map opRules = new LinkedHashMap(); - - opRules.put(new RuleRegExp("R1", SparkSqlParser.TOK_NULL + "%"), - tf.getNullExprProcessor()); - opRules.put(new RuleRegExp("R2", SparkSqlParser.Number + "%|" + - SparkSqlParser.TinyintLiteral + "%|" + - SparkSqlParser.SmallintLiteral + "%|" + - SparkSqlParser.BigintLiteral + "%|" + - SparkSqlParser.DecimalLiteral + "%"), - tf.getNumExprProcessor()); - opRules - .put(new RuleRegExp("R3", SparkSqlParser.Identifier + "%|" - + SparkSqlParser.StringLiteral + "%|" + SparkSqlParser.TOK_CHARSETLITERAL + "%|" - + SparkSqlParser.TOK_STRINGLITERALSEQUENCE + "%|" - + "%|" + SparkSqlParser.KW_IF + "%|" + SparkSqlParser.KW_CASE + "%|" - + SparkSqlParser.KW_WHEN + "%|" + SparkSqlParser.KW_IN + "%|" - + SparkSqlParser.KW_ARRAY + "%|" + SparkSqlParser.KW_MAP + "%|" - + SparkSqlParser.KW_STRUCT + "%|" + SparkSqlParser.KW_EXISTS + "%|" - + SparkSqlParser.TOK_SUBQUERY_OP_NOTIN + "%"), - tf.getStrExprProcessor()); - opRules.put(new RuleRegExp("R4", SparkSqlParser.KW_TRUE + "%|" - + SparkSqlParser.KW_FALSE + "%"), tf.getBoolExprProcessor()); - opRules.put(new RuleRegExp("R5", SparkSqlParser.TOK_DATELITERAL + "%|" - + SparkSqlParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor()); - opRules.put(new RuleRegExp("R6", - SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|" - + SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|" - + SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL + "%|" - + SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL + "%|" - + SparkSqlParser.TOK_INTERVAL_DAY_LITERAL + "%|" - + SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL + "%|" - + SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL + "%|" - + SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor()); - opRules.put(new RuleRegExp("R7", SparkSqlParser.TOK_TABLE_OR_COL + "%"), - tf.getColumnExprProcessor()); - opRules.put(new RuleRegExp("R8", SparkSqlParser.TOK_SUBQUERY_OP + "%"), - tf.getSubQueryExprProcessor()); - - // The dispatcher fires the processor corresponding to the closest matching - // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), - opRules, tcCtx); - GraphWalker ogw = new DefaultGraphWalker(disp); - - // Create a list of top nodes - ArrayList topNodes = Lists.newArrayList(expr); - HashMap nodeOutputs = new LinkedHashMap(); - - ogw.startWalking(topNodes, nodeOutputs); - - return convert(nodeOutputs); - } - - // temporary type-safe casting - private static Map convert(Map outputs) { - Map converted = new LinkedHashMap(); - for (Map.Entry entry : outputs.entrySet()) { - if (entry.getKey() instanceof ASTNode && - (entry.getValue() == null || entry.getValue() instanceof ExprNodeDesc)) { - converted.put((ASTNode)entry.getKey(), (ExprNodeDesc)entry.getValue()); - } else { - LOG.warn("Invalid type entry " + entry); - } - } - return converted; - } - - /** - * Processor for processing NULL expression. - */ - public static class NullExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - return desc; - } - - return new ExprNodeConstantDesc(TypeInfoFactory.getPrimitiveTypeInfoFromPrimitiveWritable(NullWritable.class), null); - } - - } - - /** - * Factory method to get NullExprProcessor. - * - * @return NullExprProcessor. - */ - public NullExprProcessor getNullExprProcessor() { - return new NullExprProcessor(); - } - - /** - * Processor for processing numeric constants. - */ - public static class NumExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - return desc; - } - - Number v = null; - ASTNode expr = (ASTNode) nd; - // The expression can be any one of Double, Long and Integer. We - // try to parse the expression in that order to ensure that the - // most specific type is used for conversion. - try { - if (expr.getText().endsWith("L")) { - // Literal bigint. - v = Long.valueOf(expr.getText().substring( - 0, expr.getText().length() - 1)); - } else if (expr.getText().endsWith("S")) { - // Literal smallint. - v = Short.valueOf(expr.getText().substring( - 0, expr.getText().length() - 1)); - } else if (expr.getText().endsWith("Y")) { - // Literal tinyint. - v = Byte.valueOf(expr.getText().substring( - 0, expr.getText().length() - 1)); - } else if (expr.getText().endsWith("BD")) { - // Literal decimal - String strVal = expr.getText().substring(0, expr.getText().length() - 2); - HiveDecimal hd = HiveDecimal.create(strVal); - int prec = 1; - int scale = 0; - if (hd != null) { - prec = hd.precision(); - scale = hd.scale(); - } - DecimalTypeInfo typeInfo = TypeInfoFactory.getDecimalTypeInfo(prec, scale); - return new ExprNodeConstantDesc(typeInfo, hd); - } else { - v = Double.valueOf(expr.getText()); - v = Long.valueOf(expr.getText()); - v = Integer.valueOf(expr.getText()); - } - } catch (NumberFormatException e) { - // do nothing here, we will throw an exception in the following block - } - if (v == null) { - throw new SemanticException(ErrorMsg.INVALID_NUMERICAL_CONSTANT - .getMsg(expr)); - } - return new ExprNodeConstantDesc(v); - } - - } - - /** - * Factory method to get NumExprProcessor. - * - * @return NumExprProcessor. - */ - public NumExprProcessor getNumExprProcessor() { - return new NumExprProcessor(); - } - - /** - * Processor for processing string constants. - */ - public static class StrExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - return desc; - } - - ASTNode expr = (ASTNode) nd; - String str = null; - - switch (expr.getToken().getType()) { - case SparkSqlParser.StringLiteral: - str = SemanticAnalyzer.unescapeSQLString(expr.getText()); - break; - case SparkSqlParser.TOK_STRINGLITERALSEQUENCE: - StringBuilder sb = new StringBuilder(); - for (Node n : expr.getChildren()) { - sb.append( - SemanticAnalyzer.unescapeSQLString(((ASTNode)n).getText())); - } - str = sb.toString(); - break; - case SparkSqlParser.TOK_CHARSETLITERAL: - str = SemanticAnalyzer.charSetString(expr.getChild(0).getText(), - expr.getChild(1).getText()); - break; - default: - // SparkSqlParser.identifier | HiveParse.KW_IF | HiveParse.KW_LEFT | - // HiveParse.KW_RIGHT - str = SemanticAnalyzer.unescapeIdentifier(expr.getText()); - break; - } - return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str); - } - - } - - /** - * Factory method to get StrExprProcessor. - * - * @return StrExprProcessor. - */ - public StrExprProcessor getStrExprProcessor() { - return new StrExprProcessor(); - } - - /** - * Processor for boolean constants. - */ - public static class BoolExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - return desc; - } - - ASTNode expr = (ASTNode) nd; - Boolean bool = null; - - switch (expr.getToken().getType()) { - case SparkSqlParser.KW_TRUE: - bool = Boolean.TRUE; - break; - case SparkSqlParser.KW_FALSE: - bool = Boolean.FALSE; - break; - default: - assert false; - } - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, bool); - } - - } - - /** - * Factory method to get BoolExprProcessor. - * - * @return BoolExprProcessor. - */ - public BoolExprProcessor getBoolExprProcessor() { - return new BoolExprProcessor(); - } - - /** - * Processor for date constants. - */ - public static class DateTimeExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - return desc; - } - - ASTNode expr = (ASTNode) nd; - String timeString = SemanticAnalyzer.stripQuotes(expr.getText()); - - // Get the string value and convert to a Date value. - try { - // todo replace below with joda-time, which supports timezone - if (expr.getType() == SparkSqlParser.TOK_DATELITERAL) { - PrimitiveTypeInfo typeInfo = TypeInfoFactory.dateTypeInfo; - return new ExprNodeConstantDesc(typeInfo, - Date.valueOf(timeString)); - } - if (expr.getType() == SparkSqlParser.TOK_TIMESTAMPLITERAL) { - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, - Timestamp.valueOf(timeString)); - } - throw new IllegalArgumentException("Invalid time literal type " + expr.getType()); - } catch (Exception err) { - throw new SemanticException( - "Unable to convert time literal '" + timeString + "' to time value.", err); - } - } - } - - /** - * Factory method to get DateExprProcessor. - * - * @return DateExprProcessor. - */ - public DateTimeExprProcessor getDateTimeExprProcessor() { - return new DateTimeExprProcessor(); - } - - /** - * Processor for interval constants. - */ - public static class IntervalExprProcessor implements NodeProcessor { - - private static final BigDecimal NANOS_PER_SEC_BD = new BigDecimal(DateUtils.NANOS_PER_SEC); - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - return desc; - } - - ASTNode expr = (ASTNode) nd; - String intervalString = SemanticAnalyzer.stripQuotes(expr.getText()); - - // Get the string value and convert to a Interval value. - try { - switch (expr.getType()) { - case SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, - HiveIntervalYearMonth.valueOf(intervalString)); - case SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - HiveIntervalDayTime.valueOf(intervalString)); - case SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, - new HiveIntervalYearMonth(Integer.parseInt(intervalString), 0)); - case SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, - new HiveIntervalYearMonth(0, Integer.parseInt(intervalString))); - case SparkSqlParser.TOK_INTERVAL_DAY_LITERAL: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - new HiveIntervalDayTime(Integer.parseInt(intervalString), 0, 0, 0, 0)); - case SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - new HiveIntervalDayTime(0, Integer.parseInt(intervalString), 0, 0, 0)); - case SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL: - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - new HiveIntervalDayTime(0, 0, Integer.parseInt(intervalString), 0, 0)); - case SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL: - BigDecimal bd = new BigDecimal(intervalString); - BigDecimal bdSeconds = new BigDecimal(bd.toBigInteger()); - BigDecimal bdNanos = bd.subtract(bdSeconds); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - new HiveIntervalDayTime(0, 0, 0, bdSeconds.intValueExact(), - bdNanos.multiply(NANOS_PER_SEC_BD).intValue())); - default: - throw new IllegalArgumentException("Invalid time literal type " + expr.getType()); - } - } catch (Exception err) { - throw new SemanticException( - "Unable to convert interval literal '" + intervalString + "' to interval value.", err); - } - } - } - - /** - * Factory method to get IntervalExprProcessor. - * - * @return IntervalExprProcessor. - */ - public IntervalExprProcessor getIntervalExprProcessor() { - return new IntervalExprProcessor(); - } - - /** - * Processor for table columns. - */ - public static class ColumnExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - return desc; - } - - ASTNode expr = (ASTNode) nd; - ASTNode parent = stack.size() > 1 ? (ASTNode) stack.get(stack.size() - 2) : null; - RowResolver input = ctx.getInputRR(); - - if (expr.getType() != SparkSqlParser.TOK_TABLE_OR_COL) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr), expr); - return null; - } - - assert (expr.getChildCount() == 1); - String tableOrCol = SemanticAnalyzer.unescapeIdentifier(expr - .getChild(0).getText()); - - boolean isTableAlias = input.hasTableAlias(tableOrCol); - ColumnInfo colInfo = input.get(null, tableOrCol); - - if (isTableAlias) { - if (colInfo != null) { - if (parent != null && parent.getType() == SparkSqlParser.DOT) { - // It's a table alias. - return null; - } - // It's a column. - return toExprNodeDesc(colInfo); - } else { - // It's a table alias. - // We will process that later in DOT. - return null; - } - } else { - if (colInfo == null) { - // It's not a column or a table alias. - if (input.getIsExprResolver()) { - ASTNode exprNode = expr; - if (!stack.empty()) { - ASTNode tmp = (ASTNode) stack.pop(); - if (!stack.empty()) { - exprNode = (ASTNode) stack.peek(); - } - stack.push(tmp); - } - ctx.setError(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(exprNode), expr); - return null; - } else { - List possibleColumnNames = input.getReferenceableColumnAliases(tableOrCol, -1); - String reason = String.format("(possible column names are: %s)", - StringUtils.join(possibleColumnNames, ", ")); - ctx.setError(ErrorMsg.INVALID_TABLE_OR_COLUMN.getMsg(expr.getChild(0), reason), - expr); - LOG.debug(ErrorMsg.INVALID_TABLE_OR_COLUMN.toString() + ":" - + input.toString()); - return null; - } - } else { - // It's a column. - return toExprNodeDesc(colInfo); - } - } - - } - - } - - private static ExprNodeDesc toExprNodeDesc(ColumnInfo colInfo) { - ObjectInspector inspector = colInfo.getObjectInspector(); - if (inspector instanceof ConstantObjectInspector && - inspector instanceof PrimitiveObjectInspector) { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector; - Object constant = ((ConstantObjectInspector) inspector).getWritableConstantValue(); - return new ExprNodeConstantDesc(colInfo.getType(), poi.getPrimitiveJavaObject(constant)); - } - // non-constant or non-primitive constants - ExprNodeColumnDesc column = new ExprNodeColumnDesc(colInfo); - column.setSkewedCol(colInfo.isSkewedCol()); - return column; - } - - /** - * Factory method to get ColumnExprProcessor. - * - * @return ColumnExprProcessor. - */ - public ColumnExprProcessor getColumnExprProcessor() { - return new ColumnExprProcessor(); - } - - /** - * The default processor for typechecking. - */ - public static class DefaultExprProcessor implements NodeProcessor { - - static HashMap specialUnaryOperatorTextHashMap; - static HashMap specialFunctionTextHashMap; - static HashMap conversionFunctionTextHashMap; - static HashSet windowingTokens; - static { - specialUnaryOperatorTextHashMap = new HashMap(); - specialUnaryOperatorTextHashMap.put(SparkSqlParser.PLUS, "positive"); - specialUnaryOperatorTextHashMap.put(SparkSqlParser.MINUS, "negative"); - specialFunctionTextHashMap = new HashMap(); - specialFunctionTextHashMap.put(SparkSqlParser.TOK_ISNULL, "isnull"); - specialFunctionTextHashMap.put(SparkSqlParser.TOK_ISNOTNULL, "isnotnull"); - conversionFunctionTextHashMap = new HashMap(); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_BOOLEAN, - serdeConstants.BOOLEAN_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_TINYINT, - serdeConstants.TINYINT_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_SMALLINT, - serdeConstants.SMALLINT_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_INT, - serdeConstants.INT_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_BIGINT, - serdeConstants.BIGINT_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_FLOAT, - serdeConstants.FLOAT_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_DOUBLE, - serdeConstants.DOUBLE_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_STRING, - serdeConstants.STRING_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_CHAR, - serdeConstants.CHAR_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_VARCHAR, - serdeConstants.VARCHAR_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_BINARY, - serdeConstants.BINARY_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_DATE, - serdeConstants.DATE_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_TIMESTAMP, - serdeConstants.TIMESTAMP_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_INTERVAL_YEAR_MONTH, - serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_INTERVAL_DAY_TIME, - serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME); - conversionFunctionTextHashMap.put(SparkSqlParser.TOK_DECIMAL, - serdeConstants.DECIMAL_TYPE_NAME); - - windowingTokens = new HashSet(); - windowingTokens.add(SparkSqlParser.KW_OVER); - windowingTokens.add(SparkSqlParser.TOK_PARTITIONINGSPEC); - windowingTokens.add(SparkSqlParser.TOK_DISTRIBUTEBY); - windowingTokens.add(SparkSqlParser.TOK_SORTBY); - windowingTokens.add(SparkSqlParser.TOK_CLUSTERBY); - windowingTokens.add(SparkSqlParser.TOK_WINDOWSPEC); - windowingTokens.add(SparkSqlParser.TOK_WINDOWRANGE); - windowingTokens.add(SparkSqlParser.TOK_WINDOWVALUES); - windowingTokens.add(SparkSqlParser.KW_UNBOUNDED); - windowingTokens.add(SparkSqlParser.KW_PRECEDING); - windowingTokens.add(SparkSqlParser.KW_FOLLOWING); - windowingTokens.add(SparkSqlParser.KW_CURRENT); - windowingTokens.add(SparkSqlParser.TOK_TABSORTCOLNAMEASC); - windowingTokens.add(SparkSqlParser.TOK_TABSORTCOLNAMEDESC); - } - - protected static boolean isRedundantConversionFunction(ASTNode expr, - boolean isFunction, ArrayList children) { - if (!isFunction) { - return false; - } - // conversion functions take a single parameter - if (children.size() != 1) { - return false; - } - String funcText = conversionFunctionTextHashMap.get(((ASTNode) expr - .getChild(0)).getType()); - // not a conversion function - if (funcText == null) { - return false; - } - // return true when the child type and the conversion target type is the - // same - return ((PrimitiveTypeInfo) children.get(0).getTypeInfo()).getTypeName() - .equalsIgnoreCase(funcText); - } - - public static String getFunctionText(ASTNode expr, boolean isFunction) { - String funcText = null; - if (!isFunction) { - // For operator, the function name is the operator text, unless it's in - // our special dictionary - if (expr.getChildCount() == 1) { - funcText = specialUnaryOperatorTextHashMap.get(expr.getType()); - } - if (funcText == null) { - funcText = expr.getText(); - } - } else { - // For TOK_FUNCTION, the function name is stored in the first child, - // unless it's in our - // special dictionary. - assert (expr.getChildCount() >= 1); - int funcType = ((ASTNode) expr.getChild(0)).getType(); - funcText = specialFunctionTextHashMap.get(funcType); - if (funcText == null) { - funcText = conversionFunctionTextHashMap.get(funcType); - } - if (funcText == null) { - funcText = ((ASTNode) expr.getChild(0)).getText(); - } - } - return SemanticAnalyzer.unescapeIdentifier(funcText); - } - - /** - * This function create an ExprNodeDesc for a UDF function given the - * children (arguments). It will insert implicit type conversion functions - * if necessary. - * - * @throws UDFArgumentException - */ - static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, TypeInfo typeInfo, - ExprNodeDesc... children) throws UDFArgumentException { - - FunctionInfo fi; - try { - fi = FunctionRegistry.getFunctionInfo(udfName); - } catch (SemanticException e) { - throw new UDFArgumentException(e); - } - if (fi == null) { - throw new UDFArgumentException(udfName + " not found."); - } - - GenericUDF genericUDF = fi.getGenericUDF(); - if (genericUDF == null) { - throw new UDFArgumentException(udfName - + " is an aggregation function or a table function."); - } - - // Add udfData to UDF if necessary - if (typeInfo != null) { - if (genericUDF instanceof SettableUDF) { - ((SettableUDF)genericUDF).setTypeInfo(typeInfo); - } - } - - List childrenList = new ArrayList(children.length); - - childrenList.addAll(Arrays.asList(children)); - return ExprNodeGenericFuncDesc.newInstance(genericUDF, - childrenList); - } - - public static ExprNodeDesc getFuncExprNodeDesc(String udfName, - ExprNodeDesc... children) throws UDFArgumentException { - return getFuncExprNodeDescWithUdfData(udfName, null, children); - } - - protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, - List children, GenericUDF genericUDF) throws SemanticException { - // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't - // supported - if (fi.getGenericUDTF() != null) { - throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); - } - // UDAF in filter condition, group-by caluse, param of funtion, etc. - if (fi.getGenericUDAFResolver() != null) { - if (isFunction) { - throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg((ASTNode) expr - .getChild(0))); - } else { - throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); - } - } - if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) { - if (FunctionRegistry.isStateful(genericUDF)) { - throw new SemanticException(ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); - } - } - } - - protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, - boolean isFunction, ArrayList children, TypeCheckCtx ctx) - throws SemanticException, UDFArgumentException { - // return the child directly if the conversion is redundant. - if (isRedundantConversionFunction(expr, isFunction, children)) { - assert (children.size() == 1); - assert (children.get(0) != null); - return children.get(0); - } - String funcText = getFunctionText(expr, isFunction); - ExprNodeDesc desc; - if (funcText.equals(".")) { - // "." : FIELD Expression - - assert (children.size() == 2); - // Only allow constant field name for now - assert (children.get(1) instanceof ExprNodeConstantDesc); - ExprNodeDesc object = children.get(0); - ExprNodeConstantDesc fieldName = (ExprNodeConstantDesc) children.get(1); - assert (fieldName.getValue() instanceof String); - - // Calculate result TypeInfo - String fieldNameString = (String) fieldName.getValue(); - TypeInfo objectTypeInfo = object.getTypeInfo(); - - // Allow accessing a field of list element structs directly from a list - boolean isList = (object.getTypeInfo().getCategory() == ObjectInspector.Category.LIST); - if (isList) { - objectTypeInfo = ((ListTypeInfo) objectTypeInfo).getListElementTypeInfo(); - } - if (objectTypeInfo.getCategory() != Category.STRUCT) { - throw new SemanticException(ErrorMsg.INVALID_DOT.getMsg(expr)); - } - TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString); - if (isList) { - t = TypeInfoFactory.getListTypeInfo(t); - } - - desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, isList); - } else if (funcText.equals("[")) { - // "[]" : LSQUARE/INDEX Expression - if (!ctx.getallowIndexExpr()) { - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); - } - - assert (children.size() == 2); - - // Check whether this is a list or a map - TypeInfo myt = children.get(0).getTypeInfo(); - - if (myt.getCategory() == Category.LIST) { - // Only allow integer index for now - if (!(children.get(1) instanceof ExprNodeConstantDesc) - || !(((ExprNodeConstantDesc) children.get(1)).getTypeInfo() - .equals(TypeInfoFactory.intTypeInfo))) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, - ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg())); - } - - // Calculate TypeInfo - TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo(); - desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children); - } else if (myt.getCategory() == Category.MAP) { - if (!(children.get(1) instanceof ExprNodeConstantDesc)) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage( - expr, - ErrorMsg.INVALID_MAPINDEX_CONSTANT.getMsg())); - } - // Calculate TypeInfo - TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo(); - desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children); - } else { - throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, myt.getTypeName())); - } - } else { - // other operators or functions - FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcText); - - if (fi == null) { - if (isFunction) { - throw new SemanticException(ErrorMsg.INVALID_FUNCTION - .getMsg((ASTNode) expr.getChild(0))); - } else { - throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); - } - } - - // getGenericUDF() actually clones the UDF. Just call it once and reuse. - GenericUDF genericUDF = fi.getGenericUDF(); - - if (!fi.isNative()) { - ctx.getUnparseTranslator().addIdentifierTranslation( - (ASTNode) expr.getChild(0)); - } - - // Handle type casts that may contain type parameters - if (isFunction) { - ASTNode funcNameNode = (ASTNode)expr.getChild(0); - switch (funcNameNode.getType()) { - case SparkSqlParser.TOK_CHAR: - // Add type params - CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(funcNameNode); - if (genericUDF != null) { - ((SettableUDF)genericUDF).setTypeInfo(charTypeInfo); - } - break; - case SparkSqlParser.TOK_VARCHAR: - VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(funcNameNode); - if (genericUDF != null) { - ((SettableUDF)genericUDF).setTypeInfo(varcharTypeInfo); - } - break; - case SparkSqlParser.TOK_DECIMAL: - DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(funcNameNode); - if (genericUDF != null) { - ((SettableUDF)genericUDF).setTypeInfo(decTypeInfo); - } - break; - default: - // Do nothing - break; - } - } - - validateUDF(expr, isFunction, ctx, fi, children, genericUDF); - - // Try to infer the type of the constant only if there are two - // nodes, one of them is column and the other is numeric const - if (genericUDF instanceof GenericUDFBaseCompare - && children.size() == 2 - && ((children.get(0) instanceof ExprNodeConstantDesc - && children.get(1) instanceof ExprNodeColumnDesc) - || (children.get(0) instanceof ExprNodeColumnDesc - && children.get(1) instanceof ExprNodeConstantDesc))) { - int constIdx = - children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1; - - Set inferTypes = new HashSet(Arrays.asList( - serdeConstants.TINYINT_TYPE_NAME.toLowerCase(), - serdeConstants.SMALLINT_TYPE_NAME.toLowerCase(), - serdeConstants.INT_TYPE_NAME.toLowerCase(), - serdeConstants.BIGINT_TYPE_NAME.toLowerCase(), - serdeConstants.FLOAT_TYPE_NAME.toLowerCase(), - serdeConstants.DOUBLE_TYPE_NAME.toLowerCase(), - serdeConstants.STRING_TYPE_NAME.toLowerCase() - )); - - String constType = children.get(constIdx).getTypeString().toLowerCase(); - String columnType = children.get(1 - constIdx).getTypeString().toLowerCase(); - - if (inferTypes.contains(constType) && inferTypes.contains(columnType) - && !columnType.equalsIgnoreCase(constType)) { - Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue(); - String constValue = originalValue.toString(); - boolean triedDouble = false; - Number value = null; - try { - if (columnType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)) { - value = new Byte(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)) { - value = new Short(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) { - value = new Integer(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { - value = new Long(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { - value = new Float(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { - triedDouble = true; - value = new Double(constValue); - } else if (columnType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { - // Don't scramble the const type information if comparing to a string column, - // It's not useful to do so; as of now, there is also a hack in - // SemanticAnalyzer#genTablePlan that causes every column to look like a string - // a string down here, so number type information is always lost otherwise. - boolean isNumber = (originalValue instanceof Number); - triedDouble = !isNumber; - value = isNumber ? (Number)originalValue : new Double(constValue); - } - } catch (NumberFormatException nfe) { - // this exception suggests the precise type inference did not succeed - // we'll try again to convert it to double - // however, if we already tried this, or the column is NUMBER type and - // the operator is EQUAL, return false due to the type mismatch - if (triedDouble && - (genericUDF instanceof GenericUDFOPEqual - && !columnType.equals(serdeConstants.STRING_TYPE_NAME))) { - return new ExprNodeConstantDesc(false); - } - - try { - value = new Double(constValue); - } catch (NumberFormatException ex) { - return new ExprNodeConstantDesc(false); - } - } - - if (value != null) { - children.set(constIdx, new ExprNodeConstantDesc(value)); - } - } - - // if column type is char and constant type is string, then convert the constant to char - // type with padded spaces. - final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory - .getPrimitiveTypeInfo(columnType); - if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) && - colTypeInfo instanceof CharTypeInfo) { - final Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue(); - final String constValue = originalValue.toString(); - final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo); - final HiveChar newValue = new HiveChar(constValue, length); - children.set(constIdx, new ExprNodeConstantDesc(colTypeInfo, newValue)); - } - } - if (genericUDF instanceof GenericUDFOPOr) { - // flatten OR - List childrenList = new ArrayList( - children.size()); - for (ExprNodeDesc child : children) { - if (FunctionRegistry.isOpOr(child)) { - childrenList.addAll(child.getChildren()); - } else { - childrenList.add(child); - } - } - desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, - childrenList); - } else if (genericUDF instanceof GenericUDFOPAnd) { - // flatten AND - List childrenList = new ArrayList( - children.size()); - for (ExprNodeDesc child : children) { - if (FunctionRegistry.isOpAnd(child)) { - childrenList.addAll(child.getChildren()); - } else { - childrenList.add(child); - } - } - desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, - childrenList); - } else { - desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, funcText, - children); - } - } - // UDFOPPositive is a no-op. - // However, we still create it, and then remove it here, to make sure we - // only allow - // "+" for numeric types. - if (FunctionRegistry.isOpPositive(desc)) { - assert (desc.getChildren().size() == 1); - desc = desc.getChildren().get(0); - } - assert (desc != null); - return desc; - } - - /** - * Returns true if des is a descendant of ans (ancestor) - */ - private boolean isDescendant(Node ans, Node des) { - if (ans.getChildren() == null) { - return false; - } - for (Node c : ans.getChildren()) { - if (c == des) { - return true; - } - if (isDescendant(c, des)) { - return true; - } - } - return false; - } - - protected ExprNodeDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, - Object... nodeOutputs) throws SemanticException { - RowResolver input = ctx.getInputRR(); - String tableAlias = SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) - .getText()); - // NOTE: tableAlias must be a valid non-ambiguous table alias, - // because we've checked that in TOK_TABLE_OR_COL's process method. - String colName; - if (nodeOutputs[1] instanceof ExprNodeConstantDesc) { - colName = ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(); - } else if (nodeOutputs[1] instanceof ExprNodeColumnDesc) { - colName = ((ExprNodeColumnDesc)nodeOutputs[1]).getColumn(); - } else { - throw new SemanticException("Unexpected ExprNode : " + nodeOutputs[1]); - } - ColumnInfo colInfo = input.get(tableAlias, colName); - - if (colInfo == null) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); - return null; - } - return toExprNodeDesc(colInfo); - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - // Here we know nd represents a group by expression. - - // During the DFS traversal of the AST, a descendant of nd likely set an - // error because a sub-tree of nd is unlikely to also be a group by - // expression. For example, in a query such as - // SELECT *concat(key)* FROM src GROUP BY concat(key), 'key' will be - // processed before 'concat(key)' and since 'key' is not a group by - // expression, an error will be set in ctx by ColumnExprProcessor. - - // We can clear the global error when we see that it was set in a - // descendant node of a group by expression because - // processGByExpr() returns a ExprNodeDesc that effectively ignores - // its children. Although the error can be set multiple times by - // descendant nodes, DFS traversal ensures that the error only needs to - // be cleared once. Also, for a case like - // SELECT concat(value, concat(value))... the logic still works as the - // error is only set with the first 'value'; all node processors quit - // early if the global error is set. - - if (isDescendant(nd, ctx.getErrorSrcNode())) { - ctx.setError(null, null); - } - return desc; - } - - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - - /* - * A Windowing specification get added as a child to a UDAF invocation to distinguish it - * from similar UDAFs but on different windows. - * The UDAF is translated to a WindowFunction invocation in the PTFTranslator. - * So here we just return null for tokens that appear in a Window Specification. - * When the traversal reaches up to the UDAF invocation its ExprNodeDesc is build using the - * ColumnInfo in the InputRR. This is similar to how UDAFs are handled in Select lists. - * The difference is that there is translation for Window related tokens, so we just - * return null; - */ - if (windowingTokens.contains(expr.getType())) { - if (!ctx.getallowWindowing()) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, - ErrorMsg.INVALID_FUNCTION.getMsg("Windowing is not supported in the context"))); - } - return null; - } - - if (expr.getType() == SparkSqlParser.TOK_TABNAME) { - return null; - } - - if (expr.getType() == SparkSqlParser.TOK_ALLCOLREF) { - if (!ctx.getallowAllColRef()) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, - ErrorMsg.INVALID_COLUMN - .getMsg("All column reference is not supported in the context"))); - } - - RowResolver input = ctx.getInputRR(); - ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc(); - assert expr.getChildCount() <= 1; - if (expr.getChildCount() == 1) { - // table aliased (select a.*, for example) - ASTNode child = (ASTNode) expr.getChild(0); - assert child.getType() == SparkSqlParser.TOK_TABNAME; - assert child.getChildCount() == 1; - String tableAlias = SemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText()); - HashMap columns = input.getFieldMap(tableAlias); - if (columns == null) { - throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(child)); - } - for (Map.Entry colMap : columns.entrySet()) { - ColumnInfo colInfo = colMap.getValue(); - if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(toExprNodeDesc(colInfo)); - } - } - } else { - // all columns (select *, for example) - for (ColumnInfo colInfo : input.getColumnInfos()) { - if (!colInfo.getIsVirtualCol()) { - columnList.addColumn(toExprNodeDesc(colInfo)); - } - } - } - return columnList; - } - - // If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL, - // and the operator is a DOT, then it's a table column reference. - if (expr.getType() == SparkSqlParser.DOT - && expr.getChild(0).getType() == SparkSqlParser.TOK_TABLE_OR_COL - && nodeOutputs[0] == null) { - return processQualifiedColRef(ctx, expr, nodeOutputs); - } - - // Return nulls for conversion operators - if (conversionFunctionTextHashMap.keySet().contains(expr.getType()) - || specialFunctionTextHashMap.keySet().contains(expr.getType()) - || expr.getToken().getType() == SparkSqlParser.CharSetName - || expr.getToken().getType() == SparkSqlParser.CharSetLiteral) { - return null; - } - - boolean isFunction = (expr.getType() == SparkSqlParser.TOK_FUNCTION || - expr.getType() == SparkSqlParser.TOK_FUNCTIONSTAR || - expr.getType() == SparkSqlParser.TOK_FUNCTIONDI); - - if (!ctx.getAllowDistinctFunctions() && expr.getType() == SparkSqlParser.TOK_FUNCTIONDI) { - throw new SemanticException( - SemanticAnalyzer.generateErrorMessage(expr, ErrorMsg.DISTINCT_NOT_SUPPORTED.getMsg())); - } - - // Create all children - int childrenBegin = (isFunction ? 1 : 0); - ArrayList children = new ArrayList( - expr.getChildCount() - childrenBegin); - for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) { - if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) { - children.addAll(((ExprNodeColumnListDesc) nodeOutputs[ci]).getChildren()); - } else { - children.add((ExprNodeDesc) nodeOutputs[ci]); - } - } - - if (expr.getType() == SparkSqlParser.TOK_FUNCTIONSTAR) { - if (!ctx.getallowFunctionStar()) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, - ErrorMsg.INVALID_COLUMN - .getMsg(".* reference is not supported in the context"))); - } - - RowResolver input = ctx.getInputRR(); - for (ColumnInfo colInfo : input.getColumnInfos()) { - if (!colInfo.getIsVirtualCol()) { - children.add(toExprNodeDesc(colInfo)); - } - } - } - - // If any of the children contains null, then return a null - // this is a hack for now to handle the group by case - if (children.contains(null)) { - List possibleColumnNames = getReferenceableColumnAliases(ctx); - String reason = String.format("(possible column names are: %s)", - StringUtils.join(possibleColumnNames, ", ")); - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0), reason), - expr); - return null; - } - - // Create function desc - try { - return getXpathOrFuncExprNodeDesc(expr, isFunction, children, ctx); - } catch (UDFArgumentTypeException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_TYPE.getMsg(expr - .getChild(childrenBegin + e.getArgumentId()), e.getMessage())); - } catch (UDFArgumentLengthException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT_LENGTH.getMsg( - expr, e.getMessage())); - } catch (UDFArgumentException e) { - throw new SemanticException(ErrorMsg.INVALID_ARGUMENT.getMsg(expr, e - .getMessage())); - } - } - - protected List getReferenceableColumnAliases(TypeCheckCtx ctx) { - return ctx.getInputRR().getReferenceableColumnAliases(null, -1); - } - } - - /** - * Factory method to get DefaultExprProcessor. - * - * @return DefaultExprProcessor. - */ - public DefaultExprProcessor getDefaultExprProcessor() { - return new DefaultExprProcessor(); - } - - /** - * Processor for subquery expressions.. - */ - public static class SubQueryExprProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - TypeCheckCtx ctx = (TypeCheckCtx) procCtx; - if (ctx.getError() != null) { - return null; - } - - ASTNode expr = (ASTNode) nd; - ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); - - if (!ctx.getallowSubQueryExpr()) { - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(sqNode, - ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg())); - } - - ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); - if (desc != null) { - return desc; - } - - /* - * Restriction.1.h :: SubQueries only supported in the SQL Where Clause. - */ - ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode, - "Currently SubQuery expressions are only allowed as Where Clause predicates"), - sqNode); - return null; - } - } - - /** - * Factory method to get SubQueryExprProcessor. - * - * @return DateExprProcessor. - */ - public SubQueryExprProcessor getSubQueryExprProcessor() { - return new SubQueryExprProcessor(); - } -} diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/UnparseTranslator.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/UnparseTranslator.java deleted file mode 100644 index defbeb93ab3d..000000000000 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/UnparseTranslator.java +++ /dev/null @@ -1,274 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.NavigableMap; -import java.util.TreeMap; - -import org.antlr.runtime.TokenRewriteStream; -import org.apache.hadoop.conf.Configuration; - -/** - * UnparseTranslator is used to "unparse" objects such as views when their - * definition is stored. It has a translations map where its possible to replace all the - * text with the appropriate escaped version [say invites.ds will be replaced with - * `invites`.`ds` and the entire query is processed like this and stored as - * Extended text in table's metadata]. This holds all individual translations and - * where they apply in the stream. The unparse is lazy and happens when - * SemanticAnalyzer.saveViewDefinition() calls TokenRewriteStream.toString(). - * - */ -public class UnparseTranslator { - // key is token start index - private final NavigableMap translations; - private final List copyTranslations; - private boolean enabled; - private Configuration conf; - - public UnparseTranslator(Configuration conf) { - this.conf = conf; - translations = new TreeMap(); - copyTranslations = new ArrayList(); - } - - /** - * Enable this translator. - */ - void enable() { - enabled = true; - } - - /** - * @return whether this translator has been enabled - */ - boolean isEnabled() { - return enabled; - } - - /** - * Register a translation to be performed as part of unparse. ANTLR imposes - * strict conditions on the translations and errors out during - * TokenRewriteStream.toString() if there is an overlap. It expects all - * the translations to be disjoint (See HIVE-2439). - * If the translation overlaps with any previously - * registered translation, then it must be either - * identical or a prefix (in which cases it is ignored), - * or else it must extend the existing translation (i.e. - * the existing translation must be a prefix/suffix of the new translation). - * All other overlap cases result in assertion failures. - * - * @param node - * target node whose subtree is to be replaced - * - * @param replacementText - * text to use as replacement - */ - void addTranslation(ASTNode node, String replacementText) { - if (!enabled) { - return; - } - - if (node.getOrigin() != null) { - // This node was parsed while loading the definition of another view - // being referenced by the one being created, and we don't want - // to track any expansions for the underlying view. - return; - } - - int tokenStartIndex = node.getTokenStartIndex(); - int tokenStopIndex = node.getTokenStopIndex(); - if (tokenStopIndex < 0) { - // this is for artificially added tokens - return; - } - Translation translation = new Translation(); - translation.tokenStopIndex = tokenStopIndex; - translation.replacementText = replacementText; - - // Sanity check for overlap with regions already being expanded - assert (tokenStopIndex >= tokenStartIndex); - - List subsetEntries = new ArrayList(); - // Is the existing entry and newer entry are subset of one another ? - for (Map.Entry existingEntry : - translations.headMap(tokenStopIndex, true).entrySet()) { - // check if the new entry contains the existing - if (existingEntry.getValue().tokenStopIndex <= tokenStopIndex && - existingEntry.getKey() >= tokenStartIndex) { - // Collect newer entry is if a super-set of existing entry, - assert (replacementText.contains(existingEntry.getValue().replacementText)); - subsetEntries.add(existingEntry.getKey()); - // check if the existing entry contains the new - } else if (existingEntry.getValue().tokenStopIndex >= tokenStopIndex && - existingEntry.getKey() <= tokenStartIndex) { - assert (existingEntry.getValue().replacementText.contains(replacementText)); - // we don't need to add this new entry since there's already an overlapping one - return; - } - } - // remove any existing entries that are contained by the new one - for (Integer index : subsetEntries) { - translations.remove(index); - } - - // It's all good: create a new entry in the map (or update existing one) - translations.put(tokenStartIndex, translation); - } - - /** - * Register a translation for an tabName. - * - * @param tableName - * source node (which must be an tabName) to be replaced - */ - void addTableNameTranslation(ASTNode tableName, String currentDatabaseName) { - if (!enabled) { - return; - } - if (tableName.getToken().getType() == SparkSqlParser.Identifier) { - addIdentifierTranslation(tableName); - return; - } - assert (tableName.getToken().getType() == SparkSqlParser.TOK_TABNAME); - assert (tableName.getChildCount() <= 2); - - if (tableName.getChildCount() == 2) { - addIdentifierTranslation((ASTNode)tableName.getChild(0)); - addIdentifierTranslation((ASTNode)tableName.getChild(1)); - } - else { - // transform the table reference to an absolute reference (i.e., "db.table") - StringBuilder replacementText = new StringBuilder(); - replacementText.append(ParseUtils.unparseIdentifier(currentDatabaseName, conf)); - replacementText.append('.'); - - ASTNode identifier = (ASTNode)tableName.getChild(0); - String identifierText = SemanticAnalyzer.unescapeIdentifier(identifier.getText()); - replacementText.append(ParseUtils.unparseIdentifier(identifierText, conf)); - - addTranslation(identifier, replacementText.toString()); - } - } - - /** - * Register a translation for an identifier. - * - * @param identifier - * source node (which must be an identifier) to be replaced - */ - void addIdentifierTranslation(ASTNode identifier) { - if (!enabled) { - return; - } - assert (identifier.getToken().getType() == SparkSqlParser.Identifier); - String replacementText = identifier.getText(); - replacementText = SemanticAnalyzer.unescapeIdentifier(replacementText); - replacementText = ParseUtils.unparseIdentifier(replacementText, conf); - addTranslation(identifier, replacementText); - } - - /** - * Register a "copy" translation in which a node will be translated into - * whatever the translation turns out to be for another node (after - * previously registered translations have already been performed). Deferred - * translations are performed in the order they are registered, and follow - * the same rules regarding overlap as non-copy translations. - * - * @param targetNode node whose subtree is to be replaced - * - * @param sourceNode the node providing the replacement text - * - */ - void addCopyTranslation(ASTNode targetNode, ASTNode sourceNode) { - if (!enabled) { - return; - } - - if (targetNode.getOrigin() != null) { - return; - } - - CopyTranslation copyTranslation = new CopyTranslation(); - copyTranslation.targetNode = targetNode; - copyTranslation.sourceNode = sourceNode; - copyTranslations.add(copyTranslation); - } - - /** - * Apply all translations on the given token stream. - * - * @param tokenRewriteStream - * rewrite-capable stream - */ - void applyTranslations(TokenRewriteStream tokenRewriteStream) { - for (Map.Entry entry : translations.entrySet()) { - if (entry.getKey() > 0) { - // negative means the key didn't exist in the original stream (i.e.: we changed the tree) - tokenRewriteStream.replace( - entry.getKey(), - entry.getValue().tokenStopIndex, - entry.getValue().replacementText); - } - } - for (CopyTranslation copyTranslation : copyTranslations) { - String replacementText = tokenRewriteStream.toString( - copyTranslation.sourceNode.getTokenStartIndex(), - copyTranslation.sourceNode.getTokenStopIndex()); - String currentText = tokenRewriteStream.toString( - copyTranslation.targetNode.getTokenStartIndex(), - copyTranslation.targetNode.getTokenStopIndex()); - if (currentText.equals(replacementText)) { - // copy is a nop, so skip it--this is important for avoiding - // spurious overlap assertions - continue; - } - // Call addTranslation just to get the assertions for overlap - // checking. - addTranslation(copyTranslation.targetNode, replacementText); - tokenRewriteStream.replace( - copyTranslation.targetNode.getTokenStartIndex(), - copyTranslation.targetNode.getTokenStopIndex(), - replacementText); - } - } - - private static class Translation { - int tokenStopIndex; - String replacementText; - - @Override - public String toString() { - return "" + tokenStopIndex + " -> " + replacementText; - } - } - - private static class CopyTranslation { - ASTNode targetNode; - ASTNode sourceNode; - } - - public void clear() { - translations.clear(); - copyTranslations.clear(); - enabled = false; - } -} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 41f2f42ad0b1..5b7aaa270fbc 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -600,7 +600,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C NativePlaceholder } else { tableType match { - case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => { + case Token("TOK_TABTYPE", Token("TOK_TABNAME", nameParts)) if nameParts.size == 1 => { nameParts.head match { case Token(".", dbName :: tableName :: Nil) => // It is describing a table with the format like "describe db.table". From 8ced9c0f7736401fb13b591b6a465aeb4501e96d Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Tue, 29 Dec 2015 14:00:51 +0100 Subject: [PATCH 05/14] Remove tests no longer supported by parser (HIVE-11145). --- .../sql/hive/execution/HiveCompatibilitySuite.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 2d0d7b8af358..2b0e48dbfcf2 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -308,7 +308,12 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // The difference between the double numbers generated by Hive and Spark // can be ignored (e.g., 0.6633880657639323 and 0.6633880657639322) - "udaf_corr" + "udaf_corr", + + // Feature removed in HIVE-11145 + "alter_partition_protect_mode", + "drop_partitions_ignore_protection", + "protectmode" ) /** @@ -328,7 +333,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "alter_index", "alter_merge_2", "alter_partition_format_loc", - "alter_partition_protect_mode", "alter_partition_with_whitelist", "alter_rename_partition", "alter_table_serde", @@ -460,7 +464,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "drop_partitions_filter", "drop_partitions_filter2", "drop_partitions_filter3", - "drop_partitions_ignore_protection", "drop_table", "drop_table2", "drop_table_removes_partition_dirs", @@ -778,7 +781,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "ppr_pushdown2", "ppr_pushdown3", "progress_1", - "protectmode", "push_or", "query_with_semi", "quote1", From cb60ba045ff6663ed83c308b2423bdb87152a092 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Tue, 29 Dec 2015 14:01:06 +0100 Subject: [PATCH 06/14] Remove ASTNodeOrigin --- .../org/apache/spark/sql/parser/ASTNode.java | 18 ---- .../spark/sql/parser/ASTNodeOrigin.java | 95 ------------------- 2 files changed, 113 deletions(-) delete mode 100644 sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNodeOrigin.java diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNode.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNode.java index 8e5ca5883d49..33d9322b628e 100644 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNode.java +++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNode.java @@ -30,7 +30,6 @@ public class ASTNode extends CommonTree implements Node, Serializable { private static final long serialVersionUID = 1L; private transient StringBuffer astStr; - private transient ASTNodeOrigin origin; private transient int startIndx = -1; private transient int endIndx = -1; private transient ASTNode rootNode; @@ -51,7 +50,6 @@ public ASTNode(Token t) { public ASTNode(ASTNode node) { super(node); - this.origin = node.origin; } @Override @@ -88,22 +86,6 @@ public String getName() { return (Integer.valueOf(super.getToken().getType())).toString(); } - /** - * @return information about the object from which this ASTNode originated, or - * null if this ASTNode was not expanded from an object reference - */ - public ASTNodeOrigin getOrigin() { - return origin; - } - - /** - * Tag this ASTNode with information about the object from which this node - * originated. - */ - public void setOrigin(ASTNodeOrigin origin) { - this.origin = origin; - } - public String dump() { StringBuilder sb = new StringBuilder("\n"); dump(sb, ""); diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNodeOrigin.java b/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNodeOrigin.java deleted file mode 100644 index 596787d2dd55..000000000000 --- a/sql/hive/src/main/java/org/apache/spark/sql/parser/ASTNodeOrigin.java +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.parser; - -/** - * ASTNodeOrigin contains contextual information about the object from whose - * definition a particular ASTNode originated. For example, suppose a view v is - * defined as select x+1 as y from t, and we're processing a query - * select v1.y from v as v1, and there's a type-checking problem - * with the expression x+1 due to an ALTER TABLE on t subsequent to - * the creation of v. Then, when reporting the error, we want to provide the - * parser location with respect to the definition of v (rather than with respect - * to the top-level query, since that represents a completely different - * "parser coordinate system"). - * - *

- * - * So, when expanding the definition of v while analyzing the top-level query, - * we tag each ASTNode with a reference to an ASTNodeOrign describing v and its - * usage within the query. - */ -public class ASTNodeOrigin { - private final String objectType; - private final String objectName; - private final String objectDefinition; - private final String usageAlias; - private final ASTNode usageNode; - - public ASTNodeOrigin(String objectType, String objectName, - String objectDefinition, String usageAlias, ASTNode usageNode) { - this.objectType = objectType; - this.objectName = objectName; - this.objectDefinition = objectDefinition; - this.usageAlias = usageAlias; - this.usageNode = usageNode; - } - - /** - * @return the type of the object from which an ASTNode originated, e.g. - * "view". - */ - public String getObjectType() { - return objectType; - } - - /** - * @return the name of the object from which an ASTNode originated, e.g. "v". - */ - public String getObjectName() { - return objectName; - } - - /** - * @return the definition of the object from which an ASTNode originated, e.g. - * select x+1 as y from t. - */ - public String getObjectDefinition() { - return objectDefinition; - } - - /** - * @return the alias of the object from which an ASTNode originated, e.g. "v1" - * (this can help with debugging context-dependent expansions) - */ - public String getUsageAlias() { - return usageAlias; - } - - /** - * @return the expression node triggering usage of an object from which an - * ASTNode originated, e.g. v as v1 (this can help with - * debugging context-dependent expansions) - */ - public ASTNode getUsageNode() { - return usageNode; - } -} - -// End ASTNodeOrigin.java From 2e30d92c7cbe9cdb4aa4d9364eeba0a7fbed54b3 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Tue, 29 Dec 2015 23:41:25 +0100 Subject: [PATCH 07/14] Fix CTE and position errors. --- .../org/apache/spark/sql/hive/HiveQl.scala | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 5b7aaa270fbc..dfe80205d9f1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -313,7 +313,7 @@ private[hive] object HiveQl extends Logging { context.clear() plan } catch { - case pe: org.apache.hadoop.hive.ql.parse.ParseException => + case pe: ParseException => pe.getMessage match { case errorRegEx(line, start, message) => throw new AnalysisException(message, Some(line.toInt), Some(start.toInt)) @@ -911,24 +911,20 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C Token("TOK_TABLE_PARTITION", table) :: Nil) => NativePlaceholder case Token("TOK_QUERY", queryArgs) - if Seq("TOK_FROM", "TOK_INSERT").contains(queryArgs.head.getText) => + if Seq("TOK_CTE", "TOK_FROM", "TOK_INSERT").contains(queryArgs.head.getText) => val (fromClause: Option[ASTNode], insertClauses, cteRelations) = queryArgs match { - case Token("TOK_FROM", args: Seq[ASTNode]) :: insertClauses => - // check if has CTE - insertClauses.last match { - case Token("TOK_CTE", cteClauses) => - val cteRelations = cteClauses.map(node => { - val relation = nodeToRelation(node, context).asInstanceOf[Subquery] - (relation.alias, relation) - }).toMap - (Some(args.head), insertClauses.init, Some(cteRelations)) - - case _ => (Some(args.head), insertClauses, None) + case Token("TOK_CTE", ctes) :: Token("TOK_FROM", from) :: inserts => + val cteRelations = ctes.map { node => + val relation = nodeToRelation(node, context).asInstanceOf[Subquery] + relation.alias -> relation } - - case Token("TOK_INSERT", _) :: Nil => (None, queryArgs, None) + (Some(from.head), inserts, Some(cteRelations.toMap)) + case Token("TOK_FROM", from) :: inserts => + (Some(from.head), inserts, None) + case Token("TOK_INSERT", _) :: Nil => + (None, queryArgs, None) } // Return one query for each insert clause. From b8e76b257063db79f05a83aa4a05578ce8807c03 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Wed, 30 Dec 2015 00:26:53 +0100 Subject: [PATCH 08/14] Fix described errors. --- .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index dfe80205d9f1..b1d841d1b554 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -600,12 +600,12 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C NativePlaceholder } else { tableType match { - case Token("TOK_TABTYPE", Token("TOK_TABNAME", nameParts)) if nameParts.size == 1 => { - nameParts.head match { + case Token("TOK_TABTYPE", Token("TOK_TABNAME", nameParts :: Nil) :: Nil) => { + nameParts match { case Token(".", dbName :: tableName :: Nil) => // It is describing a table with the format like "describe db.table". // TODO: Actually, a user may mean tableName.columnName. Need to resolve this issue. - val tableIdent = extractTableIdent(nameParts.head) + val tableIdent = extractTableIdent(nameParts) DescribeCommand( UnresolvedRelation(tableIdent, None), isExtended = extended.isDefined) case Token(".", dbName :: tableName :: colName :: Nil) => From a7749a8870ba44ef26af6d2e6373dadef85cda06 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Wed, 30 Dec 2015 19:52:05 +0100 Subject: [PATCH 09/14] Change log level of the Hive Compilation phase to 'debug'. --- project/SparkBuild.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index df21d3eb636f..3e214ab43e82 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -411,6 +411,7 @@ object Hive { |import org.apache.spark.sql.hive.test.TestHive.implicits._ |import org.apache.spark.sql.types._""".stripMargin, cleanupCommands in console := "sparkContext.stop()", + logLevel in Compile := Level.Debug, // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce // in order to generate golden files. This is only required for developers who are adding new // new query tests. From 90c11e87fabb6afea82b7ae94fccee778421f0fd Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Wed, 30 Dec 2015 22:14:34 +0100 Subject: [PATCH 10/14] Add the generated files to the build by hand --- project/SparkBuild.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 3e214ab43e82..7a0875f48c09 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -384,7 +384,7 @@ object SQL { } object Hive { - + import sbtantlr.SbtAntlrPlugin._ lazy val settings = Seq( javaOptions += "-XX:MaxPermSize=256m", // Specially disable assertions since some Hive tests fail them @@ -412,11 +412,17 @@ object Hive { |import org.apache.spark.sql.types._""".stripMargin, cleanupCommands in console := "sparkContext.stop()", logLevel in Compile := Level.Debug, + sourceGenerators in Compile <+= Def.task { + // This is quite a hack. + val pkg = (sourceManaged in Compile).value / "org" / "apache" / "spark" / "sql" / "parser" + val names = Seq("SparkSqlLexer", "SparkSqlParser", "SparkSqlParser_FromClauseParser", "SparkSqlParser_IdentifiersParser", "SparkSqlParser_SelectClauseParser") + names.map(name => pkg / (name + ".java")) + }, // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce // in order to generate golden files. This is only required for developers who are adding new // new query tests. fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") } - ) ++ sbtantlr.SbtAntlrPlugin.antlrSettings + ) ++ antlrSettings } From 84db0f4650398cb370461d223a8eb0e760bda8b6 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Thu, 31 Dec 2015 00:42:12 +0100 Subject: [PATCH 11/14] Embed ANTLR generation code in build. --- project/SparkBuild.scala | 55 ++++++++++++++++++++++++++++++++-------- project/plugins.sbt | 6 ++--- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 7a0875f48c09..9b5425dfe899 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -384,7 +384,6 @@ object SQL { } object Hive { - import sbtantlr.SbtAntlrPlugin._ lazy val settings = Seq( javaOptions += "-XX:MaxPermSize=256m", // Specially disable assertions since some Hive tests fail them @@ -411,19 +410,55 @@ object Hive { |import org.apache.spark.sql.hive.test.TestHive.implicits._ |import org.apache.spark.sql.types._""".stripMargin, cleanupCommands in console := "sparkContext.stop()", - logLevel in Compile := Level.Debug, - sourceGenerators in Compile <+= Def.task { - // This is quite a hack. - val pkg = (sourceManaged in Compile).value / "org" / "apache" / "spark" / "sql" / "parser" - val names = Seq("SparkSqlLexer", "SparkSqlParser", "SparkSqlParser_FromClauseParser", "SparkSqlParser_IdentifiersParser", "SparkSqlParser_SelectClauseParser") - names.map(name => pkg / (name + ".java")) - }, // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce // in order to generate golden files. This is only required for developers who are adding new // new query tests. - fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") } - ) ++ antlrSettings + fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }, + // ANTLR code-generation step. + // + // This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of + // build errors in the current plugin. + logLevel in Compile := Level.Debug, + // Create Parser from ANTLR grammar files. + sourceGenerators in Compile <+= Def.task { + val log = streams.value.log + + val grammarFileNames = Seq( + "SparkSqlLexer.g", + "SparkSqlParser.g") + val sourceDir = (sourceDirectory in Compile).value / "antlr3" + val targetDir = (sourceManaged in Compile).value + + // Create default ANTLR Tool. + val antlr = new org.antlr.Tool + + // Setup input and output directories. + antlr.setInputDirectory(sourceDir.getPath) + antlr.setOutputDirectory(targetDir.getPath) + antlr.setForceRelativeOutput(true) + antlr.setMake(true) + + // Add grammar files. + grammarFileNames.flatMap(g => (sourceDir ** g).get).foreach { g => + val relPath = (g relativeTo sourceDir).get.getPath + log.info("ANTLR: Grammar file '%s' detected.".format(relPath)) + antlr.addGrammarFile(relPath) + } + // Generate the parser. + antlr.process + if (antlr.getNumErrors > 0) { + log.error("ANTLR: Caught %d build errors.".format(antlr.getNumErrors)) + } + + // Return all generated java files. + (targetDir ** "*.java").get.toSeq + }, + // Include ANTLR token files. + resourceGenerators in Compile <+= Def.task { + ((sourceManaged in Compile).value ** "*.token").get.toSeq + } + ) } object Assembly { diff --git a/project/plugins.sbt b/project/plugins.sbt index f172dc9c1f0e..15ba3a36d51c 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -4,8 +4,6 @@ resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/release resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/" -resolvers += "stefri" at "http://stefri.github.io/repo/releases" - addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0") @@ -26,8 +24,8 @@ addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2") addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2") -addSbtPlugin("com.github.stefri" % "sbt-antlr" % "0.5.3") - libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3" libraryDependencies += "org.ow2.asm" % "asm-commons" % "5.0.3" + +libraryDependencies += "org.antlr" % "antlr" % "3.5.2" From 84b006bc92e029ff7a79e01975b96f500ddd3474 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Thu, 31 Dec 2015 01:08:59 +0100 Subject: [PATCH 12/14] Fix deps. --- dev/deps/spark-deps-hadoop-2.3 | 20 ++++++++++---------- dev/deps/spark-deps-hadoop-2.4 | 20 ++++++++++---------- project/SparkBuild.scala | 4 ++-- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 index 6014d50c6b6f..e940a328fc50 100644 --- a/dev/deps/spark-deps-hadoop-2.3 +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -1,6 +1,3 @@ -JavaEWAH-0.3.2.jar -RoaringBitmap-0.5.11.jar -ST4-4.0.4.jar activation-1.1.1.jar akka-actor_2.10-2.3.11.jar akka-remote_2.10-2.3.11.jar @@ -14,19 +11,19 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7-tests.jar avro-ipc-1.7.7.jar +avro-ipc-1.7.7-tests.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar bcprov-jdk15on-1.51.jar bonecp-0.8.0.RELEASE.jar -breeze-macros_2.10-0.11.2.jar breeze_2.10-0.11.2.jar +breeze-macros_2.10-0.11.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar -chill-java-0.5.0.jar chill_2.10-0.5.0.jar +chill-java-0.5.0.jar commons-beanutils-1.7.0.jar commons-beanutils-core-1.8.0.jar commons-cli-1.2.jar @@ -90,8 +87,9 @@ jackson-module-scala_2.10-2.4.4.jar jackson-xc-1.9.13.jar janino-2.7.8.jar jansi-1.4.jar -java-xmlbuilder-1.0.jar +JavaEWAH-0.3.2.jar javax.inject-1.jar +java-xmlbuilder-1.0.jar javax.servlet-3.0.0.v201112011016.jar javolution-5.5.1.jar jaxb-api-2.2.2.jar @@ -156,19 +154,21 @@ py4j-0.9.jar pyrolite-4.9.jar quasiquotes_2.10-2.0.0-M8.jar reflectasm-1.07-shaded.jar +RoaringBitmap-0.5.11.jar scala-compiler-2.10.5.jar scala-library-2.10.5.jar -scala-reflect-2.10.5.jar scalap-2.10.5.jar +scala-reflect-2.10.5.jar servlet-api-2.5.jar slf4j-api-1.7.10.jar slf4j-log4j12-1.7.10.jar snappy-0.2.jar snappy-java-1.1.2.jar -spire-macros_2.10-0.7.4.jar spire_2.10-0.7.4.jar -stax-api-1.0-2.jar +spire-macros_2.10-0.7.4.jar +ST4-4.0.4.jar stax-api-1.0.1.jar +stax-api-1.0-2.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4 index f56e6f4393e7..e7381009bcf5 100644 --- a/dev/deps/spark-deps-hadoop-2.4 +++ b/dev/deps/spark-deps-hadoop-2.4 @@ -1,6 +1,3 @@ -JavaEWAH-0.3.2.jar -RoaringBitmap-0.5.11.jar -ST4-4.0.4.jar activation-1.1.1.jar akka-actor_2.10-2.3.11.jar akka-remote_2.10-2.3.11.jar @@ -14,19 +11,19 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7-tests.jar avro-ipc-1.7.7.jar +avro-ipc-1.7.7-tests.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar bcprov-jdk15on-1.51.jar bonecp-0.8.0.RELEASE.jar -breeze-macros_2.10-0.11.2.jar breeze_2.10-0.11.2.jar +breeze-macros_2.10-0.11.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar -chill-java-0.5.0.jar chill_2.10-0.5.0.jar +chill-java-0.5.0.jar commons-beanutils-1.7.0.jar commons-beanutils-core-1.8.0.jar commons-cli-1.2.jar @@ -90,8 +87,9 @@ jackson-module-scala_2.10-2.4.4.jar jackson-xc-1.9.13.jar janino-2.7.8.jar jansi-1.4.jar -java-xmlbuilder-1.0.jar +JavaEWAH-0.3.2.jar javax.inject-1.jar +java-xmlbuilder-1.0.jar javax.servlet-3.0.0.v201112011016.jar javolution-5.5.1.jar jaxb-api-2.2.2.jar @@ -157,19 +155,21 @@ py4j-0.9.jar pyrolite-4.9.jar quasiquotes_2.10-2.0.0-M8.jar reflectasm-1.07-shaded.jar +RoaringBitmap-0.5.11.jar scala-compiler-2.10.5.jar scala-library-2.10.5.jar -scala-reflect-2.10.5.jar scalap-2.10.5.jar +scala-reflect-2.10.5.jar servlet-api-2.5.jar slf4j-api-1.7.10.jar slf4j-log4j12-1.7.10.jar snappy-0.2.jar snappy-java-1.1.2.jar -spire-macros_2.10-0.7.4.jar spire_2.10-0.7.4.jar -stax-api-1.0-2.jar +spire-macros_2.10-0.7.4.jar +ST4-4.0.4.jar stax-api-1.0.1.jar +stax-api-1.0-2.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 9b5425dfe899..43adddca894e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -454,9 +454,9 @@ object Hive { // Return all generated java files. (targetDir ** "*.java").get.toSeq }, - // Include ANTLR token files. + // Include ANTLR tokens files. resourceGenerators in Compile <+= Def.task { - ((sourceManaged in Compile).value ** "*.token").get.toSeq + ((sourceManaged in Compile).value ** "*.tokens").get.toSeq } ) } From edfbd3565faa3a8e673ae64903de2a0219578a76 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Thu, 31 Dec 2015 01:30:22 +0100 Subject: [PATCH 13/14] Revert deps. --- dev/deps/spark-deps-hadoop-2.3 | 20 ++++++++++---------- dev/deps/spark-deps-hadoop-2.4 | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 index e940a328fc50..6014d50c6b6f 100644 --- a/dev/deps/spark-deps-hadoop-2.3 +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -1,3 +1,6 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar activation-1.1.1.jar akka-actor_2.10-2.3.11.jar akka-remote_2.10-2.3.11.jar @@ -11,19 +14,19 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7.jar avro-ipc-1.7.7-tests.jar +avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar bcprov-jdk15on-1.51.jar bonecp-0.8.0.RELEASE.jar -breeze_2.10-0.11.2.jar breeze-macros_2.10-0.11.2.jar +breeze_2.10-0.11.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar -chill_2.10-0.5.0.jar chill-java-0.5.0.jar +chill_2.10-0.5.0.jar commons-beanutils-1.7.0.jar commons-beanutils-core-1.8.0.jar commons-cli-1.2.jar @@ -87,9 +90,8 @@ jackson-module-scala_2.10-2.4.4.jar jackson-xc-1.9.13.jar janino-2.7.8.jar jansi-1.4.jar -JavaEWAH-0.3.2.jar -javax.inject-1.jar java-xmlbuilder-1.0.jar +javax.inject-1.jar javax.servlet-3.0.0.v201112011016.jar javolution-5.5.1.jar jaxb-api-2.2.2.jar @@ -154,21 +156,19 @@ py4j-0.9.jar pyrolite-4.9.jar quasiquotes_2.10-2.0.0-M8.jar reflectasm-1.07-shaded.jar -RoaringBitmap-0.5.11.jar scala-compiler-2.10.5.jar scala-library-2.10.5.jar -scalap-2.10.5.jar scala-reflect-2.10.5.jar +scalap-2.10.5.jar servlet-api-2.5.jar slf4j-api-1.7.10.jar slf4j-log4j12-1.7.10.jar snappy-0.2.jar snappy-java-1.1.2.jar -spire_2.10-0.7.4.jar spire-macros_2.10-0.7.4.jar -ST4-4.0.4.jar -stax-api-1.0.1.jar +spire_2.10-0.7.4.jar stax-api-1.0-2.jar +stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4 index e7381009bcf5..f56e6f4393e7 100644 --- a/dev/deps/spark-deps-hadoop-2.4 +++ b/dev/deps/spark-deps-hadoop-2.4 @@ -1,3 +1,6 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar activation-1.1.1.jar akka-actor_2.10-2.3.11.jar akka-remote_2.10-2.3.11.jar @@ -11,19 +14,19 @@ asm-3.1.jar asm-commons-3.1.jar asm-tree-3.1.jar avro-1.7.7.jar -avro-ipc-1.7.7.jar avro-ipc-1.7.7-tests.jar +avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar bcprov-jdk15on-1.51.jar bonecp-0.8.0.RELEASE.jar -breeze_2.10-0.11.2.jar breeze-macros_2.10-0.11.2.jar +breeze_2.10-0.11.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar -chill_2.10-0.5.0.jar chill-java-0.5.0.jar +chill_2.10-0.5.0.jar commons-beanutils-1.7.0.jar commons-beanutils-core-1.8.0.jar commons-cli-1.2.jar @@ -87,9 +90,8 @@ jackson-module-scala_2.10-2.4.4.jar jackson-xc-1.9.13.jar janino-2.7.8.jar jansi-1.4.jar -JavaEWAH-0.3.2.jar -javax.inject-1.jar java-xmlbuilder-1.0.jar +javax.inject-1.jar javax.servlet-3.0.0.v201112011016.jar javolution-5.5.1.jar jaxb-api-2.2.2.jar @@ -155,21 +157,19 @@ py4j-0.9.jar pyrolite-4.9.jar quasiquotes_2.10-2.0.0-M8.jar reflectasm-1.07-shaded.jar -RoaringBitmap-0.5.11.jar scala-compiler-2.10.5.jar scala-library-2.10.5.jar -scalap-2.10.5.jar scala-reflect-2.10.5.jar +scalap-2.10.5.jar servlet-api-2.5.jar slf4j-api-1.7.10.jar slf4j-log4j12-1.7.10.jar snappy-0.2.jar snappy-java-1.1.2.jar -spire_2.10-0.7.4.jar spire-macros_2.10-0.7.4.jar -ST4-4.0.4.jar -stax-api-1.0.1.jar +spire_2.10-0.7.4.jar stax-api-1.0-2.jar +stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar From e3e2f72d8080f8e0040be351032c7e3b24289c77 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Thu, 31 Dec 2015 10:07:30 +0100 Subject: [PATCH 14/14] Remove debugging code from the build code. Do not use SBT internal API: <+=. --- project/SparkBuild.scala | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 43adddca894e..588e97f64e05 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -384,6 +384,7 @@ object SQL { } object Hive { + lazy val settings = Seq( javaOptions += "-XX:MaxPermSize=256m", // Specially disable assertions since some Hive tests fail them @@ -418,9 +419,8 @@ object Hive { // // This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of // build errors in the current plugin. - logLevel in Compile := Level.Debug, // Create Parser from ANTLR grammar files. - sourceGenerators in Compile <+= Def.task { + sourceGenerators in Compile += Def.task { val log = streams.value.log val grammarFileNames = Seq( @@ -439,10 +439,10 @@ object Hive { antlr.setMake(true) // Add grammar files. - grammarFileNames.flatMap(g => (sourceDir ** g).get).foreach { g => - val relPath = (g relativeTo sourceDir).get.getPath - log.info("ANTLR: Grammar file '%s' detected.".format(relPath)) - antlr.addGrammarFile(relPath) + grammarFileNames.flatMap(gFileName => (sourceDir ** gFileName).get).foreach { gFilePath => + val relGFilePath = (gFilePath relativeTo sourceDir).get.getPath + log.info("ANTLR: Grammar file '%s' detected.".format(relGFilePath)) + antlr.addGrammarFile(relGFilePath) } // Generate the parser. @@ -453,11 +453,11 @@ object Hive { // Return all generated java files. (targetDir ** "*.java").get.toSeq - }, + }.taskValue, // Include ANTLR tokens files. - resourceGenerators in Compile <+= Def.task { + resourceGenerators in Compile += Def.task { ((sourceManaged in Compile).value ** "*.tokens").get.toSeq - } + }.taskValue ) }