@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions._
2626import org .apache .spark .sql .catalyst .expressions .aggregate .Count
2727import org .apache .spark .sql .catalyst .plans ._
2828import org .apache .spark .sql .catalyst .plans .logical ._
29- import org .apache .spark .sql .catalyst .trees .CurrentOrigin
3029import org .apache .spark .sql .types ._
3130import org .apache .spark .unsafe .types .CalendarInterval
3231import org .apache .spark .util .random .RandomSampler
@@ -36,12 +35,7 @@ import org.apache.spark.util.random.RandomSampler
3635 * This class translates SQL to Catalyst [[LogicalPlan ]]s or [[Expression ]]s.
3736 */
3837private [sql] class CatalystQl (val conf : ParserConf = SimpleParserConf ()) extends ParserInterface {
39- object Token {
40- def unapply (node : ASTNode ): Some [(String , List [ASTNode ])] = {
41- CurrentOrigin .setPosition(node.line, node.positionInLine)
42- node.pattern
43- }
44- }
38+ import ParserUtils ._
4539
4640 /**
4741 * The safeParse method allows a user to focus on the parsing/AST transformation logic. This
@@ -82,102 +76,6 @@ private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) extends
8276 def parseTableIdentifier (sql : String ): TableIdentifier =
8377 safeParse(sql, ParseDriver .parseTableName(sql, conf))(extractTableIdent)
8478
85- def parseDdl (sql : String ): Seq [Attribute ] = {
86- safeParse(sql, ParseDriver .parseExpression(sql, conf)) { ast =>
87- val Token (" TOK_CREATETABLE" , children) = ast
88- children
89- .find(_.text == " TOK_TABCOLLIST" )
90- .getOrElse(sys.error(" No columnList!" ))
91- .flatMap(_.children.map(nodeToAttribute))
92- }
93- }
94-
95- protected def getClauses (
96- clauseNames : Seq [String ],
97- nodeList : Seq [ASTNode ]): Seq [Option [ASTNode ]] = {
98- var remainingNodes = nodeList
99- val clauses = clauseNames.map { clauseName =>
100- val (matches, nonMatches) = remainingNodes.partition(_.text.toUpperCase == clauseName)
101- remainingNodes = nonMatches ++ (if (matches.nonEmpty) matches.tail else Nil )
102- matches.headOption
103- }
104-
105- if (remainingNodes.nonEmpty) {
106- sys.error(
107- s """ Unhandled clauses: ${remainingNodes.map(_.treeString).mkString(" \n " )}.
108- |You are likely trying to use an unsupported Hive feature." """ .stripMargin)
109- }
110- clauses
111- }
112-
113- protected def getClause (clauseName : String , nodeList : Seq [ASTNode ]): ASTNode =
114- getClauseOption(clauseName, nodeList).getOrElse(sys.error(
115- s " Expected clause $clauseName missing from ${nodeList.map(_.treeString).mkString(" \n " )}" ))
116-
117- protected def getClauseOption (clauseName : String , nodeList : Seq [ASTNode ]): Option [ASTNode ] = {
118- nodeList.filter { case ast : ASTNode => ast.text == clauseName } match {
119- case Seq (oneMatch) => Some (oneMatch)
120- case Seq () => None
121- case _ => sys.error(s " Found multiple instances of clause $clauseName" )
122- }
123- }
124-
125- protected def nodeToAttribute (node : ASTNode ): Attribute = node match {
126- case Token (" TOK_TABCOL" , Token (colName, Nil ) :: dataType :: Nil ) =>
127- AttributeReference (colName, nodeToDataType(dataType), nullable = true )()
128- case _ =>
129- noParseRule(" Attribute" , node)
130- }
131-
132- protected def nodeToDataType (node : ASTNode ): DataType = node match {
133- case Token (" TOK_DECIMAL" , precision :: scale :: Nil ) =>
134- DecimalType (precision.text.toInt, scale.text.toInt)
135- case Token (" TOK_DECIMAL" , precision :: Nil ) =>
136- DecimalType (precision.text.toInt, 0 )
137- case Token (" TOK_DECIMAL" , Nil ) => DecimalType .USER_DEFAULT
138- case Token (" TOK_BIGINT" , Nil ) => LongType
139- case Token (" TOK_INT" , Nil ) => IntegerType
140- case Token (" TOK_TINYINT" , Nil ) => ByteType
141- case Token (" TOK_SMALLINT" , Nil ) => ShortType
142- case Token (" TOK_BOOLEAN" , Nil ) => BooleanType
143- case Token (" TOK_STRING" , Nil ) => StringType
144- case Token (" TOK_VARCHAR" , Token (_, Nil ) :: Nil ) => StringType
145- case Token (" TOK_CHAR" , Token (_, Nil ) :: Nil ) => StringType
146- case Token (" TOK_FLOAT" , Nil ) => FloatType
147- case Token (" TOK_DOUBLE" , Nil ) => DoubleType
148- case Token (" TOK_DATE" , Nil ) => DateType
149- case Token (" TOK_TIMESTAMP" , Nil ) => TimestampType
150- case Token (" TOK_BINARY" , Nil ) => BinaryType
151- case Token (" TOK_LIST" , elementType :: Nil ) => ArrayType (nodeToDataType(elementType))
152- case Token (" TOK_STRUCT" , Token (" TOK_TABCOLLIST" , fields) :: Nil ) =>
153- StructType (fields.map(nodeToStructField))
154- case Token (" TOK_MAP" , keyType :: valueType :: Nil ) =>
155- MapType (nodeToDataType(keyType), nodeToDataType(valueType))
156- case _ =>
157- noParseRule(" DataType" , node)
158- }
159-
160- protected def nodeToStructField (node : ASTNode ): StructField = node match {
161- case Token (" TOK_TABCOL" , Token (fieldName, Nil ) :: dataType :: Nil ) =>
162- StructField (cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true )
163- case Token (" TOK_TABCOL" , Token (fieldName, Nil ) :: dataType :: comment :: Nil ) =>
164- val meta = new MetadataBuilder ().putString(" comment" , unquoteString(comment.text)).build()
165- StructField (cleanIdentifier(fieldName), nodeToDataType(dataType), nullable = true , meta)
166- case _ =>
167- noParseRule(" StructField" , node)
168- }
169-
170- protected def extractTableIdent (tableNameParts : ASTNode ): TableIdentifier = {
171- tableNameParts.children.map {
172- case Token (part, Nil ) => cleanIdentifier(part)
173- } match {
174- case Seq (tableOnly) => TableIdentifier (tableOnly)
175- case Seq (databaseName, table) => TableIdentifier (table, Some (databaseName))
176- case other => sys.error(" Hive only supports tables names like 'tableName' " +
177- s " or 'databaseName.tableName', found ' $other' " )
178- }
179- }
180-
18179 /**
18280 * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2))
18381 * is equivalent to
@@ -625,42 +523,6 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
625523 noParseRule(" Select" , node)
626524 }
627525
628- protected val escapedIdentifier = " `(.+)`" .r
629- protected val doubleQuotedString = " \" ([^\" ]+)\" " .r
630- protected val singleQuotedString = " '([^']+)'" .r
631-
632- protected def unquoteString (str : String ) = str match {
633- case singleQuotedString(s) => s
634- case doubleQuotedString(s) => s
635- case other => other
636- }
637-
638- /** Strips backticks from ident if present */
639- protected def cleanIdentifier (ident : String ): String = ident match {
640- case escapedIdentifier(i) => i
641- case plainIdent => plainIdent
642- }
643-
644- /* Case insensitive matches */
645- val COUNT = " (?i)COUNT" .r
646- val SUM = " (?i)SUM" .r
647- val AND = " (?i)AND" .r
648- val OR = " (?i)OR" .r
649- val NOT = " (?i)NOT" .r
650- val TRUE = " (?i)TRUE" .r
651- val FALSE = " (?i)FALSE" .r
652- val LIKE = " (?i)LIKE" .r
653- val RLIKE = " (?i)RLIKE" .r
654- val REGEXP = " (?i)REGEXP" .r
655- val IN = " (?i)IN" .r
656- val DIV = " (?i)DIV" .r
657- val BETWEEN = " (?i)BETWEEN" .r
658- val WHEN = " (?i)WHEN" .r
659- val CASE = " (?i)CASE" .r
660-
661- val INTEGRAL = " [+-]?\\ d+" .r
662- val DECIMAL = " [+-]?((\\ d+(\\ .\\ d*)?)|(\\ .\\ d+))" .r
663-
664526 protected def nodeToExpr (node : ASTNode ): Expression = node match {
665527 /* Attribute References */
666528 case Token (" TOK_TABLE_OR_COL" , Token (name, Nil ) :: Nil ) =>
@@ -1007,6 +869,4 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
1007869
1008870 protected def nodeToGenerator (node : ASTNode ): Generator = noParseRule(" Generator" , node)
1009871
1010- protected def noParseRule (msg : String , node : ASTNode ): Nothing = throw new NotImplementedError (
1011- s " [ $msg]: No parse rules for ASTNode type: ${node.tokenType}, tree: \n ${node.treeString}" )
1012872}
0 commit comments