diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala index 3bc8e9a5e03c..bcf125f149c9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala @@ -125,6 +125,9 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi case w: Window => windowToSQL(w) + case g: Generate => + generateToSQL(g) + case Limit(limitExpr, child) => s"${toSQL(child)} LIMIT ${limitExpr.sql}" @@ -221,6 +224,30 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi ) } + private def generateToSQL(g: Generate): String = { + val columnAliases = g.generatorOutput.map(_.sql).mkString(",") + + val childSQL = if (g.child == OneRowRelation) { + // This only happens when we put UDTF in project list and there is no FROM clause. Because we + // always generate LATERAL VIEW for `Generate`, here we use a trick to put a dummy sub-query + // after FROM clause, so that we can generate a valid LATERAL VIEW SQL string. + s"(SELECT 1) ${SQLBuilder.newSubqueryName}" + } + else { + toSQL(g.child) + } + + build( + childSQL, + "LATERAL VIEW", + if (g.outer) "OUTER" else "", + g.generator.sql, + SQLBuilder.newSubqueryName, + "AS", + columnAliases + ) + } + private def sameOutput(output1: Seq[Attribute], output2: Seq[Attribute]): Boolean = output1.size == output2.size && output1.zip(output2).forall(pair => pair._1.semanticEquals(pair._2)) @@ -333,6 +360,9 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi // operators on top of a table relation, merge the sample information into `SQLTable` of // that table relation, as we can only convert table sample to standard SQL string. ResolveSQLTable, + // Re-order operators to let them generate legal SQL string, e.g. we should push down + // `Generate` through `Filter`. We should enrich this rule in the future. + ReOrderOperators, // Insert sub queries on top of operators that need to appear after FROM clause. AddSubquery ) @@ -368,6 +398,14 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi } } + object ReOrderOperators extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { + case g @ Generate(_, _, _, _, _, f: Filter) => + val newGenerate = g.copy(child = f.child) + f.copy(child = newGenerate) + } + } + object AddSubquery extends Rule[LogicalPlan] { override def apply(tree: LogicalPlan): LogicalPlan = tree transformUp { // This branch handles aggregate functions within HAVING clauses. For example: @@ -408,6 +446,7 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi case _: LocalLimit => plan case _: GlobalLimit => plan case _: SQLTable => plan + case _: Generate => plan case OneRowRelation => plan case _ => addSubquery(plan) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala index f02ecb48d53a..721d5f1ec972 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive import scala.util.control.NonFatal +import org.apache.spark.sql.Column import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SQLTestUtils @@ -45,12 +46,25 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { .select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write .saveAsTable("parquet_t2") + + def createArray(id: Column): Column = { + when(id % 3 === 0, lit(null)).otherwise(array('id, 'id + 1)) + } + + sqlContext + .range(10) + .select( + createArray('id).as("arr"), + array(array('id), createArray('id)).as("arr2"), 'id) + .write + .saveAsTable("parquet_t3") } override protected def afterAll(): Unit = { sql("DROP TABLE IF EXISTS parquet_t0") sql("DROP TABLE IF EXISTS parquet_t1") sql("DROP TABLE IF EXISTS parquet_t2") + sql("DROP TABLE IF EXISTS parquet_t3") sql("DROP TABLE IF EXISTS t0") } @@ -568,4 +582,84 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { |HAVING MAX(a.KEY) > 0 """.stripMargin) } + + test("generator in project list without FROM clause") { + checkHiveQl("SELECT EXPLODE(ARRAY(1,2,3))") + checkHiveQl("SELECT EXPLODE(ARRAY(1,2,3)) AS val") + } + + test("generator in project list with non-referenced table") { + checkHiveQl("SELECT EXPLODE(ARRAY(1,2,3)) FROM t0") + checkHiveQl("SELECT EXPLODE(ARRAY(1,2,3)) AS val FROM t0") + } + + test("generator in project list with referenced table") { + checkHiveQl("SELECT EXPLODE(arr) FROM parquet_t3") + checkHiveQl("SELECT EXPLODE(arr) AS val FROM parquet_t3") + } + + test("generator in project list with non-UDTF expressions") { + checkHiveQl("SELECT EXPLODE(arr), id FROM parquet_t3") + checkHiveQl("SELECT EXPLODE(arr) AS val, id as a FROM parquet_t3") + } + + test("generator in lateral view") { + checkHiveQl("SELECT val, id FROM parquet_t3 LATERAL VIEW EXPLODE(arr) exp AS val") + checkHiveQl("SELECT val, id FROM parquet_t3 LATERAL VIEW OUTER EXPLODE(arr) exp AS val") + } + + test("generator in lateral view with ambiguous names") { + checkHiveQl( + """ + |SELECT exp.id, parquet_t3.id + |FROM parquet_t3 + |LATERAL VIEW EXPLODE(arr) exp AS id + """.stripMargin) + checkHiveQl( + """ + |SELECT exp.id, parquet_t3.id + |FROM parquet_t3 + |LATERAL VIEW OUTER EXPLODE(arr) exp AS id + """.stripMargin) + } + + test("nested generator in lateral view") { + checkHiveQl( + """ + |SELECT val, id + |FROM parquet_t3 + |LATERAL VIEW EXPLODE(arr2) exp1 AS nested_array + |LATERAL VIEW EXPLODE(nested_array) exp1 AS val + """.stripMargin) + + checkHiveQl( + """ + |SELECT val, id + |FROM parquet_t3 + |LATERAL VIEW EXPLODE(arr2) exp1 AS nested_array + |LATERAL VIEW OUTER EXPLODE(nested_array) exp1 AS val + """.stripMargin) + } + + test("generate with other operators") { + checkHiveQl( + """ + |SELECT EXPLODE(arr) AS val, id + |FROM parquet_t3 + |WHERE id > 2 + |ORDER BY val + |LIMIT 5 + """.stripMargin) + + checkHiveQl( + """ + |SELECT val, id + |FROM parquet_t3 + |LATERAL VIEW EXPLODE(arr2) exp1 AS nested_array + |LATERAL VIEW EXPLODE(nested_array) exp1 AS val + |WHERE val > 2 + |ORDER BY val + |LIMIT 5 + """.stripMargin) + } }