Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package org.apache.spark.sql.catalyst.parser

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
Expand Down Expand Up @@ -426,4 +428,68 @@ class PlanParserSuite extends PlanTest {
"Number of aliases must match the number of fields in an inline table.")
intercept[ArrayIndexOutOfBoundsException](parsePlan("values (1, 'a'), (2, 'b', 5Y)"))
}

test("nesting UNION") {
val parsed = parsePlan(
"""
|SELECT `u_1`.`id` FROM (((SELECT `t0`.`id` FROM `default`.`t0`)
|UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) UNION ALL
|(SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1
""".stripMargin)

val expected = Project(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: @viirya could you update this use the DSL and assertEqual equals? It makes this a bit easier to read.

BTW this test is very similar to the following test case: https://github.com/apache/spark/blob/master/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala#L384-L392

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm. indeed. If so, I think I can close this pr now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hvanhovell Is new ANTLR4 parser natively to solve this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@viirya the new parser handles nested queries a lot better. This is mainly due to ANTLR4's better parsing algorithms.

"u_1.id".attr :: Nil,
SubqueryAlias("u_1",
Union(
Union(
Project(
"t0.id".attr :: Nil,
UnresolvedRelation(TableIdentifier("t0", Some("default")), None)),
Project(
"t0.id".attr :: Nil,
UnresolvedRelation(TableIdentifier("t0", Some("default")), None))),
Project(
"t0.id".attr :: Nil,
UnresolvedRelation(TableIdentifier("t0", Some("default")), None)))))

comparePlans(parsed, expected)

val parsedSame = parsePlan(
"""
|SELECT `u_1`.`id` FROM ((SELECT `t0`.`id` FROM `default`.`t0`)
|UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`) UNION ALL
|(SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1
""".stripMargin)

comparePlans(parsedSame, expected)

val parsed2 = parsePlan(
"""
|SELECT `u_1`.`id` FROM ((((SELECT `t0`.`id` FROM `default`.`t0`)
|UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) UNION ALL
|(SELECT `t0`.`id` FROM `default`.`t0`))
|UNION ALL (SELECT `t0`.`id` FROM `default`.`t0`)) AS u_1
""".stripMargin)

val expected2 = Project(
"u_1.id".attr :: Nil,
SubqueryAlias("u_1",
Union(
Union(
Union(
Project(
"t0.id".attr :: Nil,
UnresolvedRelation(TableIdentifier("t0", Some("default")), None)),
Project(
"t0.id".attr :: Nil,
UnresolvedRelation(TableIdentifier("t0", Some("default")), None))),
Project(
"t0.id".attr :: Nil,
UnresolvedRelation(TableIdentifier("t0", Some("default")), None))),
Project(
"t0.id".attr :: Nil,
UnresolvedRelation(TableIdentifier("t0", Some("default")), None)))))

comparePlans(parsed2, expected2)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,91 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
}
}

test("nested union") {
sql(
"""
| EXPLAIN
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the point of this SQL statement? See if it compiles?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previous solution to the ANTLR3 parser will hang on this query.

| SELECT count(1) FROM (
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
|
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
|
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
|
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
|
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src) src
""".stripMargin)

val countForSrc = sql("SELECT count(1) FROM src").first()

val countForUnion25Src = sql(
"""
| SELECT count(1) FROM (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems a bit redundant to write the allmost the same query as above...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ha. sure. let me update this.

| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
|
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
|
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
|
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
|
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src UNION ALL
| SELECT key, value FROM src) src
""".stripMargin).first()

assert(countForSrc.getLong(0) == 500)
assert(countForUnion25Src.getLong(0) == 500 * 25)

val nested = sql(
"""
| SELECT u_1.key FROM (((SELECT key FROM src)
| UNION ALL (SELECT key FROM src)) UNION ALL
| (SELECT key FROM src)) AS u_1
""".stripMargin).collect()

assert(nested.size == 500 * 3)
}

test("parse HQL set commands") {
// Adapted from its SQL counterpart.
val testKey = "spark.sql.key.usedfortestonly"
Expand Down