Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
-- List of configuration the test suite is run against:
--SET spark.sql.autoBroadcastJoinThreshold=10485760
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false

CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);

Expand Down
5 changes: 5 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
-- List of configuration the test suite is run against:
--SET spark.sql.autoBroadcastJoinThreshold=10485760
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false

create temporary view nt1 as select * from values
("one", 1),
("two", 2),
Expand Down
5 changes: 5 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/outer-join.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
-- List of configuration the test suite is run against:
--SET spark.sql.autoBroadcastJoinThreshold=10485760
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false

-- SPARK-17099: Incorrect result when HAVING clause is added to group by query
CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
(-234), (145), (367), (975), (298)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
-- Tests EXISTS subquery support. Tests Exists subquery
-- used in Joins (Both when joins occurs in outer and suquery blocks)
-- List of configuration the test suite is run against:
--SET spark.sql.autoBroadcastJoinThreshold=10485760
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false

CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
(100, "emp 1", date "2005-01-01", 100.00D, 10),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
-- A test suite for IN JOINS in parent side, subquery, and both predicate subquery
-- It includes correlated cases.
-- List of configuration the test suite is run against:
--SET spark.sql.autoBroadcastJoinThreshold=10485760
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false

create temporary view t1 as select * from values
("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
-- A test suite for not-in-joins in parent side, subquery, and both predicate subquery
-- It includes correlated cases.
-- List of configuration the test suite is run against:
--SET spark.sql.autoBroadcastJoinThreshold=10485760
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false

create temporary view t1 as select * from values
("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import org.apache.spark.sql.types.StructType
* The format for input files is simple:
* 1. A list of SQL queries separated by semicolon.
* 2. Lines starting with -- are treated as comments and ignored.
* 3. Lines starting with --SET are used to run the file with the following set of configs.
*
* For example:
* {{{
Expand Down Expand Up @@ -138,18 +139,58 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
private def runTest(testCase: TestCase): Unit = {
val input = fileToString(new File(testCase.inputFile))

val (comments, code) = input.split("\n").partition(_.startsWith("--"))
val configSets = {
val configLines = comments.filter(_.startsWith("--SET")).map(_.substring(5))
val configs = configLines.map(_.split(",").map { confAndValue =>
val (conf, value) = confAndValue.span(_ != '=')
conf.trim -> value.substring(1).trim
})
// When we are regenerating the golden files we don't need to run all the configs as they
// all need to return the same result
if (regenerateGoldenFiles && configs.nonEmpty) {
configs.take(1)
} else {
configs
}
}
// List of SQL queries to run
val queries: Seq[String] = {
val cleaned = input.split("\n").filterNot(_.startsWith("--")).mkString("\n")
// note: this is not a robust way to split queries using semicolon, but works for now.
cleaned.split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
// note: this is not a robust way to split queries using semicolon, but works for now.
val queries = code.mkString("\n").split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq

if (configSets.isEmpty) {
runQueries(queries, testCase.resultFile, None)
} else {
configSets.foreach { configSet =>
try {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to do the try-catch inside runQueries, so that we can know which config cause a failure.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mmh, but we know which are the configs causing a failure also here (and we are logging them). Am I missing something from your comment?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah sorry I misread the code.

runQueries(queries, testCase.resultFile, Some(configSet))
} catch {
case e: Throwable =>
val configs = configSet.map {
case (k, v) => s"$k=$v"
}
logError(s"Error using configs: ${configs.mkString(",")}")
throw e
}
}
}
}

private def runQueries(
queries: Seq[String],
resultFileName: String,
configSet: Option[Seq[(String, String)]]): Unit = {
// Create a local SparkSession to have stronger isolation between different test cases.
// This does not isolate catalog changes.
val localSparkSession = spark.newSession()
loadTestData(localSparkSession)

if (configSet.isDefined) {
// Execute the list of set operation in order to add the desired configs
val setOperations = configSet.get.map { case (key, value) => s"set $key=$value" }
logInfo(s"Setting configs: ${setOperations.mkString(", ")}")
setOperations.foreach(localSparkSession.sql)
}
// Run the SQL queries preparing them for comparison.
val outputs: Seq[QueryOutput] = queries.map { sql =>
val (schema, output) = getNormalizedResult(localSparkSession, sql)
Expand All @@ -167,7 +208,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
s"-- Number of queries: ${outputs.size}\n\n\n" +
outputs.zipWithIndex.map{case (qr, i) => qr.toString(i)}.mkString("\n\n\n") + "\n"
}
val resultFile = new File(testCase.resultFile)
val resultFile = new File(resultFileName)
val parent = resultFile.getParentFile
if (!parent.exists()) {
assert(parent.mkdirs(), "Could not create directory: " + parent)
Expand All @@ -177,7 +218,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {

// Read back the golden file.
val expectedOutputs: Seq[QueryOutput] = {
val goldenOutput = fileToString(new File(testCase.resultFile))
val goldenOutput = fileToString(new File(resultFileName))
val segments = goldenOutput.split("-- !query.+\n")

// each query has 3 segments, plus the header
Expand Down