From 612396c6c16e62428338790639d8e5afb2431eb6 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 11:02:48 -0800 Subject: [PATCH 01/63] Initial commit --- .../spark/sql/crossdbms/JdbcConnection.scala | 101 ++++++++++++++++++ .../sql/crossdbms/SQLQueryTestRunner.scala | 85 +++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala new file mode 100644 index 0000000000000..a9be898306987 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.crossdbms + +import java.sql.{DriverManager, ResultSet} +import java.util.Properties + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry + +private[sql] trait JdbcConnection { + /** + * Runs the given query. + * @return A Seq[String] representing the output. + */ + def runQuery(query: String): Seq[String] + + /** + * Drop the table with the given table name. + */ + def dropTable(tableName: String): Unit + + /** + * Create a table with the given table name and schema. + */ + def createTable(tableName: String, schemaString: String): Unit + + /** + * Load data from the given Spark Dataframe into the table with given name. + */ + def loadData(df: DataFrame, tableName: String): Unit + + /** + * Close the connection. + */ + def close(): Unit +} + +private[sql] case class PostgresConnection(connection_url: Option[String] = None) + extends JdbcConnection { + + DriverRegistry.register("org.postgresql.Driver") + private final val DEFAULT_USER = "pg" + private final val DEFAULT_CONNECTION_URL = + s"jdbc:postgresql://localhost:5432/postgres?user=$DEFAULT_USER" + private val url = connection_url.getOrElse(DEFAULT_CONNECTION_URL) + private val conn = DriverManager.getConnection(url) + private val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) + + def runQuery(query: String): Seq[String] = { + try { + val isResultSet = stmt.execute(query) + val rows = ArrayBuffer[Row]() + if (isResultSet) { + val rs = stmt.getResultSet + val metadata = rs.getMetaData + while (rs.next()) { + val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => rs.getObject(i))) + rows.append(row) + } + } + rows.map(_.mkString("\t")).toSeq + } catch { + case e: Throwable => Seq(e.toString) + } + } + + def dropTable(tableName: String): Unit = { + val dropTableSql = s"DROP TABLE IF EXISTS $tableName" + stmt.executeUpdate(dropTableSql) + } + + def createTable(tableName: String, schemaString: String): Unit = { + val createTableSql = s"CREATE TABLE $tableName ($schemaString)" + stmt.executeUpdate(createTableSql) + } + + def loadData(df: DataFrame, tableName: String): Unit = { + df.write.option("driver", "org.postgresql.Driver").mode("append") + .jdbc(url, tableName, new Properties()) + } + + def close(): Unit = if (!conn.isClosed) conn.close() +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala new file mode 100644 index 0000000000000..7751818476ba6 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.crossdbms + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.planning.PhysicalOperation +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} +import org.apache.spark.sql.test.SQLTestUtils + +trait SQLQueryTestRunner { + + /** + * Runs a given query. + * @return Returns a tuple with first argument being the schema and the second argument being a + * Sequence of strings representing the output. + */ + def runQuery(query: String): (String, Seq[String]) + + /** + * Perform clean up, such as dropping tables and closing the database connection. + */ + def cleanUp(): Unit +} + +/** + * A runner that takes a JDBC connection and uses it to execute queries. We still need the local + * Spark session to do some things, such as generate the schema and load test data into + * dataframes, before creating tables in the database. + */ +private[sql] case class JdbcSQLQueryTestRunner( + connection: JdbcConnection, spark: SparkSession) extends SQLQueryTestRunner with SQLTestUtils { + setUp() + + def runQuery(query: String): (String, Seq[String]) = { + def isSorted(plan: LogicalPlan): Boolean = plan match { + case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false + case _: DescribeCommandBase + | _: DescribeColumnCommand + | _: DescribeRelation + | _: DescribeColumn => true + case PhysicalOperation(_, _, Sort(_, true, _)) => true + + case _ => plan.children.iterator.exists(isSorted) + } + + val df = spark.sql(query) + val schema = df.schema.catalogString + val output = connection.runQuery(query) + // Use Spark analyzed plan to check if the query result is already semantically sorted. + val result = if (isSorted(df.queryExecution.analyzed)) { + output + } else { + // Sort the answer manually if it isn't sorted. + output.sorted + } + // Use the Spark schema for schema generation. + (schema, result) + } + + def cleanUp(): Unit = { + connection.close() + } + + private def setUp(): Unit = { + loadTestData() + } + + override def loadTestData(): Unit = {} // TODO: Load test data +} \ No newline at end of file From 83e6a518e1c32f0362686a1da77c7b01c038fff3 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 11:33:17 -0800 Subject: [PATCH 02/63] Add CrossDbmsQueryTestSuite --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 114 ++++++++++++++++++ .../spark/sql/crossdbms/JdbcConnection.scala | 43 +++++++ .../sql/crossdbms/SQLQueryTestRunner.scala | 52 +------- 3 files changed, 162 insertions(+), 47 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala new file mode 100644 index 0000000000000..930a7293bd6a1 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.crossdbms + +import java.io.File + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.SQLQueryTestSuite +import org.apache.spark.sql.catalyst.planning.PhysicalOperation +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DescribeColumn, DescribeRelation, Distinct, Generate, Join, LogicalPlan, Sample, Sort} +import org.apache.spark.sql.catalyst.util.stringToFile +import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} + +class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { + private val DBMS_MAPPING = Map( + "postgres" ->((connection_url: Option[String]) => + JdbcSQLQueryTestRunner(PostgresConnection(connection_url)))) + + private val crossDbmsToGenerateGoldenFiles: String = System.getenv("REF_DBMS") + private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL") + + override def ignoreList: Set[String] = super.ignoreList ++ Set( + // TODO: include all test files for now. + ) + + private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { + case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false + case _: DescribeCommandBase + | _: DescribeColumnCommand + | _: DescribeRelation + | _: DescribeColumn => true + case PhysicalOperation(_, _, Sort(_, true, _)) => true + case _ => plan.children.iterator.exists(isSemanticallySorted) + } + + override protected def runQueries( + queries: Seq[String], + testCase: TestCase, + configSet: Seq[(String, String)]): Unit = { + // TODO: check if testcase is an Analyzer test and ignore if so + val localSparkSession = spark.newSession() + + var runner: Option[SQLQueryTestRunner] = None + + // Run the SQL queries preparing them for comparison. + val outputs: Seq[QueryTestOutput] = queries.map { sql => + val (schema, output) = + if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) { + if (runner.isEmpty) { + val connectionUrl = if (customConnectionUrl.nonEmpty) { + Some(customConnectionUrl) + } else { + None + } + runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl)) + } + val sparkDf = spark.sql(sql) + val schema = sparkDf.schema.catalogString + val output = runner.map(_.runQuery(sql)).get + // Use Spark analyzed plan to check if the query result is already semantically sorted + val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { + output.sorted + } else { + // Sort the answer manually if it isn't sorted. + output + } + (schema, result) + } else { + handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql)) + } + // We do some query canonicalization now. + val executionOutput = ExecutionOutput( + sql = sql, + schema = Some(schema), + output = normalizeTestResults(output.mkString("\n"))) + if (testCase.isInstanceOf[CTETest]) { + expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput) + } + executionOutput + } + runner.foreach(_.cleanUp()) + + if (regenerateGoldenFiles) { + // Again, we are explicitly not using multi-line string due to stripMargin removing "|". + val goldenOutput = { + s"-- Automatically generated by ${getClass.getSimpleName}\n" + + outputs.mkString("\n\n\n") + "\n" + } + val resultFile = new File(testCase.resultFile) + val parent = resultFile.getParentFile + if (!parent.exists()) { + assert(parent.mkdirs(), "Could not create directory: " + parent) + } + stringToFile(resultFile, goldenOutput) + } + + readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index a9be898306987..4c23cd054e743 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -99,3 +99,46 @@ private[sql] case class PostgresConnection(connection_url: Option[String] = None def close(): Unit = if (!conn.isClosed) conn.close() } + +var runner: Option[SQLQueryTestRunner] = None + + // Run the SQL queries preparing them for comparison. + val outputs: Seq[QueryTestOutput] = queries.map { sql => + testCase match { + case _: AnalyzerTest => + val (_, output) = + handleExceptions(getNormalizedQueryAnalysisResult(localSparkSession, sql)) + // We do some query canonicalization now. + AnalyzerOutput( + sql = sql, + schema = None, + output = normalizeTestResults(output.mkString("\n"))) + case _ => + val (schema, output) = + if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) { + if (runner.isEmpty) { + val connectionUrl = if (customConnectionUrl.nonEmpty) { + Some(customConnectionUrl) + } else { + None + } + runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl)) + } + val output = handleExceptions(runner.map(_.runQuery(sql)).get) + val schema = spark.sql(sql).schema.catalogString + (schema, output) + } else { + handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql)) + } + // We do some query canonicalization now. + val executionOutput = ExecutionOutput( + sql = sql, + schema = Some(schema), + output = normalizeTestResults(output.mkString("\n"))) + if (testCase.isInstanceOf[CTETest]) { + expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput) + } + executionOutput + } + } + jdbcRunner.foreach(_.cleanUp()) \ No newline at end of file diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala index 7751818476ba6..672a943380fe4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala @@ -17,20 +17,12 @@ package org.apache.spark.sql.crossdbms -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.catalyst.planning.PhysicalOperation -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} -import org.apache.spark.sql.test.SQLTestUtils - trait SQLQueryTestRunner { /** * Runs a given query. - * @return Returns a tuple with first argument being the schema and the second argument being a - * Sequence of strings representing the output. */ - def runQuery(query: String): (String, Seq[String]) + def runQuery(query: String): Seq[String] /** * Perform clean up, such as dropping tables and closing the database connection. @@ -40,46 +32,12 @@ trait SQLQueryTestRunner { /** * A runner that takes a JDBC connection and uses it to execute queries. We still need the local - * Spark session to do some things, such as generate the schema and load test data into - * dataframes, before creating tables in the database. + * Spark session to do some things.. */ private[sql] case class JdbcSQLQueryTestRunner( - connection: JdbcConnection, spark: SparkSession) extends SQLQueryTestRunner with SQLTestUtils { - setUp() - - def runQuery(query: String): (String, Seq[String]) = { - def isSorted(plan: LogicalPlan): Boolean = plan match { - case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false - case _: DescribeCommandBase - | _: DescribeColumnCommand - | _: DescribeRelation - | _: DescribeColumn => true - case PhysicalOperation(_, _, Sort(_, true, _)) => true - - case _ => plan.children.iterator.exists(isSorted) - } - - val df = spark.sql(query) - val schema = df.schema.catalogString - val output = connection.runQuery(query) - // Use Spark analyzed plan to check if the query result is already semantically sorted. - val result = if (isSorted(df.queryExecution.analyzed)) { - output - } else { - // Sort the answer manually if it isn't sorted. - output.sorted - } - // Use the Spark schema for schema generation. - (schema, result) - } - - def cleanUp(): Unit = { - connection.close() - } + connection: JdbcConnection) extends SQLQueryTestRunner { - private def setUp(): Unit = { - loadTestData() - } + def runQuery(query: String): Seq[String] = connection.runQuery(query) - override def loadTestData(): Unit = {} // TODO: Load test data + def cleanUp(): Unit = connection.close() } \ No newline at end of file From c691f839b6b35e650919656d2a7d45cf2cacc7a1 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 12:55:45 -0800 Subject: [PATCH 03/63] Small changes for niceness --- .../org/apache/spark/sql/SQLQueryTestSuite.scala | 8 ++++++-- .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 11 ++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 36b572c7e8b5d..9111aa472a519 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -928,10 +928,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper output: String) extends QueryTestOutput { override def toString: String = { // We are explicitly not using multi-line string due to stripMargin removing "|" in output. + val schemaString = if (schema.nonEmpty) { + s"-- !query schema\n" + schema.get + "\n" + } else { + "" + } s"-- !query\n" + sql + "\n" + - s"-- !query schema\n" + - schema.get + "\n" + + schemaString + s"-- !query output\n" + output } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 930a7293bd6a1..9e5bedbe6c9fb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DescribeColumn, D import org.apache.spark.sql.catalyst.util.stringToFile import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} +// TODO: Add header class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { private val DBMS_MAPPING = Map( "postgres" ->((connection_url: Option[String]) => @@ -59,7 +60,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // Run the SQL queries preparing them for comparison. val outputs: Seq[QueryTestOutput] = queries.map { sql => - val (schema, output) = + val output = if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) { if (runner.isEmpty) { val connectionUrl = if (customConnectionUrl.nonEmpty) { @@ -70,7 +71,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl)) } val sparkDf = spark.sql(sql) - val schema = sparkDf.schema.catalogString val output = runner.map(_.runQuery(sql)).get // Use Spark analyzed plan to check if the query result is already semantically sorted val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { @@ -79,14 +79,15 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // Sort the answer manually if it isn't sorted. output } - (schema, result) + result } else { - handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql)) + handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))._2 } // We do some query canonicalization now. val executionOutput = ExecutionOutput( sql = sql, - schema = Some(schema), + // Don't care about the schema for this test. Only care about correctness. + schema = None, output = normalizeTestResults(output.mkString("\n"))) if (testCase.isInstanceOf[CTETest]) { expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput) From 24a02f0dac1e9f2dd828b0d033acb66943ebc22a Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 13:18:14 -0800 Subject: [PATCH 04/63] Save point --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 58 ++++++++++++++++--- .../spark/sql/crossdbms/JdbcConnection.scala | 43 -------------- .../sql/crossdbms/SQLQueryTestRunner.scala | 12 ++-- 3 files changed, 58 insertions(+), 55 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 9e5bedbe6c9fb..f62c4793abeb7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.crossdbms import java.io.File +import java.util.Locale import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLQueryTestSuite @@ -25,6 +26,7 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DescribeColumn, DescribeRelation, Distinct, Generate, Join, LogicalPlan, Sample, Sort} import org.apache.spark.sql.catalyst.util.stringToFile import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} +import org.apache.spark.util.Utils // TODO: Add header class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { @@ -35,9 +37,12 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { private val crossDbmsToGenerateGoldenFiles: String = System.getenv("REF_DBMS") private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL") - override def ignoreList: Set[String] = super.ignoreList ++ Set( - // TODO: include all test files for now. - ) + override protected val inputFilePath = { + val originalInputs = new File(baseResourcePath, "inputs").getAbsolutePath + new File(originalInputs, crossDbmsToGenerateGoldenFiles).getAbsolutePath + } + override protected val goldenFilePath = new File( + baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false @@ -53,12 +58,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { queries: Seq[String], testCase: TestCase, configSet: Seq[(String, String)]): Unit = { - // TODO: check if testcase is an Analyzer test and ignore if so val localSparkSession = spark.newSession() var runner: Option[SQLQueryTestRunner] = None - - // Run the SQL queries preparing them for comparison. val outputs: Seq[QueryTestOutput] = queries.map { sql => val output = if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) { @@ -97,7 +99,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { runner.foreach(_.cleanUp()) if (regenerateGoldenFiles) { - // Again, we are explicitly not using multi-line string due to stripMargin removing "|". val goldenOutput = { s"-- Automatically generated by ${getClass.getSimpleName}\n" + outputs.mkString("\n\n\n") + "\n" @@ -112,4 +113,47 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput) } + + override def createScalaTestCase(testCase: TestCase): Unit = { + if (ignoreList.exists(t => + testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) { + ignore(testCase.name) { + /* Do nothing */ + } + } else { + testCase match { + case _: AnalyzerTestCase => + ignore(s"${testCase.name} is skipped because it is an analyzer test and this " + + s"suite is only testing for correctness.") { + /* Do nothing */ + } + case _: RegularTestCase => + // Create a test case to run this case. + test(testCase.name) { + runSqlTestCase(testCase, listTestCases) + } + case _ => + ignore(s"Ignoring test cases that are not RegularTestCases are now") { + /* Do nothing */ + } + } + } + } + + override lazy val listTestCases: Seq[TestCase] = { + listFilesRecursively(new File(inputFilePath)).flatMap { file => + var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + var analyzerResultFile = + file.getAbsolutePath.replace(inputFilePath, analyzerGoldenFilePath) + ".out" + // JDK-4511638 changes 'toString' result of Float/Double + // JDK-8282081 changes DataTimeFormatter 'F' symbol + if (Utils.isJavaVersionAtLeast21) { + if (new File(resultFile + ".java21").exists()) resultFile += ".java21" + if (new File(analyzerResultFile + ".java21").exists()) analyzerResultFile += ".java21" + } + val absPath = file.getAbsolutePath + val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator) + RegularTestCase(testCaseName, absPath, resultFile) :: Nil + }.sortBy(_.name) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index 4c23cd054e743..a9be898306987 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -99,46 +99,3 @@ private[sql] case class PostgresConnection(connection_url: Option[String] = None def close(): Unit = if (!conn.isClosed) conn.close() } - -var runner: Option[SQLQueryTestRunner] = None - - // Run the SQL queries preparing them for comparison. - val outputs: Seq[QueryTestOutput] = queries.map { sql => - testCase match { - case _: AnalyzerTest => - val (_, output) = - handleExceptions(getNormalizedQueryAnalysisResult(localSparkSession, sql)) - // We do some query canonicalization now. - AnalyzerOutput( - sql = sql, - schema = None, - output = normalizeTestResults(output.mkString("\n"))) - case _ => - val (schema, output) = - if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) { - if (runner.isEmpty) { - val connectionUrl = if (customConnectionUrl.nonEmpty) { - Some(customConnectionUrl) - } else { - None - } - runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl)) - } - val output = handleExceptions(runner.map(_.runQuery(sql)).get) - val schema = spark.sql(sql).schema.catalogString - (schema, output) - } else { - handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql)) - } - // We do some query canonicalization now. - val executionOutput = ExecutionOutput( - sql = sql, - schema = Some(schema), - output = normalizeTestResults(output.mkString("\n"))) - if (testCase.isInstanceOf[CTETest]) { - expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput) - } - executionOutput - } - } - jdbcRunner.foreach(_.cleanUp()) \ No newline at end of file diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala index 672a943380fe4..87df28ccaaa85 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala @@ -17,10 +17,13 @@ package org.apache.spark.sql.crossdbms +/** + * Trait for classes that can run SQL queries for testing. + */ trait SQLQueryTestRunner { /** - * Runs a given query. + * Runs a given query and returns a Seq[String] that represents the query result output. */ def runQuery(query: String): Seq[String] @@ -31,11 +34,10 @@ trait SQLQueryTestRunner { } /** - * A runner that takes a JDBC connection and uses it to execute queries. We still need the local - * Spark session to do some things.. + * A runner that takes a JDBC connection and uses it to execute queries. */ -private[sql] case class JdbcSQLQueryTestRunner( - connection: JdbcConnection) extends SQLQueryTestRunner { +private[sql] case class JdbcSQLQueryTestRunner(connection: JdbcConnection) + extends SQLQueryTestRunner { def runQuery(query: String): Seq[String] = connection.runQuery(query) From 315847af6ca84190beb50c12f9f529bad61f052c Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 13:23:59 -0800 Subject: [PATCH 05/63] Add sqllogictest-select1.sql in postgres-crosstest input file --- .../sqllogictest-select1.sql | 90 +++++++++++++++++++ .../crossdbms/CrossDbmsQueryTestSuite.scala | 4 +- 2 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql new file mode 100644 index 0000000000000..27328457ac851 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql @@ -0,0 +1,90 @@ +-- First 6 queries of the sql-logic-tests with some minor changes for compatibility, available here: +-- https://www.sqlite.org/sqllogictest/file?name=test/select1.test&ci=tip. + +CREATE VIEW t1(a, b, c, d, e) AS VALUES + (103,102,100,101,104), + (107,106,108,109,105), + (110,114,112,111,113), + (116,119,117,115,118), + (123,122,124,120,121), + (127,128,129,126,125), + (132,134,131,133,130), + (138,136,139,135,137), + (144,141,140,142,143), + (145,149,146,148,147), + (151,150,153,154,152), + (155,157,159,156,158), + (161,160,163,164,162), + (167,169,168,165,166), + (171,170,172,173,174), + (177,176,179,178,175), + (181,180,182,183,184), + (187,188,186,189,185), + (190,194,193,192,191), + (199,197,198,196,195), + (200,202,203,201,204), + (208,209,205,206,207), + (214,210,213,212,211), + (218,215,216,217,219), + (223,221,222,220,224), + (226,227,228,229,225), + (234,231,232,230,233), + (237,236,239,235,238), + (242,244,240,243,241), + (246,248,247,249,245); + +SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END + FROM t1 + ORDER BY 1; + +SELECT a+b*2+c*3+d*4+e*5, + CAST((a+b+c+d+e)/5 AS INT) + FROM t1 + ORDER BY 1,2; + +SELECT a+b*2+c*3+d*4+e*5, + CASE WHEN ac OR ee + AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.bc OR ec + AND c>d + ORDER BY 3,4,5,1,2,6; + +SELECT a+b*2+c*3+d*4, + CASE a+1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + (SELECT count(*) FROM t1 AS x WHERE x.c>t1.c AND x.d=d+2) + ORDER BY 4,2,1,3; + +DROP VIEW IF EXISTS t1; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index f62c4793abeb7..3fa9f5c2b4c9b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -39,7 +39,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override protected val inputFilePath = { val originalInputs = new File(baseResourcePath, "inputs").getAbsolutePath - new File(originalInputs, crossDbmsToGenerateGoldenFiles).getAbsolutePath + new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath } override protected val goldenFilePath = new File( baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath @@ -133,7 +133,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { runSqlTestCase(testCase, listTestCases) } case _ => - ignore(s"Ignoring test cases that are not RegularTestCases are now") { + ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") { /* Do nothing */ } } From 5e087e1425c5c6764c7db033bc0c362ce5ee171e Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 13:47:26 -0800 Subject: [PATCH 06/63] Add header for class suite --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 63 +++++++++++++++---- .../sql/crossdbms/SQLQueryTestRunner.scala | 2 +- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 3fa9f5c2b4c9b..a11e3598f8ab8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -28,18 +28,65 @@ import org.apache.spark.sql.catalyst.util.stringToFile import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} import org.apache.spark.util.Utils -// TODO: Add header +// scalastyle:off line.size.limit +/** + * See SQLQueryTestSuite.scala for the more information. This class builds off of that to allow us + * to generate golden files with other DBMS to perform cross-checking for correctness. Note that the + * input directory path is currently limited because most, if not all, of our current SQL query + * tests will not be compatible with other DBMSes. There will be more work in the future, such as + * some kind of conversion, to increase coverage. + * + * You need to have a database server up before running this test. + * For postgres: + * 1. On a mac: `brew install postgresql@13` + * 2. After installing PostgreSQL, start the database server, then create a role named pg with + * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser`` + * + * To run the entire test suite: + * {{{ + * build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" + * }}} + * + * To re-generate golden files for entire suite, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" + * }}} + * + * To re-generate golden file for a single test, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" -- -z describe.sql" + * }}} + * + * To specify a DBMS to use (the default is postgres): + * {{{ + * REF_DBMS=mysql SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" + * }}} + */ +// scalastyle:on line.size.limit class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { + private val DEFAULT = "postgres" private val DBMS_MAPPING = Map( "postgres" ->((connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url)))) - private val crossDbmsToGenerateGoldenFiles: String = System.getenv("REF_DBMS") + private val crossDbmsToGenerateGoldenFiles: String = { + val userInputDbms = System.getenv("REF_DBMS") + if (userInputDbms.isEmpty) { + DEFAULT + } else { + userInputDbms + } + } private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL") + // Currently using a separate directory for this because the current SQL tests we have are highly + // unlikely to get compatible with the other DBMS. override protected val inputFilePath = { val originalInputs = new File(baseResourcePath, "inputs").getAbsolutePath - new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath + val x = new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath + log.info("HERE " + originalInputs) + log.info("HERE " + x) + x } override protected val goldenFilePath = new File( baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath @@ -63,7 +110,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { var runner: Option[SQLQueryTestRunner] = None val outputs: Seq[QueryTestOutput] = queries.map { sql => val output = - if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) { + if (regenerateGoldenFiles) { if (runner.isEmpty) { val connectionUrl = if (customConnectionUrl.nonEmpty) { Some(customConnectionUrl) @@ -122,11 +169,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } } else { testCase match { - case _: AnalyzerTestCase => - ignore(s"${testCase.name} is skipped because it is an analyzer test and this " + - s"suite is only testing for correctness.") { - /* Do nothing */ - } case _: RegularTestCase => // Create a test case to run this case. test(testCase.name) { @@ -143,13 +185,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" - var analyzerResultFile = - file.getAbsolutePath.replace(inputFilePath, analyzerGoldenFilePath) + ".out" // JDK-4511638 changes 'toString' result of Float/Double // JDK-8282081 changes DataTimeFormatter 'F' symbol if (Utils.isJavaVersionAtLeast21) { if (new File(resultFile + ".java21").exists()) resultFile += ".java21" - if (new File(analyzerResultFile + ".java21").exists()) analyzerResultFile += ".java21" } val absPath = file.getAbsolutePath val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala index 87df28ccaaa85..f600276a49e92 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala @@ -42,4 +42,4 @@ private[sql] case class JdbcSQLQueryTestRunner(connection: JdbcConnection) def runQuery(query: String): Seq[String] = connection.runQuery(query) def cleanUp(): Unit = connection.close() -} \ No newline at end of file +} From 33b52b2e2611f2cc0b0cfcfcf2c545f80fc6ae3b Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 14:09:25 -0800 Subject: [PATCH 07/63] Small changes --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index a11e3598f8ab8..3621ae77277dc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -37,10 +37,11 @@ import org.apache.spark.util.Utils * some kind of conversion, to increase coverage. * * You need to have a database server up before running this test. - * For postgres: - * 1. On a mac: `brew install postgresql@13` + * For example, for postgres: + * 1. Install PostgreSQL. + * a. On a mac: `brew install postgresql@13` * 2. After installing PostgreSQL, start the database server, then create a role named pg with - * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser`` + * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser`` * * To run the entire test suite: * {{{ @@ -64,17 +65,15 @@ import org.apache.spark.util.Utils */ // scalastyle:on line.size.limit class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { - private val DEFAULT = "postgres" - private val DBMS_MAPPING = Map( - "postgres" ->((connection_url: Option[String]) => - JdbcSQLQueryTestRunner(PostgresConnection(connection_url)))) private val crossDbmsToGenerateGoldenFiles: String = { val userInputDbms = System.getenv("REF_DBMS") - if (userInputDbms.isEmpty) { - DEFAULT - } else { + if (userInputDbms.nonEmpty) { + assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(userInputDbms), + s"$userInputDbms is not currently supported.") userInputDbms + } else { + CrossDbmsQueryTestSuite.DEFAULT_DBMS } } private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL") @@ -82,11 +81,8 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // Currently using a separate directory for this because the current SQL tests we have are highly // unlikely to get compatible with the other DBMS. override protected val inputFilePath = { - val originalInputs = new File(baseResourcePath, "inputs").getAbsolutePath - val x = new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath - log.info("HERE " + originalInputs) - log.info("HERE " + x) - x + val originalInputs = new File(baseResourcePath, "inputs") + new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath } override protected val goldenFilePath = new File( baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath @@ -117,7 +113,8 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } else { None } - runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl)) + runner = Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING( + crossDbmsToGenerateGoldenFiles)(connectionUrl)) } val sparkDf = spark.sql(sql) val output = runner.map(_.runQuery(sql)).get @@ -196,3 +193,14 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { }.sortBy(_.name) } } + +object CrossDbmsQueryTestSuite { + + private final val POSTGRES = "postgres" + private final val SUPPORTED_DBMS = Seq(POSTGRES) + private final val DEFAULT_DBMS = POSTGRES + private final val DBMS_TO_CONNECTION_MAPPING = Map( + POSTGRES -> ((connection_url: Option[String]) => + JdbcSQLQueryTestRunner(PostgresConnection(connection_url))) + ) +} From 81202c470079653bed7ca030902ef9625a6ca4ef Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 15:30:32 -0800 Subject: [PATCH 08/63] Trying to get the input files set up.. --- .../apache/spark/sql/SQLQueryTestSuite.scala | 6 ++- .../crossdbms/CrossDbmsQueryTestSuite.scala | 48 +++++++++++-------- .../ThriftServerQueryTestSuite.scala | 2 +- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 9111aa472a519..464c9dfc22428 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -612,9 +612,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } } + protected def resultFileForInputFile(file: File): String = { + file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + } + protected lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => - var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + var resultFile = resultFileForInputFile(file) var analyzerResultFile = file.getAbsolutePath.replace(inputFilePath, analyzerGoldenFilePath) + ".out" // JDK-4511638 changes 'toString' result of Float/Double diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 3621ae77277dc..9f1f11eb040b4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -68,7 +68,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { private val crossDbmsToGenerateGoldenFiles: String = { val userInputDbms = System.getenv("REF_DBMS") - if (userInputDbms.nonEmpty) { + if (userInputDbms != null && userInputDbms.nonEmpty) { assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(userInputDbms), s"$userInputDbms is not currently supported.") userInputDbms @@ -78,24 +78,16 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL") - // Currently using a separate directory for this because the current SQL tests we have are highly - // unlikely to get compatible with the other DBMS. - override protected val inputFilePath = { - val originalInputs = new File(baseResourcePath, "inputs") - new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath - } - override protected val goldenFilePath = new File( - baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath - - private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { - case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false - case _: DescribeCommandBase - | _: DescribeColumnCommand - | _: DescribeRelation - | _: DescribeColumn => true - case PhysicalOperation(_, _, Sort(_, true, _)) => true - case _ => plan.children.iterator.exists(isSemanticallySorted) - } + override def ignoreList: Set[String] = super.ignoreList ++ Set( + "postgreSQL", + "subquery", + "ansi", + "udtf", + "udf", + "timestampNTZ", + "udaf", + "typeCoercion" + ) override protected def runQueries( queries: Seq[String], @@ -179,9 +171,15 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } } + override protected def resultFileForInputFile(file: File): String = { + val goldenFilePath = new File( + baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath + file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + } + override lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => - var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + var resultFile = resultFileForInputFile(file) // JDK-4511638 changes 'toString' result of Float/Double // JDK-8282081 changes DataTimeFormatter 'F' symbol if (Utils.isJavaVersionAtLeast21) { @@ -192,6 +190,16 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { RegularTestCase(testCaseName, absPath, resultFile) :: Nil }.sortBy(_.name) } + + private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { + case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false + case _: DescribeCommandBase + | _: DescribeColumnCommand + | _: DescribeRelation + | _: DescribeColumn => true + case PhysicalOperation(_, _, Sort(_, true, _)) => true + case _ => plan.children.iterator.exists(isSemanticallySorted) + } } object CrossDbmsQueryTestSuite { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index ee336b13b0b2d..8d90d47e1bf5f 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -246,7 +246,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ override lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => - var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + var resultFile = resultFileForInputFile(file) // JDK-4511638 changes 'toString' result of Float/Double // JDK-8282081 changes DataTimeFormatter 'F' symbol if (Utils.isJavaVersionAtLeast21 && (new File(resultFile + ".java21")).exists()) { From c9fba5b0f3b5fa2039fd29dfdc40abd400c0771b Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 15:46:50 -0800 Subject: [PATCH 09/63] Passing test!! --- .../sqllogictest-select1.sql.out | 236 ++++++++++++++++++ .../apache/spark/sql/SQLQueryTestSuite.scala | 2 +- .../crossdbms/CrossDbmsQueryTestSuite.scala | 40 +-- 3 files changed, 259 insertions(+), 19 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out new file mode 100644 index 0000000000000..1ce87f0fc54bb --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out @@ -0,0 +1,236 @@ +-- Automatically generated by CrossDbmsQueryTestSuite +-- !query +CREATE VIEW t1(a, b, c, d, e) AS VALUES + (103,102,100,101,104), + (107,106,108,109,105), + (110,114,112,111,113), + (116,119,117,115,118), + (123,122,124,120,121), + (127,128,129,126,125), + (132,134,131,133,130), + (138,136,139,135,137), + (144,141,140,142,143), + (145,149,146,148,147), + (151,150,153,154,152), + (155,157,159,156,158), + (161,160,163,164,162), + (167,169,168,165,166), + (171,170,172,173,174), + (177,176,179,178,175), + (181,180,182,183,184), + (187,188,186,189,185), + (190,194,193,192,191), + (199,197,198,196,195), + (200,202,203,201,204), + (208,209,205,206,207), + (214,210,213,212,211), + (218,215,216,217,219), + (223,221,222,220,224), + (226,227,228,229,225), + (234,231,232,230,233), + (237,236,239,235,238), + (242,244,240,243,241), + (246,248,247,249,245) +-- !query output + + + +-- !query +SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END + FROM t1 + ORDER BY 1 +-- !query output +354 +362 +374 +380 +398 +400 +416 +428 +436 +446 +452 +468 +474 +484 +492 +1020 +1060 +1140 +1190 +1220 +1280 +1340 +1360 +1410 +1490 +1500 +1570 +1600 +1690 +1700 + + +-- !query +SELECT a+b*2+c*3+d*4+e*5, + CAST((a+b+c+d+e)/5 AS INT) + FROM t1 + ORDER BY 1,2 +-- !query output +1531 102 +1604 107 +1683 112 +1755 117 +1824 122 +1899 127 +1975 132 +2052 137 +2129 142 +2208 147 +2286 152 +2360 157 +2436 162 +2499 167 +2589 172 +2653 177 +2739 182 +2802 187 +2880 192 +2946 197 +3037 202 +3100 207 +3176 212 +3259 217 +3331 222 +3405 227 +3477 232 +3556 237 +3627 242 +3704 247 + + +-- !query +SELECT a+b*2+c*3+d*4+e*5, + CASE WHEN ac OR ee + AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.bc OR ec + AND c>d + ORDER BY 3,4,5,1,2,6 +-- !query output +1118 112 110 113 333 111 +1165 117 116 118 222 115 +1669 167 167 166 222 165 +1925 192 190 191 444 192 + + +-- !query +SELECT a+b*2+c*3+d*4, + CASE a+1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + (SELECT count(*) FROM t1 AS x WHERE x.c>t1.c AND x.d=d+2) + ORDER BY 4,2,1,3 +-- !query output +1165 222 0 117 +1219 222 0 124 +1274 111 0 129 +1325 333 0 131 +1367 222 0 139 +1414 555 0 140 +1473 222 0 146 +1570 333 0 159 +1669 222 0 168 +1877 111 0 186 +1971 555 0 198 +2017 333 0 203 +2211 444 0 222 +2312 555 0 232 +2366 444 0 239 +2422 333 0 240 +2479 222 0 247 + + +-- !query +DROP VIEW IF EXISTS t1 +-- !query output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 464c9dfc22428..f5d51d58e1e10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -943,7 +943,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper s"-- !query output\n" + output } - override def numSegments: Int = 3 + override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 } } /** A single SQL query's analysis results. */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 9f1f11eb040b4..2533be0e12d5a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -55,7 +55,7 @@ import org.apache.spark.util.Utils * * To re-generate golden file for a single test, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" -- -z describe.sql" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite -- -z describe.sql" * }}} * * To specify a DBMS to use (the default is postgres): @@ -66,7 +66,9 @@ import org.apache.spark.util.Utils // scalastyle:on line.size.limit class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { - private val crossDbmsToGenerateGoldenFiles: String = { + // Note: the below two functions have to be functions instead of variables because the superclass + // runs the test first before the subclass variables can be instantiated. + private def crossDbmsToGenerateGoldenFiles: String = { val userInputDbms = System.getenv("REF_DBMS") if (userInputDbms != null && userInputDbms.nonEmpty) { assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(userInputDbms), @@ -76,18 +78,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { CrossDbmsQueryTestSuite.DEFAULT_DBMS } } - private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL") - - override def ignoreList: Set[String] = super.ignoreList ++ Set( - "postgreSQL", - "subquery", - "ansi", - "udtf", - "udf", - "timestampNTZ", - "udaf", - "typeCoercion" - ) + private def customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL") override protected def runQueries( queries: Seq[String], @@ -100,7 +91,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val output = if (regenerateGoldenFiles) { if (runner.isEmpty) { - val connectionUrl = if (customConnectionUrl.nonEmpty) { + val connectionUrl = if (customConnectionUrl != null && customConnectionUrl.nonEmpty) { Some(customConnectionUrl) } else { None @@ -112,10 +103,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val output = runner.map(_.runQuery(sql)).get // Use Spark analyzed plan to check if the query result is already semantically sorted val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { - output.sorted + output } else { // Sort the answer manually if it isn't sorted. - output + output.sorted } result } else { @@ -172,8 +163,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } override protected def resultFileForInputFile(file: File): String = { + val defaultResultsDir = new File(baseResourcePath, "results") val goldenFilePath = new File( - baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath + defaultResultsDir, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" } @@ -200,6 +192,18 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { case PhysicalOperation(_, _, Sort(_, true, _)) => true case _ => plan.children.iterator.exists(isSemanticallySorted) } + + // Ignore all earlier tests for now due to incompatibility. + override def ignoreList: Set[String] = super.ignoreList ++ Set( + "postgreSQL", + "subquery", + "ansi", + "udtf", + "udf", + "timestampNTZ", + "udaf", + "typeCoercion" + ) } object CrossDbmsQueryTestSuite { From 8b5ed22ab1fceb4aca95f9d8766676d5e692a6bb Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 15:53:01 -0800 Subject: [PATCH 10/63] Ignore all earlier tests first --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 128 +++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 2533be0e12d5a..07b4410fd30bb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -202,7 +202,133 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { "udf", "timestampNTZ", "udaf", - "typeCoercion" + "typeCoercion", + "group-by.sql", + "natural-join.sql", + "timestamp-ltz.sql", + "csv-functions.sql", + "datetime-formatting-invalid.sql", + "except.sql", + "string-functions.sql", + "cte-command.sql", + "identifier-clause.sql", + "try_reflect.sql", + "order-by-all.sql", + "timestamp-ntz.sql", + "url-functions.sql", + "math.sql", + "random.sql", + "tablesample-negative.sql", + "date.sql", + "window.sql", + "linear-regression.sql", + "join-empty-relation.sql", + "null-propagation.sql", + "operators.sql", + "change-column.sql", + "count.sql", + "mask-functions.sql", + "decimalArithmeticOperations.sql", + "column-resolution-sort.sql", + "group-analytics.sql", + "inline-table.sql", + "comparator.sql", + "ceil-floor-with-scale-param.sql", + "show-tblproperties.sql", + "timezone.sql", + "ilike.sql", + "parse-schema-string.sql", + "charvarchar.sql", + "ignored.sql", + "cte.sql", + "selectExcept.sql", + "order-by-nulls-ordering.sql", + "query_regex_column.sql", + "show_columns.sql", + "columnresolution-views.sql", + "percentiles.sql", + "hll.sql", + "group-by-all.sql", + "like-any.sql", + "struct.sql", + "show-create-table.sql", + "try_cast.sql", + "unpivot.sql", + "describe-query.sql", + "describe.sql", + "columnresolution.sql", + "union.sql", + "order-by-ordinal.sql", + "explain-cbo.sql", + "intersect-all.sql", + "ilike-all.sql", + "try_aggregates.sql", + "try-string-functions.sql", + "show-views.sql", + "xml-functions.sql", + "datetime-parsing.sql", + "cte-nested.sql", + "columnresolution-negative.sql", + "datetime-special.sql", + "describe-table-after-alter-table.sql", + "column-resolution-aggregate.sql", + "keywords.sql", + "table-aliases.sql", + "bitwise.sql", + "like-all.sql", + "named-function-arguments.sql", + "try_datetime_functions.sql", + "datetime-legacy.sql", + "outer-join.sql", + "explain.sql", + "sql-compatibility-functions.sql", + "join-lateral.sql", + "pred-pushdown.sql", + "cast.sql", + "except-all.sql", + "predicate-functions.sql", + "timestamp.sql", + "group-by-filter.sql", + "pivot.sql", + "try_arithmetic.sql", + "higher-order-functions.sql", + "cte-legacy.sql", + "misc-functions.sql", + "describe-part-after-analyze.sql", + "extract.sql", + "datetime-formatting.sql", + "interval.sql", + "double-quoted-identifiers.sql", + "datetime-formatting-legacy.sql", + "group-by-ordinal.sql", + "having.sql", + "inner-join.sql", + "null-handling.sql", + "ilike-any.sql", + "show-tables.sql", + "sql-session-variables.sql", + "using-join.sql", + "subexp-elimination.sql", + "cte-nonlegacy.sql", + "group-by-all-duckdb.sql", + "transform.sql", + "map.sql", + "table-valued-functions.sql", + "comments.sql", + "regexp-functions.sql", + "datetime-parsing-legacy.sql", + "cross-join.sql", + "array.sql", + "group-by-all-mosha.sql", + "limit.sql", + "non-excludable-rule.sql", + "grouping_set.sql", + "json-functions.sql", + "datetime-parsing-invalid.sql", + "try_element_at.sql", + "explain-aqe.sql", + "current_database_catalog.sql", + "literals.sql" ) } From fea3975dcdc9c235e53799a9daa2d4dcc5769585 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 16:35:49 -0800 Subject: [PATCH 11/63] Small comment changes --- .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 07b4410fd30bb..dad03f08d3831 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.util.Utils // scalastyle:off line.size.limit /** - * See SQLQueryTestSuite.scala for the more information. This class builds off of that to allow us + * See SQLQueryTestSuite.scala for more information. This class builds off of that to allow us * to generate golden files with other DBMS to perform cross-checking for correctness. Note that the * input directory path is currently limited because most, if not all, of our current SQL query * tests will not be compatible with other DBMSes. There will be more work in the future, such as @@ -88,7 +88,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { var runner: Option[SQLQueryTestRunner] = None val outputs: Seq[QueryTestOutput] = queries.map { sql => - val output = + val output = { + // Use the runner when generating golden files, and Spark when running the test against + // the already generated golden files. if (regenerateGoldenFiles) { if (runner.isEmpty) { val connectionUrl = if (customConnectionUrl != null && customConnectionUrl.nonEmpty) { @@ -112,6 +114,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } else { handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))._2 } + } // We do some query canonicalization now. val executionOutput = ExecutionOutput( sql = sql, From 678a0efc07662cb4b678505f371cebeffde7487d Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 29 Nov 2023 16:52:22 -0800 Subject: [PATCH 12/63] More comments --- .../apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index dad03f08d3831..38335a99f8359 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -36,6 +36,9 @@ import org.apache.spark.util.Utils * tests will not be compatible with other DBMSes. There will be more work in the future, such as * some kind of conversion, to increase coverage. * + * If your SQL query test is not compatible with other DBMSes, please add it to the `ignoreList` at + * the bottom of this file. + * * You need to have a database server up before running this test. * For example, for postgres: * 1. Install PostgreSQL. @@ -196,7 +199,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { case _ => plan.children.iterator.exists(isSemanticallySorted) } - // Ignore all earlier tests for now due to incompatibility. + // Ignore all tests for now due to likely incompatibility. override def ignoreList: Set[String] = super.ignoreList ++ Set( "postgreSQL", "subquery", From 309a7e2db06665ffef47a7544f5a07a77aca1110 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 10:27:53 -0800 Subject: [PATCH 13/63] Address simple comments --- .../sqllogictest-select1.sql | 48 +++++----- .../apache/spark/sql/SQLQueryTestHelper.scala | 26 +++--- .../apache/spark/sql/SQLQueryTestSuite.scala | 11 +++ .../crossdbms/CrossDbmsQueryTestSuite.scala | 92 ++++++++----------- .../spark/sql/crossdbms/JdbcConnection.scala | 6 ++ .../sql/crossdbms/SQLQueryTestRunner.scala | 4 +- 6 files changed, 98 insertions(+), 89 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql index 27328457ac851..7fcc5419044d0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql @@ -33,58 +33,58 @@ CREATE VIEW t1(a, b, c, d, e) AS VALUES (242,244,240,243,241), (246,248,247,249,245); -SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END +SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END FROM t1 ORDER BY 1; -SELECT a+b*2+c*3+d*4+e*5, - CAST((a+b+c+d+e)/5 AS INT) +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CAST((a + b + c + d + e) / 5 AS INT) FROM t1 ORDER BY 1,2; -SELECT a+b*2+c*3+d*4+e*5, - CASE WHEN ac OR e c OR e < d) AND d>e - AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.bc OR e c OR e < d) ORDER BY 1,5,3,2,4; -SELECT a+b*2+c*3+d*4, - CAST((a+b+c+d+e)/5 AS INT), +SELECT a + b * 2 + c * 3 + d * 4, + CAST((a + b + c + d + e) / 5 AS INT), abs(a), e, - CASE a+1 WHEN b THEN 111 WHEN c THEN 222 + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, d FROM t1 - WHERE b>c - AND c>d + WHERE b > c + AND c > d ORDER BY 3,4,5,1,2,6; -SELECT a+b*2+c*3+d*4, - CASE a+1 WHEN b THEN 111 WHEN c THEN 222 +SELECT a + b * 2 + c * 3 + d * 4, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - (SELECT count(*) FROM t1 AS x WHERE x.c>t1.c AND x.d t1.c AND x.d < t1.d), c FROM t1 - WHERE (c<=d-2 OR c>=d+2) + WHERE (c <= d - 2 OR c >= d + 2) ORDER BY 4,2,1,3; DROP VIEW IF EXISTS t1; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index d8956961440df..ca26d31aa161d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -98,20 +98,24 @@ trait SQLQueryTestHelper extends Logging { } } + /** + * Uses the Spark logical plan to determine whether the plan is semantically sorted. This is + * important to make non-sorted queries test cases more deterministic. + */ + def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { + case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false + case _: DescribeCommandBase + | _: DescribeColumnCommand + | _: DescribeRelation + | _: DescribeColumn => true + case PhysicalOperation(_, _, Sort(_, true, _)) => true + case _ => plan.children.iterator.exists(isSemanticallySorted) + } + /** Executes a query and returns the result as (schema of the output, normalized output). */ protected def getNormalizedQueryExecutionResult( session: SparkSession, sql: String): (String, Seq[String]) = { // Returns true if the plan is supposed to be sorted. - def isSorted(plan: LogicalPlan): Boolean = plan match { - case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false - case _: DescribeCommandBase - | _: DescribeColumnCommand - | _: DescribeRelation - | _: DescribeColumn => true - case PhysicalOperation(_, _, Sort(_, true, _)) => true - case _ => plan.children.iterator.exists(isSorted) - } - val df = session.sql(sql) val schema = df.schema.catalogString // Get answer, but also get rid of the #1234 expression ids that show up in explain plans @@ -120,7 +124,7 @@ trait SQLQueryTestHelper extends Logging { } // If the output is not pre-sorted, sort it. - if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted) + if (isSemanticallySorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index f5d51d58e1e10..be59753f97655 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -612,6 +612,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } } + /** + * Returns the desired file path for results, given the input file. This is implemented as a + * function because differente Suites extending this class may want their results files with + * different names or in different locations. + */ protected def resultFileForInputFile(file: File): String = { file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" } @@ -943,6 +948,12 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper s"-- !query output\n" + output } + + /** + * Some suites extending this one, such as [[CrossDbmsQueryTestSuite]], only test for + * correctness and do not care about the schema. The logic here allows passing the check for + * number of segments in the golden files when testing. + */ override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 38335a99f8359..8248c61776f96 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -22,10 +22,7 @@ import java.util.Locale import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLQueryTestSuite -import org.apache.spark.sql.catalyst.planning.PhysicalOperation -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DescribeColumn, DescribeRelation, Distinct, Generate, Join, LogicalPlan, Sample, Sort} import org.apache.spark.sql.catalyst.util.stringToFile -import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} import org.apache.spark.util.Utils // scalastyle:off line.size.limit @@ -72,21 +69,20 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // Note: the below two functions have to be functions instead of variables because the superclass // runs the test first before the subclass variables can be instantiated. private def crossDbmsToGenerateGoldenFiles: String = { - val userInputDbms = System.getenv("REF_DBMS") - if (userInputDbms != null && userInputDbms.nonEmpty) { - assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(userInputDbms), - s"$userInputDbms is not currently supported.") - userInputDbms - } else { - CrossDbmsQueryTestSuite.DEFAULT_DBMS - } + val userInputDbms = System.getenv(CrossDbmsQueryTestSuite.REF_DBMS_ARGUMENT) + val selectedDbms = Option(userInputDbms).filter(_.nonEmpty).getOrElse( + CrossDbmsQueryTestSuite.DEFAULT_DBMS) + assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(selectedDbms), + s"$selectedDbms is not currently supported.") + selectedDbms } - private def customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL") + private def customConnectionUrl: String = System.getenv( + CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL) override protected def runQueries( - queries: Seq[String], - testCase: TestCase, - configSet: Seq[(String, String)]): Unit = { + queries: Seq[String], + testCase: TestCase, + configSet: Seq[(String, String)]): Unit = { val localSparkSession = spark.newSession() var runner: Option[SQLQueryTestRunner] = None @@ -95,18 +91,13 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // Use the runner when generating golden files, and Spark when running the test against // the already generated golden files. if (regenerateGoldenFiles) { - if (runner.isEmpty) { - val connectionUrl = if (customConnectionUrl != null && customConnectionUrl.nonEmpty) { - Some(customConnectionUrl) - } else { - None - } - runner = Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING( - crossDbmsToGenerateGoldenFiles)(connectionUrl)) - } + val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty) + runner = runner.getOrElse( + Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING( + crossDbmsToGenerateGoldenFiles)(connectionUrl))) val sparkDf = spark.sql(sql) val output = runner.map(_.runQuery(sql)).get - // Use Spark analyzed plan to check if the query result is already semantically sorted + // Use Spark analyzed plan to check if the query result is already semantically sorted. val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { output } else { @@ -115,7 +106,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } result } else { - handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))._2 + val (_, output) = handleExceptions( + getNormalizedQueryExecutionResult(localSparkSession, sql)) + output } } // We do some query canonicalization now. @@ -148,23 +141,21 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } override def createScalaTestCase(testCase: TestCase): Unit = { - if (ignoreList.exists(t => - testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) { - ignore(testCase.name) { - /* Do nothing */ - } - } else { - testCase match { - case _: RegularTestCase => - // Create a test case to run this case. - test(testCase.name) { - runSqlTestCase(testCase, listTestCases) - } - case _ => - ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") { - /* Do nothing */ - } - } + testCase match { + case _ if ignoreList.exists(t => + testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT))) => + ignore(s"${testCase.name} is in the ignore list.") { + log.debug(s"${testCase.name} is in the ignore list.") + } + case _: RegularTestCase => + // Create a test case to run this case. + test(testCase.name) { + runSqlTestCase(testCase, listTestCases) + } + case _ => + ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") { + log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.") + } } } @@ -189,16 +180,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { }.sortBy(_.name) } - private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { - case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false - case _: DescribeCommandBase - | _: DescribeColumnCommand - | _: DescribeRelation - | _: DescribeColumn => true - case PhysicalOperation(_, _, Sort(_, true, _)) => true - case _ => plan.children.iterator.exists(isSemanticallySorted) - } - // Ignore all tests for now due to likely incompatibility. override def ignoreList: Set[String] = super.ignoreList ++ Set( "postgreSQL", @@ -340,6 +321,11 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { object CrossDbmsQueryTestSuite { + // System argument to indicate which reference DBMS is being used. + private final val REF_DBMS_ARGUMENT = "REF_DBMS" + // System arguemnt to indicate a custom connection URL to the reference DBMS. + private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" + private final val POSTGRES = "postgres" private final val SUPPORTED_DBMS = Seq(POSTGRES) private final val DEFAULT_DBMS = POSTGRES diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index a9be898306987..ee4cd7ec6eed3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -25,6 +25,9 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry +/** + * Represents a connection (session) to a database. + */ private[sql] trait JdbcConnection { /** * Runs the given query. @@ -53,6 +56,9 @@ private[sql] trait JdbcConnection { def close(): Unit } +/** + * Represents a connection (session) to a PostgreSQL database. + */ private[sql] case class PostgresConnection(connection_url: Option[String] = None) extends JdbcConnection { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala index f600276a49e92..7c79b27657d1a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala @@ -18,7 +18,9 @@ package org.apache.spark.sql.crossdbms /** - * Trait for classes that can run SQL queries for testing. + * Trait for classes that can run SQL queries for testing. This is specifically used as a wrapper + * around a connection to a system that can run SQL queries, to run queries from + * [[SQLQueryTestSuite]]. */ trait SQLQueryTestRunner { From 5c52b9c4e1fc0327ccbf51fa9937530912157765 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 10:39:24 -0800 Subject: [PATCH 14/63] A little more small changes.. --- .../apache/spark/sql/SQLQueryTestHelper.scala | 6 +- .../crossdbms/CrossDbmsQueryTestSuite.scala | 277 +++++++++--------- .../spark/sql/crossdbms/JdbcConnection.scala | 3 +- .../sql/crossdbms/SQLQueryTestRunner.scala | 3 +- 4 files changed, 150 insertions(+), 139 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index ca26d31aa161d..3bda8c75d7806 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -124,7 +124,11 @@ trait SQLQueryTestHelper extends Logging { } // If the output is not pre-sorted, sort it. - if (isSemanticallySorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted) + if (isSemanticallySorted(df.queryExecution.analyzed)) { + (schema, answer) + } else { + (schema, answer.sorted) + } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 8248c61776f96..b30820bc55378 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -181,142 +181,147 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } // Ignore all tests for now due to likely incompatibility. - override def ignoreList: Set[String] = super.ignoreList ++ Set( - "postgreSQL", - "subquery", - "ansi", - "udtf", - "udf", - "timestampNTZ", - "udaf", - "typeCoercion", - "group-by.sql", - "natural-join.sql", - "timestamp-ltz.sql", - "csv-functions.sql", - "datetime-formatting-invalid.sql", - "except.sql", - "string-functions.sql", - "cte-command.sql", - "identifier-clause.sql", - "try_reflect.sql", - "order-by-all.sql", - "timestamp-ntz.sql", - "url-functions.sql", - "math.sql", - "random.sql", - "tablesample-negative.sql", - "date.sql", - "window.sql", - "linear-regression.sql", - "join-empty-relation.sql", - "null-propagation.sql", - "operators.sql", - "change-column.sql", - "count.sql", - "mask-functions.sql", - "decimalArithmeticOperations.sql", - "column-resolution-sort.sql", - "group-analytics.sql", - "inline-table.sql", - "comparator.sql", - "ceil-floor-with-scale-param.sql", - "show-tblproperties.sql", - "timezone.sql", - "ilike.sql", - "parse-schema-string.sql", - "charvarchar.sql", - "ignored.sql", - "cte.sql", - "selectExcept.sql", - "order-by-nulls-ordering.sql", - "query_regex_column.sql", - "show_columns.sql", - "columnresolution-views.sql", - "percentiles.sql", - "hll.sql", - "group-by-all.sql", - "like-any.sql", - "struct.sql", - "show-create-table.sql", - "try_cast.sql", - "unpivot.sql", - "describe-query.sql", - "describe.sql", - "columnresolution.sql", - "union.sql", - "order-by-ordinal.sql", - "explain-cbo.sql", - "intersect-all.sql", - "ilike-all.sql", - "try_aggregates.sql", - "try-string-functions.sql", - "show-views.sql", - "xml-functions.sql", - "datetime-parsing.sql", - "cte-nested.sql", - "columnresolution-negative.sql", - "datetime-special.sql", - "describe-table-after-alter-table.sql", - "column-resolution-aggregate.sql", - "keywords.sql", - "table-aliases.sql", - "bitwise.sql", - "like-all.sql", - "named-function-arguments.sql", - "try_datetime_functions.sql", - "datetime-legacy.sql", - "outer-join.sql", - "explain.sql", - "sql-compatibility-functions.sql", - "join-lateral.sql", - "pred-pushdown.sql", - "cast.sql", - "except-all.sql", - "predicate-functions.sql", - "timestamp.sql", - "group-by-filter.sql", - "pivot.sql", - "try_arithmetic.sql", - "higher-order-functions.sql", - "cte-legacy.sql", - "misc-functions.sql", - "describe-part-after-analyze.sql", - "extract.sql", - "datetime-formatting.sql", - "interval.sql", - "double-quoted-identifiers.sql", - "datetime-formatting-legacy.sql", - "group-by-ordinal.sql", - "having.sql", - "inner-join.sql", - "null-handling.sql", - "ilike-any.sql", - "show-tables.sql", - "sql-session-variables.sql", - "using-join.sql", - "subexp-elimination.sql", - "cte-nonlegacy.sql", - "group-by-all-duckdb.sql", - "transform.sql", - "map.sql", - "table-valued-functions.sql", - "comments.sql", - "regexp-functions.sql", - "datetime-parsing-legacy.sql", - "cross-join.sql", - "array.sql", - "group-by-all-mosha.sql", - "limit.sql", - "non-excludable-rule.sql", - "grouping_set.sql", - "json-functions.sql", - "datetime-parsing-invalid.sql", - "try_element_at.sql", - "explain-aqe.sql", - "current_database_catalog.sql", - "literals.sql" - ) + override def ignoreList: Set[String] = super.ignoreList ++ + // Directories + Set( + "postgreSQL", + "subquery", + "ansi", + "udtf", + "udf", + "timestampNTZ", + "udaf", + "typeCoercion", + ) ++ + // Files + Set( + "group-by.sql", + "natural-join.sql", + "timestamp-ltz.sql", + "csv-functions.sql", + "datetime-formatting-invalid.sql", + "except.sql", + "string-functions.sql", + "cte-command.sql", + "identifier-clause.sql", + "try_reflect.sql", + "order-by-all.sql", + "timestamp-ntz.sql", + "url-functions.sql", + "math.sql", + "random.sql", + "tablesample-negative.sql", + "date.sql", + "window.sql", + "linear-regression.sql", + "join-empty-relation.sql", + "null-propagation.sql", + "operators.sql", + "change-column.sql", + "count.sql", + "mask-functions.sql", + "decimalArithmeticOperations.sql", + "column-resolution-sort.sql", + "group-analytics.sql", + "inline-table.sql", + "comparator.sql", + "ceil-floor-with-scale-param.sql", + "show-tblproperties.sql", + "timezone.sql", + "ilike.sql", + "parse-schema-string.sql", + "charvarchar.sql", + "ignored.sql", + "cte.sql", + "selectExcept.sql", + "order-by-nulls-ordering.sql", + "query_regex_column.sql", + "show_columns.sql", + "columnresolution-views.sql", + "percentiles.sql", + "hll.sql", + "group-by-all.sql", + "like-any.sql", + "struct.sql", + "show-create-table.sql", + "try_cast.sql", + "unpivot.sql", + "describe-query.sql", + "describe.sql", + "columnresolution.sql", + "union.sql", + "order-by-ordinal.sql", + "explain-cbo.sql", + "intersect-all.sql", + "ilike-all.sql", + "try_aggregates.sql", + "try-string-functions.sql", + "show-views.sql", + "xml-functions.sql", + "datetime-parsing.sql", + "cte-nested.sql", + "columnresolution-negative.sql", + "datetime-special.sql", + "describe-table-after-alter-table.sql", + "column-resolution-aggregate.sql", + "keywords.sql", + "table-aliases.sql", + "bitwise.sql", + "like-all.sql", + "named-function-arguments.sql", + "try_datetime_functions.sql", + "datetime-legacy.sql", + "outer-join.sql", + "explain.sql", + "sql-compatibility-functions.sql", + "join-lateral.sql", + "pred-pushdown.sql", + "cast.sql", + "except-all.sql", + "predicate-functions.sql", + "timestamp.sql", + "group-by-filter.sql", + "pivot.sql", + "try_arithmetic.sql", + "higher-order-functions.sql", + "cte-legacy.sql", + "misc-functions.sql", + "describe-part-after-analyze.sql", + "extract.sql", + "datetime-formatting.sql", + "interval.sql", + "double-quoted-identifiers.sql", + "datetime-formatting-legacy.sql", + "group-by-ordinal.sql", + "having.sql", + "inner-join.sql", + "null-handling.sql", + "ilike-any.sql", + "show-tables.sql", + "sql-session-variables.sql", + "using-join.sql", + "subexp-elimination.sql", + "cte-nonlegacy.sql", + "group-by-all-duckdb.sql", + "transform.sql", + "map.sql", + "table-valued-functions.sql", + "comments.sql", + "regexp-functions.sql", + "datetime-parsing-legacy.sql", + "cross-join.sql", + "array.sql", + "group-by-all-mosha.sql", + "limit.sql", + "non-excludable-rule.sql", + "grouping_set.sql", + "json-functions.sql", + "datetime-parsing-invalid.sql", + "try_element_at.sql", + "explain-aqe.sql", + "current_database_catalog.sql", + "literals.sql" + ) } object CrossDbmsQueryTestSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index ee4cd7ec6eed3..f04677fea4e8b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -31,7 +31,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry private[sql] trait JdbcConnection { /** * Runs the given query. - * @return A Seq[String] representing the output. + * @return A Seq[String] representing the output. This is a Seq where each element represents a + * single row. */ def runQuery(query: String): Seq[String] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala index 7c79b27657d1a..d6230cc6b69a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala @@ -25,7 +25,8 @@ package org.apache.spark.sql.crossdbms trait SQLQueryTestRunner { /** - * Runs a given query and returns a Seq[String] that represents the query result output. + * Runs a given query and returns a Seq[String] that represents the query result output. This is a + * Seq where each element represents a single row. */ def runQuery(query: String): Seq[String] From ad867df8d282bd6be1b45c2340ec4bc9667e7a9f Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 10:51:40 -0800 Subject: [PATCH 15/63] More small changes and removal of redundant code --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 15 ++------------- .../spark/sql/crossdbms/JdbcConnection.scala | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index b30820bc55378..28080926e34a4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -23,7 +23,6 @@ import java.util.Locale import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLQueryTestSuite import org.apache.spark.sql.catalyst.util.stringToFile -import org.apache.spark.util.Utils // scalastyle:off line.size.limit /** @@ -111,16 +110,11 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { output } } - // We do some query canonicalization now. - val executionOutput = ExecutionOutput( + ExecutionOutput( sql = sql, // Don't care about the schema for this test. Only care about correctness. schema = None, output = normalizeTestResults(output.mkString("\n"))) - if (testCase.isInstanceOf[CTETest]) { - expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput) - } - executionOutput } runner.foreach(_.cleanUp()) @@ -169,11 +163,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => var resultFile = resultFileForInputFile(file) - // JDK-4511638 changes 'toString' result of Float/Double - // JDK-8282081 changes DataTimeFormatter 'F' symbol - if (Utils.isJavaVersionAtLeast21) { - if (new File(resultFile + ".java21").exists()) resultFile += ".java21" - } val absPath = file.getAbsolutePath val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator) RegularTestCase(testCaseName, absPath, resultFile) :: Nil @@ -328,7 +317,7 @@ object CrossDbmsQueryTestSuite { // System argument to indicate which reference DBMS is being used. private final val REF_DBMS_ARGUMENT = "REF_DBMS" - // System arguemnt to indicate a custom connection URL to the reference DBMS. + // System argument to indicate a custom connection URL to the reference DBMS. private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" private final val POSTGRES = "postgres" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index f04677fea4e8b..5901a523a6cad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -26,33 +26,36 @@ import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry /** - * Represents a connection (session) to a database. + * Represents a connection (session) to a database using JDBC. */ private[sql] trait JdbcConnection { + /** - * Runs the given query. - * @return A Seq[String] representing the output. This is a Seq where each element represents a - * single row. + * Executes the given SQL query and returns the result as a sequence of strings. + * @return A Seq[String] representing the output, where each element represents a single row. */ def runQuery(query: String): Seq[String] /** - * Drop the table with the given table name. + * Drops the table with the specified table name. */ def dropTable(tableName: String): Unit /** - * Create a table with the given table name and schema. + * Creates a table with the specified name and schema. + * @param schemaString The schema definition for the table. Note that this may vary depending on + * the database system. */ def createTable(tableName: String, schemaString: String): Unit /** - * Load data from the given Spark Dataframe into the table with given name. + * Loads data from the given Spark DataFrame into the table with the specified name. + * @param df The Spark DataFrame containing the data to be loaded. */ def loadData(df: DataFrame, tableName: String): Unit /** - * Close the connection. + * Closes the JDBC connection. */ def close(): Unit } From bef801184d51936a81d7b2c261cb71bafbdeed3e Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 11:01:42 -0800 Subject: [PATCH 16/63] Generate golden files for SQLQueryTestSuite --- .../sqllogictest-select1.sql.out | 203 ++++++++++++++ .../sqllogictest-select1.sql.out | 252 ++++++++++++++++++ .../apache/spark/sql/SQLQueryTestHelper.scala | 2 +- .../crossdbms/CrossDbmsQueryTestSuite.scala | 9 +- 4 files changed, 460 insertions(+), 6 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out new file mode 100644 index 0000000000000..16d7edfbe0467 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out @@ -0,0 +1,203 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE VIEW t1(a, b, c, d, e) AS VALUES + (103,102,100,101,104), + (107,106,108,109,105), + (110,114,112,111,113), + (116,119,117,115,118), + (123,122,124,120,121), + (127,128,129,126,125), + (132,134,131,133,130), + (138,136,139,135,137), + (144,141,140,142,143), + (145,149,146,148,147), + (151,150,153,154,152), + (155,157,159,156,158), + (161,160,163,164,162), + (167,169,168,165,166), + (171,170,172,173,174), + (177,176,179,178,175), + (181,180,182,183,184), + (187,188,186,189,185), + (190,194,193,192,191), + (199,197,198,196,195), + (200,202,203,201,204), + (208,209,205,206,207), + (214,210,213,212,211), + (218,215,216,217,219), + (223,221,222,220,224), + (226,227,228,229,225), + (234,231,232,230,233), + (237,236,239,235,238), + (242,244,240,243,241), + (246,248,247,249,245) +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`t1`, [(a,None), (b,None), (c,None), (d,None), (e,None)], VALUES + (103,102,100,101,104), + (107,106,108,109,105), + (110,114,112,111,113), + (116,119,117,115,118), + (123,122,124,120,121), + (127,128,129,126,125), + (132,134,131,133,130), + (138,136,139,135,137), + (144,141,140,142,143), + (145,149,146,148,147), + (151,150,153,154,152), + (155,157,159,156,158), + (161,160,163,164,162), + (167,169,168,165,166), + (171,170,172,173,174), + (177,176,179,178,175), + (181,180,182,183,184), + (187,188,186,189,185), + (190,194,193,192,191), + (199,197,198,196,195), + (200,202,203,201,204), + (208,209,205,206,207), + (214,210,213,212,211), + (218,215,216,217,219), + (223,221,222,220,224), + (226,227,228,229,225), + (234,231,232,230,233), + (237,236,239,235,238), + (242,244,240,243,241), + (246,248,247,249,245), false, false, PersistedView, true + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END + FROM t1 + ORDER BY 1 +-- !query analysis +Sort [CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x ASC NULLS FIRST], true ++- Project [CASE WHEN (cast(c#x as double) > scalar-subquery#x []) THEN (a#x * 2) ELSE (b#x * 10) END AS CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x] + : +- Aggregate [avg(c#x) AS avg(c)#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CAST((a + b + c + d + e) / 5 AS INT) + FROM t1 + ORDER BY 1,2 +-- !query analysis +Sort [((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST], true ++- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x] + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 + WHEN a < b + 3 THEN 333 ELSE 444 END, + abs(b - c), + CAST((a + b + c + d + e) / 5 AS INT), + a + b * 2 + c * 3 + FROM t1 + WHERE (e > c OR e < d) + AND d>e + AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) + ORDER BY 4,2,1,3,5 +-- !query analysis +Sort [CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x ASC NULLS FIRST, ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, abs((b - c))#x ASC NULLS FIRST, ((a + (b * 2)) + (c * 3))#x ASC NULLS FIRST], true ++- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, CASE WHEN (a#x < (b#x - 3)) THEN 111 WHEN (a#x <= b#x) THEN 222 WHEN (a#x < (b#x + 3)) THEN 333 ELSE 444 END AS CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x, abs((b#x - c#x)) AS abs((b - c))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, ((a#x + (b#x * 2)) + (c#x * 3)) AS ((a + (b * 2)) + (c * 3))#x] + +- Filter ((((e#x > c#x) OR (e#x < d#x)) AND (d#x > e#x)) AND exists#x [b#x]) + : +- Project [1 AS 1#x] + : +- Filter (b#x < outer(b#x)) + : +- SubqueryAlias x + : +- SubqueryAlias spark_catalog.default.t1 + : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT c, + d-e, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + a + b * 2 + c * 3 + d * 4, + e + FROM t1 + WHERE d NOT BETWEEN 110 AND 150 + OR c BETWEEN b - 2 AND d + 2 + OR (e > c OR e < d) + ORDER BY 1,5,3,2,4 +-- !query analysis +Sort [c#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (d - e)#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST], true ++- Project [c#x, (d#x - e#x) AS (d - e)#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, (((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, e#x] + +- Filter ((NOT ((d#x >= 110) AND (d#x <= 150)) OR ((c#x >= (b#x - 2)) AND (c#x <= (d#x + 2)))) OR ((e#x > c#x) OR (e#x < d#x))) + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4, + CAST((a + b + c + d + e) / 5 AS INT), + abs(a), + e, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + d + FROM t1 + WHERE b > c + AND c > d + ORDER BY 3,4,5,1,2,6 +-- !query analysis +Sort [abs(a)#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, d#x ASC NULLS FIRST], true ++- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, abs(a#x) AS abs(a)#x, e#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, d#x] + +- Filter ((b#x > c#x) AND (c#x > d#x)) + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), + c + FROM t1 + WHERE (c <= d - 2 OR c >= d + 2) + ORDER BY 4,2,1,3 +-- !query analysis +Sort [c#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, scalarsubquery(c, d)#xL ASC NULLS FIRST], true ++- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, scalar-subquery#x [c#x && d#x] AS scalarsubquery(c, d)#xL, c#x] + : +- Aggregate [count(1) AS count(1)#xL] + : +- Filter ((c#x > outer(c#x)) AND (d#x < outer(d#x))) + : +- SubqueryAlias x + : +- SubqueryAlias spark_catalog.default.t1 + : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + +- Filter ((c#x <= (d#x - 2)) OR (c#x >= (d#x + 2))) + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +DROP VIEW IF EXISTS t1 +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out new file mode 100644 index 0000000000000..762a04c22900d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out @@ -0,0 +1,252 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE VIEW t1(a, b, c, d, e) AS VALUES + (103,102,100,101,104), + (107,106,108,109,105), + (110,114,112,111,113), + (116,119,117,115,118), + (123,122,124,120,121), + (127,128,129,126,125), + (132,134,131,133,130), + (138,136,139,135,137), + (144,141,140,142,143), + (145,149,146,148,147), + (151,150,153,154,152), + (155,157,159,156,158), + (161,160,163,164,162), + (167,169,168,165,166), + (171,170,172,173,174), + (177,176,179,178,175), + (181,180,182,183,184), + (187,188,186,189,185), + (190,194,193,192,191), + (199,197,198,196,195), + (200,202,203,201,204), + (208,209,205,206,207), + (214,210,213,212,211), + (218,215,216,217,219), + (223,221,222,220,224), + (226,227,228,229,225), + (234,231,232,230,233), + (237,236,239,235,238), + (242,244,240,243,241), + (246,248,247,249,245) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END + FROM t1 + ORDER BY 1 +-- !query schema +struct scalarsubquery()) THEN (a * 2) ELSE (b * 10) END:int> +-- !query output +354 +362 +374 +380 +398 +400 +416 +428 +436 +446 +452 +468 +474 +484 +492 +1020 +1060 +1140 +1190 +1220 +1280 +1340 +1360 +1410 +1490 +1500 +1570 +1600 +1690 +1700 + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CAST((a + b + c + d + e) / 5 AS INT) + FROM t1 + ORDER BY 1,2 +-- !query schema +struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int> +-- !query output +1531 102 +1604 107 +1683 112 +1755 117 +1824 122 +1899 127 +1975 132 +2052 137 +2129 142 +2208 147 +2286 152 +2360 157 +2436 162 +2499 167 +2589 172 +2653 177 +2739 182 +2802 187 +2880 192 +2946 197 +3037 202 +3100 207 +3176 212 +3259 217 +3331 222 +3405 227 +3477 232 +3556 237 +3627 242 +3704 247 + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 + WHEN a < b + 3 THEN 333 ELSE 444 END, + abs(b - c), + CAST((a + b + c + d + e) / 5 AS INT), + a + b * 2 + c * 3 + FROM t1 + WHERE (e > c OR e < d) + AND d>e + AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) + ORDER BY 4,2,1,3,5 +-- !query schema +struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END:int,abs((b - c)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,((a + (b * 2)) + (c * 3)):int> +-- !query output +1604 333 2 107 643 +1899 222 1 127 770 +1975 222 3 132 793 +2208 111 3 147 881 +2286 333 3 152 910 +2436 333 3 162 970 +2653 333 3 177 1066 +2802 222 2 187 1121 +2880 111 1 192 1157 +2946 333 1 197 1187 +3176 444 3 212 1273 +3405 222 1 227 1364 +3627 222 4 242 1450 +3704 222 1 247 1483 + + +-- !query +SELECT c, + d-e, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + a + b * 2 + c * 3 + d * 4, + e + FROM t1 + WHERE d NOT BETWEEN 110 AND 150 + OR c BETWEEN b - 2 AND d + 2 + OR (e > c OR e < d) + ORDER BY 1,5,3,2,4 +-- !query schema +struct +-- !query output +100 -3 444 1011 104 +108 4 222 1079 105 +112 -2 333 1118 113 +117 -3 222 1165 118 +129 1 111 1274 125 +131 3 333 1325 130 +140 -1 555 1414 143 +146 1 222 1473 147 +153 2 444 1526 152 +159 -2 333 1570 158 +163 2 444 1626 162 +168 -1 222 1669 166 +172 -1 222 1719 174 +179 3 333 1778 175 +182 -1 222 1819 184 +186 4 111 1877 185 +193 1 444 1925 191 +198 1 555 1971 195 +203 -3 333 2017 204 +205 -1 111 2065 207 +213 1 555 2121 211 +216 -2 444 2164 219 +222 -4 444 2211 224 +228 4 111 2280 225 +232 -3 555 2312 233 +239 -3 444 2366 238 +240 2 333 2422 241 +247 4 222 2479 245 + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4, + CAST((a + b + c + d + e) / 5 AS INT), + abs(a), + e, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + d + FROM t1 + WHERE b > c + AND c > d + ORDER BY 3,4,5,1,2,6 +-- !query schema +struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,abs(a):int,e:int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,d:int> +-- !query output +1118 112 110 113 333 111 +1165 117 116 118 222 115 +1669 167 167 166 222 165 +1925 192 190 191 444 192 + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), + c + FROM t1 + WHERE (c <= d - 2 OR c >= d + 2) + ORDER BY 4,2,1,3 +-- !query schema +struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,scalarsubquery(c, d):bigint,c:int> +-- !query output +1165 222 0 117 +1219 222 0 124 +1274 111 0 129 +1325 333 0 131 +1367 222 0 139 +1414 555 0 140 +1473 222 0 146 +1570 333 0 159 +1669 222 0 168 +1877 111 0 186 +1971 555 0 198 +2017 333 0 203 +2211 444 0 222 +2312 555 0 232 +2366 444 0 239 +2422 333 0 240 +2479 222 0 247 + + +-- !query +DROP VIEW IF EXISTS t1 +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index 3bda8c75d7806..c08569150e2a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -102,7 +102,7 @@ trait SQLQueryTestHelper extends Logging { * Uses the Spark logical plan to determine whether the plan is semantically sorted. This is * important to make non-sorted queries test cases more deterministic. */ - def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { + protected def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false case _: DescribeCommandBase | _: DescribeColumnCommand diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 28080926e34a4..cbf7f9c1a536c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -91,19 +91,18 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // the already generated golden files. if (regenerateGoldenFiles) { val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty) - runner = runner.getOrElse( + runner = runner.orElse( Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING( crossDbmsToGenerateGoldenFiles)(connectionUrl))) val sparkDf = spark.sql(sql) val output = runner.map(_.runQuery(sql)).get // Use Spark analyzed plan to check if the query result is already semantically sorted. - val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { + if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { output } else { // Sort the answer manually if it isn't sorted. output.sorted } - result } else { val (_, output) = handleExceptions( getNormalizedQueryExecutionResult(localSparkSession, sql)) @@ -162,7 +161,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => - var resultFile = resultFileForInputFile(file) + val resultFile = resultFileForInputFile(file) val absPath = file.getAbsolutePath val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator) RegularTestCase(testCaseName, absPath, resultFile) :: Nil @@ -180,7 +179,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { "udf", "timestampNTZ", "udaf", - "typeCoercion", + "typeCoercion" ) ++ // Files Set( From a6e7e2a57833d2c815cae1b98687ca5eb6fb444d Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 11:04:33 -0800 Subject: [PATCH 17/63] Revert "Generate golden files for SQLQueryTestSuite" This reverts commit bef801184d51936a81d7b2c261cb71bafbdeed3e. --- .../sqllogictest-select1.sql.out | 203 -------------- .../sqllogictest-select1.sql.out | 252 ------------------ .../apache/spark/sql/SQLQueryTestHelper.scala | 2 +- .../crossdbms/CrossDbmsQueryTestSuite.scala | 9 +- 4 files changed, 6 insertions(+), 460 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out deleted file mode 100644 index 16d7edfbe0467..0000000000000 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out +++ /dev/null @@ -1,203 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- !query -CREATE VIEW t1(a, b, c, d, e) AS VALUES - (103,102,100,101,104), - (107,106,108,109,105), - (110,114,112,111,113), - (116,119,117,115,118), - (123,122,124,120,121), - (127,128,129,126,125), - (132,134,131,133,130), - (138,136,139,135,137), - (144,141,140,142,143), - (145,149,146,148,147), - (151,150,153,154,152), - (155,157,159,156,158), - (161,160,163,164,162), - (167,169,168,165,166), - (171,170,172,173,174), - (177,176,179,178,175), - (181,180,182,183,184), - (187,188,186,189,185), - (190,194,193,192,191), - (199,197,198,196,195), - (200,202,203,201,204), - (208,209,205,206,207), - (214,210,213,212,211), - (218,215,216,217,219), - (223,221,222,220,224), - (226,227,228,229,225), - (234,231,232,230,233), - (237,236,239,235,238), - (242,244,240,243,241), - (246,248,247,249,245) --- !query analysis -CreateViewCommand `spark_catalog`.`default`.`t1`, [(a,None), (b,None), (c,None), (d,None), (e,None)], VALUES - (103,102,100,101,104), - (107,106,108,109,105), - (110,114,112,111,113), - (116,119,117,115,118), - (123,122,124,120,121), - (127,128,129,126,125), - (132,134,131,133,130), - (138,136,139,135,137), - (144,141,140,142,143), - (145,149,146,148,147), - (151,150,153,154,152), - (155,157,159,156,158), - (161,160,163,164,162), - (167,169,168,165,166), - (171,170,172,173,174), - (177,176,179,178,175), - (181,180,182,183,184), - (187,188,186,189,185), - (190,194,193,192,191), - (199,197,198,196,195), - (200,202,203,201,204), - (208,209,205,206,207), - (214,210,213,212,211), - (218,215,216,217,219), - (223,221,222,220,224), - (226,227,228,229,225), - (234,231,232,230,233), - (237,236,239,235,238), - (242,244,240,243,241), - (246,248,247,249,245), false, false, PersistedView, true - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END - FROM t1 - ORDER BY 1 --- !query analysis -Sort [CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x ASC NULLS FIRST], true -+- Project [CASE WHEN (cast(c#x as double) > scalar-subquery#x []) THEN (a#x * 2) ELSE (b#x * 10) END AS CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x] - : +- Aggregate [avg(c#x) AS avg(c)#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CAST((a + b + c + d + e) / 5 AS INT) - FROM t1 - ORDER BY 1,2 --- !query analysis -Sort [((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST], true -+- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x] - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 - WHEN a < b + 3 THEN 333 ELSE 444 END, - abs(b - c), - CAST((a + b + c + d + e) / 5 AS INT), - a + b * 2 + c * 3 - FROM t1 - WHERE (e > c OR e < d) - AND d>e - AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) - ORDER BY 4,2,1,3,5 --- !query analysis -Sort [CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x ASC NULLS FIRST, ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, abs((b - c))#x ASC NULLS FIRST, ((a + (b * 2)) + (c * 3))#x ASC NULLS FIRST], true -+- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, CASE WHEN (a#x < (b#x - 3)) THEN 111 WHEN (a#x <= b#x) THEN 222 WHEN (a#x < (b#x + 3)) THEN 333 ELSE 444 END AS CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x, abs((b#x - c#x)) AS abs((b - c))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, ((a#x + (b#x * 2)) + (c#x * 3)) AS ((a + (b * 2)) + (c * 3))#x] - +- Filter ((((e#x > c#x) OR (e#x < d#x)) AND (d#x > e#x)) AND exists#x [b#x]) - : +- Project [1 AS 1#x] - : +- Filter (b#x < outer(b#x)) - : +- SubqueryAlias x - : +- SubqueryAlias spark_catalog.default.t1 - : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT c, - d-e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - a + b * 2 + c * 3 + d * 4, - e - FROM t1 - WHERE d NOT BETWEEN 110 AND 150 - OR c BETWEEN b - 2 AND d + 2 - OR (e > c OR e < d) - ORDER BY 1,5,3,2,4 --- !query analysis -Sort [c#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (d - e)#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST], true -+- Project [c#x, (d#x - e#x) AS (d - e)#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, (((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, e#x] - +- Filter ((NOT ((d#x >= 110) AND (d#x <= 150)) OR ((c#x >= (b#x - 2)) AND (c#x <= (d#x + 2)))) OR ((e#x > c#x) OR (e#x < d#x))) - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CAST((a + b + c + d + e) / 5 AS INT), - abs(a), - e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - d - FROM t1 - WHERE b > c - AND c > d - ORDER BY 3,4,5,1,2,6 --- !query analysis -Sort [abs(a)#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, d#x ASC NULLS FIRST], true -+- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, abs(a#x) AS abs(a)#x, e#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, d#x] - +- Filter ((b#x > c#x) AND (c#x > d#x)) - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), - c - FROM t1 - WHERE (c <= d - 2 OR c >= d + 2) - ORDER BY 4,2,1,3 --- !query analysis -Sort [c#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, scalarsubquery(c, d)#xL ASC NULLS FIRST], true -+- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, scalar-subquery#x [c#x && d#x] AS scalarsubquery(c, d)#xL, c#x] - : +- Aggregate [count(1) AS count(1)#xL] - : +- Filter ((c#x > outer(c#x)) AND (d#x < outer(d#x))) - : +- SubqueryAlias x - : +- SubqueryAlias spark_catalog.default.t1 - : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - +- Filter ((c#x <= (d#x - 2)) OR (c#x >= (d#x + 2))) - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -DROP VIEW IF EXISTS t1 --- !query analysis -DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out deleted file mode 100644 index 762a04c22900d..0000000000000 --- a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out +++ /dev/null @@ -1,252 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- !query -CREATE VIEW t1(a, b, c, d, e) AS VALUES - (103,102,100,101,104), - (107,106,108,109,105), - (110,114,112,111,113), - (116,119,117,115,118), - (123,122,124,120,121), - (127,128,129,126,125), - (132,134,131,133,130), - (138,136,139,135,137), - (144,141,140,142,143), - (145,149,146,148,147), - (151,150,153,154,152), - (155,157,159,156,158), - (161,160,163,164,162), - (167,169,168,165,166), - (171,170,172,173,174), - (177,176,179,178,175), - (181,180,182,183,184), - (187,188,186,189,185), - (190,194,193,192,191), - (199,197,198,196,195), - (200,202,203,201,204), - (208,209,205,206,207), - (214,210,213,212,211), - (218,215,216,217,219), - (223,221,222,220,224), - (226,227,228,229,225), - (234,231,232,230,233), - (237,236,239,235,238), - (242,244,240,243,241), - (246,248,247,249,245) --- !query schema -struct<> --- !query output - - - --- !query -SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END - FROM t1 - ORDER BY 1 --- !query schema -struct scalarsubquery()) THEN (a * 2) ELSE (b * 10) END:int> --- !query output -354 -362 -374 -380 -398 -400 -416 -428 -436 -446 -452 -468 -474 -484 -492 -1020 -1060 -1140 -1190 -1220 -1280 -1340 -1360 -1410 -1490 -1500 -1570 -1600 -1690 -1700 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CAST((a + b + c + d + e) / 5 AS INT) - FROM t1 - ORDER BY 1,2 --- !query schema -struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int> --- !query output -1531 102 -1604 107 -1683 112 -1755 117 -1824 122 -1899 127 -1975 132 -2052 137 -2129 142 -2208 147 -2286 152 -2360 157 -2436 162 -2499 167 -2589 172 -2653 177 -2739 182 -2802 187 -2880 192 -2946 197 -3037 202 -3100 207 -3176 212 -3259 217 -3331 222 -3405 227 -3477 232 -3556 237 -3627 242 -3704 247 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 - WHEN a < b + 3 THEN 333 ELSE 444 END, - abs(b - c), - CAST((a + b + c + d + e) / 5 AS INT), - a + b * 2 + c * 3 - FROM t1 - WHERE (e > c OR e < d) - AND d>e - AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) - ORDER BY 4,2,1,3,5 --- !query schema -struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END:int,abs((b - c)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,((a + (b * 2)) + (c * 3)):int> --- !query output -1604 333 2 107 643 -1899 222 1 127 770 -1975 222 3 132 793 -2208 111 3 147 881 -2286 333 3 152 910 -2436 333 3 162 970 -2653 333 3 177 1066 -2802 222 2 187 1121 -2880 111 1 192 1157 -2946 333 1 197 1187 -3176 444 3 212 1273 -3405 222 1 227 1364 -3627 222 4 242 1450 -3704 222 1 247 1483 - - --- !query -SELECT c, - d-e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - a + b * 2 + c * 3 + d * 4, - e - FROM t1 - WHERE d NOT BETWEEN 110 AND 150 - OR c BETWEEN b - 2 AND d + 2 - OR (e > c OR e < d) - ORDER BY 1,5,3,2,4 --- !query schema -struct --- !query output -100 -3 444 1011 104 -108 4 222 1079 105 -112 -2 333 1118 113 -117 -3 222 1165 118 -129 1 111 1274 125 -131 3 333 1325 130 -140 -1 555 1414 143 -146 1 222 1473 147 -153 2 444 1526 152 -159 -2 333 1570 158 -163 2 444 1626 162 -168 -1 222 1669 166 -172 -1 222 1719 174 -179 3 333 1778 175 -182 -1 222 1819 184 -186 4 111 1877 185 -193 1 444 1925 191 -198 1 555 1971 195 -203 -3 333 2017 204 -205 -1 111 2065 207 -213 1 555 2121 211 -216 -2 444 2164 219 -222 -4 444 2211 224 -228 4 111 2280 225 -232 -3 555 2312 233 -239 -3 444 2366 238 -240 2 333 2422 241 -247 4 222 2479 245 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CAST((a + b + c + d + e) / 5 AS INT), - abs(a), - e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - d - FROM t1 - WHERE b > c - AND c > d - ORDER BY 3,4,5,1,2,6 --- !query schema -struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,abs(a):int,e:int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,d:int> --- !query output -1118 112 110 113 333 111 -1165 117 116 118 222 115 -1669 167 167 166 222 165 -1925 192 190 191 444 192 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), - c - FROM t1 - WHERE (c <= d - 2 OR c >= d + 2) - ORDER BY 4,2,1,3 --- !query schema -struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,scalarsubquery(c, d):bigint,c:int> --- !query output -1165 222 0 117 -1219 222 0 124 -1274 111 0 129 -1325 333 0 131 -1367 222 0 139 -1414 555 0 140 -1473 222 0 146 -1570 333 0 159 -1669 222 0 168 -1877 111 0 186 -1971 555 0 198 -2017 333 0 203 -2211 444 0 222 -2312 555 0 232 -2366 444 0 239 -2422 333 0 240 -2479 222 0 247 - - --- !query -DROP VIEW IF EXISTS t1 --- !query schema -struct<> --- !query output - diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index c08569150e2a9..3bda8c75d7806 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -102,7 +102,7 @@ trait SQLQueryTestHelper extends Logging { * Uses the Spark logical plan to determine whether the plan is semantically sorted. This is * important to make non-sorted queries test cases more deterministic. */ - protected def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { + def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false case _: DescribeCommandBase | _: DescribeColumnCommand diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index cbf7f9c1a536c..28080926e34a4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -91,18 +91,19 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // the already generated golden files. if (regenerateGoldenFiles) { val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty) - runner = runner.orElse( + runner = runner.getOrElse( Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING( crossDbmsToGenerateGoldenFiles)(connectionUrl))) val sparkDf = spark.sql(sql) val output = runner.map(_.runQuery(sql)).get // Use Spark analyzed plan to check if the query result is already semantically sorted. - if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { + val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { output } else { // Sort the answer manually if it isn't sorted. output.sorted } + result } else { val (_, output) = handleExceptions( getNormalizedQueryExecutionResult(localSparkSession, sql)) @@ -161,7 +162,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => - val resultFile = resultFileForInputFile(file) + var resultFile = resultFileForInputFile(file) val absPath = file.getAbsolutePath val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator) RegularTestCase(testCaseName, absPath, resultFile) :: Nil @@ -179,7 +180,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { "udf", "timestampNTZ", "udaf", - "typeCoercion" + "typeCoercion", ) ++ // Files Set( From 1409d63258a190a3823e26e4a5e2d9d94ae599b7 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 11:06:03 -0800 Subject: [PATCH 18/63] Ignore "postgres-crosstest" in SQLQueryTestSuite --- .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 3 ++- .../apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index be59753f97655..ca45276efbb69 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -165,7 +165,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper if (TestUtils.testCommandAvailable("/bin/bash")) Nil else Set("transform.sql") /** List of test cases to ignore, in lower cases. */ protected def ignoreList: Set[String] = Set( - "ignored.sql" // Do NOT remove this one. It is here to test the ignore functionality. + "ignored.sql", // Do NOT remove this one. It is here to test the ignore functionality. + "postgres-crosstest" ) ++ otherIgnoreList // Create all the test cases. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 28080926e34a4..d42609ff54af5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -170,7 +170,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } // Ignore all tests for now due to likely incompatibility. - override def ignoreList: Set[String] = super.ignoreList ++ + override def ignoreList: Set[String] = // Directories Set( "postgreSQL", From a552d86563254225ccacc574ef4b5c92d0c0bf5a Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 11:07:15 -0800 Subject: [PATCH 19/63] Add comment to clarify why it is ignored --- .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index ca45276efbb69..5c7b877c24b11 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -165,7 +165,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper if (TestUtils.testCommandAvailable("/bin/bash")) Nil else Set("transform.sql") /** List of test cases to ignore, in lower cases. */ protected def ignoreList: Set[String] = Set( - "ignored.sql", // Do NOT remove this one. It is here to test the ignore functionality. + // Do NOT remove this one. It is here to test the ignore functionality. + "ignored.sql", + // CrossDbms tests are captured in [[CrossDbmsQueryTestSuite]], there is no need to repeat them. "postgres-crosstest" ) ++ otherIgnoreList From 7a9cb80cc14f9d2bccbcaadb69c630d645066f27 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 11:14:50 -0800 Subject: [PATCH 20/63] Fix compilation failures --- .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index d42609ff54af5..551c290587e66 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -91,7 +91,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // the already generated golden files. if (regenerateGoldenFiles) { val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty) - runner = runner.getOrElse( + runner = runner.orElse( Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING( crossDbmsToGenerateGoldenFiles)(connectionUrl))) val sparkDf = spark.sql(sql) @@ -173,14 +173,14 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override def ignoreList: Set[String] = // Directories Set( - "postgreSQL", + // "postgreSQL", "subquery", "ansi", "udtf", "udf", "timestampNTZ", "udaf", - "typeCoercion", + "typeCoercion" ) ++ // Files Set( From 66f41a5fddb74d359eaab12e097ae135e239ee84 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 11:18:21 -0800 Subject: [PATCH 21/63] Regenerate golden files for sqllogictest-select1.sql.out --- .../sqllogictest-select1.sql.out | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out index 1ce87f0fc54bb..fde78003ffa90 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out @@ -36,7 +36,7 @@ CREATE VIEW t1(a, b, c, d, e) AS VALUES -- !query -SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END +SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END FROM t1 ORDER BY 1 -- !query output @@ -73,8 +73,8 @@ SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END -- !query -SELECT a+b*2+c*3+d*4+e*5, - CAST((a+b+c+d+e)/5 AS INT) +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CAST((a + b + c + d + e) / 5 AS INT) FROM t1 ORDER BY 1,2 -- !query output @@ -111,16 +111,16 @@ SELECT a+b*2+c*3+d*4+e*5, -- !query -SELECT a+b*2+c*3+d*4+e*5, - CASE WHEN ac OR e c OR e < d) AND d>e - AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.bc OR e c OR e < d) ORDER BY 1,5,3,2,4 -- !query output 100 -3 444 1011 104 @@ -183,16 +183,16 @@ SELECT c, -- !query -SELECT a+b*2+c*3+d*4, - CAST((a+b+c+d+e)/5 AS INT), +SELECT a + b * 2 + c * 3 + d * 4, + CAST((a + b + c + d + e) / 5 AS INT), abs(a), e, - CASE a+1 WHEN b THEN 111 WHEN c THEN 222 + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, d FROM t1 - WHERE b>c - AND c>d + WHERE b > c + AND c > d ORDER BY 3,4,5,1,2,6 -- !query output 1118 112 110 113 333 111 @@ -202,13 +202,13 @@ SELECT a+b*2+c*3+d*4, -- !query -SELECT a+b*2+c*3+d*4, - CASE a+1 WHEN b THEN 111 WHEN c THEN 222 +SELECT a + b * 2 + c * 3 + d * 4, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - (SELECT count(*) FROM t1 AS x WHERE x.c>t1.c AND x.d t1.c AND x.d < t1.d), c FROM t1 - WHERE (c<=d-2 OR c>=d+2) + WHERE (c <= d - 2 OR c >= d + 2) ORDER BY 4,2,1,3 -- !query output 1165 222 0 117 From 69614d310eedd25f7dbdc1de5adce178cf833dbb Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 11:34:09 -0800 Subject: [PATCH 22/63] Tiny changes.. --- .../scala/org/apache/spark/sql/SQLQueryTestHelper.scala | 2 +- .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index 3bda8c75d7806..c08569150e2a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -102,7 +102,7 @@ trait SQLQueryTestHelper extends Logging { * Uses the Spark logical plan to determine whether the plan is semantically sorted. This is * important to make non-sorted queries test cases more deterministic. */ - def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { + protected def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match { case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false case _: DescribeCommandBase | _: DescribeColumnCommand diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 551c290587e66..44f45f1174acd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -97,14 +97,14 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val sparkDf = spark.sql(sql) val output = runner.map(_.runQuery(sql)).get // Use Spark analyzed plan to check if the query result is already semantically sorted. - val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { + if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { output } else { // Sort the answer manually if it isn't sorted. output.sorted } - result } else { + // Use Spark. val (_, output) = handleExceptions( getNormalizedQueryExecutionResult(localSparkSession, sql)) output @@ -162,7 +162,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override lazy val listTestCases: Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).flatMap { file => - var resultFile = resultFileForInputFile(file) + val resultFile = resultFileForInputFile(file) val absPath = file.getAbsolutePath val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator) RegularTestCase(testCaseName, absPath, resultFile) :: Nil From 2bf43dfdbd4b6207206bac12d8f02830e8adbf9c Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 12:11:51 -0800 Subject: [PATCH 23/63] Add postgresql back to ignorelist --- .../apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 44f45f1174acd..7cea9d41d2772 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -173,7 +173,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override def ignoreList: Set[String] = // Directories Set( - // "postgreSQL", + "postgreSQL", "subquery", "ansi", "udtf", From aba83b89c2861cf2461b06c88d09ae1fb7f5b1fc Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 15:30:15 -0800 Subject: [PATCH 24/63] Add exception handling in CrossDbmsQueryTestSuite --- .../apache/spark/sql/SQLQueryTestSuite.scala | 5 +-- .../crossdbms/CrossDbmsQueryTestSuite.scala | 32 ++++++++++++++----- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 5c7b877c24b11..be59753f97655 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -165,10 +165,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper if (TestUtils.testCommandAvailable("/bin/bash")) Nil else Set("transform.sql") /** List of test cases to ignore, in lower cases. */ protected def ignoreList: Set[String] = Set( - // Do NOT remove this one. It is here to test the ignore functionality. - "ignored.sql", - // CrossDbms tests are captured in [[CrossDbmsQueryTestSuite]], there is no need to repeat them. - "postgres-crosstest" + "ignored.sql" // Do NOT remove this one. It is here to test the ignore functionality. ) ++ otherIgnoreList // Create all the test cases. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 7cea9d41d2772..dc5c6c897e598 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -20,6 +20,8 @@ package org.apache.spark.sql.crossdbms import java.io.File import java.util.Locale +import scala.util.control.NonFatal + import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLQueryTestSuite import org.apache.spark.sql.catalyst.util.stringToFile @@ -94,14 +96,28 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { runner = runner.orElse( Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING( crossDbmsToGenerateGoldenFiles)(connectionUrl))) - val sparkDf = spark.sql(sql) - val output = runner.map(_.runQuery(sql)).get - // Use Spark analyzed plan to check if the query result is already semantically sorted. - if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { - output - } else { - // Sort the answer manually if it isn't sorted. - output.sorted + try { + // Either of the below two lines can error. If we go into the catch statement, then it + // is likely one of the following scenarios: + // 1. Error thrown in Spark analysis: + // a. The query is either incompatible between Spark and the other DBMS + // b. There is issue with the schema in Spark. + // c. The error is expected - it errors on both systems, but only the Spark error + // will be printed to the golden file, because it errors first. + // 2. Error thrown in other DBMS execution: + // a. The query is either incompatible between Spark and the other DBMS + // b. Some test table/view is not created on the other DBMS. + val sparkDf = spark.sql(sql) + val output = runner.map(_.runQuery(sql)).get + // Use Spark analyzed plan to check if the query result is already semantically sorted. + if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { + output + } else { + // Sort the answer manually if it isn't sorted. + output.sorted + } + } catch { + case NonFatal(e) => Seq(e.getClass.getName, e.getMessage) } } else { // Use Spark. From bf4036d7d2786ea11ee72081889012a4da907edc Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 16:51:36 -0800 Subject: [PATCH 25/63] Do refactoring so that we can add an additional input argument for CrossDbmsQueryTestSuite --- .../apache/spark/sql/SQLQueryTestSuite.scala | 109 ++++++---- .../crossdbms/CrossDbmsQueryTestSuite.scala | 201 +++++------------- 2 files changed, 112 insertions(+), 198 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index be59753f97655..b054d837c3d5c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -349,20 +349,16 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } } - /** Run a test case. */ - protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { - def splitWithSemicolon(seq: Seq[String]) = { - seq.mkString("\n").split("(?<=[^\\\\]);") - } - - def splitCommentsAndCodes(input: String) = input.split("\n").partition { line => + protected def splitCommentsAndCodes(input: String) = + input.split("\n").partition { line => val newLine = line.trim newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") } - val input = fileToString(new File(testCase.inputFile)) - - val (comments, code) = splitCommentsAndCodes(input) + protected def getQueries(code: Array[String], comments: Array[String]) = { + def splitWithSemicolon(seq: Seq[String]) = { + seq.mkString("\n").split("(?<=[^\\\\]);") + } // If `--IMPORT` found, load code from another test case file, then insert them // into the head in this test. @@ -406,52 +402,74 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } // List of SQL queries to run - val queries = tempQueries.map(_.trim).filter(_ != "").toSeq + tempQueries.map(_.trim).filter(_ != "") // Fix misplacement when comment is at the end of the query. .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "") + } + protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = { val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6)) - val settings = settingLines.flatMap(_.split(",").map { kv => + settingLines.flatMap(_.split(",").map { kv => val (conf, value) = kv.span(_ != '=') conf.trim -> value.substring(1).trim }) + } - if (regenerateGoldenFiles) { - runQueries(queries, testCase, settings.toImmutableArraySeq) - } else { - // A config dimension has multiple config sets, and a config set has multiple configs. - // - config dim: Seq[Seq[(String, String)]] - // - config set: Seq[(String, String)] - // - config: (String, String)) - // We need to do cartesian product for all the config dimensions, to get a list of - // config sets, and run the query once for each config set. - val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12)) - val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines => - lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv => - val (conf, value) = kv.span(_ != '=') - conf.trim -> value.substring(1).trim - }.toSeq).toSeq - } + protected def getSparkConfigDimensions(comments: Array[String]): Seq[Seq[(String, String)]] = { + // A config dimension has multiple config sets, and a config set has multiple configs. + // - config dim: Seq[Seq[(String, String)]] + // - config set: Seq[(String, String)] + // - config: (String, String)) + // We need to do cartesian product for all the config dimensions, to get a list of + // config sets, and run the query once for each config set. + val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12)) + val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines => + lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv => + val (conf, value) = kv.span(_ != '=') + conf.trim -> value.substring(1).trim + }.toSeq).toSeq + } - val configSets = configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) => - dim.flatMap { configSet => res.map(_ ++ configSet) } - } + configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) => + dim.flatMap { configSet => res.map(_ ++ configSet) } + } + } - configSets.foreach { configSet => - try { - runQueries(queries, testCase, (settings ++ configSet).toImmutableArraySeq) - } catch { - case e: Throwable => - val configs = configSet.map { - case (k, v) => s"$k=$v" - } - logError(s"Error using configs: ${configs.mkString(",")}") - throw e - } + protected def runQueriesWithSparkConfigDimensions( + queries: Seq[String], + testCase: TestCase, + sparkConfigSet: Array[(String, String)], + sparkConfigDims: Seq[Seq[(String, String)]]): Unit = { + sparkConfigDims.foreach { configDim => + try { + runQueries(queries, testCase, (sparkConfigSet ++ configDim).toImmutableArraySeq) + } catch { + case e: Throwable => + val configs = configDim.map { + case (k, v) => s"$k=$v" + } + logError(s"Error using configs: ${configs.mkString(",")}") + throw e } } } + /** Run a test case. */ + protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { + val input = fileToString(new File(testCase.inputFile)) + val (comments, code) = splitCommentsAndCodes(input) + val queries = getQueries(code, comments) + val settings = getSparkSettings(comments) + + if (regenerateGoldenFiles) { + runQueries(queries, testCase, settings.toImmutableArraySeq) + } else { + val configSets = getSparkConfigDimensions(comments) + runQueriesWithSparkConfigDimensions( + queries, testCase, settings, configSets) + } + } + def hasNoDuplicateColumns(schema: String): Boolean = { val columnAndTypes = schema.replaceFirst("^struct<", "").stripSuffix(">").split(",") columnAndTypes.size == columnAndTypes.distinct.length @@ -498,7 +516,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper protected def runQueries( queries: Seq[String], testCase: TestCase, - configSet: Seq[(String, String)]): Unit = { + sparkConfigSet: Seq[(String, String)], + otherConfigs: Map[String, String] = Map.empty): Unit = { // Create a local SparkSession to have stronger isolation between different test cases. // This does not isolate catalog changes. val localSparkSession = spark.newSession() @@ -529,9 +548,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, false) } - if (configSet.nonEmpty) { + if (sparkConfigSet.nonEmpty) { // Execute the list of set operation in order to add the desired configs - val setOperations = configSet.map { case (key, value) => s"set $key=$value" } + val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" } logInfo(s"Setting configs: ${setOperations.mkString(", ")}") setOperations.foreach(localSparkSession.sql) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index dc5c6c897e598..86a751a90117d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -24,7 +24,8 @@ import scala.util.control.NonFatal import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLQueryTestSuite -import org.apache.spark.sql.catalyst.util.stringToFile +import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} +import org.apache.spark.util.ArrayImplicits.SparkArrayOps // scalastyle:off line.size.limit /** @@ -34,9 +35,6 @@ import org.apache.spark.sql.catalyst.util.stringToFile * tests will not be compatible with other DBMSes. There will be more work in the future, such as * some kind of conversion, to increase coverage. * - * If your SQL query test is not compatible with other DBMSes, please add it to the `ignoreList` at - * the bottom of this file. - * * You need to have a database server up before running this test. * For example, for postgres: * 1. Install PostgreSQL. @@ -44,7 +42,13 @@ import org.apache.spark.sql.catalyst.util.stringToFile * 2. After installing PostgreSQL, start the database server, then create a role named pg with * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser`` * - * To run the entire test suite: + * To indicate that the SQL file is eligible for testing with this suite, add the following comment + * into the input file: + * {{{ + * --DBMS_TO_GENERATE_GOLDEN_FILE postgres + * }}} + * + * And then, to run the entire test suite: * {{{ * build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" * }}} @@ -54,14 +58,14 @@ import org.apache.spark.sql.catalyst.util.stringToFile * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" * }}} * - * To re-generate golden file for a single test, run: + * To re-generate golden file for a single test, e.g. `describe.sql`, run: * {{{ * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite -- -z describe.sql" * }}} * * To specify a DBMS to use (the default is postgres): * {{{ - * REF_DBMS=mysql SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" + * REF_DBMS=postgres SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" * }}} */ // scalastyle:on line.size.limit @@ -80,12 +84,41 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { private def customConnectionUrl: String = System.getenv( CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL) + override protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { + val input = fileToString(new File(testCase.inputFile)) + val (comments, code) = splitCommentsAndCodes(input) + val queries = getQueries(code, comments) + val settings = getSparkSettings(comments) + + if (regenerateGoldenFiles) { + // If `--DBMS_TO_GENERATE_GOLDEN_FILE` found, + val dbmsConfig = comments.filter(_.startsWith(s"--${ + CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31)) + val otherConfigs = dbmsConfig.map( + (CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE, _)).toMap + runQueries(queries, testCase, settings.toImmutableArraySeq, otherConfigs) + } else { + val configSets = getSparkConfigDimensions(comments) + runQueriesWithSparkConfigDimensions( + queries, testCase, settings, configSets) + } + } + override protected def runQueries( queries: Seq[String], testCase: TestCase, - configSet: Seq[(String, String)]): Unit = { + sparkConfigSet: Seq[(String, String)], + otherConfigs: Map[String, String] = Map.empty): Unit = { val localSparkSession = spark.newSession() + if (!otherConfigs.contains(CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE) || + !otherConfigs.get(CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE) + .contains(crossDbmsToGenerateGoldenFiles)) { + log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " + + s"testing with $crossDbmsToGenerateGoldenFiles") + return + } + var runner: Option[SQLQueryTestRunner] = None val outputs: Seq[QueryTestOutput] = queries.map { sql => val output = { @@ -121,6 +154,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } } else { // Use Spark. + // Execute the list of set operation in order to add the desired configs + val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" } + setOperations.foreach(localSparkSession.sql) val (_, output) = handleExceptions( getNormalizedQueryExecutionResult(localSparkSession, sql)) output @@ -136,8 +172,8 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { if (regenerateGoldenFiles) { val goldenOutput = { - s"-- Automatically generated by ${getClass.getSimpleName}\n" + - outputs.mkString("\n\n\n") + "\n" + s"-- Automatically generated by ${getClass.getSimpleName} with " + + s"$crossDbmsToGenerateGoldenFiles\n" + outputs.mkString("\n\n\n") + "\n" } val resultFile = new File(testCase.resultFile) val parent = resultFile.getParentFile @@ -184,149 +220,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { RegularTestCase(testCaseName, absPath, resultFile) :: Nil }.sortBy(_.name) } - - // Ignore all tests for now due to likely incompatibility. - override def ignoreList: Set[String] = - // Directories - Set( - "postgreSQL", - "subquery", - "ansi", - "udtf", - "udf", - "timestampNTZ", - "udaf", - "typeCoercion" - ) ++ - // Files - Set( - "group-by.sql", - "natural-join.sql", - "timestamp-ltz.sql", - "csv-functions.sql", - "datetime-formatting-invalid.sql", - "except.sql", - "string-functions.sql", - "cte-command.sql", - "identifier-clause.sql", - "try_reflect.sql", - "order-by-all.sql", - "timestamp-ntz.sql", - "url-functions.sql", - "math.sql", - "random.sql", - "tablesample-negative.sql", - "date.sql", - "window.sql", - "linear-regression.sql", - "join-empty-relation.sql", - "null-propagation.sql", - "operators.sql", - "change-column.sql", - "count.sql", - "mask-functions.sql", - "decimalArithmeticOperations.sql", - "column-resolution-sort.sql", - "group-analytics.sql", - "inline-table.sql", - "comparator.sql", - "ceil-floor-with-scale-param.sql", - "show-tblproperties.sql", - "timezone.sql", - "ilike.sql", - "parse-schema-string.sql", - "charvarchar.sql", - "ignored.sql", - "cte.sql", - "selectExcept.sql", - "order-by-nulls-ordering.sql", - "query_regex_column.sql", - "show_columns.sql", - "columnresolution-views.sql", - "percentiles.sql", - "hll.sql", - "group-by-all.sql", - "like-any.sql", - "struct.sql", - "show-create-table.sql", - "try_cast.sql", - "unpivot.sql", - "describe-query.sql", - "describe.sql", - "columnresolution.sql", - "union.sql", - "order-by-ordinal.sql", - "explain-cbo.sql", - "intersect-all.sql", - "ilike-all.sql", - "try_aggregates.sql", - "try-string-functions.sql", - "show-views.sql", - "xml-functions.sql", - "datetime-parsing.sql", - "cte-nested.sql", - "columnresolution-negative.sql", - "datetime-special.sql", - "describe-table-after-alter-table.sql", - "column-resolution-aggregate.sql", - "keywords.sql", - "table-aliases.sql", - "bitwise.sql", - "like-all.sql", - "named-function-arguments.sql", - "try_datetime_functions.sql", - "datetime-legacy.sql", - "outer-join.sql", - "explain.sql", - "sql-compatibility-functions.sql", - "join-lateral.sql", - "pred-pushdown.sql", - "cast.sql", - "except-all.sql", - "predicate-functions.sql", - "timestamp.sql", - "group-by-filter.sql", - "pivot.sql", - "try_arithmetic.sql", - "higher-order-functions.sql", - "cte-legacy.sql", - "misc-functions.sql", - "describe-part-after-analyze.sql", - "extract.sql", - "datetime-formatting.sql", - "interval.sql", - "double-quoted-identifiers.sql", - "datetime-formatting-legacy.sql", - "group-by-ordinal.sql", - "having.sql", - "inner-join.sql", - "null-handling.sql", - "ilike-any.sql", - "show-tables.sql", - "sql-session-variables.sql", - "using-join.sql", - "subexp-elimination.sql", - "cte-nonlegacy.sql", - "group-by-all-duckdb.sql", - "transform.sql", - "map.sql", - "table-valued-functions.sql", - "comments.sql", - "regexp-functions.sql", - "datetime-parsing-legacy.sql", - "cross-join.sql", - "array.sql", - "group-by-all-mosha.sql", - "limit.sql", - "non-excludable-rule.sql", - "grouping_set.sql", - "json-functions.sql", - "datetime-parsing-invalid.sql", - "try_element_at.sql", - "explain-aqe.sql", - "current_database_catalog.sql", - "literals.sql" - ) } object CrossDbmsQueryTestSuite { @@ -335,6 +228,8 @@ object CrossDbmsQueryTestSuite { private final val REF_DBMS_ARGUMENT = "REF_DBMS" // System argument to indicate a custom connection URL to the reference DBMS. private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" + // Argument in input files to indicate that golden file should be generated with a reference DBMS + private final val DBMS_TO_GENERATE_GOLDEN_FILE = "DBMS_TO_GENERATE_GOLDEN_FILE" private final val POSTGRES = "postgres" private final val SUPPORTED_DBMS = Seq(POSTGRES) From 08e2cfb58cddeed786a8cf0369afb2e4fd125a72 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 30 Nov 2023 16:53:07 -0800 Subject: [PATCH 26/63] Generate with postgres --- .../inputs/postgres-crosstest/sqllogictest-select1.sql | 1 + .../postgres-crosstest/sqllogictest-select1.sql.out | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql index 7fcc5419044d0..87c15a07733b5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql @@ -1,6 +1,7 @@ -- First 6 queries of the sql-logic-tests with some minor changes for compatibility, available here: -- https://www.sqlite.org/sqllogictest/file?name=test/select1.test&ci=tip. +--DBMS_TO_GENERATE_GOLDEN_FILE postgres CREATE VIEW t1(a, b, c, d, e) AS VALUES (103,102,100,101,104), (107,106,108,109,105), diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out index fde78003ffa90..b5eb7eef4a9a3 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out @@ -1,4 +1,4 @@ --- Automatically generated by CrossDbmsQueryTestSuite +-- Automatically generated by CrossDbmsQueryTestSuite with postgres -- !query CREATE VIEW t1(a, b, c, d, e) AS VALUES (103,102,100,101,104), From 2ec381c52ac7853efa6523382d182fcb3aa99520 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Fri, 1 Dec 2023 10:33:39 -0800 Subject: [PATCH 27/63] Fix compilation error with ThriftServerQueryTestSuite --- .../sql/hive/thriftserver/ThriftServerQueryTestSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index 8d90d47e1bf5f..febf763e209d5 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -107,7 +107,8 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ override def runQueries( queries: Seq[String], testCase: TestCase, - configSet: Seq[(String, String)]): Unit = { + configSet: Seq[(String, String)], + otherConfigs: Map[String, String] = Map.empty): Unit = { // We do not test with configSet. withJdbcStatement() { statement => From 9c2d2832129c0341c1d14e6bf1563882f1d41a87 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Fri, 1 Dec 2023 13:15:00 -0800 Subject: [PATCH 28/63] Generate golden files with SQLQueryTestSuite --- .../sqllogictest-select1.sql.out | 203 ++++++++++++++ .../sqllogictest-select1.sql.out | 252 ++++++++++++++++++ 2 files changed, 455 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out new file mode 100644 index 0000000000000..16d7edfbe0467 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out @@ -0,0 +1,203 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE VIEW t1(a, b, c, d, e) AS VALUES + (103,102,100,101,104), + (107,106,108,109,105), + (110,114,112,111,113), + (116,119,117,115,118), + (123,122,124,120,121), + (127,128,129,126,125), + (132,134,131,133,130), + (138,136,139,135,137), + (144,141,140,142,143), + (145,149,146,148,147), + (151,150,153,154,152), + (155,157,159,156,158), + (161,160,163,164,162), + (167,169,168,165,166), + (171,170,172,173,174), + (177,176,179,178,175), + (181,180,182,183,184), + (187,188,186,189,185), + (190,194,193,192,191), + (199,197,198,196,195), + (200,202,203,201,204), + (208,209,205,206,207), + (214,210,213,212,211), + (218,215,216,217,219), + (223,221,222,220,224), + (226,227,228,229,225), + (234,231,232,230,233), + (237,236,239,235,238), + (242,244,240,243,241), + (246,248,247,249,245) +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`t1`, [(a,None), (b,None), (c,None), (d,None), (e,None)], VALUES + (103,102,100,101,104), + (107,106,108,109,105), + (110,114,112,111,113), + (116,119,117,115,118), + (123,122,124,120,121), + (127,128,129,126,125), + (132,134,131,133,130), + (138,136,139,135,137), + (144,141,140,142,143), + (145,149,146,148,147), + (151,150,153,154,152), + (155,157,159,156,158), + (161,160,163,164,162), + (167,169,168,165,166), + (171,170,172,173,174), + (177,176,179,178,175), + (181,180,182,183,184), + (187,188,186,189,185), + (190,194,193,192,191), + (199,197,198,196,195), + (200,202,203,201,204), + (208,209,205,206,207), + (214,210,213,212,211), + (218,215,216,217,219), + (223,221,222,220,224), + (226,227,228,229,225), + (234,231,232,230,233), + (237,236,239,235,238), + (242,244,240,243,241), + (246,248,247,249,245), false, false, PersistedView, true + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END + FROM t1 + ORDER BY 1 +-- !query analysis +Sort [CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x ASC NULLS FIRST], true ++- Project [CASE WHEN (cast(c#x as double) > scalar-subquery#x []) THEN (a#x * 2) ELSE (b#x * 10) END AS CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x] + : +- Aggregate [avg(c#x) AS avg(c)#x] + : +- SubqueryAlias spark_catalog.default.t1 + : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CAST((a + b + c + d + e) / 5 AS INT) + FROM t1 + ORDER BY 1,2 +-- !query analysis +Sort [((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST], true ++- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x] + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 + WHEN a < b + 3 THEN 333 ELSE 444 END, + abs(b - c), + CAST((a + b + c + d + e) / 5 AS INT), + a + b * 2 + c * 3 + FROM t1 + WHERE (e > c OR e < d) + AND d>e + AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) + ORDER BY 4,2,1,3,5 +-- !query analysis +Sort [CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x ASC NULLS FIRST, ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, abs((b - c))#x ASC NULLS FIRST, ((a + (b * 2)) + (c * 3))#x ASC NULLS FIRST], true ++- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, CASE WHEN (a#x < (b#x - 3)) THEN 111 WHEN (a#x <= b#x) THEN 222 WHEN (a#x < (b#x + 3)) THEN 333 ELSE 444 END AS CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x, abs((b#x - c#x)) AS abs((b - c))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, ((a#x + (b#x * 2)) + (c#x * 3)) AS ((a + (b * 2)) + (c * 3))#x] + +- Filter ((((e#x > c#x) OR (e#x < d#x)) AND (d#x > e#x)) AND exists#x [b#x]) + : +- Project [1 AS 1#x] + : +- Filter (b#x < outer(b#x)) + : +- SubqueryAlias x + : +- SubqueryAlias spark_catalog.default.t1 + : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT c, + d-e, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + a + b * 2 + c * 3 + d * 4, + e + FROM t1 + WHERE d NOT BETWEEN 110 AND 150 + OR c BETWEEN b - 2 AND d + 2 + OR (e > c OR e < d) + ORDER BY 1,5,3,2,4 +-- !query analysis +Sort [c#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (d - e)#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST], true ++- Project [c#x, (d#x - e#x) AS (d - e)#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, (((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, e#x] + +- Filter ((NOT ((d#x >= 110) AND (d#x <= 150)) OR ((c#x >= (b#x - 2)) AND (c#x <= (d#x + 2)))) OR ((e#x > c#x) OR (e#x < d#x))) + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4, + CAST((a + b + c + d + e) / 5 AS INT), + abs(a), + e, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + d + FROM t1 + WHERE b > c + AND c > d + ORDER BY 3,4,5,1,2,6 +-- !query analysis +Sort [abs(a)#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, d#x ASC NULLS FIRST], true ++- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, abs(a#x) AS abs(a)#x, e#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, d#x] + +- Filter ((b#x > c#x) AND (c#x > d#x)) + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), + c + FROM t1 + WHERE (c <= d - 2 OR c >= d + 2) + ORDER BY 4,2,1,3 +-- !query analysis +Sort [c#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, scalarsubquery(c, d)#xL ASC NULLS FIRST], true ++- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, scalar-subquery#x [c#x && d#x] AS scalarsubquery(c, d)#xL, c#x] + : +- Aggregate [count(1) AS count(1)#xL] + : +- Filter ((c#x > outer(c#x)) AND (d#x < outer(d#x))) + : +- SubqueryAlias x + : +- SubqueryAlias spark_catalog.default.t1 + : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + +- Filter ((c#x <= (d#x - 2)) OR (c#x >= (d#x + 2))) + +- SubqueryAlias spark_catalog.default.t1 + +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) + +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] + + +-- !query +DROP VIEW IF EXISTS t1 +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out new file mode 100644 index 0000000000000..762a04c22900d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out @@ -0,0 +1,252 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE VIEW t1(a, b, c, d, e) AS VALUES + (103,102,100,101,104), + (107,106,108,109,105), + (110,114,112,111,113), + (116,119,117,115,118), + (123,122,124,120,121), + (127,128,129,126,125), + (132,134,131,133,130), + (138,136,139,135,137), + (144,141,140,142,143), + (145,149,146,148,147), + (151,150,153,154,152), + (155,157,159,156,158), + (161,160,163,164,162), + (167,169,168,165,166), + (171,170,172,173,174), + (177,176,179,178,175), + (181,180,182,183,184), + (187,188,186,189,185), + (190,194,193,192,191), + (199,197,198,196,195), + (200,202,203,201,204), + (208,209,205,206,207), + (214,210,213,212,211), + (218,215,216,217,219), + (223,221,222,220,224), + (226,227,228,229,225), + (234,231,232,230,233), + (237,236,239,235,238), + (242,244,240,243,241), + (246,248,247,249,245) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END + FROM t1 + ORDER BY 1 +-- !query schema +struct scalarsubquery()) THEN (a * 2) ELSE (b * 10) END:int> +-- !query output +354 +362 +374 +380 +398 +400 +416 +428 +436 +446 +452 +468 +474 +484 +492 +1020 +1060 +1140 +1190 +1220 +1280 +1340 +1360 +1410 +1490 +1500 +1570 +1600 +1690 +1700 + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CAST((a + b + c + d + e) / 5 AS INT) + FROM t1 + ORDER BY 1,2 +-- !query schema +struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int> +-- !query output +1531 102 +1604 107 +1683 112 +1755 117 +1824 122 +1899 127 +1975 132 +2052 137 +2129 142 +2208 147 +2286 152 +2360 157 +2436 162 +2499 167 +2589 172 +2653 177 +2739 182 +2802 187 +2880 192 +2946 197 +3037 202 +3100 207 +3176 212 +3259 217 +3331 222 +3405 227 +3477 232 +3556 237 +3627 242 +3704 247 + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4 + e * 5, + CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 + WHEN a < b + 3 THEN 333 ELSE 444 END, + abs(b - c), + CAST((a + b + c + d + e) / 5 AS INT), + a + b * 2 + c * 3 + FROM t1 + WHERE (e > c OR e < d) + AND d>e + AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) + ORDER BY 4,2,1,3,5 +-- !query schema +struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END:int,abs((b - c)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,((a + (b * 2)) + (c * 3)):int> +-- !query output +1604 333 2 107 643 +1899 222 1 127 770 +1975 222 3 132 793 +2208 111 3 147 881 +2286 333 3 152 910 +2436 333 3 162 970 +2653 333 3 177 1066 +2802 222 2 187 1121 +2880 111 1 192 1157 +2946 333 1 197 1187 +3176 444 3 212 1273 +3405 222 1 227 1364 +3627 222 4 242 1450 +3704 222 1 247 1483 + + +-- !query +SELECT c, + d-e, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + a + b * 2 + c * 3 + d * 4, + e + FROM t1 + WHERE d NOT BETWEEN 110 AND 150 + OR c BETWEEN b - 2 AND d + 2 + OR (e > c OR e < d) + ORDER BY 1,5,3,2,4 +-- !query schema +struct +-- !query output +100 -3 444 1011 104 +108 4 222 1079 105 +112 -2 333 1118 113 +117 -3 222 1165 118 +129 1 111 1274 125 +131 3 333 1325 130 +140 -1 555 1414 143 +146 1 222 1473 147 +153 2 444 1526 152 +159 -2 333 1570 158 +163 2 444 1626 162 +168 -1 222 1669 166 +172 -1 222 1719 174 +179 3 333 1778 175 +182 -1 222 1819 184 +186 4 111 1877 185 +193 1 444 1925 191 +198 1 555 1971 195 +203 -3 333 2017 204 +205 -1 111 2065 207 +213 1 555 2121 211 +216 -2 444 2164 219 +222 -4 444 2211 224 +228 4 111 2280 225 +232 -3 555 2312 233 +239 -3 444 2366 238 +240 2 333 2422 241 +247 4 222 2479 245 + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4, + CAST((a + b + c + d + e) / 5 AS INT), + abs(a), + e, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + d + FROM t1 + WHERE b > c + AND c > d + ORDER BY 3,4,5,1,2,6 +-- !query schema +struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,abs(a):int,e:int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,d:int> +-- !query output +1118 112 110 113 333 111 +1165 117 116 118 222 115 +1669 167 167 166 222 165 +1925 192 190 191 444 192 + + +-- !query +SELECT a + b * 2 + c * 3 + d * 4, + CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 + WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, + (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), + c + FROM t1 + WHERE (c <= d - 2 OR c >= d + 2) + ORDER BY 4,2,1,3 +-- !query schema +struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,scalarsubquery(c, d):bigint,c:int> +-- !query output +1165 222 0 117 +1219 222 0 124 +1274 111 0 129 +1325 333 0 131 +1367 222 0 139 +1414 555 0 140 +1473 222 0 146 +1570 333 0 159 +1669 222 0 168 +1877 111 0 186 +1971 555 0 198 +2017 333 0 203 +2211 444 0 222 +2312 555 0 232 +2366 444 0 239 +2422 333 0 240 +2479 222 0 247 + + +-- !query +DROP VIEW IF EXISTS t1 +-- !query schema +struct<> +-- !query output + From ab4ab121549877a43d76d1c8f239237f2ed44eca Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 4 Dec 2023 13:20:25 -0800 Subject: [PATCH 29/63] Fixed a bug where tests weren't running against the golden file --- .../apache/spark/sql/SQLQueryTestSuite.scala | 3 +-- .../crossdbms/CrossDbmsQueryTestSuite.scala | 24 +++++++------------ 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index b054d837c3d5c..5705feec96a21 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -516,8 +516,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper protected def runQueries( queries: Seq[String], testCase: TestCase, - sparkConfigSet: Seq[(String, String)], - otherConfigs: Map[String, String] = Map.empty): Unit = { + sparkConfigSet: Seq[(String, String)]): Unit = { // Create a local SparkSession to have stronger isolation between different test cases. // This does not isolate catalog changes. val localSparkSession = spark.newSession() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 86a751a90117d..85654dce0c6ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -91,12 +91,15 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val settings = getSparkSettings(comments) if (regenerateGoldenFiles) { - // If `--DBMS_TO_GENERATE_GOLDEN_FILE` found, val dbmsConfig = comments.filter(_.startsWith(s"--${ CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31)) - val otherConfigs = dbmsConfig.map( - (CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE, _)).toMap - runQueries(queries, testCase, settings.toImmutableArraySeq, otherConfigs) + // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test. + if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { + log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " + + s"testing with $crossDbmsToGenerateGoldenFiles") + return + } + runQueries(queries, testCase, settings.toImmutableArraySeq) } else { val configSets = getSparkConfigDimensions(comments) runQueriesWithSparkConfigDimensions( @@ -107,19 +110,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { override protected def runQueries( queries: Seq[String], testCase: TestCase, - sparkConfigSet: Seq[(String, String)], - otherConfigs: Map[String, String] = Map.empty): Unit = { + sparkConfigSet: Seq[(String, String)]): Unit = { val localSparkSession = spark.newSession() - - if (!otherConfigs.contains(CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE) || - !otherConfigs.get(CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE) - .contains(crossDbmsToGenerateGoldenFiles)) { - log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " + - s"testing with $crossDbmsToGenerateGoldenFiles") - return - } - var runner: Option[SQLQueryTestRunner] = None + val outputs: Seq[QueryTestOutput] = queries.map { sql => val output = { // Use the runner when generating golden files, and Spark when running the test against From 0166aede0ca0c70cdbc429a96d05afd7ab52027b Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 4 Dec 2023 13:47:53 -0800 Subject: [PATCH 30/63] Use local spark session --- .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 7 +++++-- .../org/apache/spark/sql/crossdbms/JdbcConnection.scala | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 85654dce0c6ad..66447289e3b2a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -134,7 +134,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // 2. Error thrown in other DBMS execution: // a. The query is either incompatible between Spark and the other DBMS // b. Some test table/view is not created on the other DBMS. - val sparkDf = spark.sql(sql) + val sparkDf = localSparkSession.sql(sql) val output = runner.map(_.runQuery(sql)).get // Use Spark analyzed plan to check if the query result is already semantically sorted. if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { @@ -162,7 +162,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { schema = None, output = normalizeTestResults(output.mkString("\n"))) } - runner.foreach(_.cleanUp()) + if (runner.isDefined) { + runner.foreach(_.cleanUp()) + runner = None + } if (regenerateGoldenFiles) { val goldenOutput = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index 5901a523a6cad..f203f75ac0c8d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -66,7 +66,8 @@ private[sql] trait JdbcConnection { private[sql] case class PostgresConnection(connection_url: Option[String] = None) extends JdbcConnection { - DriverRegistry.register("org.postgresql.Driver") + private final val POSTGRES_DRIVER_CLASS_NAME = "org.postgresql.Driver" + DriverRegistry.register(POSTGRES_DRIVER_CLASS_NAME) private final val DEFAULT_USER = "pg" private final val DEFAULT_CONNECTION_URL = s"jdbc:postgresql://localhost:5432/postgres?user=$DEFAULT_USER" @@ -103,7 +104,7 @@ private[sql] case class PostgresConnection(connection_url: Option[String] = None } def loadData(df: DataFrame, tableName: String): Unit = { - df.write.option("driver", "org.postgresql.Driver").mode("append") + df.write.option("driver", POSTGRES_DRIVER_CLASS_NAME).mode("append") .jdbc(url, tableName, new Properties()) } From 2e45a67ea85bee971aed751bdf98ace39b9dffac Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 4 Dec 2023 13:53:46 -0800 Subject: [PATCH 31/63] Small comment change --- .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 66447289e3b2a..d086b8dce1092 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -30,10 +30,10 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps // scalastyle:off line.size.limit /** * See SQLQueryTestSuite.scala for more information. This class builds off of that to allow us - * to generate golden files with other DBMS to perform cross-checking for correctness. Note that the - * input directory path is currently limited because most, if not all, of our current SQL query - * tests will not be compatible with other DBMSes. There will be more work in the future, such as - * some kind of conversion, to increase coverage. + * to generate golden files with other DBMS to perform cross-checking for correctness. Note that + * this is not currently run on all SQL input files by default because there is incompatibility + * between Spark SQL and the DBMS SQL. More work will be done to increase coverage, focusing on + * postgres. * * You need to have a database server up before running this test. * For example, for postgres: From dd4597e9bee91d508bca5975360150783e4d2f0b Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 4 Dec 2023 15:28:00 -0800 Subject: [PATCH 32/63] Revert change in ThriftServerQueryTestSuite --- .../sql/crossdbms/DialectConverter.scala | 66 +++++++++++++++++++ .../ThriftServerQueryTestSuite.scala | 3 +- 2 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala new file mode 100644 index 0000000000000..c8ea1b271b9b2 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.crossdbms + +trait DialectConverter { + + final val NOOP = "" + final val SET_COMMAND_PREFIX = "set" + final val COMMAND_PREFIXES_TO_IGNORE = Seq(SET_COMMAND_PREFIX) + + final val SPARK_SET_COMMAND_PATTERN = """set (\w+) = (true|false);""".r + + protected def checkIgnore(query: String): Boolean = + COMMAND_PREFIXES_TO_IGNORE.count(prefix => query.startsWith(prefix)) > 0 + + /** + * Transform the query string, which is compatible with Spark, such that it is compatible with + * dialect. This transforms a query string itself. Note that this is a best effort conversion and + * may not always produce a compatible query string. + */ + def preprocessQuery(query: String): String + + /** + * Transform the query SET, which is compatible with Spark, such that it is compatible with + * dialect. + * For example: + * {{{ + * 1. create temporary view t1 as values (..); + * 2. set spark.sql... = true; + * 3. select * from ... + * 4. set spark.sql... = false; + * }}} + * We can remove everything between lines 2 and 4 because if setting configs is not applicable to + * other DBMS systems, and the tests between such config setting is not relevant. + */ + def preprocessQuerySet(queries: Seq[String]): Seq[String] +} + +/** + * Wrapper around utility functions that help convert from Spark SQL to PostgreSQL. + */ +object PostgresDialectConverter extends DialectConverter { + + def preprocessQuery(query: String): String = { + query + } + + override def preprocessQuerySet(queries: Seq[String]): Seq[String] = { + queries + } +} diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index febf763e209d5..8d90d47e1bf5f 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -107,8 +107,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ override def runQueries( queries: Seq[String], testCase: TestCase, - configSet: Seq[(String, String)], - otherConfigs: Map[String, String] = Map.empty): Unit = { + configSet: Seq[(String, String)]): Unit = { // We do not test with configSet. withJdbcStatement() { statement => From cb719791a292d6f8214574239373e17eab9c3a6e Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 4 Dec 2023 15:29:40 -0800 Subject: [PATCH 33/63] Remove DialectConverter.. --- .../sql/crossdbms/DialectConverter.scala | 66 ------------------- 1 file changed, 66 deletions(-) delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala deleted file mode 100644 index c8ea1b271b9b2..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.crossdbms - -trait DialectConverter { - - final val NOOP = "" - final val SET_COMMAND_PREFIX = "set" - final val COMMAND_PREFIXES_TO_IGNORE = Seq(SET_COMMAND_PREFIX) - - final val SPARK_SET_COMMAND_PATTERN = """set (\w+) = (true|false);""".r - - protected def checkIgnore(query: String): Boolean = - COMMAND_PREFIXES_TO_IGNORE.count(prefix => query.startsWith(prefix)) > 0 - - /** - * Transform the query string, which is compatible with Spark, such that it is compatible with - * dialect. This transforms a query string itself. Note that this is a best effort conversion and - * may not always produce a compatible query string. - */ - def preprocessQuery(query: String): String - - /** - * Transform the query SET, which is compatible with Spark, such that it is compatible with - * dialect. - * For example: - * {{{ - * 1. create temporary view t1 as values (..); - * 2. set spark.sql... = true; - * 3. select * from ... - * 4. set spark.sql... = false; - * }}} - * We can remove everything between lines 2 and 4 because if setting configs is not applicable to - * other DBMS systems, and the tests between such config setting is not relevant. - */ - def preprocessQuerySet(queries: Seq[String]): Seq[String] -} - -/** - * Wrapper around utility functions that help convert from Spark SQL to PostgreSQL. - */ -object PostgresDialectConverter extends DialectConverter { - - def preprocessQuery(query: String): String = { - query - } - - override def preprocessQuerySet(queries: Seq[String]): Seq[String] = { - queries - } -} From 6cd52f1e97601054ddf2588f8a36bddb12a7f9b2 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 4 Dec 2023 17:51:18 -0800 Subject: [PATCH 34/63] Don't run the tests if the cross dbms is not specified --- .../sql/crossdbms/CrossDbmsQueryTestSuite.scala | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index d086b8dce1092..4932ed96efbd8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -90,15 +90,13 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val queries = getQueries(code, comments) val settings = getSparkSettings(comments) - if (regenerateGoldenFiles) { - val dbmsConfig = comments.filter(_.startsWith(s"--${ - CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31)) - // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test. - if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { - log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " + - s"testing with $crossDbmsToGenerateGoldenFiles") - return - } + val dbmsConfig = comments.filter(_.startsWith(s"--${ + CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31)) + // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test. + if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { + log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " + + s"testing with $crossDbmsToGenerateGoldenFiles") + } else if (regenerateGoldenFiles) { runQueries(queries, testCase, settings.toImmutableArraySeq) } else { val configSets = getSparkConfigDimensions(comments) From 7f3970777360245a0b7b6847dd6d2f66fe5a02df Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 18 Dec 2023 11:39:22 -0800 Subject: [PATCH 35/63] Remove sqllogictest --- .../sqllogictest-select1.sql.out | 203 -------------- .../sqllogictest-select1.sql | 91 ------- .../sqllogictest-select1.sql.out | 252 ------------------ .../sqllogictest-select1.sql.out | 236 ---------------- 4 files changed, 782 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out delete mode 100644 sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out deleted file mode 100644 index 16d7edfbe0467..0000000000000 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out +++ /dev/null @@ -1,203 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- !query -CREATE VIEW t1(a, b, c, d, e) AS VALUES - (103,102,100,101,104), - (107,106,108,109,105), - (110,114,112,111,113), - (116,119,117,115,118), - (123,122,124,120,121), - (127,128,129,126,125), - (132,134,131,133,130), - (138,136,139,135,137), - (144,141,140,142,143), - (145,149,146,148,147), - (151,150,153,154,152), - (155,157,159,156,158), - (161,160,163,164,162), - (167,169,168,165,166), - (171,170,172,173,174), - (177,176,179,178,175), - (181,180,182,183,184), - (187,188,186,189,185), - (190,194,193,192,191), - (199,197,198,196,195), - (200,202,203,201,204), - (208,209,205,206,207), - (214,210,213,212,211), - (218,215,216,217,219), - (223,221,222,220,224), - (226,227,228,229,225), - (234,231,232,230,233), - (237,236,239,235,238), - (242,244,240,243,241), - (246,248,247,249,245) --- !query analysis -CreateViewCommand `spark_catalog`.`default`.`t1`, [(a,None), (b,None), (c,None), (d,None), (e,None)], VALUES - (103,102,100,101,104), - (107,106,108,109,105), - (110,114,112,111,113), - (116,119,117,115,118), - (123,122,124,120,121), - (127,128,129,126,125), - (132,134,131,133,130), - (138,136,139,135,137), - (144,141,140,142,143), - (145,149,146,148,147), - (151,150,153,154,152), - (155,157,159,156,158), - (161,160,163,164,162), - (167,169,168,165,166), - (171,170,172,173,174), - (177,176,179,178,175), - (181,180,182,183,184), - (187,188,186,189,185), - (190,194,193,192,191), - (199,197,198,196,195), - (200,202,203,201,204), - (208,209,205,206,207), - (214,210,213,212,211), - (218,215,216,217,219), - (223,221,222,220,224), - (226,227,228,229,225), - (234,231,232,230,233), - (237,236,239,235,238), - (242,244,240,243,241), - (246,248,247,249,245), false, false, PersistedView, true - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END - FROM t1 - ORDER BY 1 --- !query analysis -Sort [CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x ASC NULLS FIRST], true -+- Project [CASE WHEN (cast(c#x as double) > scalar-subquery#x []) THEN (a#x * 2) ELSE (b#x * 10) END AS CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x] - : +- Aggregate [avg(c#x) AS avg(c)#x] - : +- SubqueryAlias spark_catalog.default.t1 - : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CAST((a + b + c + d + e) / 5 AS INT) - FROM t1 - ORDER BY 1,2 --- !query analysis -Sort [((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST], true -+- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x] - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 - WHEN a < b + 3 THEN 333 ELSE 444 END, - abs(b - c), - CAST((a + b + c + d + e) / 5 AS INT), - a + b * 2 + c * 3 - FROM t1 - WHERE (e > c OR e < d) - AND d>e - AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) - ORDER BY 4,2,1,3,5 --- !query analysis -Sort [CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x ASC NULLS FIRST, ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, abs((b - c))#x ASC NULLS FIRST, ((a + (b * 2)) + (c * 3))#x ASC NULLS FIRST], true -+- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, CASE WHEN (a#x < (b#x - 3)) THEN 111 WHEN (a#x <= b#x) THEN 222 WHEN (a#x < (b#x + 3)) THEN 333 ELSE 444 END AS CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x, abs((b#x - c#x)) AS abs((b - c))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, ((a#x + (b#x * 2)) + (c#x * 3)) AS ((a + (b * 2)) + (c * 3))#x] - +- Filter ((((e#x > c#x) OR (e#x < d#x)) AND (d#x > e#x)) AND exists#x [b#x]) - : +- Project [1 AS 1#x] - : +- Filter (b#x < outer(b#x)) - : +- SubqueryAlias x - : +- SubqueryAlias spark_catalog.default.t1 - : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT c, - d-e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - a + b * 2 + c * 3 + d * 4, - e - FROM t1 - WHERE d NOT BETWEEN 110 AND 150 - OR c BETWEEN b - 2 AND d + 2 - OR (e > c OR e < d) - ORDER BY 1,5,3,2,4 --- !query analysis -Sort [c#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (d - e)#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST], true -+- Project [c#x, (d#x - e#x) AS (d - e)#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, (((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, e#x] - +- Filter ((NOT ((d#x >= 110) AND (d#x <= 150)) OR ((c#x >= (b#x - 2)) AND (c#x <= (d#x + 2)))) OR ((e#x > c#x) OR (e#x < d#x))) - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CAST((a + b + c + d + e) / 5 AS INT), - abs(a), - e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - d - FROM t1 - WHERE b > c - AND c > d - ORDER BY 3,4,5,1,2,6 --- !query analysis -Sort [abs(a)#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, d#x ASC NULLS FIRST], true -+- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, abs(a#x) AS abs(a)#x, e#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, d#x] - +- Filter ((b#x > c#x) AND (c#x > d#x)) - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), - c - FROM t1 - WHERE (c <= d - 2 OR c >= d + 2) - ORDER BY 4,2,1,3 --- !query analysis -Sort [c#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, scalarsubquery(c, d)#xL ASC NULLS FIRST], true -+- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, scalar-subquery#x [c#x && d#x] AS scalarsubquery(c, d)#xL, c#x] - : +- Aggregate [count(1) AS count(1)#xL] - : +- Filter ((c#x > outer(c#x)) AND (d#x < outer(d#x))) - : +- SubqueryAlias x - : +- SubqueryAlias spark_catalog.default.t1 - : +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - : +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - +- Filter ((c#x <= (d#x - 2)) OR (c#x >= (d#x + 2))) - +- SubqueryAlias spark_catalog.default.t1 - +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x]) - +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x] - +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] - - --- !query -DROP VIEW IF EXISTS t1 --- !query analysis -DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql deleted file mode 100644 index 87c15a07733b5..0000000000000 --- a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql +++ /dev/null @@ -1,91 +0,0 @@ --- First 6 queries of the sql-logic-tests with some minor changes for compatibility, available here: --- https://www.sqlite.org/sqllogictest/file?name=test/select1.test&ci=tip. - ---DBMS_TO_GENERATE_GOLDEN_FILE postgres -CREATE VIEW t1(a, b, c, d, e) AS VALUES - (103,102,100,101,104), - (107,106,108,109,105), - (110,114,112,111,113), - (116,119,117,115,118), - (123,122,124,120,121), - (127,128,129,126,125), - (132,134,131,133,130), - (138,136,139,135,137), - (144,141,140,142,143), - (145,149,146,148,147), - (151,150,153,154,152), - (155,157,159,156,158), - (161,160,163,164,162), - (167,169,168,165,166), - (171,170,172,173,174), - (177,176,179,178,175), - (181,180,182,183,184), - (187,188,186,189,185), - (190,194,193,192,191), - (199,197,198,196,195), - (200,202,203,201,204), - (208,209,205,206,207), - (214,210,213,212,211), - (218,215,216,217,219), - (223,221,222,220,224), - (226,227,228,229,225), - (234,231,232,230,233), - (237,236,239,235,238), - (242,244,240,243,241), - (246,248,247,249,245); - -SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END - FROM t1 - ORDER BY 1; - -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CAST((a + b + c + d + e) / 5 AS INT) - FROM t1 - ORDER BY 1,2; - -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 - WHEN a < b + 3 THEN 333 ELSE 444 END, - abs(b - c), - CAST((a + b + c + d + e) / 5 AS INT), - a + b * 2 + c * 3 - FROM t1 - WHERE (e > c OR e < d) - AND d>e - AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) - ORDER BY 4,2,1,3,5; - -SELECT c, - d-e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - a + b * 2 + c * 3 + d * 4, - e - FROM t1 - WHERE d NOT BETWEEN 110 AND 150 - OR c BETWEEN b - 2 AND d + 2 - OR (e > c OR e < d) - ORDER BY 1,5,3,2,4; - -SELECT a + b * 2 + c * 3 + d * 4, - CAST((a + b + c + d + e) / 5 AS INT), - abs(a), - e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - d - FROM t1 - WHERE b > c - AND c > d - ORDER BY 3,4,5,1,2,6; - -SELECT a + b * 2 + c * 3 + d * 4, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), - c - FROM t1 - WHERE (c <= d - 2 OR c >= d + 2) - ORDER BY 4,2,1,3; - -DROP VIEW IF EXISTS t1; diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out deleted file mode 100644 index 762a04c22900d..0000000000000 --- a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out +++ /dev/null @@ -1,252 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- !query -CREATE VIEW t1(a, b, c, d, e) AS VALUES - (103,102,100,101,104), - (107,106,108,109,105), - (110,114,112,111,113), - (116,119,117,115,118), - (123,122,124,120,121), - (127,128,129,126,125), - (132,134,131,133,130), - (138,136,139,135,137), - (144,141,140,142,143), - (145,149,146,148,147), - (151,150,153,154,152), - (155,157,159,156,158), - (161,160,163,164,162), - (167,169,168,165,166), - (171,170,172,173,174), - (177,176,179,178,175), - (181,180,182,183,184), - (187,188,186,189,185), - (190,194,193,192,191), - (199,197,198,196,195), - (200,202,203,201,204), - (208,209,205,206,207), - (214,210,213,212,211), - (218,215,216,217,219), - (223,221,222,220,224), - (226,227,228,229,225), - (234,231,232,230,233), - (237,236,239,235,238), - (242,244,240,243,241), - (246,248,247,249,245) --- !query schema -struct<> --- !query output - - - --- !query -SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END - FROM t1 - ORDER BY 1 --- !query schema -struct scalarsubquery()) THEN (a * 2) ELSE (b * 10) END:int> --- !query output -354 -362 -374 -380 -398 -400 -416 -428 -436 -446 -452 -468 -474 -484 -492 -1020 -1060 -1140 -1190 -1220 -1280 -1340 -1360 -1410 -1490 -1500 -1570 -1600 -1690 -1700 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CAST((a + b + c + d + e) / 5 AS INT) - FROM t1 - ORDER BY 1,2 --- !query schema -struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int> --- !query output -1531 102 -1604 107 -1683 112 -1755 117 -1824 122 -1899 127 -1975 132 -2052 137 -2129 142 -2208 147 -2286 152 -2360 157 -2436 162 -2499 167 -2589 172 -2653 177 -2739 182 -2802 187 -2880 192 -2946 197 -3037 202 -3100 207 -3176 212 -3259 217 -3331 222 -3405 227 -3477 232 -3556 237 -3627 242 -3704 247 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 - WHEN a < b + 3 THEN 333 ELSE 444 END, - abs(b - c), - CAST((a + b + c + d + e) / 5 AS INT), - a + b * 2 + c * 3 - FROM t1 - WHERE (e > c OR e < d) - AND d>e - AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) - ORDER BY 4,2,1,3,5 --- !query schema -struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END:int,abs((b - c)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,((a + (b * 2)) + (c * 3)):int> --- !query output -1604 333 2 107 643 -1899 222 1 127 770 -1975 222 3 132 793 -2208 111 3 147 881 -2286 333 3 152 910 -2436 333 3 162 970 -2653 333 3 177 1066 -2802 222 2 187 1121 -2880 111 1 192 1157 -2946 333 1 197 1187 -3176 444 3 212 1273 -3405 222 1 227 1364 -3627 222 4 242 1450 -3704 222 1 247 1483 - - --- !query -SELECT c, - d-e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - a + b * 2 + c * 3 + d * 4, - e - FROM t1 - WHERE d NOT BETWEEN 110 AND 150 - OR c BETWEEN b - 2 AND d + 2 - OR (e > c OR e < d) - ORDER BY 1,5,3,2,4 --- !query schema -struct --- !query output -100 -3 444 1011 104 -108 4 222 1079 105 -112 -2 333 1118 113 -117 -3 222 1165 118 -129 1 111 1274 125 -131 3 333 1325 130 -140 -1 555 1414 143 -146 1 222 1473 147 -153 2 444 1526 152 -159 -2 333 1570 158 -163 2 444 1626 162 -168 -1 222 1669 166 -172 -1 222 1719 174 -179 3 333 1778 175 -182 -1 222 1819 184 -186 4 111 1877 185 -193 1 444 1925 191 -198 1 555 1971 195 -203 -3 333 2017 204 -205 -1 111 2065 207 -213 1 555 2121 211 -216 -2 444 2164 219 -222 -4 444 2211 224 -228 4 111 2280 225 -232 -3 555 2312 233 -239 -3 444 2366 238 -240 2 333 2422 241 -247 4 222 2479 245 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CAST((a + b + c + d + e) / 5 AS INT), - abs(a), - e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - d - FROM t1 - WHERE b > c - AND c > d - ORDER BY 3,4,5,1,2,6 --- !query schema -struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,abs(a):int,e:int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,d:int> --- !query output -1118 112 110 113 333 111 -1165 117 116 118 222 115 -1669 167 167 166 222 165 -1925 192 190 191 444 192 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), - c - FROM t1 - WHERE (c <= d - 2 OR c >= d + 2) - ORDER BY 4,2,1,3 --- !query schema -struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,scalarsubquery(c, d):bigint,c:int> --- !query output -1165 222 0 117 -1219 222 0 124 -1274 111 0 129 -1325 333 0 131 -1367 222 0 139 -1414 555 0 140 -1473 222 0 146 -1570 333 0 159 -1669 222 0 168 -1877 111 0 186 -1971 555 0 198 -2017 333 0 203 -2211 444 0 222 -2312 555 0 232 -2366 444 0 239 -2422 333 0 240 -2479 222 0 247 - - --- !query -DROP VIEW IF EXISTS t1 --- !query schema -struct<> --- !query output - diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out deleted file mode 100644 index b5eb7eef4a9a3..0000000000000 --- a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out +++ /dev/null @@ -1,236 +0,0 @@ --- Automatically generated by CrossDbmsQueryTestSuite with postgres --- !query -CREATE VIEW t1(a, b, c, d, e) AS VALUES - (103,102,100,101,104), - (107,106,108,109,105), - (110,114,112,111,113), - (116,119,117,115,118), - (123,122,124,120,121), - (127,128,129,126,125), - (132,134,131,133,130), - (138,136,139,135,137), - (144,141,140,142,143), - (145,149,146,148,147), - (151,150,153,154,152), - (155,157,159,156,158), - (161,160,163,164,162), - (167,169,168,165,166), - (171,170,172,173,174), - (177,176,179,178,175), - (181,180,182,183,184), - (187,188,186,189,185), - (190,194,193,192,191), - (199,197,198,196,195), - (200,202,203,201,204), - (208,209,205,206,207), - (214,210,213,212,211), - (218,215,216,217,219), - (223,221,222,220,224), - (226,227,228,229,225), - (234,231,232,230,233), - (237,236,239,235,238), - (242,244,240,243,241), - (246,248,247,249,245) --- !query output - - - --- !query -SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END - FROM t1 - ORDER BY 1 --- !query output -354 -362 -374 -380 -398 -400 -416 -428 -436 -446 -452 -468 -474 -484 -492 -1020 -1060 -1140 -1190 -1220 -1280 -1340 -1360 -1410 -1490 -1500 -1570 -1600 -1690 -1700 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CAST((a + b + c + d + e) / 5 AS INT) - FROM t1 - ORDER BY 1,2 --- !query output -1531 102 -1604 107 -1683 112 -1755 117 -1824 122 -1899 127 -1975 132 -2052 137 -2129 142 -2208 147 -2286 152 -2360 157 -2436 162 -2499 167 -2589 172 -2653 177 -2739 182 -2802 187 -2880 192 -2946 197 -3037 202 -3100 207 -3176 212 -3259 217 -3331 222 -3405 227 -3477 232 -3556 237 -3627 242 -3704 247 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4 + e * 5, - CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222 - WHEN a < b + 3 THEN 333 ELSE 444 END, - abs(b - c), - CAST((a + b + c + d + e) / 5 AS INT), - a + b * 2 + c * 3 - FROM t1 - WHERE (e > c OR e < d) - AND d>e - AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b) - ORDER BY 4,2,1,3,5 --- !query output -1604 333 2 107 643 -1899 222 1 127 770 -1975 222 3 132 793 -2208 111 3 147 881 -2286 333 3 152 910 -2436 333 3 162 970 -2653 333 3 177 1066 -2802 222 2 187 1121 -2880 111 1 192 1157 -2946 333 1 197 1187 -3176 444 3 212 1273 -3405 222 1 227 1364 -3627 222 4 242 1450 -3704 222 1 247 1483 - - --- !query -SELECT c, - d-e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - a + b * 2 + c * 3 + d * 4, - e - FROM t1 - WHERE d NOT BETWEEN 110 AND 150 - OR c BETWEEN b - 2 AND d + 2 - OR (e > c OR e < d) - ORDER BY 1,5,3,2,4 --- !query output -100 -3 444 1011 104 -108 4 222 1079 105 -112 -2 333 1118 113 -117 -3 222 1165 118 -129 1 111 1274 125 -131 3 333 1325 130 -140 -1 555 1414 143 -146 1 222 1473 147 -153 2 444 1526 152 -159 -2 333 1570 158 -163 2 444 1626 162 -168 -1 222 1669 166 -172 -1 222 1719 174 -179 3 333 1778 175 -182 -1 222 1819 184 -186 4 111 1877 185 -193 1 444 1925 191 -198 1 555 1971 195 -203 -3 333 2017 204 -205 -1 111 2065 207 -213 1 555 2121 211 -216 -2 444 2164 219 -222 -4 444 2211 224 -228 4 111 2280 225 -232 -3 555 2312 233 -239 -3 444 2366 238 -240 2 333 2422 241 -247 4 222 2479 245 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CAST((a + b + c + d + e) / 5 AS INT), - abs(a), - e, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - d - FROM t1 - WHERE b > c - AND c > d - ORDER BY 3,4,5,1,2,6 --- !query output -1118 112 110 113 333 111 -1165 117 116 118 222 115 -1669 167 167 166 222 165 -1925 192 190 191 444 192 - - --- !query -SELECT a + b * 2 + c * 3 + d * 4, - CASE a + 1 WHEN b THEN 111 WHEN c THEN 222 - WHEN d THEN 333 WHEN e THEN 444 ELSE 555 END, - (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d), - c - FROM t1 - WHERE (c <= d - 2 OR c >= d + 2) - ORDER BY 4,2,1,3 --- !query output -1165 222 0 117 -1219 222 0 124 -1274 111 0 129 -1325 333 0 131 -1367 222 0 139 -1414 555 0 140 -1473 222 0 146 -1570 333 0 159 -1669 222 0 168 -1877 111 0 186 -1971 555 0 198 -2017 333 0 203 -2211 444 0 222 -2312 555 0 232 -2366 444 0 239 -2422 333 0 240 -2479 222 0 247 - - --- !query -DROP VIEW IF EXISTS t1 --- !query output - From 615f4d98a6e5e9842514cfbc827d70a6ba23f5a3 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 18 Dec 2023 13:52:19 -0800 Subject: [PATCH 36/63] Small niceness changes --- .../sql/crossdbms/CrossDbmsQueryTestSuite.scala | 11 ++++++----- .../spark/sql/crossdbms/JdbcConnection.scala | 16 +++++++++++----- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 4932ed96efbd8..1b6ef3de8c7ef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -90,8 +90,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val queries = getQueries(code, comments) val settings = getSparkSettings(comments) - val dbmsConfig = comments.filter(_.startsWith(s"--${ - CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31)) + val dbmsConfig = comments.filter(_.startsWith( + CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE)).map(_.substring( + CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE.length)) // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test. if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " + @@ -125,8 +126,8 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // Either of the below two lines can error. If we go into the catch statement, then it // is likely one of the following scenarios: // 1. Error thrown in Spark analysis: - // a. The query is either incompatible between Spark and the other DBMS - // b. There is issue with the schema in Spark. + // a. The query is incompatible with either Spark and the other DBMS. + // b. There is an issue with the schema in Spark. // c. The error is expected - it errors on both systems, but only the Spark error // will be printed to the golden file, because it errors first. // 2. Error thrown in other DBMS execution: @@ -224,7 +225,7 @@ object CrossDbmsQueryTestSuite { // System argument to indicate a custom connection URL to the reference DBMS. private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" // Argument in input files to indicate that golden file should be generated with a reference DBMS - private final val DBMS_TO_GENERATE_GOLDEN_FILE = "DBMS_TO_GENERATE_GOLDEN_FILE" + private final val DBMS_TO_GENERATE_GOLDEN_FILE = "--DBMS_TO_GENERATE_GOLDEN_FILE " private final val POSTGRES = "postgres" private final val SUPPORTED_DBMS = Seq(POSTGRES) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index f203f75ac0c8d..1c5842d951c67 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry /** * Represents a connection (session) to a database using JDBC. */ -private[sql] trait JdbcConnection { +private[crossdbms] trait JdbcConnection { /** * Executes the given SQL query and returns the result as a sequence of strings. @@ -63,15 +63,21 @@ private[sql] trait JdbcConnection { /** * Represents a connection (session) to a PostgreSQL database. */ -private[sql] case class PostgresConnection(connection_url: Option[String] = None) +private[crossdbms] case class PostgresConnection(connection_url: Option[String] = None) extends JdbcConnection { private final val POSTGRES_DRIVER_CLASS_NAME = "org.postgresql.Driver" DriverRegistry.register(POSTGRES_DRIVER_CLASS_NAME) - private final val DEFAULT_USER = "pg" - private final val DEFAULT_CONNECTION_URL = - s"jdbc:postgresql://localhost:5432/postgres?user=$DEFAULT_USER" + + private final val DEFAULT_HOST = "localhost" + private final val DEFAULT_PORT = "5432" + private final val DEFAULT_USER = "postgres" + private final val DEFAULT_DB = "postgres" + private final val DEFAULT_PASSWORD = "postgres" + private final val DEFAULT_CONNECTION_URL = s"jdbc:postgresql://$DEFAULT_HOST:$DEFAULT_PORT/" + + s"$DEFAULT_DB?user=$DEFAULT_USER?password=$DEFAULT_PASSWORD" private val url = connection_url.getOrElse(DEFAULT_CONNECTION_URL) + private val conn = DriverManager.getConnection(url) private val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) From cf345bd96aa153d04e917fdb1c66897d7e14fe2b Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 18 Dec 2023 14:20:36 -0800 Subject: [PATCH 37/63] Do null -> NULL replacement, and put 2 tests in --- ...not-in-unit-tests-multi-column-literal.sql | 14 +-- .../not-in-unit-tests-multi-column.sql | 16 +-- ...in-unit-tests-multi-column-literal.sql.out | 37 +++++++ .../not-in-unit-tests-multi-column.sql.out | 99 +++++++++++++++++++ 4 files changed, 153 insertions(+), 13 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql index a061e495f51b8..cfce07cc020c2 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql @@ -7,12 +7,14 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false -CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES - (null, null), - (null, 1.0), - (2, 3.0), - (4, 5.0) - AS m(a, b); +--DBMS_TO_GENERATE_GOLDEN_FILE postgres + +CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES + (NULL, NULL), + (NULL, 1.0), + (2, 3.0), + (4, 5.0)) + AS m(a, b); -- Case 1 (not possible to write a literal with no rows, so we ignore it.) -- (subquery is empty -> row is returned) diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql index 28ab75121573a..9314878e13917 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql @@ -18,18 +18,20 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false -CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES - (null, null), - (null, 1.0), +--DBMS_TO_GENERATE_GOLDEN_FILE postgres + +CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES + (NULL, NULL), + (NULL, 1.0), (2, 3.0), - (4, 5.0) + (4, 5.0)) AS m(a, b); -CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES - (null, null), +CREATE TEMPORARY VIEW s AS SELECT * FROM (VALUES + (NULL, NULL), (0, 1.0), (2, 3.0), - (4, null) + (4, NULL)) AS s(c, d); -- Case 1 diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out new file mode 100644 index 0000000000000..b696ee2d43b4a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out @@ -0,0 +1,37 @@ +-- Automatically generated by CrossDbmsQueryTestSuite with postgres +-- !query +CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES + (NULL, NULL), + (NULL, 1.0), + (2, 3.0), + (4, 5.0)) + AS m(a, b) +-- !query output + + + +-- !query +SELECT * +FROM m +WHERE b = 1.0 -- Matches (null, 1.0) + AND (a, b) NOT IN ((2, 3.0)) +-- !query output +NULL 1.0 + + +-- !query +SELECT * +FROM m +WHERE b = 3.0 -- Matches (2, 3.0) + AND (a, b) NOT IN ((2, 3.0)) +-- !query output + + + +-- !query +SELECT * +FROM m +WHERE b = 5.0 -- Matches (4, 5.0) + AND (a, b) NOT IN ((2, 3.0)) +-- !query output +4 5.0 diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out new file mode 100644 index 0000000000000..4f6510ac2f52b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out @@ -0,0 +1,99 @@ +-- Automatically generated by CrossDbmsQueryTestSuite with postgres +-- !query +CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES + (NULL, NULL), + (NULL, 1.0), + (2, 3.0), + (4, 5.0)) + AS m(a, b) +-- !query output + + + +-- !query +CREATE TEMPORARY VIEW s AS SELECT * FROM (VALUES + (NULL, NULL), + (0, 1.0), + (2, 3.0), + (4, NULL)) + AS s(c, d) +-- !query output + + + +-- !query +SELECT * +FROM m +WHERE (a, b) NOT IN (SELECT * + FROM s + WHERE d > 5.0) -- Matches no rows +-- !query output +2 3.0 +4 5.0 +NULL 1.0 +NULL NULL + + +-- !query +SELECT * +FROM m +WHERE (a, b) NOT IN (SELECT * + FROM s + WHERE c IS NULL AND d IS NULL) -- Matches only (null, null) +-- !query output + + + +-- !query +SELECT * +FROM m +WHERE a IS NULL AND b IS NULL -- Matches only (null, null) + AND (a, b) NOT IN (SELECT * + FROM s + WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null) +-- !query output + + + +-- !query +SELECT * +FROM m +WHERE b = 1.0 -- Matches (null, 1.0) + AND (a, b) NOT IN (SELECT * + FROM s + WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null) +-- !query output + + + +-- !query +SELECT * +FROM m +WHERE b = 1.0 -- Matches (null, 1.0) + AND (a, b) NOT IN (SELECT * + FROM s + WHERE c = 2) -- Matches (2, 3.0) +-- !query output +NULL 1.0 + + +-- !query +SELECT * +FROM m +WHERE b = 3.0 -- Matches (2, 3.0) + AND (a, b) NOT IN (SELECT * + FROM s + WHERE c = 2) -- Matches (2, 3.0) +-- !query output + + + +-- !query +SELECT * +FROM m +WHERE b = 5.0 -- Matches (4, 5.0) + AND (a, b) NOT IN (SELECT * + FROM s + WHERE c = 2) -- Matches (2, 3.0) +-- !query output +4 5.0 From cdef387ac34eae8ece1c9457bdbfe754c6cdf338 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 18 Dec 2023 14:49:00 -0800 Subject: [PATCH 38/63] Add docker compose and bash scripts for easy postgres instance --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 17 ++++----- .../spark/sql/crossdbms/JdbcConnection.scala | 7 ++-- .../sql/crossdbms/bin/docker-compose.yml | 31 ++++++++++++++++ .../crossdbms/bin/generate_golden_files.sh | 36 +++++++++++++++++++ 4 files changed, 81 insertions(+), 10 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 1b6ef3de8c7ef..0019fcfc374aa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -32,15 +32,16 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps * See SQLQueryTestSuite.scala for more information. This class builds off of that to allow us * to generate golden files with other DBMS to perform cross-checking for correctness. Note that * this is not currently run on all SQL input files by default because there is incompatibility - * between Spark SQL and the DBMS SQL. More work will be done to increase coverage, focusing on - * postgres. + * between Spark SQL and the DBMS SQL. * - * You need to have a database server up before running this test. - * For example, for postgres: - * 1. Install PostgreSQL. - * a. On a mac: `brew install postgresql@13` - * 2. After installing PostgreSQL, start the database server, then create a role named pg with - * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser`` + * You need to have a database server up before running this test. Two options: + * - Install Docker and use the bash script in ./bin/generate_golden_files.sh to run a DBMS + * container and generate golden files. + * - Otherwise, do manual installation. For example, for postgres: + * 1. Install PostgreSQL. + * a. On a mac: `brew install postgresql@13` + * 2. After installing PostgreSQL, start the database server, then create a role named pg with + * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser`` * * To indicate that the SQL file is eligible for testing with this suite, add the following comment * into the input file: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index 1c5842d951c67..771b304401ae9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -75,7 +75,7 @@ private[crossdbms] case class PostgresConnection(connection_url: Option[String] private final val DEFAULT_DB = "postgres" private final val DEFAULT_PASSWORD = "postgres" private final val DEFAULT_CONNECTION_URL = s"jdbc:postgresql://$DEFAULT_HOST:$DEFAULT_PORT/" + - s"$DEFAULT_DB?user=$DEFAULT_USER?password=$DEFAULT_PASSWORD" + s"$DEFAULT_DB?user=$DEFAULT_USER&password=$DEFAULT_PASSWORD" private val url = connection_url.getOrElse(DEFAULT_CONNECTION_URL) private val conn = DriverManager.getConnection(url) @@ -89,7 +89,10 @@ private[crossdbms] case class PostgresConnection(connection_url: Option[String] val rs = stmt.getResultSet val metadata = rs.getMetaData while (rs.next()) { - val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => rs.getObject(i))) + val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => { + val value = rs.getObject(i) + if (value == null) { "NULL" } else { value } + })) rows.append(row) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml new file mode 100644 index 0000000000000..787bdda8ecd31 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +version: '3' + +services: + # Postgres database service + postgres-db: + image: postgres:latest + container_name: postgres-db + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + ports: + - "5432:5432" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh new file mode 100644 index 0000000000000..cd68caab182f5 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Get the root of the runtime repo +GIT_ROOT="$(git rev-parse --show-toplevel)" +echo "Root of repo is at $GIT_ROOT" + +# Change if using different container or name +DOCKER_CONTAINER_NAME="postgres-db" + +echo "Starting DB container.." +docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" up --build --detach + +echo "Waiting for DB container to be ready.." +timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done" + +echo "Generating golden files.." +SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" + +echo "Bringing down DB container.." +docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes \ No newline at end of file From 36be209f2cce4c97f7924c230f94236c980e1d44 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 18 Dec 2023 14:49:51 -0800 Subject: [PATCH 39/63] Trivial changes --- .../apache/spark/sql/crossdbms/bin/generate_golden_files.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh index cd68caab182f5..25820a885d0a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh @@ -29,8 +29,8 @@ docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apac echo "Waiting for DB container to be ready.." timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done" -echo "Generating golden files.." +echo "Generating all golden files.." SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" echo "Bringing down DB container.." -docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes \ No newline at end of file +docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes From d0373313fa5aa314527c6fcf73d2067eb0cd63fb Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 18 Dec 2023 14:53:00 -0800 Subject: [PATCH 40/63] Trivial changes --- .../apache/spark/sql/crossdbms/bin/generate_golden_files.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh index 25820a885d0a9..07c740f52ca10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh @@ -20,13 +20,13 @@ GIT_ROOT="$(git rev-parse --show-toplevel)" echo "Root of repo is at $GIT_ROOT" -# Change if using different container or name -DOCKER_CONTAINER_NAME="postgres-db" - echo "Starting DB container.." docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" up --build --detach echo "Waiting for DB container to be ready.." +# TODO: Make this dynamically detect which DBMS is being used. +# Change if using different container or name. +DOCKER_CONTAINER_NAME="postgres-db" timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done" echo "Generating all golden files.." From 750a1dea821bc2ef42b22442648991304f178add Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 09:45:37 -0800 Subject: [PATCH 41/63] Fix typo --- .../src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index eb04aed145f28..6c7b4c48864a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -632,7 +632,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper /** * Returns the desired file path for results, given the input file. This is implemented as a - * function because differente Suites extending this class may want their results files with + * function because different Suites extending this class may want their results files with * different names or in different locations. */ protected def resultFileForInputFile(file: File): String = { From 089c2922b146a3edcd23622b6c66dbe64bbb17cf Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 10:32:30 -0800 Subject: [PATCH 42/63] Add header comments, and make CrossDbmsQuerySuite an abstract class --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 107 ++++++++++-------- 1 file changed, 58 insertions(+), 49 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 0019fcfc374aa..55ba9159a4906 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -29,10 +29,14 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps // scalastyle:off line.size.limit /** - * See SQLQueryTestSuite.scala for more information. This class builds off of that to allow us - * to generate golden files with other DBMS to perform cross-checking for correctness. Note that - * this is not currently run on all SQL input files by default because there is incompatibility - * between Spark SQL and the DBMS SQL. + * IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING, READ THIS: + * Your new SQL test is automatically opted into this suite. It is likely failing because it is not + * compatible with the default DBMS (currently postgres). You have two options: + * 1. (Recommended) Modify your queries to be compatible with both systems, and generate golden + * files with the instructions below. This is recommended because it will run your queries + * against postgres, providing higher correctness testing confidence, and you won't have to + * manually verify the golden files generated with your test. + * 2. Add this line to your .sql file: --SPARK_ONLY * * You need to have a database server up before running this test. Two options: * - Install Docker and use the bash script in ./bin/generate_golden_files.sh to run a DBMS @@ -43,45 +47,61 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps * 2. After installing PostgreSQL, start the database server, then create a role named pg with * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser`` * - * To indicate that the SQL file is eligible for testing with this suite, add the following comment - * into the input file: + * To indicate that the SQL file is not eligible for testing with this suite, add the following + * comment into the input file: * {{{ - * --DBMS_TO_GENERATE_GOLDEN_FILE postgres + * --SPARK_ONLY * }}} * - * And then, to run the entire test suite: + * And then, to run the entire test suite, with the default cross DBMS: * {{{ - * build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" + * build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" * }}} * - * To re-generate golden files for entire suite, run: + * To re-generate golden files for entire suite, either run: + * 1. (Recommended) You need Docker on your machine. * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" + * bash ./bin/generate_golden_files.sh + * }}} + * 2. + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" * }}} * * To re-generate golden file for a single test, e.g. `describe.sql`, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite -- -z describe.sql" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql" * }}} * * To specify a DBMS to use (the default is postgres): * {{{ - * REF_DBMS=postgres SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" * }}} */ -// scalastyle:on line.size.limit -class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { - - // Note: the below two functions have to be functions instead of variables because the superclass - // runs the test first before the subclass variables can be instantiated. - private def crossDbmsToGenerateGoldenFiles: String = { - val userInputDbms = System.getenv(CrossDbmsQueryTestSuite.REF_DBMS_ARGUMENT) - val selectedDbms = Option(userInputDbms).filter(_.nonEmpty).getOrElse( - CrossDbmsQueryTestSuite.DEFAULT_DBMS) - assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(selectedDbms), - s"$selectedDbms is not currently supported.") - selectedDbms - } + +class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { + + def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES + + // Reduce scope to subquery tests for now. That is where most correctness issues are. + override def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath + + override def getConnection: Option[String] => JdbcSQLQueryTestRunner = + (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url)) +} + +/** + * See SQLQueryTestSuite.scala for more information. This suite builds off of that to allow us + * to generate golden files with other DBMS to perform cross-checking for correctness. It generates + * another set of golden files. Note that this is not currently run on all SQL input files by + * default because there is incompatibility between SQL dialects for Spark and the other DBMS. + */ +abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { + + protected def crossDbmsToGenerateGoldenFiles: String + protected def customInputFilePath: String = inputFilePath + protected def getConnection: Option[String] => JdbcSQLQueryTestRunner + private def customConnectionUrl: String = System.getenv( CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL) @@ -91,13 +111,12 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val queries = getQueries(code, comments) val settings = getSparkSettings(comments) - val dbmsConfig = comments.filter(_.startsWith( - CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE)).map(_.substring( - CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE.length)) - // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test. + val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG)) + .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length)) + // If `--ONLY_IF` is found, check if the DBMS being used is allowed. if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { - log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " + - s"testing with $crossDbmsToGenerateGoldenFiles") + log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " + + "SPARK_ONLY") } else if (regenerateGoldenFiles) { runQueries(queries, testCase, settings.toImmutableArraySeq) } else { @@ -120,9 +139,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // the already generated golden files. if (regenerateGoldenFiles) { val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty) - runner = runner.orElse( - Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING( - crossDbmsToGenerateGoldenFiles)(connectionUrl))) + runner = runner.orElse(Some(getConnection(connectionUrl)) try { // Either of the below two lines can error. If we go into the catch statement, then it // is likely one of the following scenarios: @@ -206,14 +223,14 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val defaultResultsDir = new File(baseResourcePath, "results") val goldenFilePath = new File( defaultResultsDir, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath - file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + file.getAbsolutePath.replace(customInputFilePath, goldenFilePath) + ".out" } override lazy val listTestCases: Seq[TestCase] = { - listFilesRecursively(new File(inputFilePath)).flatMap { file => + listFilesRecursively(new File(customInputFilePath)).flatMap { file => val resultFile = resultFileForInputFile(file) val absPath = file.getAbsolutePath - val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator) + val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator) RegularTestCase(testCaseName, absPath, resultFile) :: Nil }.sortBy(_.name) } @@ -221,18 +238,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { object CrossDbmsQueryTestSuite { - // System argument to indicate which reference DBMS is being used. - private final val REF_DBMS_ARGUMENT = "REF_DBMS" + final val POSTGRES = "postgres" + // System argument to indicate a custom connection URL to the reference DBMS. private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" // Argument in input files to indicate that golden file should be generated with a reference DBMS - private final val DBMS_TO_GENERATE_GOLDEN_FILE = "--DBMS_TO_GENERATE_GOLDEN_FILE " - - private final val POSTGRES = "postgres" - private final val SUPPORTED_DBMS = Seq(POSTGRES) - private final val DEFAULT_DBMS = POSTGRES - private final val DBMS_TO_CONNECTION_MAPPING = Map( - POSTGRES -> ((connection_url: Option[String]) => - JdbcSQLQueryTestRunner(PostgresConnection(connection_url))) - ) + private final val ONLY_IF_ARG = "--ONLY_IF " } From 62d1760b99d6d8e136029a28d1494629337fa570 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 10:38:01 -0800 Subject: [PATCH 43/63] Trivial changes --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 55ba9159a4906..923b0e97080b7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -36,7 +36,7 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps * files with the instructions below. This is recommended because it will run your queries * against postgres, providing higher correctness testing confidence, and you won't have to * manually verify the golden files generated with your test. - * 2. Add this line to your .sql file: --SPARK_ONLY + * 2. Add this line to your .sql file: --ONLY_IF spark * * You need to have a database server up before running this test. Two options: * - Install Docker and use the bash script in ./bin/generate_golden_files.sh to run a DBMS @@ -50,7 +50,7 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps * To indicate that the SQL file is not eligible for testing with this suite, add the following * comment into the input file: * {{{ - * --SPARK_ONLY + * --ONLY_IF spark * }}} * * And then, to run the entire test suite, with the default cross DBMS: @@ -81,12 +81,12 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { - def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES + protected def crossDbmsToGenerateGoldenFiles: String = "postgres" // Reduce scope to subquery tests for now. That is where most correctness issues are. - override def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath + override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath - override def getConnection: Option[String] => JdbcSQLQueryTestRunner = + override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner = (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url)) } @@ -99,8 +99,8 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { protected def crossDbmsToGenerateGoldenFiles: String - protected def customInputFilePath: String = inputFilePath - protected def getConnection: Option[String] => JdbcSQLQueryTestRunner + protected def customInputFilePath: String + protected def getConnection: Option[String] => SQLQueryTestRunner private def customConnectionUrl: String = System.getenv( CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL) @@ -238,10 +238,8 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { object CrossDbmsQueryTestSuite { - final val POSTGRES = "postgres" - // System argument to indicate a custom connection URL to the reference DBMS. private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" - // Argument in input files to indicate that golden file should be generated with a reference DBMS + // Argument in input files to indicate that the sql file is restricted to certain systems. private final val ONLY_IF_ARG = "--ONLY_IF " } From def270bbd21895709f5536732ce758ee30b9a8e1 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 10:57:22 -0800 Subject: [PATCH 44/63] Use prepared statements --- .../apache/spark/sql/crossdbms/JdbcConnection.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index 771b304401ae9..ff59af955e474 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.crossdbms -import java.sql.{DriverManager, ResultSet} +import java.sql.{DriverManager, PreparedStatement, ResultSet} import java.util.Properties import scala.collection.mutable.ArrayBuffer @@ -103,13 +103,16 @@ private[crossdbms] case class PostgresConnection(connection_url: Option[String] } def dropTable(tableName: String): Unit = { - val dropTableSql = s"DROP TABLE IF EXISTS $tableName" - stmt.executeUpdate(dropTableSql) + val st = conn.prepareStatement("DROP TABLE IF EXISTS ?") + st.setString(1, tableName) + st.executeUpdate() } def createTable(tableName: String, schemaString: String): Unit = { - val createTableSql = s"CREATE TABLE $tableName ($schemaString)" - stmt.executeUpdate(createTableSql) + val st = conn.prepareStatement("CREATE TABLE ? (?)") + st.setString(1, tableName) + st.setString(2, schemaString) + st.executeUpdate() } def loadData(df: DataFrame, tableName: String): Unit = { From fdb90a4c077cecff58a3a0220d8434adebd98265 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 10:58:45 -0800 Subject: [PATCH 45/63] Remove sql file changes for now --- ...not-in-unit-tests-multi-column-literal.sql | 14 ++- .../not-in-unit-tests-multi-column.sql | 16 ++- ...in-unit-tests-multi-column-literal.sql.out | 37 ------- .../not-in-unit-tests-multi-column.sql.out | 99 ------------------- 4 files changed, 13 insertions(+), 153 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql index cfce07cc020c2..a061e495f51b8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql @@ -7,14 +7,12 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false ---DBMS_TO_GENERATE_GOLDEN_FILE postgres - -CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES - (NULL, NULL), - (NULL, 1.0), - (2, 3.0), - (4, 5.0)) - AS m(a, b); +CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES + (null, null), + (null, 1.0), + (2, 3.0), + (4, 5.0) + AS m(a, b); -- Case 1 (not possible to write a literal with no rows, so we ignore it.) -- (subquery is empty -> row is returned) diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql index 9314878e13917..28ab75121573a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql @@ -18,20 +18,18 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false ---DBMS_TO_GENERATE_GOLDEN_FILE postgres - -CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES - (NULL, NULL), - (NULL, 1.0), +CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES + (null, null), + (null, 1.0), (2, 3.0), - (4, 5.0)) + (4, 5.0) AS m(a, b); -CREATE TEMPORARY VIEW s AS SELECT * FROM (VALUES - (NULL, NULL), +CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES + (null, null), (0, 1.0), (2, 3.0), - (4, NULL)) + (4, null) AS s(c, d); -- Case 1 diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out deleted file mode 100644 index b696ee2d43b4a..0000000000000 --- a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out +++ /dev/null @@ -1,37 +0,0 @@ --- Automatically generated by CrossDbmsQueryTestSuite with postgres --- !query -CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES - (NULL, NULL), - (NULL, 1.0), - (2, 3.0), - (4, 5.0)) - AS m(a, b) --- !query output - - - --- !query -SELECT * -FROM m -WHERE b = 1.0 -- Matches (null, 1.0) - AND (a, b) NOT IN ((2, 3.0)) --- !query output -NULL 1.0 - - --- !query -SELECT * -FROM m -WHERE b = 3.0 -- Matches (2, 3.0) - AND (a, b) NOT IN ((2, 3.0)) --- !query output - - - --- !query -SELECT * -FROM m -WHERE b = 5.0 -- Matches (4, 5.0) - AND (a, b) NOT IN ((2, 3.0)) --- !query output -4 5.0 diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out deleted file mode 100644 index 4f6510ac2f52b..0000000000000 --- a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out +++ /dev/null @@ -1,99 +0,0 @@ --- Automatically generated by CrossDbmsQueryTestSuite with postgres --- !query -CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES - (NULL, NULL), - (NULL, 1.0), - (2, 3.0), - (4, 5.0)) - AS m(a, b) --- !query output - - - --- !query -CREATE TEMPORARY VIEW s AS SELECT * FROM (VALUES - (NULL, NULL), - (0, 1.0), - (2, 3.0), - (4, NULL)) - AS s(c, d) --- !query output - - - --- !query -SELECT * -FROM m -WHERE (a, b) NOT IN (SELECT * - FROM s - WHERE d > 5.0) -- Matches no rows --- !query output -2 3.0 -4 5.0 -NULL 1.0 -NULL NULL - - --- !query -SELECT * -FROM m -WHERE (a, b) NOT IN (SELECT * - FROM s - WHERE c IS NULL AND d IS NULL) -- Matches only (null, null) --- !query output - - - --- !query -SELECT * -FROM m -WHERE a IS NULL AND b IS NULL -- Matches only (null, null) - AND (a, b) NOT IN (SELECT * - FROM s - WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null) --- !query output - - - --- !query -SELECT * -FROM m -WHERE b = 1.0 -- Matches (null, 1.0) - AND (a, b) NOT IN (SELECT * - FROM s - WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null) --- !query output - - - --- !query -SELECT * -FROM m -WHERE b = 1.0 -- Matches (null, 1.0) - AND (a, b) NOT IN (SELECT * - FROM s - WHERE c = 2) -- Matches (2, 3.0) --- !query output -NULL 1.0 - - --- !query -SELECT * -FROM m -WHERE b = 3.0 -- Matches (2, 3.0) - AND (a, b) NOT IN (SELECT * - FROM s - WHERE c = 2) -- Matches (2, 3.0) --- !query output - - - --- !query -SELECT * -FROM m -WHERE b = 5.0 -- Matches (4, 5.0) - AND (a, b) NOT IN (SELECT * - FROM s - WHERE c = 2) -- Matches (2, 3.0) --- !query output -4 5.0 From 8c21ceb1356ea38cac0979525a9a83687805c945 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 11:08:54 -0800 Subject: [PATCH 46/63] Header comment improvements --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 41 +++++++++---------- ...=> generate_golden_files_with_postgres.sh} | 5 +-- 2 files changed, 21 insertions(+), 25 deletions(-) rename sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/{generate_golden_files.sh => generate_golden_files_with_postgres.sh} (91%) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 923b0e97080b7..22acbb96f7307 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -38,14 +38,20 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps * manually verify the golden files generated with your test. * 2. Add this line to your .sql file: --ONLY_IF spark * - * You need to have a database server up before running this test. Two options: - * - Install Docker and use the bash script in ./bin/generate_golden_files.sh to run a DBMS - * container and generate golden files. - * - Otherwise, do manual installation. For example, for postgres: - * 1. Install PostgreSQL. - * a. On a mac: `brew install postgresql@13` - * 2. After installing PostgreSQL, start the database server, then create a role named pg with - * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser`` + * To re-generate golden files for entire suite, either run: + * 1. (Recommended) You need Docker on your machine. Install Docker and run the following command: + * {{{ + * bash ./bin/generate_golden_files_with_postgres.sh + * }}} + * 2. + * a. You need to have a Postgres server up before running this test. + * i. Install PostgreSQL. On a mac: `brew install postgresql@13` + * ii. After installing PostgreSQL, start the database server, then create a role named pg with + * superuser permissions: `createuser -s postgres` OR `psql> CREATE role postgres superuser` + * b. Run the following command: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" + * }}} * * To indicate that the SQL file is not eligible for testing with this suite, add the following * comment into the input file: @@ -58,25 +64,16 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps * build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" * }}} * - * To re-generate golden files for entire suite, either run: - * 1. (Recommended) You need Docker on your machine. - * {{{ - * bash ./bin/generate_golden_files.sh - * }}} - * 2. - * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" - * }}} - * * To re-generate golden file for a single test, e.g. `describe.sql`, run: * {{{ * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql" * }}} * - * To specify a DBMS to use (the default is postgres): - * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" - * }}} + * This file adds a new comment argument, --ONLY_IF. This comment is used to indicate that the + * SQL file is not eligible for testing with this suite. For example, if you have a SQL file + * named `describe.sql`, and you want to exclude it from this suite, add the following comment + * into the input file: + * --ONLY_IF spark */ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh similarity index 91% rename from sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh rename to sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh index 07c740f52ca10..32e1573b51e6e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh @@ -24,13 +24,12 @@ echo "Starting DB container.." docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" up --build --detach echo "Waiting for DB container to be ready.." -# TODO: Make this dynamically detect which DBMS is being used. # Change if using different container or name. DOCKER_CONTAINER_NAME="postgres-db" timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done" -echo "Generating all golden files.." -SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" +echo "Generating all golden files for postgres.." +SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" echo "Bringing down DB container.." docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes From 50c2dfb0babe5bf6ea3fc01169f89ebaed3f5910 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 11:37:54 -0800 Subject: [PATCH 47/63] Trivial changes --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 18 +++++++++--------- .../spark/sql/crossdbms/JdbcConnection.scala | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 22acbb96f7307..c4aec76641b41 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -68,12 +68,6 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps * {{{ * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql" * }}} - * - * This file adds a new comment argument, --ONLY_IF. This comment is used to indicate that the - * SQL file is not eligible for testing with this suite. For example, if you have a SQL file - * named `describe.sql`, and you want to exclude it from this suite, add the following comment - * into the input file: - * --ONLY_IF spark */ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { @@ -92,6 +86,12 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { * to generate golden files with other DBMS to perform cross-checking for correctness. It generates * another set of golden files. Note that this is not currently run on all SQL input files by * default because there is incompatibility between SQL dialects for Spark and the other DBMS. + * + * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for + * which is eligible for the SQL file . For example, if you have a SQL file named `describe.sql`, + * and you want to indicate that postgres is incompatible, add the following comment into the input + * file: + * --ONLY_IF spark */ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { @@ -111,9 +111,9 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG)) .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length)) // If `--ONLY_IF` is found, check if the DBMS being used is allowed. - if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { + if (dbmsConfig.nonEmpty && !dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " + - "SPARK_ONLY") + s"not eligible with $crossDbmsToGenerateGoldenFiles.") } else if (regenerateGoldenFiles) { runQueries(queries, testCase, settings.toImmutableArraySeq) } else { @@ -136,7 +136,7 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { // the already generated golden files. if (regenerateGoldenFiles) { val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty) - runner = runner.orElse(Some(getConnection(connectionUrl)) + runner = runner.orElse(Some(getConnection(connectionUrl))) try { // Either of the below two lines can error. If we go into the catch statement, then it // is likely one of the following scenarios: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index ff59af955e474..4e83ab8c6da03 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.crossdbms -import java.sql.{DriverManager, PreparedStatement, ResultSet} +import java.sql.{DriverManager, ResultSet} import java.util.Properties import scala.collection.mutable.ArrayBuffer From 939b8f252224236e683f5b3a4d4591419738c3b5 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 14:55:51 -0800 Subject: [PATCH 48/63] Add custom postgres command --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index c4aec76641b41..a955e59a4b131 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -79,6 +79,17 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner = (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url)) + + override protected def preprocessingCommands = Seq( + // Custom function `double` to imitate Spark's function, so that more tests are covered. + """ + |CREATE FUNCTION OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision + | AS 'select CAST($1 AS double precision);' + | LANGUAGE SQL + | IMMUTABLE + | RETURNS NULL ON NULL INPUT; + |""".stripMargin + ) } /** @@ -95,10 +106,28 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { */ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { + /** + * A String representing the database system being used. + */ protected def crossDbmsToGenerateGoldenFiles: String - protected def customInputFilePath: String + + /** + * A custom input file path where SQL tests are located, if desired. + */ + protected def customInputFilePath: String = super.inputFilePath + + /** + * A function taking in an optional custom connection URL, and returns a [[SQLQueryTestRunner]] + * specific to the database system. + */ protected def getConnection: Option[String] => SQLQueryTestRunner + /** + * Commands to be run before running queries to generate golden files, such as defining custom + * functions. + */ + protected def preprocessingCommands: Seq[String] + private def customConnectionUrl: String = System.getenv( CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL) @@ -137,6 +166,8 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { if (regenerateGoldenFiles) { val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty) runner = runner.orElse(Some(getConnection(connectionUrl))) + preprocessingCommands.foreach(command => runner.foreach(_.runQuery(command))) + try { // Either of the below two lines can error. If we go into the catch statement, then it // is likely one of the following scenarios: From db01061f7d672dec5ad1607bbbb3707cd6bfc436 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 15:04:49 -0800 Subject: [PATCH 49/63] Modify exists-having to be compatible with psql --- .../exists-subquery/exists-having.sql | 52 +++---- .../exists-subquery/exists-having.sql.out | 134 ++++++++++++++++++ .../crossdbms/CrossDbmsQueryTestSuite.scala | 17 ++- 3 files changed, 171 insertions(+), 32 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql index c30159039ff3e..5605ae104e655 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql @@ -1,35 +1,35 @@ -- Tests HAVING clause in subquery. -CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) +CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70)) AS EMP(id, emp_name, hiredate, salary, dept_id); -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") +CREATE TEMPORARY VIEW DEPT AS SELECT * FROM (VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL')) AS DEPT(dept_id, dept_name, state); -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) +CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00))) AS BONUS(emp_name, bonus_amt); -- simple having in subquery. diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out new file mode 100644 index 0000000000000..e330971afdf08 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out @@ -0,0 +1,134 @@ +-- Automatically generated by PostgreSQLQueryTestSuite with postgres +-- !query +CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70)) +AS EMP(id, emp_name, hiredate, salary, dept_id) +-- !query output + + + +-- !query +CREATE TEMPORARY VIEW DEPT AS SELECT * FROM (VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL')) +AS DEPT(dept_id, dept_name, state) +-- !query output + + + +-- !query +CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00))) +AS BONUS(emp_name, bonus_amt) +-- !query output + + + +-- !query +SELECT dept_id, count(*) +FROM emp +GROUP BY dept_id +HAVING EXISTS (SELECT 1 + FROM bonus + WHERE bonus_amt < min(emp.salary)) +-- !query output +10 3 +100 2 +20 1 +30 1 +70 1 +NULL 1 + + +-- !query +SELECT * +FROM dept +WHERE EXISTS (SELECT dept_id, + Count(*) + FROM emp + GROUP BY dept_id + HAVING EXISTS (SELECT 1 + FROM bonus + WHERE bonus_amt < Min(emp.salary))) +-- !query output +10 dept 1 CA +20 dept 2 NY +30 dept 3 TX +40 dept 4 - unassigned OR +50 dept 5 - unassigned NJ +70 dept 7 FL + + +-- !query +SELECT dept_id, + Max(salary) +FROM emp gp +WHERE EXISTS (SELECT dept_id, + Count(*) + FROM emp p + GROUP BY dept_id + HAVING EXISTS (SELECT 1 + FROM bonus + WHERE bonus_amt < Min(p.salary))) +GROUP BY gp.dept_id +-- !query output +10 200.0 +100 400.0 +20 300.0 +30 400.0 +70 150.0 +NULL 400.0 + + +-- !query +SELECT * +FROM dept +WHERE EXISTS (SELECT dept_id, + Count(*) + FROM emp + GROUP BY dept_id + HAVING EXISTS (SELECT 1 + FROM bonus + WHERE bonus_amt > Min(emp.salary))) +-- !query output +10 dept 1 CA +20 dept 2 NY +30 dept 3 TX +40 dept 4 - unassigned OR +50 dept 5 - unassigned NJ +70 dept 7 FL + + +-- !query +SELECT * +FROM dept +WHERE EXISTS (SELECT dept_id, + count(emp.dept_id) + FROM emp + WHERE dept.dept_id = dept_id + GROUP BY dept_id + HAVING EXISTS (SELECT 1 + FROM bonus + WHERE ( bonus_amt > min(emp.salary) + AND count(emp.dept_id) > 1 ))) +-- !query output +10 dept 1 CA diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index a955e59a4b131..b55268f66e8c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -72,7 +72,7 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { - protected def crossDbmsToGenerateGoldenFiles: String = "postgres" + protected def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES // Reduce scope to subquery tests for now. That is where most correctness issues are. override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath @@ -83,7 +83,7 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { override protected def preprocessingCommands = Seq( // Custom function `double` to imitate Spark's function, so that more tests are covered. """ - |CREATE FUNCTION OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision + |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision | AS 'select CAST($1 AS double precision);' | LANGUAGE SQL | IMMUTABLE @@ -107,14 +107,16 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { /** - * A String representing the database system being used. + * A String representing the database system being used. A list of these is defined in the + * companion object. */ protected def crossDbmsToGenerateGoldenFiles: String + assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(crossDbmsToGenerateGoldenFiles)) /** * A custom input file path where SQL tests are located, if desired. */ - protected def customInputFilePath: String = super.inputFilePath + protected def customInputFilePath: String = inputFilePath /** * A function taking in an optional custom connection URL, and returns a [[SQLQueryTestRunner]] @@ -123,8 +125,8 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { protected def getConnection: Option[String] => SQLQueryTestRunner /** - * Commands to be run before running queries to generate golden files, such as defining custom - * functions. + * Commands to be run on the DBMS before running queries to generate golden files, such as + * defining custom functions. */ protected def preprocessingCommands: Seq[String] @@ -266,6 +268,9 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { object CrossDbmsQueryTestSuite { + final val POSTGRES = "postgres" + final val SUPPORTED_DBMS = Seq(POSTGRES) + // System argument to indicate a custom connection URL to the reference DBMS. private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" // Argument in input files to indicate that the sql file is restricted to certain systems. From 7200f7d3263e366a831de08a6897cd65d989fdc1 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 19 Dec 2023 15:20:38 -0800 Subject: [PATCH 50/63] Modify query slightly --- .../exists-subquery/exists-having.sql.out | 189 ++++++++---------- .../exists-subquery/exists-having.sql | 12 +- .../exists-subquery/exists-having.sql.out | 52 ++--- 3 files changed, 118 insertions(+), 135 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out index da242e583f45e..54e046eafec29 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out @@ -1,81 +1,78 @@ -- Automatically generated by SQLQueryTestSuite -- !query -CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) +CREATE TEMPORARY VIEW EMP AS VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70) AS EMP(id, emp_name, hiredate, salary, dept_id) -- !query analysis -CreateViewCommand `EMP`, SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) +CreateViewCommand `EMP`, VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70) AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- SubqueryAlias EMP + +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] -- !query -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") +CREATE TEMPORARY VIEW DEPT AS VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL') AS DEPT(dept_id, dept_name, state) -- !query analysis -CreateViewCommand `DEPT`, SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") +CreateViewCommand `DEPT`, VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL') AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true - +- Project [dept_id#x, dept_name#x, state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- SubqueryAlias DEPT + +- LocalRelation [dept_id#x, dept_name#x, state#x] -- !query -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) +CREATE TEMPORARY VIEW BONUS AS VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00)) AS BONUS(emp_name, bonus_amt) -- !query analysis -CreateViewCommand `BONUS`, SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) +CreateViewCommand `BONUS`, VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00)) AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true - +- Project [emp_name#x, bonus_amt#x] - +- SubqueryAlias BONUS - +- LocalRelation [emp_name#x, bonus_amt#x] + +- SubqueryAlias BONUS + +- LocalRelation [emp_name#x, bonus_amt#x] -- !query @@ -93,16 +90,14 @@ Project [dept_id#x, count(1)#xL] : +- SubqueryAlias bonus : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : +- Project [emp_name#x, bonus_amt#x] - : +- SubqueryAlias BONUS - : +- LocalRelation [emp_name#x, bonus_amt#x] + : +- SubqueryAlias BONUS + : +- LocalRelation [emp_name#x, bonus_amt#x] +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] +- SubqueryAlias emp +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- SubqueryAlias EMP + +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] -- !query @@ -125,22 +120,19 @@ Project [dept_id#x, dept_name#x, state#x] : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- Project [emp_name#x, bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- SubqueryAlias BONUS + : : +- LocalRelation [emp_name#x, bonus_amt#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias emp : +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- SubqueryAlias EMP + : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- Project [dept_id#x, dept_name#x, state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- SubqueryAlias DEPT + +- LocalRelation [dept_id#x, dept_name#x, state#x] -- !query @@ -165,24 +157,21 @@ Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x] : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- Project [emp_name#x, bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- SubqueryAlias BONUS + : : +- LocalRelation [emp_name#x, bonus_amt#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias p : +- SubqueryAlias emp : +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- SubqueryAlias EMP + : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +- SubqueryAlias gp +- SubqueryAlias emp +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- SubqueryAlias EMP + +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] -- !query @@ -205,22 +194,19 @@ Project [dept_id#x, dept_name#x, state#x] : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- Project [emp_name#x, bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- SubqueryAlias BONUS + : : +- LocalRelation [emp_name#x, bonus_amt#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias emp : +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- SubqueryAlias EMP + : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- Project [dept_id#x, dept_name#x, state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- SubqueryAlias DEPT + +- LocalRelation [dept_id#x, dept_name#x, state#x] -- !query @@ -245,20 +231,17 @@ Project [dept_id#x, dept_name#x, state#x] : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- Project [emp_name#x, bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- SubqueryAlias BONUS + : : +- LocalRelation [emp_name#x, bonus_amt#x] : +- Aggregate [dept_id#x], [dept_id#x, count(dept_id#x) AS count(dept_id)#xL, min(salary#x) AS min(salary#x)#x] : +- Filter (outer(dept_id#x) = dept_id#x) : +- SubqueryAlias emp : +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- SubqueryAlias EMP + : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- Project [dept_id#x, dept_name#x, state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- SubqueryAlias DEPT + +- LocalRelation [dept_id#x, dept_name#x, state#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql index 5605ae104e655..670034ec1eab2 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql @@ -1,6 +1,6 @@ -- Tests HAVING clause in subquery. -CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES +CREATE TEMPORARY VIEW EMP AS VALUES (100, 'emp 1', date '2005-01-01', double(100.00), 10), (100, 'emp 1', date '2005-01-01', double(100.00), 10), (200, 'emp 2', date '2003-01-01', double(200.00), 10), @@ -9,19 +9,19 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES (500, 'emp 5', date '2001-01-01', double(400.00), NULL), (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), (700, 'emp 7', date '2010-01-01', double(400.00), 100), - (800, 'emp 8', date '2016-01-01', double(150.00), 70)) + (800, 'emp 8', date '2016-01-01', double(150.00), 70) AS EMP(id, emp_name, hiredate, salary, dept_id); -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM (VALUES +CREATE TEMPORARY VIEW DEPT AS VALUES (10, 'dept 1', 'CA'), (20, 'dept 2', 'NY'), (30, 'dept 3', 'TX'), (40, 'dept 4 - unassigned', 'OR'), (50, 'dept 5 - unassigned', 'NJ'), - (70, 'dept 7', 'FL')) + (70, 'dept 7', 'FL') AS DEPT(dept_id, dept_name, state); -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES +CREATE TEMPORARY VIEW BONUS AS VALUES ('emp 1', double(10.00)), ('emp 1', double(20.00)), ('emp 2', double(300.00)), @@ -29,7 +29,7 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES ('emp 3', double(300.00)), ('emp 4', double(100.00)), ('emp 5', double(1000.00)), - ('emp 6 - no dept', double(500.00))) + ('emp 6 - no dept', double(500.00)) AS BONUS(emp_name, bonus_amt); -- simple having in subquery. diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out index ae4cf010ffc98..170c99555ffe3 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out @@ -1,15 +1,15 @@ -- Automatically generated by SQLQueryTestSuite -- !query -CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) +CREATE TEMPORARY VIEW EMP AS VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70) AS EMP(id, emp_name, hiredate, salary, dept_id) -- !query schema struct<> @@ -18,13 +18,13 @@ struct<> -- !query -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") +CREATE TEMPORARY VIEW DEPT AS VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL') AS DEPT(dept_id, dept_name, state) -- !query schema struct<> @@ -33,15 +33,15 @@ struct<> -- !query -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) +CREATE TEMPORARY VIEW BONUS AS VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00)) AS BONUS(emp_name, bonus_amt) -- !query schema struct<> From 1d7934540b54a68336544813a4c2ac50b450b0ae Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 20 Dec 2023 11:32:07 -0800 Subject: [PATCH 51/63] Add readme and add ONLY_IF for most sql files in subquery dir --- .../test/resources/sql-tests/inputs/subquery/README.md | 10 ++++++++++ .../subquery/exists-subquery/exists-aggregate.sql | 1 + .../inputs/subquery/exists-subquery/exists-basic.sql | 1 + .../subquery/exists-subquery/exists-count-bug.sql | 1 + .../inputs/subquery/exists-subquery/exists-cte.sql | 1 + .../exists-subquery/exists-in-join-condition.sql | 1 + .../exists-subquery/exists-joins-and-set-ops.sql | 1 + .../subquery/exists-subquery/exists-orderby-limit.sql | 1 + .../subquery/exists-subquery/exists-outside-filter.sql | 1 + .../subquery/exists-subquery/exists-within-and-or.sql | 1 + .../sql-tests/inputs/subquery/in-subquery/in-basic.sql | 1 + .../inputs/subquery/in-subquery/in-count-bug.sql | 1 + .../inputs/subquery/in-subquery/in-group-by.sql | 1 + .../inputs/subquery/in-subquery/in-having.sql | 1 + .../sql-tests/inputs/subquery/in-subquery/in-joins.sql | 1 + .../sql-tests/inputs/subquery/in-subquery/in-limit.sql | 1 + .../subquery/in-subquery/in-multiple-columns.sql | 1 + .../inputs/subquery/in-subquery/in-null-semantics.sql | 1 + .../inputs/subquery/in-subquery/in-nullability.sql | 1 + .../inputs/subquery/in-subquery/in-order-by.sql | 1 + .../inputs/subquery/in-subquery/in-set-operations.sql | 1 + .../in-subquery/in-subquery-in-join-condition.sql | 1 + .../inputs/subquery/in-subquery/in-with-cte.sql | 1 + .../inputs/subquery/in-subquery/nested-not-in.sql | 1 + .../inputs/subquery/in-subquery/not-in-group-by.sql | 1 + .../inputs/subquery/in-subquery/not-in-joins.sql | 1 + .../not-in-unit-tests-multi-column-literal.sql | 1 + .../in-subquery/not-in-unit-tests-multi-column.sql | 1 + .../not-in-unit-tests-single-column-literal.sql | 1 + .../in-subquery/not-in-unit-tests-single-column.sql | 1 + .../inputs/subquery/in-subquery/simple-in.sql | 1 + .../subquery/negative-cases/invalid-correlation.sql | 1 + .../subquery/negative-cases/subq-input-typecheck.sql | 1 + .../nested-scalar-subquery-count-bug.sql | 1 + .../scalar-subquery/scalar-subquery-count-bug.sql | 1 + .../scalar-subquery/scalar-subquery-predicate.sql | 1 + .../scalar-subquery/scalar-subquery-select.sql | 1 + .../scalar-subquery/scalar-subquery-set-op.sql | 1 + .../sql-tests/inputs/subquery/subquery-in-from.sql | 1 + 39 files changed, 48 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/subquery/README.md diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md b/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md new file mode 100644 index 0000000000000..a42cbed4003af --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md @@ -0,0 +1,10 @@ + +## Note on PostgreSQLQueryTestSuite + +The SQL files in this directory are automatically opted into `PostgreSQLQueryTestSuite`, which +generates golden files with Postgres and runs Spark against these golden files. If you added a new +SQL test file, please generate the golden files with `PostgreSQLQueryTestSuite`. It is likely to +fail because of incompatibility between the SQL dialects of Spark and Postgres. To have queries that +are compatible for both systems, keep your SQL queries simple by using basic types like INTs and +strings only, if possible, and using functions that are present in both Spark and Postgres. Refer +to the header comment for `PostgreSQLQueryTestSuite` for more information. \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql index 1c4ef982c662f..637f230e739d7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql @@ -4,6 +4,7 @@ --CONFIG_DIM1 spark.sql.codegen.wholeStage=true --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql index 332e858800f7c..4055f798ad85d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql @@ -1,5 +1,6 @@ -- Tests EXISTS subquery support. Tests basic form -- of EXISTS subquery (both EXISTS and NOT EXISTS) +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql index 1af7a2356836b..e6721f5f1c50e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark create temporary view t1(c1, c2) as values (0, 1), (1, 2); create temporary view t2(c1, c2) as values (0, 2), (0, 3); create temporary view t3(c1, c2) as values (0, 3), (1, 4), (2, 5); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql index c6784838158e1..ea8c0fc36ccfc 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql @@ -1,5 +1,6 @@ -- Tests EXISTS subquery used along with -- Common Table Expressions(CTE) +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql index 5cff53d33816e..ad2e7ad563e08 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql @@ -6,6 +6,7 @@ -- 3. Join type: inner / left outer / right outer / full outer / left semi / left anti -- 4. AND or OR for the join condition +--ONLY_IF spark CREATE TEMP VIEW x(x1, x2) AS VALUES (2, 1), (1, 1), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql index 28cb2f74748ea..1542fa5149aaf 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql @@ -13,6 +13,7 @@ --CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY --CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql index d920a413ab8fc..2e3055db60afa 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql @@ -5,6 +5,7 @@ --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql index af75103797fe5..585de247b11c5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql @@ -1,5 +1,6 @@ -- Tests EXISTS subquery support where the subquery is used outside the WHERE clause. +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql index 7743b5241d11d..5920b61dade52 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql @@ -1,6 +1,7 @@ -- Tests EXISTS subquery support. Tests EXISTS -- subquery within a AND or OR expression. +--ONLY_IF spark CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES (100, "emp 1", date "2005-01-01", 100.00D, 10), (100, "emp 1", date "2005-01-01", 100.00D, 10), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql index 5669423148f80..fc243422bd7c8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql @@ -1,5 +1,6 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view tab_a as select * from values (1, 1) as tab_a(a1, b1); create temporary view tab_b as select * from values (1, 1) as tab_b(a2, b2); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql index 4840f02511b64..f07ead3e868bd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark create temporary view t1(c1, c2) as values (0, 1), (1, 2); create temporary view t2(c1, c2) as values (0, 2), (0, 3); create temporary view t3(c1, c2) as values (0, 3), (1, 4), (2, 5); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql index 168faa0aee7c5..5c962dceb067f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql @@ -6,6 +6,7 @@ --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN +--ONLY_IF spark create temporary view t1 as select * from values ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql index 750cc42b8641c..4d11ea7005b3b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql @@ -4,6 +4,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql index 08eeb1d106fd5..d12144ae7e492 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql @@ -16,6 +16,7 @@ --CONFIG_DIM3 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM3 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql index 5f06f159f0b60..a76da33e501ca 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql @@ -4,6 +4,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql index 1a6c06f9dad49..c403c2d66ab1f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql @@ -4,6 +4,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql index aad655f05099e..7bf49a3209268 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark create temp view v (c) as values (1), (null); create temp view v_empty (e) as select 1 where false; diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql index 07b4afe36c143..30f3a25f0e85a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql @@ -1,4 +1,5 @@ -- SPARK-43413: Tests for IN subquery nullability +--ONLY_IF spark create temp view t0 as select 1 as a_nonnullable; create temp view t1 as select cast(null as int) as b_nullable; diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql index 0b006af4130a9..8bf49a1c2d996 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql @@ -8,6 +8,7 @@ --CONFIG_DIM2 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM2 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql index e4a931ce2c800..c6b6a338c9b1d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql @@ -1,5 +1,6 @@ -- A test suite for set-operations in parent side, subquery, and both predicate subquery -- It includes correlated cases. +--ONLY_IF spark create temporary view t1 as select * from values ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql index d4fa2f6b0e816..d519abdbacc05 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql @@ -1,4 +1,5 @@ -- Test that correlated EXISTS subqueries in join conditions are supported. +--ONLY_IF spark -- Permutations of the test: -- 1. In / Not In diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql index fa4ae87f041cf..8d08cfb4da346 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql @@ -1,5 +1,6 @@ -- A test suite for in with cte in parent side, subquery, and both predicate subquery -- It includes correlated cases. +--ONLY_IF spark --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql index e2d4ad522d446..9472690d99143 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql @@ -1,4 +1,5 @@ -- Tests NOT-IN subqueries nested inside OR expression(s). +--ONLY_IF spark --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql index 54b74534c1162..97da00ac2cefd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql @@ -1,5 +1,6 @@ -- A test suite for NOT IN GROUP BY in parent side, subquery, and both predicate subquery -- It includes correlated cases. +--ONLY_IF spark -- Test aggregate operator with codegen on and off. --CONFIG_DIM1 spark.sql.codegen.wholeStage=true diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql index 2d11c5da20633..639034c18f31b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql @@ -1,5 +1,6 @@ -- A test suite for not-in-joins in parent side, subquery, and both predicate subquery -- It includes correlated cases. +--ONLY_IF spark --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql index a061e495f51b8..e9f3fda9c1b92 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql @@ -6,6 +6,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES (null, null), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql index 28ab75121573a..d5a30950b7fe7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql @@ -15,6 +15,7 @@ -- This can be generalized to include more tests for more columns, but it covers the main cases -- when there is more than one column. +--ONLY_IF spark --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql index 79747022eb1e8..2c3aed95a13ba 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql @@ -6,6 +6,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES (null, 1.0), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql index 8060246bf3a3f..bbf5dce38a962 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql @@ -33,6 +33,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES (null, 1.0), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql index d8a58afa344db..98f1f3d52cbb0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql @@ -3,6 +3,7 @@ --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false +--ONLY_IF spark create temporary view t1 as select * from values ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql index 1260fb73c6d81..0b83ecf43a301 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql @@ -1,5 +1,6 @@ -- The test file contains negative test cases -- of invalid queries where error messages are expected. +--ONLY_IF spark CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1, 2, 3) diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql index 98ce1354a1355..da3ed9d11a8bd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql @@ -1,5 +1,6 @@ -- The test file contains negative test cases -- of invalid queries where error messages are expected. +--ONLY_IF spark CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1, 2, 3) diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql index 86476389a8577..fe9152a27fe8d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark CREATE OR REPLACE VIEW t1(a1, a2) as values (0, 1), (1, 2); CREATE OR REPLACE VIEW t2(b1, b2) as values (0, 2), (0, 3); CREATE OR REPLACE VIEW t3(c1, c2) as values (0, 2), (0, 3); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql index 94b707ebf2002..c428dd81f287a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql @@ -1,6 +1,7 @@ --CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=true --CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=false +--ONLY_IF spark create temp view l (a, b) as values (1, 2.0), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql index d2d0ef7817178..20ba10176196f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql @@ -1,5 +1,6 @@ -- A test suite for scalar subquery in predicate context +--ONLY_IF spark CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv); CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql index e4f7b25a1684d..ef1e612fd744a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql @@ -1,5 +1,6 @@ -- A test suite for scalar subquery in SELECT clause +--ONLY_IF spark create temporary view t1 as select * from values ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql index 39e456611c03b..d282e477678c9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql @@ -1,5 +1,6 @@ -- Set operations in correlation path +--ONLY_IF spark CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0); CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3); CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql index 1273b56b6344b..662a065cdc13a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark -- Aliased subqueries in FROM clause SELECT * FROM (SELECT * FROM testData) AS t WHERE key = 1; From 83cd8d4e130f894d72156bad84f819923c76183a Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 20 Dec 2023 11:41:52 -0800 Subject: [PATCH 52/63] Rewrite exists-having --- .../exists-subquery/exists-having.sql.out | 103 +++++++----------- .../exists-subquery/exists-having.sql | 15 +-- .../exists-subquery/exists-having.sql.out | 15 +-- .../exists-subquery/exists-having.sql.out | 9 +- 4 files changed, 55 insertions(+), 87 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out index 54e046eafec29..e8e1acdda34c0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out @@ -1,6 +1,6 @@ -- Automatically generated by SQLQueryTestSuite -- !query -CREATE TEMPORARY VIEW EMP AS VALUES +CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES (100, 'emp 1', date '2005-01-01', double(100.00), 10), (100, 'emp 1', date '2005-01-01', double(100.00), 10), (200, 'emp 2', date '2003-01-01', double(200.00), 10), @@ -10,9 +10,8 @@ CREATE TEMPORARY VIEW EMP AS VALUES (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), (700, 'emp 7', date '2010-01-01', double(400.00), 100), (800, 'emp 8', date '2016-01-01', double(150.00), 70) -AS EMP(id, emp_name, hiredate, salary, dept_id) -- !query analysis -CreateViewCommand `EMP`, VALUES +CreateViewCommand `EMP`, [(id,None), (emp_name,None), (hiredate,None), (salary,None), (dept_id,None)], VALUES (100, 'emp 1', date '2005-01-01', double(100.00), 10), (100, 'emp 1', date '2005-01-01', double(100.00), 10), (200, 'emp 2', date '2003-01-01', double(200.00), 10), @@ -21,36 +20,31 @@ CreateViewCommand `EMP`, VALUES (500, 'emp 5', date '2001-01-01', double(400.00), NULL), (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), (700, 'emp 7', date '2010-01-01', double(400.00), 100), - (800, 'emp 8', date '2016-01-01', double(150.00), 70) -AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + (800, 'emp 8', date '2016-01-01', double(150.00), 70), false, false, LocalTempView, true + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] -- !query -CREATE TEMPORARY VIEW DEPT AS VALUES +CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES (10, 'dept 1', 'CA'), (20, 'dept 2', 'NY'), (30, 'dept 3', 'TX'), (40, 'dept 4 - unassigned', 'OR'), (50, 'dept 5 - unassigned', 'NJ'), (70, 'dept 7', 'FL') -AS DEPT(dept_id, dept_name, state) -- !query analysis -CreateViewCommand `DEPT`, VALUES +CreateViewCommand `DEPT`, [(dept_id,None), (dept_name,None), (state,None)], VALUES (10, 'dept 1', 'CA'), (20, 'dept 2', 'NY'), (30, 'dept 3', 'TX'), (40, 'dept 4 - unassigned', 'OR'), (50, 'dept 5 - unassigned', 'NJ'), - (70, 'dept 7', 'FL') -AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + (70, 'dept 7', 'FL'), false, false, LocalTempView, true + +- LocalRelation [col1#x, col2#x, col3#x] -- !query -CREATE TEMPORARY VIEW BONUS AS VALUES +CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES ('emp 1', double(10.00)), ('emp 1', double(20.00)), ('emp 2', double(300.00)), @@ -59,9 +53,8 @@ CREATE TEMPORARY VIEW BONUS AS VALUES ('emp 4', double(100.00)), ('emp 5', double(1000.00)), ('emp 6 - no dept', double(500.00)) -AS BONUS(emp_name, bonus_amt) -- !query analysis -CreateViewCommand `BONUS`, VALUES +CreateViewCommand `BONUS`, [(emp_name,None), (bonus_amt,None)], VALUES ('emp 1', double(10.00)), ('emp 1', double(20.00)), ('emp 2', double(300.00)), @@ -69,10 +62,8 @@ CreateViewCommand `BONUS`, VALUES ('emp 3', double(300.00)), ('emp 4', double(100.00)), ('emp 5', double(1000.00)), - ('emp 6 - no dept', double(500.00)) -AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true - +- SubqueryAlias BONUS - +- LocalRelation [emp_name#x, bonus_amt#x] + ('emp 6 - no dept', double(500.00)), false, false, LocalTempView, true + +- LocalRelation [col1#x, col2#x] -- !query @@ -89,15 +80,13 @@ Project [dept_id#x, count(1)#xL] : +- Filter (bonus_amt#x < outer(min(salary#x)#x)) : +- SubqueryAlias bonus : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) - : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : +- SubqueryAlias BONUS - : +- LocalRelation [emp_name#x, bonus_amt#x] + : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : +- LocalRelation [col1#x, col2#x] +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] +- SubqueryAlias emp +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) - +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] -- !query @@ -119,20 +108,17 @@ Project [dept_id#x, dept_name#x, state#x] : : +- Filter (bonus_amt#x < outer(min(salary#x)#x)) : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) - : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : : +- LocalRelation [col1#x, col2#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias emp : +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) - : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) - +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x] + +- LocalRelation [col1#x, col2#x, col3#x] -- !query @@ -156,22 +142,19 @@ Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x] : : +- Filter (bonus_amt#x < outer(min(salary#x)#x)) : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) - : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : : +- LocalRelation [col1#x, col2#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias p : +- SubqueryAlias emp : +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) - : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] +- SubqueryAlias gp +- SubqueryAlias emp +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) - +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - +- SubqueryAlias EMP - +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] -- !query @@ -193,20 +176,17 @@ Project [dept_id#x, dept_name#x, state#x] : : +- Filter (bonus_amt#x > outer(min(salary#x)#x)) : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) - : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : : +- LocalRelation [col1#x, col2#x] : +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x] : +- SubqueryAlias emp : +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) - : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) - +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x] + +- LocalRelation [col1#x, col2#x, col3#x] -- !query @@ -230,18 +210,15 @@ Project [dept_id#x, dept_name#x, state#x] : : +- Filter ((bonus_amt#x > outer(min(salary#x)#x)) AND (outer(count(dept_id)#xL) > cast(1 as bigint))) : : +- SubqueryAlias bonus : : +- View (`BONUS`, [emp_name#x,bonus_amt#x]) - : : +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x] - : : +- SubqueryAlias BONUS - : : +- LocalRelation [emp_name#x, bonus_amt#x] + : : +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x] + : : +- LocalRelation [col1#x, col2#x] : +- Aggregate [dept_id#x], [dept_id#x, count(dept_id#x) AS count(dept_id)#xL, min(salary#x) AS min(salary#x)#x] : +- Filter (outer(dept_id#x) = dept_id#x) : +- SubqueryAlias emp : +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x]) - : +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x] - : +- SubqueryAlias EMP - : +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x] + : +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x] + : +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x] +- SubqueryAlias dept +- View (`DEPT`, [dept_id#x,dept_name#x,state#x]) - +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x] - +- SubqueryAlias DEPT - +- LocalRelation [dept_id#x, dept_name#x, state#x] + +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x] + +- LocalRelation [col1#x, col2#x, col3#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql index 670034ec1eab2..d172220fbc413 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql @@ -1,6 +1,6 @@ -- Tests HAVING clause in subquery. -CREATE TEMPORARY VIEW EMP AS VALUES +CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES (100, 'emp 1', date '2005-01-01', double(100.00), 10), (100, 'emp 1', date '2005-01-01', double(100.00), 10), (200, 'emp 2', date '2003-01-01', double(200.00), 10), @@ -9,19 +9,17 @@ CREATE TEMPORARY VIEW EMP AS VALUES (500, 'emp 5', date '2001-01-01', double(400.00), NULL), (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), (700, 'emp 7', date '2010-01-01', double(400.00), 100), - (800, 'emp 8', date '2016-01-01', double(150.00), 70) -AS EMP(id, emp_name, hiredate, salary, dept_id); + (800, 'emp 8', date '2016-01-01', double(150.00), 70); -CREATE TEMPORARY VIEW DEPT AS VALUES +CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES (10, 'dept 1', 'CA'), (20, 'dept 2', 'NY'), (30, 'dept 3', 'TX'), (40, 'dept 4 - unassigned', 'OR'), (50, 'dept 5 - unassigned', 'NJ'), - (70, 'dept 7', 'FL') -AS DEPT(dept_id, dept_name, state); + (70, 'dept 7', 'FL'); -CREATE TEMPORARY VIEW BONUS AS VALUES +CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES ('emp 1', double(10.00)), ('emp 1', double(20.00)), ('emp 2', double(300.00)), @@ -29,8 +27,7 @@ CREATE TEMPORARY VIEW BONUS AS VALUES ('emp 3', double(300.00)), ('emp 4', double(100.00)), ('emp 5', double(1000.00)), - ('emp 6 - no dept', double(500.00)) -AS BONUS(emp_name, bonus_amt); + ('emp 6 - no dept', double(500.00)); -- simple having in subquery. -- TC.01.01 diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out index e330971afdf08..95229f39865c0 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out @@ -1,6 +1,6 @@ -- Automatically generated by PostgreSQLQueryTestSuite with postgres -- !query -CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES +CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES (100, 'emp 1', date '2005-01-01', double(100.00), 10), (100, 'emp 1', date '2005-01-01', double(100.00), 10), (200, 'emp 2', date '2003-01-01', double(200.00), 10), @@ -9,27 +9,25 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES (500, 'emp 5', date '2001-01-01', double(400.00), NULL), (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), (700, 'emp 7', date '2010-01-01', double(400.00), 100), - (800, 'emp 8', date '2016-01-01', double(150.00), 70)) -AS EMP(id, emp_name, hiredate, salary, dept_id) + (800, 'emp 8', date '2016-01-01', double(150.00), 70) -- !query output -- !query -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM (VALUES +CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES (10, 'dept 1', 'CA'), (20, 'dept 2', 'NY'), (30, 'dept 3', 'TX'), (40, 'dept 4 - unassigned', 'OR'), (50, 'dept 5 - unassigned', 'NJ'), - (70, 'dept 7', 'FL')) -AS DEPT(dept_id, dept_name, state) + (70, 'dept 7', 'FL') -- !query output -- !query -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES +CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES ('emp 1', double(10.00)), ('emp 1', double(20.00)), ('emp 2', double(300.00)), @@ -37,8 +35,7 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES ('emp 3', double(300.00)), ('emp 4', double(100.00)), ('emp 5', double(1000.00)), - ('emp 6 - no dept', double(500.00))) -AS BONUS(emp_name, bonus_amt) + ('emp 6 - no dept', double(500.00)) -- !query output diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out index 170c99555ffe3..2a84516e90abe 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out @@ -1,6 +1,6 @@ -- Automatically generated by SQLQueryTestSuite -- !query -CREATE TEMPORARY VIEW EMP AS VALUES +CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES (100, 'emp 1', date '2005-01-01', double(100.00), 10), (100, 'emp 1', date '2005-01-01', double(100.00), 10), (200, 'emp 2', date '2003-01-01', double(200.00), 10), @@ -10,7 +10,6 @@ CREATE TEMPORARY VIEW EMP AS VALUES (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), (700, 'emp 7', date '2010-01-01', double(400.00), 100), (800, 'emp 8', date '2016-01-01', double(150.00), 70) -AS EMP(id, emp_name, hiredate, salary, dept_id) -- !query schema struct<> -- !query output @@ -18,14 +17,13 @@ struct<> -- !query -CREATE TEMPORARY VIEW DEPT AS VALUES +CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES (10, 'dept 1', 'CA'), (20, 'dept 2', 'NY'), (30, 'dept 3', 'TX'), (40, 'dept 4 - unassigned', 'OR'), (50, 'dept 5 - unassigned', 'NJ'), (70, 'dept 7', 'FL') -AS DEPT(dept_id, dept_name, state) -- !query schema struct<> -- !query output @@ -33,7 +31,7 @@ struct<> -- !query -CREATE TEMPORARY VIEW BONUS AS VALUES +CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES ('emp 1', double(10.00)), ('emp 1', double(20.00)), ('emp 2', double(300.00)), @@ -42,7 +40,6 @@ CREATE TEMPORARY VIEW BONUS AS VALUES ('emp 4', double(100.00)), ('emp 5', double(1000.00)), ('emp 6 - no dept', double(500.00)) -AS BONUS(emp_name, bonus_amt) -- !query schema struct<> -- !query output From f491d1f4ce379987fc33af3c83c876ed214127d4 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 20 Dec 2023 11:43:41 -0800 Subject: [PATCH 53/63] Update comment --- .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index b55268f66e8c8..368d6319e0429 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -99,9 +99,9 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { * default because there is incompatibility between SQL dialects for Spark and the other DBMS. * * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for - * which is eligible for the SQL file . For example, if you have a SQL file named `describe.sql`, - * and you want to indicate that postgres is incompatible, add the following comment into the input - * file: + * which is eligible for the SQL file. These strings are defined in the companion object. For + * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is + * incompatible, add the following comment into the input file: * --ONLY_IF spark */ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { From ce0e1064acc260a9d7ee42d33a08cc6c8b36bd3c Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 21 Dec 2023 12:30:28 -0800 Subject: [PATCH 54/63] Ignore subquery-offset.sql --- .../test/resources/sql-tests/inputs/subquery/subquery-offset.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql index 9b61d1b26270b..cc9c70df9df87 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql @@ -1,3 +1,4 @@ +--ONLY_IF spark drop table if exists x; drop table if exists y; From 1d7c620ed5a7205d27381f6abf94f08bd02199b7 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Thu, 21 Dec 2023 18:00:25 -0800 Subject: [PATCH 55/63] Create new file PostgreSQLQueryTestSuite.scala and fix indent --- .../crossdbms/CrossDbmsQueryTestSuite.scala | 65 -------------- .../spark/sql/crossdbms/JdbcConnection.scala | 2 +- .../crossdbms/PostgreSQLQueryTestSuite.scala | 84 +++++++++++++++++++ 3 files changed, 85 insertions(+), 66 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index 368d6319e0429..a4ca037301137 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -27,71 +27,6 @@ import org.apache.spark.sql.SQLQueryTestSuite import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} import org.apache.spark.util.ArrayImplicits.SparkArrayOps -// scalastyle:off line.size.limit -/** - * IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING, READ THIS: - * Your new SQL test is automatically opted into this suite. It is likely failing because it is not - * compatible with the default DBMS (currently postgres). You have two options: - * 1. (Recommended) Modify your queries to be compatible with both systems, and generate golden - * files with the instructions below. This is recommended because it will run your queries - * against postgres, providing higher correctness testing confidence, and you won't have to - * manually verify the golden files generated with your test. - * 2. Add this line to your .sql file: --ONLY_IF spark - * - * To re-generate golden files for entire suite, either run: - * 1. (Recommended) You need Docker on your machine. Install Docker and run the following command: - * {{{ - * bash ./bin/generate_golden_files_with_postgres.sh - * }}} - * 2. - * a. You need to have a Postgres server up before running this test. - * i. Install PostgreSQL. On a mac: `brew install postgresql@13` - * ii. After installing PostgreSQL, start the database server, then create a role named pg with - * superuser permissions: `createuser -s postgres` OR `psql> CREATE role postgres superuser` - * b. Run the following command: - * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" - * }}} - * - * To indicate that the SQL file is not eligible for testing with this suite, add the following - * comment into the input file: - * {{{ - * --ONLY_IF spark - * }}} - * - * And then, to run the entire test suite, with the default cross DBMS: - * {{{ - * build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" - * }}} - * - * To re-generate golden file for a single test, e.g. `describe.sql`, run: - * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql" - * }}} - */ - -class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { - - protected def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES - - // Reduce scope to subquery tests for now. That is where most correctness issues are. - override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath - - override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner = - (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url)) - - override protected def preprocessingCommands = Seq( - // Custom function `double` to imitate Spark's function, so that more tests are covered. - """ - |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision - | AS 'select CAST($1 AS double precision);' - | LANGUAGE SQL - | IMMUTABLE - | RETURNS NULL ON NULL INPUT; - |""".stripMargin - ) -} - /** * See SQLQueryTestSuite.scala for more information. This suite builds off of that to allow us * to generate golden files with other DBMS to perform cross-checking for correctness. It generates diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala index 4e83ab8c6da03..653e00d34d562 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala @@ -50,7 +50,7 @@ private[crossdbms] trait JdbcConnection { /** * Loads data from the given Spark DataFrame into the table with the specified name. - * @param df The Spark DataFrame containing the data to be loaded. + * @param df The Spark DataFrame containing the data to be loaded. */ def loadData(df: DataFrame, tableName: String): Unit diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala new file mode 100644 index 0000000000000..bb0aae3d8ab03 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.crossdbms + +import java.io.File + +// scalastyle:off line.size.limit +/** + * IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING, READ THIS: + * Your new SQL test is automatically opted into this suite. It is likely failing because it is not + * compatible with the default DBMS (currently postgres). You have two options: + * 1. (Recommended) Modify your queries to be compatible with both systems, and generate golden + * files with the instructions below. This is recommended because it will run your queries + * against postgres, providing higher correctness testing confidence, and you won't have to + * manually verify the golden files generated with your test. + * 2. Add this line to your .sql file: --ONLY_IF spark + * + * To re-generate golden files for entire suite, either run: + * 1. (Recommended) You need Docker on your machine. Install Docker and run the following command: + * {{{ + * bash ./bin/generate_golden_files_with_postgres.sh + * }}} + * 2. + * a. You need to have a Postgres server up before running this test. + * i. Install PostgreSQL. On a mac: `brew install postgresql@13` + * ii. After installing PostgreSQL, start the database server, then create a role named pg with + * superuser permissions: `createuser -s postgres` OR `psql> CREATE role postgres superuser` + * b. Run the following command: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" + * }}} + * + * To indicate that the SQL file is not eligible for testing with this suite, add the following + * comment into the input file: + * {{{ + * --ONLY_IF spark + * }}} + * + * And then, to run the entire test suite, with the default cross DBMS: + * {{{ + * build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" + * }}} + * + * To re-generate golden file for a single test, e.g. `describe.sql`, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql" + * }}} + */ +class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { + + protected def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES + + // Reduce scope to subquery tests for now. That is where most correctness issues are. + override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath + + override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner = + (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url)) + + override protected def preprocessingCommands = Seq( + // Custom function `double` to imitate Spark's function, so that more tests are covered. + """ + |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision + | AS 'select CAST($1 AS double precision);' + | LANGUAGE SQL + | IMMUTABLE + | RETURNS NULL ON NULL INPUT; + |""".stripMargin + ) +} From 0e542cf46bf99b6d62ee679b8336701311bd292e Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Mon, 1 Jan 2024 21:17:38 -0800 Subject: [PATCH 56/63] Move from postgres-results -> crossdbms-results --- .../exists-subquery/exists-having.sql.out | 0 .../exists-subquery/exists-having.sql.out | 147 ------------------ .../crossdbms/CrossDbmsQueryTestSuite.scala | 9 +- 3 files changed, 6 insertions(+), 150 deletions(-) rename sql/core/src/test/resources/sql-tests/{results/postgres-results => crossdbms-results}/exists-subquery/exists-having.sql.out (100%) delete mode 100644 sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out similarity index 100% rename from sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out rename to sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out deleted file mode 100644 index 2a84516e90abe..0000000000000 --- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out +++ /dev/null @@ -1,147 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- !query -CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES - (100, 'emp 1', date '2005-01-01', double(100.00), 10), - (100, 'emp 1', date '2005-01-01', double(100.00), 10), - (200, 'emp 2', date '2003-01-01', double(200.00), 10), - (300, 'emp 3', date '2002-01-01', double(300.00), 20), - (400, 'emp 4', date '2005-01-01', double(400.00), 30), - (500, 'emp 5', date '2001-01-01', double(400.00), NULL), - (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), - (700, 'emp 7', date '2010-01-01', double(400.00), 100), - (800, 'emp 8', date '2016-01-01', double(150.00), 70) --- !query schema -struct<> --- !query output - - - --- !query -CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES - (10, 'dept 1', 'CA'), - (20, 'dept 2', 'NY'), - (30, 'dept 3', 'TX'), - (40, 'dept 4 - unassigned', 'OR'), - (50, 'dept 5 - unassigned', 'NJ'), - (70, 'dept 7', 'FL') --- !query schema -struct<> --- !query output - - - --- !query -CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES - ('emp 1', double(10.00)), - ('emp 1', double(20.00)), - ('emp 2', double(300.00)), - ('emp 2', double(100.00)), - ('emp 3', double(300.00)), - ('emp 4', double(100.00)), - ('emp 5', double(1000.00)), - ('emp 6 - no dept', double(500.00)) --- !query schema -struct<> --- !query output - - - --- !query -SELECT dept_id, count(*) -FROM emp -GROUP BY dept_id -HAVING EXISTS (SELECT 1 - FROM bonus - WHERE bonus_amt < min(emp.salary)) --- !query schema -struct --- !query output -10 3 -100 2 -20 1 -30 1 -70 1 -NULL 1 - - --- !query -SELECT * -FROM dept -WHERE EXISTS (SELECT dept_id, - Count(*) - FROM emp - GROUP BY dept_id - HAVING EXISTS (SELECT 1 - FROM bonus - WHERE bonus_amt < Min(emp.salary))) --- !query schema -struct --- !query output -10 dept 1 CA -20 dept 2 NY -30 dept 3 TX -40 dept 4 - unassigned OR -50 dept 5 - unassigned NJ -70 dept 7 FL - - --- !query -SELECT dept_id, - Max(salary) -FROM emp gp -WHERE EXISTS (SELECT dept_id, - Count(*) - FROM emp p - GROUP BY dept_id - HAVING EXISTS (SELECT 1 - FROM bonus - WHERE bonus_amt < Min(p.salary))) -GROUP BY gp.dept_id --- !query schema -struct --- !query output -10 200.0 -100 400.0 -20 300.0 -30 400.0 -70 150.0 -NULL 400.0 - - --- !query -SELECT * -FROM dept -WHERE EXISTS (SELECT dept_id, - Count(*) - FROM emp - GROUP BY dept_id - HAVING EXISTS (SELECT 1 - FROM bonus - WHERE bonus_amt > Min(emp.salary))) --- !query schema -struct --- !query output -10 dept 1 CA -20 dept 2 NY -30 dept 3 TX -40 dept 4 - unassigned OR -50 dept 5 - unassigned NJ -70 dept 7 FL - - --- !query -SELECT * -FROM dept -WHERE EXISTS (SELECT dept_id, - count(emp.dept_id) - FROM emp - WHERE dept.dept_id = dept_id - GROUP BY dept_id - HAVING EXISTS (SELECT 1 - FROM bonus - WHERE ( bonus_amt > min(emp.salary) - AND count(emp.dept_id) > 1 ))) --- !query schema -struct --- !query output -10 dept 1 CA diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index a4ca037301137..e40be9d4193e0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -185,9 +185,8 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } override protected def resultFileForInputFile(file: File): String = { - val defaultResultsDir = new File(baseResourcePath, "results") - val goldenFilePath = new File( - defaultResultsDir, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath + val goldenFilePathsql/core/src/test/resources/sql-tests/crossdbms-results/ = new File(baseResourcePath, + CrossDbmsQueryTestSuite.RESULT_DIR).getAbsolutePath file.getAbsolutePath.replace(customInputFilePath, goldenFilePath) + ".out" } @@ -206,6 +205,10 @@ object CrossDbmsQueryTestSuite { final val POSTGRES = "postgres" final val SUPPORTED_DBMS = Seq(POSTGRES) + // Result directory for golden files produced by CrossDBMS. This means that all DBMS will share + // the same golden files. This is only possible if we make the queries simple enough that they are + // compatible with all DBMS. + private final val RESULT_DIR = "crossdbms-results" // System argument to indicate a custom connection URL to the reference DBMS. private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" // Argument in input files to indicate that the sql file is restricted to certain systems. From 5d4c5773e872b173453c6446df91af226104f905 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 2 Jan 2024 18:06:24 -0800 Subject: [PATCH 57/63] Fix silly paste --- .../apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala index e40be9d4193e0..c874e2f59db1b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala @@ -185,7 +185,7 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { } override protected def resultFileForInputFile(file: File): String = { - val goldenFilePathsql/core/src/test/resources/sql-tests/crossdbms-results/ = new File(baseResourcePath, + val goldenFilePath = new File(baseResourcePath, CrossDbmsQueryTestSuite.RESULT_DIR).getAbsolutePath file.getAbsolutePath.replace(customInputFilePath, goldenFilePath) + ".out" } From 68ea6f222b9aa2c12f803c39707585832642ec7d Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Tue, 2 Jan 2024 23:15:19 -0800 Subject: [PATCH 58/63] Fix unintended removal of results file --- .../exists-subquery/exists-having.sql.out | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out new file mode 100644 index 0000000000000..ae4cf010ffc98 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out @@ -0,0 +1,150 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (200, "emp 2", date "2003-01-01", 200.00D, 10), + (300, "emp 3", date "2002-01-01", 300.00D, 20), + (400, "emp 4", date "2005-01-01", 400.00D, 30), + (500, "emp 5", date "2001-01-01", 400.00D, NULL), + (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), + (700, "emp 7", date "2010-01-01", 400.00D, 100), + (800, "emp 8", date "2016-01-01", 150.00D, 70) +AS EMP(id, emp_name, hiredate, salary, dept_id) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES + (10, "dept 1", "CA"), + (20, "dept 2", "NY"), + (30, "dept 3", "TX"), + (40, "dept 4 - unassigned", "OR"), + (50, "dept 5 - unassigned", "NJ"), + (70, "dept 7", "FL") +AS DEPT(dept_id, dept_name, state) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES + ("emp 1", 10.00D), + ("emp 1", 20.00D), + ("emp 2", 300.00D), + ("emp 2", 100.00D), + ("emp 3", 300.00D), + ("emp 4", 100.00D), + ("emp 5", 1000.00D), + ("emp 6 - no dept", 500.00D) +AS BONUS(emp_name, bonus_amt) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT dept_id, count(*) +FROM emp +GROUP BY dept_id +HAVING EXISTS (SELECT 1 + FROM bonus + WHERE bonus_amt < min(emp.salary)) +-- !query schema +struct +-- !query output +10 3 +100 2 +20 1 +30 1 +70 1 +NULL 1 + + +-- !query +SELECT * +FROM dept +WHERE EXISTS (SELECT dept_id, + Count(*) + FROM emp + GROUP BY dept_id + HAVING EXISTS (SELECT 1 + FROM bonus + WHERE bonus_amt < Min(emp.salary))) +-- !query schema +struct +-- !query output +10 dept 1 CA +20 dept 2 NY +30 dept 3 TX +40 dept 4 - unassigned OR +50 dept 5 - unassigned NJ +70 dept 7 FL + + +-- !query +SELECT dept_id, + Max(salary) +FROM emp gp +WHERE EXISTS (SELECT dept_id, + Count(*) + FROM emp p + GROUP BY dept_id + HAVING EXISTS (SELECT 1 + FROM bonus + WHERE bonus_amt < Min(p.salary))) +GROUP BY gp.dept_id +-- !query schema +struct +-- !query output +10 200.0 +100 400.0 +20 300.0 +30 400.0 +70 150.0 +NULL 400.0 + + +-- !query +SELECT * +FROM dept +WHERE EXISTS (SELECT dept_id, + Count(*) + FROM emp + GROUP BY dept_id + HAVING EXISTS (SELECT 1 + FROM bonus + WHERE bonus_amt > Min(emp.salary))) +-- !query schema +struct +-- !query output +10 dept 1 CA +20 dept 2 NY +30 dept 3 TX +40 dept 4 - unassigned OR +50 dept 5 - unassigned NJ +70 dept 7 FL + + +-- !query +SELECT * +FROM dept +WHERE EXISTS (SELECT dept_id, + count(emp.dept_id) + FROM emp + WHERE dept.dept_id = dept_id + GROUP BY dept_id + HAVING EXISTS (SELECT 1 + FROM bonus + WHERE ( bonus_amt > min(emp.salary) + AND count(emp.dept_id) > 1 ))) +-- !query schema +struct +-- !query output +10 dept 1 CA From 1e1a2838fc88a4a20bfdb9071d47400890a99810 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 3 Jan 2024 11:42:30 -0800 Subject: [PATCH 59/63] In middle of refactoring.. --- .../sql/jdbc/PostgreSQLQueryTestSuite.scala | 129 ++++++++ .../apache/spark/sql/SQLQueryTestHelper.scala | 289 +++++++++++++++++ .../apache/spark/sql/SQLQueryTestSuite.scala | 291 ------------------ .../crossdbms/PostgreSQLQueryTestSuite.scala | 84 ----- 4 files changed, 418 insertions(+), 375 deletions(-) create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala new file mode 100644 index 0000000000000..436639e70bcd1 --- /dev/null +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.jdbc + +import java.io.File +import java.sql.Connection + +import scala.util.control.NonFatal + +import org.apache.spark.sql.SQLQueryTestHelper +import org.apache.spark.tags.DockerTest + +@DockerTest +class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper { + override val db = new DatabaseOnDocker { + override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine") + override val env = Map( + "POSTGRES_PASSWORD" -> "rootpass" + ) + override val usesIpc = false + override val jdbcPort = 5432 + + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass" + } + + override def dataPreparation(conn: Connection): Unit = {} + + protected val baseResourcePath = { + // We use a path based on Spark home for 2 reasons: + // 1. Maven can't get correct resource directory when resources in other jars. + // 2. We test subclasses in the hive-thriftserver module. + getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile + } + protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath + protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath + protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath + + def listTestCases: Seq[TestCase] = { + listFilesRecursively(new File(customInputFilePath)).flatMap { file => + val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + val absPath = file.getAbsolutePath + val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator) + RegularTestCase(testCaseName, absPath, resultFile) :: Nil + }.sortBy(_.name) + } + + def createScalaTestCase(testCase: TestCase): Unit = { + testCase match { + case _ if ignoreList.exists(t => + testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT))) => + ignore(s"${testCase.name} is in the ignore list.") { + log.debug(s"${testCase.name} is in the ignore list.") + } + case _: RegularTestCase => + // Create a test case to run this case. + test(testCase.name) { + runSqlTestCase(testCase, listTestCases) + } + case _ => + ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") { + log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.") + } + } + } + + protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { + val input = fileToString(new File(testCase.inputFile)) + val (comments, code) = splitCommentsAndCodes(input) + val queries = getQueries(code, comments) + val settings = getSparkSettings(comments) + + val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG)) + .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length)) + // If `--ONLY_IF` is found, check if the DBMS being used is allowed. + if (dbmsConfig.nonEmpty && !dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { + log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " + + s"not eligible with $crossDbmsToGenerateGoldenFiles.") + } else { + runQueries(queries, testCase, settings.toImmutableArraySeq) + } + } + + + protected def runQueries( + queries: Seq[String], + testCase: TestCase, + sparkConfigSet: Seq[(String, String)]): Unit = { + val localSparkSession = spark.newSession() + var runner: Option[SQLQueryTestRunner] = None + + val outputs: Seq[QueryTestOutput] = queries.map { sql => + val output = { + val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" } + setOperations.foreach(localSparkSession.sql) + val (_, output) = handleExceptions( + getNormalizedQueryExecutionResult(localSparkSession, sql)) + output + } + ExecutionOutput( + sql = sql, + // Don't care about the schema for this test. Only care about correctness. + schema = None, + output = normalizeTestResults(output.mkString("\n"))) + } + if (runner.isDefined) { + runner.foreach(_.cleanUp()) + runner = None + } + + readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput) + } + + listTestCases.foreach(createScalaTestCase) +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index c08569150e2a9..d82d285ed17aa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -17,19 +17,25 @@ package org.apache.spark.sql +import java.io.File + +import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal import org.apache.spark.{SparkException, SparkThrowable} import org.apache.spark.ErrorMessageFormat.MINIMAL import org.apache.spark.SparkThrowableHelper.getMessage import org.apache.spark.internal.Logging +import org.apache.spark.sql.IntegratedUDFTestUtils.{TestUDF, TestUDTFSet} import org.apache.spark.sql.catalyst.expressions.{CurrentDate, CurrentTimestampLike, CurrentUser, Literal} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.util.fileToString import org.apache.spark.sql.execution.HiveResult.hiveResultString import org.apache.spark.sql.execution.SQLExecution import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase} import org.apache.spark.sql.types.{DateType, StructType, TimestampType} +import org.apache.spark.util.ArrayImplicits.SparkArrayOps trait SQLQueryTestHelper extends Logging { @@ -164,4 +170,287 @@ trait SQLQueryTestHelper extends Logging { (emptySchema, Seq(e.getClass.getName, e.getMessage)) } } + + /** A test case. */ + protected trait TestCase { + val name: String + val inputFile: String + val resultFile: String + def asAnalyzerTest(newName: String, newResultFile: String): TestCase + } + + /** + * traits that indicate UDF or PgSQL to trigger the code path specific to each. For instance, + * PgSQL tests require to register some UDF functions. + */ + protected trait PgSQLTest + + /** Trait that indicates ANSI-related tests with the ANSI mode enabled. */ + protected trait AnsiTest + + /** Trait that indicates an analyzer test that shows the analyzed plan string as output. */ + protected trait AnalyzerTest extends TestCase { + override def asAnalyzerTest(newName: String, newResultFile: String): AnalyzerTest = this + } + + /** Trait that indicates the default timestamp type is TimestampNTZType. */ + protected trait TimestampNTZTest + + /** Trait that indicates CTE test cases need their create view versions */ + protected trait CTETest + + protected trait UDFTest { + val udf: TestUDF + } + + protected trait UDTFSetTest { + val udtfSet: TestUDTFSet + } + + protected case class RegularTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + RegularAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** An ANSI-related test case. */ + protected case class AnsiTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + AnsiAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** An analyzer test that shows the analyzed plan string as output. */ + protected case class AnalyzerTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with AnalyzerTest + + /** A PostgreSQL test case. */ + protected case class PgSQLTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + PgSQLAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** A UDF test case. */ + protected case class UDFTestCase( + name: String, + inputFile: String, + resultFile: String, + udf: TestUDF) extends TestCase with UDFTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + UDFAnalyzerTestCase(newName, inputFile, newResultFile, udf) + } + + protected case class UDTFSetTestCase( + name: String, + inputFile: String, + resultFile: String, + udtfSet: TestUDTFSet) extends TestCase with UDTFSetTest { + + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + UDTFSetAnalyzerTestCase(newName, inputFile, newResultFile, udtfSet) + } + + /** A UDAF test case. */ + protected case class UDAFTestCase( + name: String, + inputFile: String, + resultFile: String, + udf: TestUDF) extends TestCase with UDFTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + UDAFAnalyzerTestCase(newName, inputFile, newResultFile, udf) + } + + /** A UDF PostgreSQL test case. */ + protected case class UDFPgSQLTestCase( + name: String, + inputFile: String, + resultFile: String, + udf: TestUDF) extends TestCase with UDFTest with PgSQLTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + UDFPgSQLAnalyzerTestCase(newName, inputFile, newResultFile, udf) + } + + /** An date time test case with default timestamp as TimestampNTZType */ + protected case class TimestampNTZTestCase( + name: String, inputFile: String, resultFile: String) extends TestCase with TimestampNTZTest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + TimestampNTZAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** A CTE test case with special handling */ + protected case class CTETestCase(name: String, inputFile: String, resultFile: String) + extends TestCase + with CTETest { + override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = + CTEAnalyzerTestCase(newName, inputFile, newResultFile) + } + + /** These are versions of the above test cases, but only exercising analysis. */ + protected case class RegularAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest + protected case class AnsiAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest with AnsiTest + protected case class PgSQLAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest with PgSQLTest + protected case class UDFAnalyzerTestCase( + name: String, inputFile: String, resultFile: String, udf: TestUDF) + extends AnalyzerTest with UDFTest + protected case class UDTFSetAnalyzerTestCase( + name: String, inputFile: String, resultFile: String, udtfSet: TestUDTFSet) + extends AnalyzerTest with UDTFSetTest + protected case class UDAFAnalyzerTestCase( + name: String, inputFile: String, resultFile: String, udf: TestUDF) + extends AnalyzerTest with UDFTest + protected case class UDFPgSQLAnalyzerTestCase( + name: String, inputFile: String, resultFile: String, udf: TestUDF) + extends AnalyzerTest with UDFTest with PgSQLTest + protected case class TimestampNTZAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest with TimestampNTZTest + protected case class CTEAnalyzerTestCase( + name: String, inputFile: String, resultFile: String) + extends AnalyzerTest with CTETest + + /** A single SQL query's output. */ + trait QueryTestOutput { + def sql: String + def schema: Option[String] + def output: String + def numSegments: Int + } + + /** A single SQL query's execution output. */ + case class ExecutionOutput( + sql: String, + schema: Option[String], + output: String) extends QueryTestOutput { + override def toString: String = { + // We are explicitly not using multi-line string due to stripMargin removing "|" in output. + val schemaString = if (schema.nonEmpty) { + s"-- !query schema\n" + schema.get + "\n" + } else { + "" + } + s"-- !query\n" + + sql + "\n" + + schemaString + + s"-- !query output\n" + + output + } + + override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 } + } + + /** A single SQL query's analysis results. */ + case class AnalyzerOutput( + sql: String, + schema: Option[String], + output: String) extends QueryTestOutput { + override def toString: String = { + // We are explicitly not using multi-line string due to stripMargin removing "|" in output. + s"-- !query\n" + + sql + "\n" + + s"-- !query analysis\n" + + output + } + override def numSegments: Int = 2 + } + + /** Returns all the files (not directories) in a directory, recursively. */ + protected def listFilesRecursively(path: File): Seq[File] = { + val (dirs, files) = path.listFiles().partition(_.isDirectory) + // Filter out test files with invalid extensions such as temp files created + // by vi (.swp), Mac (.DS_Store) etc. + val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions)) + (filteredFiles ++ dirs.flatMap(listFilesRecursively)).toImmutableArraySeq + } + protected def splitCommentsAndCodes(input: String) = + input.split("\n").partition { line => + val newLine = line.trim + newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") + } + + protected def getQueries(code: Array[String], comments: Array[String]) = { + def splitWithSemicolon(seq: Seq[String]) = { + seq.mkString("\n").split("(?<=[^\\\\]);") + } + + // If `--IMPORT` found, load code from another test case file, then insert them + // into the head in this test. + val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9)) + val importedCode = importedTestCaseName.flatMap { testCaseName => + listTestCases.find(_.name == testCaseName).map { testCase => + val input = fileToString(new File(testCase.inputFile)) + val (_, code) = splitCommentsAndCodes(input) + code + } + }.flatten + + val allCode = importedCode ++ code + val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { + // Although the loop is heavy, only used for bracketed comments test. + val queries = new ArrayBuffer[String] + val otherCodes = new ArrayBuffer[String] + var tempStr = "" + var start = false + for (c <- allCode) { + if (c.trim.startsWith("--QUERY-DELIMITER-START")) { + start = true + queries ++= splitWithSemicolon(otherCodes.toSeq) + otherCodes.clear() + } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { + start = false + queries += s"\n${tempStr.stripSuffix(";")}" + tempStr = "" + } else if (start) { + tempStr += s"\n$c" + } else { + otherCodes += c + } + } + if (otherCodes.nonEmpty) { + queries ++= splitWithSemicolon(otherCodes.toSeq) + } + queries.toSeq + } else { + splitWithSemicolon(allCode.toImmutableArraySeq).toSeq + } + + // List of SQL queries to run + tempQueries.map(_.trim).filter(_ != "") + // Fix misplacement when comment is at the end of the query. + .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "") + } + + protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = { + val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6)) + settingLines.flatMap(_.split(",").map { kv => + val (conf, value) = kv.span(_ != '=') + conf.trim -> value.substring(1).trim + }) + } + + protected def getSparkConfigDimensions(comments: Array[String]): Seq[Seq[(String, String)]] = { + // A config dimension has multiple config sets, and a config set has multiple configs. + // - config dim: Seq[Seq[(String, String)]] + // - config set: Seq[(String, String)] + // - config: (String, String)) + // We need to do cartesian product for all the config dimensions, to get a list of + // config sets, and run the query once for each config set. + val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12)) + val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines => + lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv => + val (conf, value) = kv.span(_ != '=') + conf.trim -> value.substring(1).trim + }.toSeq).toSeq + } + + configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) => + dim.flatMap { configSet => res.map(_ ++ configSet) } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index f8a405094c43a..6eb48b13d4078 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -21,8 +21,6 @@ import java.io.File import java.net.URI import java.util.Locale -import scala.collection.mutable.ArrayBuffer - import org.apache.spark.{SparkConf, TestUtils} import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.parser.ParseException @@ -171,151 +169,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper // Create all the test cases. listTestCases.foreach(createScalaTestCase) - /** A test case. */ - protected trait TestCase { - val name: String - val inputFile: String - val resultFile: String - def asAnalyzerTest(newName: String, newResultFile: String): TestCase - } - - /** - * traits that indicate UDF or PgSQL to trigger the code path specific to each. For instance, - * PgSQL tests require to register some UDF functions. - */ - protected trait PgSQLTest - - /** Trait that indicates ANSI-related tests with the ANSI mode enabled. */ - protected trait AnsiTest - - /** Trait that indicates an analyzer test that shows the analyzed plan string as output. */ - protected trait AnalyzerTest extends TestCase { - override def asAnalyzerTest(newName: String, newResultFile: String): AnalyzerTest = this - } - - /** Trait that indicates the default timestamp type is TimestampNTZType. */ - protected trait TimestampNTZTest - - /** Trait that indicates CTE test cases need their create view versions */ - protected trait CTETest - - protected trait UDFTest { - val udf: TestUDF - } - - protected trait UDTFSetTest { - val udtfSet: TestUDTFSet - } - - /** A regular test case. */ - protected case class RegularTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - RegularAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** An ANSI-related test case. */ - protected case class AnsiTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - AnsiAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** An analyzer test that shows the analyzed plan string as output. */ - protected case class AnalyzerTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with AnalyzerTest - - /** A PostgreSQL test case. */ - protected case class PgSQLTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - PgSQLAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** A UDF test case. */ - protected case class UDFTestCase( - name: String, - inputFile: String, - resultFile: String, - udf: TestUDF) extends TestCase with UDFTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - UDFAnalyzerTestCase(newName, inputFile, newResultFile, udf) - } - - protected case class UDTFSetTestCase( - name: String, - inputFile: String, - resultFile: String, - udtfSet: TestUDTFSet) extends TestCase with UDTFSetTest { - - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - UDTFSetAnalyzerTestCase(newName, inputFile, newResultFile, udtfSet) - } - - /** A UDAF test case. */ - protected case class UDAFTestCase( - name: String, - inputFile: String, - resultFile: String, - udf: TestUDF) extends TestCase with UDFTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - UDAFAnalyzerTestCase(newName, inputFile, newResultFile, udf) - } - - /** A UDF PostgreSQL test case. */ - protected case class UDFPgSQLTestCase( - name: String, - inputFile: String, - resultFile: String, - udf: TestUDF) extends TestCase with UDFTest with PgSQLTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - UDFPgSQLAnalyzerTestCase(newName, inputFile, newResultFile, udf) - } - - /** An date time test case with default timestamp as TimestampNTZType */ - protected case class TimestampNTZTestCase( - name: String, inputFile: String, resultFile: String) extends TestCase with TimestampNTZTest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - TimestampNTZAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** A CTE test case with special handling */ - protected case class CTETestCase(name: String, inputFile: String, resultFile: String) - extends TestCase - with CTETest { - override def asAnalyzerTest(newName: String, newResultFile: String): TestCase = - CTEAnalyzerTestCase(newName, inputFile, newResultFile) - } - - /** These are versions of the above test cases, but only exercising analysis. */ - protected case class RegularAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest - protected case class AnsiAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest with AnsiTest - protected case class PgSQLAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest with PgSQLTest - protected case class UDFAnalyzerTestCase( - name: String, inputFile: String, resultFile: String, udf: TestUDF) - extends AnalyzerTest with UDFTest - protected case class UDTFSetAnalyzerTestCase( - name: String, inputFile: String, resultFile: String, udtfSet: TestUDTFSet) - extends AnalyzerTest with UDTFSetTest - protected case class UDAFAnalyzerTestCase( - name: String, inputFile: String, resultFile: String, udf: TestUDF) - extends AnalyzerTest with UDFTest - protected case class UDFPgSQLAnalyzerTestCase( - name: String, inputFile: String, resultFile: String, udf: TestUDF) - extends AnalyzerTest with UDFTest with PgSQLTest - protected case class TimestampNTZAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest with TimestampNTZTest - protected case class CTEAnalyzerTestCase( - name: String, inputFile: String, resultFile: String) - extends AnalyzerTest with CTETest - protected def createScalaTestCase(testCase: TestCase): Unit = { if (ignoreList.exists(t => testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) { @@ -349,91 +202,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } } - protected def splitCommentsAndCodes(input: String) = - input.split("\n").partition { line => - val newLine = line.trim - newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") - } - - protected def getQueries(code: Array[String], comments: Array[String]) = { - def splitWithSemicolon(seq: Seq[String]) = { - seq.mkString("\n").split("(?<=[^\\\\]);") - } - - // If `--IMPORT` found, load code from another test case file, then insert them - // into the head in this test. - val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9)) - val importedCode = importedTestCaseName.flatMap { testCaseName => - listTestCases.find(_.name == testCaseName).map { testCase => - val input = fileToString(new File(testCase.inputFile)) - val (_, code) = splitCommentsAndCodes(input) - code - } - }.flatten - - val allCode = importedCode ++ code - val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { - // Although the loop is heavy, only used for bracketed comments test. - val queries = new ArrayBuffer[String] - val otherCodes = new ArrayBuffer[String] - var tempStr = "" - var start = false - for (c <- allCode) { - if (c.trim.startsWith("--QUERY-DELIMITER-START")) { - start = true - queries ++= splitWithSemicolon(otherCodes.toSeq) - otherCodes.clear() - } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { - start = false - queries += s"\n${tempStr.stripSuffix(";")}" - tempStr = "" - } else if (start) { - tempStr += s"\n$c" - } else { - otherCodes += c - } - } - if (otherCodes.nonEmpty) { - queries ++= splitWithSemicolon(otherCodes.toSeq) - } - queries.toSeq - } else { - splitWithSemicolon(allCode.toImmutableArraySeq).toSeq - } - - // List of SQL queries to run - tempQueries.map(_.trim).filter(_ != "") - // Fix misplacement when comment is at the end of the query. - .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "") - } - - protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = { - val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6)) - settingLines.flatMap(_.split(",").map { kv => - val (conf, value) = kv.span(_ != '=') - conf.trim -> value.substring(1).trim - }) - } - - protected def getSparkConfigDimensions(comments: Array[String]): Seq[Seq[(String, String)]] = { - // A config dimension has multiple config sets, and a config set has multiple configs. - // - config dim: Seq[Seq[(String, String)]] - // - config set: Seq[(String, String)] - // - config: (String, String)) - // We need to do cartesian product for all the config dimensions, to get a list of - // config sets, and run the query once for each config set. - val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12)) - val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines => - lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv => - val (conf, value) = kv.span(_ != '=') - conf.trim -> value.substring(1).trim - }.toSeq).toSeq - } - - configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) => - dim.flatMap { configSet => res.map(_ ++ configSet) } - } - } protected def runQueriesWithSparkConfigDimensions( queries: Seq[String], @@ -703,15 +471,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper }.sortBy(_.name) } - /** Returns all the files (not directories) in a directory, recursively. */ - protected def listFilesRecursively(path: File): Seq[File] = { - val (dirs, files) = path.listFiles().partition(_.isDirectory) - // Filter out test files with invalid extensions such as temp files created - // by vi (.swp), Mac (.DS_Store) etc. - val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions)) - (filteredFiles ++ dirs.flatMap(listFilesRecursively)).toImmutableArraySeq - } - /** Load built-in test tables into the SparkSession. */ protected def createTestTables(session: SparkSession): Unit = { import session.implicits._ @@ -921,56 +680,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper strippedPythonErrors.replaceAll("\\s+$", "") } - /** A single SQL query's output. */ - trait QueryTestOutput { - def sql: String - def schema: Option[String] - def output: String - def numSegments: Int - } - - /** A single SQL query's execution output. */ - case class ExecutionOutput( - sql: String, - schema: Option[String], - output: String) extends QueryTestOutput { - override def toString: String = { - // We are explicitly not using multi-line string due to stripMargin removing "|" in output. - val schemaString = if (schema.nonEmpty) { - s"-- !query schema\n" + schema.get + "\n" - } else { - "" - } - s"-- !query\n" + - sql + "\n" + - schemaString + - s"-- !query output\n" + - output - } - - /** - * Some suites extending this one, such as [[CrossDbmsQueryTestSuite]], only test for - * correctness and do not care about the schema. The logic here allows passing the check for - * number of segments in the golden files when testing. - */ - override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 } - } - - /** A single SQL query's analysis results. */ - case class AnalyzerOutput( - sql: String, - schema: Option[String], - output: String) extends QueryTestOutput { - override def toString: String = { - // We are explicitly not using multi-line string due to stripMargin removing "|" in output. - s"-- !query\n" + - sql + "\n" + - s"-- !query analysis\n" + - output - } - override def numSegments: Int = 2 - } - test("test splitCommentsAndCodes") { { // Correctly split comments and codes diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala index bb0aae3d8ab03..e69de29bb2d1d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.crossdbms - -import java.io.File - -// scalastyle:off line.size.limit -/** - * IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING, READ THIS: - * Your new SQL test is automatically opted into this suite. It is likely failing because it is not - * compatible with the default DBMS (currently postgres). You have two options: - * 1. (Recommended) Modify your queries to be compatible with both systems, and generate golden - * files with the instructions below. This is recommended because it will run your queries - * against postgres, providing higher correctness testing confidence, and you won't have to - * manually verify the golden files generated with your test. - * 2. Add this line to your .sql file: --ONLY_IF spark - * - * To re-generate golden files for entire suite, either run: - * 1. (Recommended) You need Docker on your machine. Install Docker and run the following command: - * {{{ - * bash ./bin/generate_golden_files_with_postgres.sh - * }}} - * 2. - * a. You need to have a Postgres server up before running this test. - * i. Install PostgreSQL. On a mac: `brew install postgresql@13` - * ii. After installing PostgreSQL, start the database server, then create a role named pg with - * superuser permissions: `createuser -s postgres` OR `psql> CREATE role postgres superuser` - * b. Run the following command: - * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" - * }}} - * - * To indicate that the SQL file is not eligible for testing with this suite, add the following - * comment into the input file: - * {{{ - * --ONLY_IF spark - * }}} - * - * And then, to run the entire test suite, with the default cross DBMS: - * {{{ - * build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" - * }}} - * - * To re-generate golden file for a single test, e.g. `describe.sql`, run: - * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql" - * }}} - */ -class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { - - protected def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES - - // Reduce scope to subquery tests for now. That is where most correctness issues are. - override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath - - override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner = - (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url)) - - override protected def preprocessingCommands = Seq( - // Custom function `double` to imitate Spark's function, so that more tests are covered. - """ - |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision - | AS 'select CAST($1 AS double precision);' - | LANGUAGE SQL - | IMMUTABLE - | RETURNS NULL ON NULL INPUT; - |""".stripMargin - ) -} From 5756c8e2798b787583d2dd529b75fafdb639d6f6 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 3 Jan 2024 14:25:07 -0800 Subject: [PATCH 60/63] Major refactoring in progress.. --- .../querytest/CrossDbmsQueryTestHelper.scala | 26 +++ .../PostgreSQLQueryTestSuite.scala | 124 +++++++--- .../apache/spark/sql/SQLQueryTestHelper.scala | 24 +- .../apache/spark/sql/SQLQueryTestSuite.scala | 18 +- .../crossdbms/CrossDbmsQueryTestSuite.scala | 216 ------------------ .../spark/sql/crossdbms/JdbcConnection.scala | 124 ---------- .../crossdbms/PostgreSQLQueryTestSuite.scala | 0 .../sql/crossdbms/SQLQueryTestRunner.scala | 48 ---- .../sql/crossdbms/bin/docker-compose.yml | 31 --- .../generate_golden_files_with_postgres.sh | 35 --- 10 files changed, 141 insertions(+), 505 deletions(-) create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala rename connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/{ => querytest}/PostgreSQLQueryTestSuite.scala (52%) delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala new file mode 100644 index 0000000000000..cec304093b3f9 --- /dev/null +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.jdbc + +trait CrossDbmsQueryTestHelper { + + val DATABASE_NAME: String + + protected final val POSTGRES = "postgres" + // Argument in input files to indicate that the sql file is restricted to certain systems. + protected final val ONLY_IF_ARG = "--ONLY_IF " +} diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala similarity index 52% rename from connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala index 436639e70bcd1..808f3e58fb64c 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala @@ -17,15 +17,22 @@ package org.apache.spark.sql.jdbc import java.io.File -import java.sql.Connection +import java.sql.{Connection, ResultSet} +import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal +import org.apache.spark.sql.Row import org.apache.spark.sql.SQLQueryTestHelper +import org.apache.spark.sql.catalyst.util.fileToString import org.apache.spark.tags.DockerTest @DockerTest -class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper { +class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper + with CrossDbmsQueryTestHelper { + + val DATABASE_NAME = POSTGRES + override val db = new DatabaseOnDocker { override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine") override val env = Map( @@ -38,7 +45,18 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass" } - override def dataPreparation(conn: Connection): Unit = {} + override def dataPreparation(conn: Connection): Unit = { + conn.prepareStatement( + // Custom function `double` to imitate Spark's function, so that more tests are covered. + """ + |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision + | AS 'select CAST($1 AS double precision);' + | LANGUAGE SQL + | IMMUTABLE + | RETURNS NULL ON NULL INPUT; + |""".stripMargin + ) + } protected val baseResourcePath = { // We use a path based on Spark home for 2 reasons: @@ -61,11 +79,6 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT def createScalaTestCase(testCase: TestCase): Unit = { testCase match { - case _ if ignoreList.exists(t => - testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT))) => - ignore(s"${testCase.name} is in the ignore list.") { - log.debug(s"${testCase.name} is in the ignore list.") - } case _: RegularTestCase => // Create a test case to run this case. test(testCase.name) { @@ -81,48 +94,97 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { val input = fileToString(new File(testCase.inputFile)) val (comments, code) = splitCommentsAndCodes(input) - val queries = getQueries(code, comments) - val settings = getSparkSettings(comments) + val queries = getQueries(code, comments, listTestCases) - val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG)) - .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length)) + val dbmsConfig = comments.filter(_.startsWith(ONLY_IF_ARG)) + .map(_.substring(ONLY_IF_ARG.length)) // If `--ONLY_IF` is found, check if the DBMS being used is allowed. - if (dbmsConfig.nonEmpty && !dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { + if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) { log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " + - s"not eligible with $crossDbmsToGenerateGoldenFiles.") + s"not eligible with $DATABASE_NAME.") } else { - runQueries(queries, testCase, settings.toImmutableArraySeq) + runQueriesAndCheckAgainstGoldenFile(queries, testCase) } } - - protected def runQueries( - queries: Seq[String], - testCase: TestCase, - sparkConfigSet: Seq[(String, String)]): Unit = { + protected def runQueriesAndCheckAgainstGoldenFile( + queries: Seq[String], testCase: TestCase): Unit = { val localSparkSession = spark.newSession() - var runner: Option[SQLQueryTestRunner] = None + val conn = getConnection() + val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) val outputs: Seq[QueryTestOutput] = queries.map { sql => val output = { - val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" } - setOperations.foreach(localSparkSession.sql) - val (_, output) = handleExceptions( - getNormalizedQueryExecutionResult(localSparkSession, sql)) - output + try { + val sparkDf = localSparkSession.sql(sql) + + val isResultSet = stmt.execute(sql) + val rows = ArrayBuffer[Row]() + if (isResultSet) { + val rs = stmt.getResultSet + val metadata = rs.getMetaData + while (rs.next()) { + val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => { + val value = rs.getObject(i) + if (value == null) { + "NULL" + } else { + value + } + })) + rows.append(row) + } + } + val output = rows.map(_.mkString("\t")).toSeq + // Use Spark analyzed plan to check if the query result is already semantically sorted. + if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { + output + } else { + // Sort the answer manually if it isn't sorted. + output.sorted + } + } catch { + case NonFatal(e) => Seq(e.getClass.getName, e.getMessage) + } } + ExecutionOutput( sql = sql, // Don't care about the schema for this test. Only care about correctness. schema = None, - output = normalizeTestResults(output.mkString("\n"))) + output = output.mkString("\n")) + } + conn.close() + + // Read back the golden files. + var curSegment = 0 + val expectedOutputs: Seq[QueryTestOutput] = { + val goldenOutput = fileToString(new File(testCase.resultFile)) + val segments = goldenOutput.split("-- !query.*\n") + outputs.map { output => + val result = ExecutionOutput( + segments(curSegment + 1).trim, // SQL + None, // Schema + normalizeTestResults(segments(curSegment + 3))) // Output + curSegment += output.numSegments + result + } } - if (runner.isDefined) { - runner.foreach(_.cleanUp()) - runner = None + + // Compare results. + assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") { + outputs.size } - readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput) + outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => + assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") { + output.sql + } + assertResult(expected.output, s"Result did not match" + + s" for query #$i\n${expected.sql}") { + output.output + } + } } listTestCases.foreach(createScalaTestCase) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index d82d285ed17aa..38e004e0b7209 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -368,13 +368,15 @@ trait SQLQueryTestHelper extends Logging { val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions)) (filteredFiles ++ dirs.flatMap(listFilesRecursively)).toImmutableArraySeq } - protected def splitCommentsAndCodes(input: String) = + + protected def splitCommentsAndCodes(input: String): (Array[String], Array[String]) = input.split("\n").partition { line => val newLine = line.trim newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") } - protected def getQueries(code: Array[String], comments: Array[String]) = { + protected def getQueries(code: Array[String], comments: Array[String], + allTestCases: Seq[TestCase]): Seq[String] = { def splitWithSemicolon(seq: Seq[String]) = { seq.mkString("\n").split("(?<=[^\\\\]);") } @@ -383,7 +385,7 @@ trait SQLQueryTestHelper extends Logging { // into the head in this test. val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9)) val importedCode = importedTestCaseName.flatMap { testCaseName => - listTestCases.find(_.name == testCaseName).map { testCase => + allTestCases.find(_.name == testCaseName).map { testCase => val input = fileToString(new File(testCase.inputFile)) val (_, code) = splitCommentsAndCodes(input) code @@ -453,4 +455,20 @@ trait SQLQueryTestHelper extends Logging { dim.flatMap { configSet => res.map(_ ++ configSet) } } } + + /** This is a helper function to normalize non-deterministic Python error stacktraces. */ + def normalizeTestResults(output: String): String = { + val strippedPythonErrors: String = { + var traceback = false + output.split("\n").filter { line: String => + if (line == "Traceback (most recent call last):") { + traceback = true + } else if (!line.startsWith(" ")) { + traceback = false + } + !traceback + }.mkString("\n") + } + strippedPythonErrors.replaceAll("\\s+$", "") + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 6eb48b13d4078..0edf06e1d7424 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -226,7 +226,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { val input = fileToString(new File(testCase.inputFile)) val (comments, code) = splitCommentsAndCodes(input) - val queries = getQueries(code, comments) + val queries = getQueries(code, comments, listTestCases) val settings = getSparkSettings(comments) if (regenerateGoldenFiles) { @@ -664,22 +664,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper } } - /** This is a helper function to normalize non-deterministic Python error stacktraces. */ - def normalizeTestResults(output: String): String = { - val strippedPythonErrors: String = { - var traceback = false - output.split("\n").filter { line: String => - if (line == "Traceback (most recent call last):") { - traceback = true - } else if (!line.startsWith(" ")) { - traceback = false - } - !traceback - }.mkString("\n") - } - strippedPythonErrors.replaceAll("\\s+$", "") - } - test("test splitCommentsAndCodes") { { // Correctly split comments and codes diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala deleted file mode 100644 index c874e2f59db1b..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.crossdbms - -import java.io.File -import java.util.Locale - -import scala.util.control.NonFatal - -import org.apache.spark.internal.Logging -import org.apache.spark.sql.SQLQueryTestSuite -import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} -import org.apache.spark.util.ArrayImplicits.SparkArrayOps - -/** - * See SQLQueryTestSuite.scala for more information. This suite builds off of that to allow us - * to generate golden files with other DBMS to perform cross-checking for correctness. It generates - * another set of golden files. Note that this is not currently run on all SQL input files by - * default because there is incompatibility between SQL dialects for Spark and the other DBMS. - * - * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for - * which is eligible for the SQL file. These strings are defined in the companion object. For - * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is - * incompatible, add the following comment into the input file: - * --ONLY_IF spark - */ -abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging { - - /** - * A String representing the database system being used. A list of these is defined in the - * companion object. - */ - protected def crossDbmsToGenerateGoldenFiles: String - assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(crossDbmsToGenerateGoldenFiles)) - - /** - * A custom input file path where SQL tests are located, if desired. - */ - protected def customInputFilePath: String = inputFilePath - - /** - * A function taking in an optional custom connection URL, and returns a [[SQLQueryTestRunner]] - * specific to the database system. - */ - protected def getConnection: Option[String] => SQLQueryTestRunner - - /** - * Commands to be run on the DBMS before running queries to generate golden files, such as - * defining custom functions. - */ - protected def preprocessingCommands: Seq[String] - - private def customConnectionUrl: String = System.getenv( - CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL) - - override protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { - val input = fileToString(new File(testCase.inputFile)) - val (comments, code) = splitCommentsAndCodes(input) - val queries = getQueries(code, comments) - val settings = getSparkSettings(comments) - - val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG)) - .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length)) - // If `--ONLY_IF` is found, check if the DBMS being used is allowed. - if (dbmsConfig.nonEmpty && !dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) { - log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " + - s"not eligible with $crossDbmsToGenerateGoldenFiles.") - } else if (regenerateGoldenFiles) { - runQueries(queries, testCase, settings.toImmutableArraySeq) - } else { - val configSets = getSparkConfigDimensions(comments) - runQueriesWithSparkConfigDimensions( - queries, testCase, settings, configSets) - } - } - - override protected def runQueries( - queries: Seq[String], - testCase: TestCase, - sparkConfigSet: Seq[(String, String)]): Unit = { - val localSparkSession = spark.newSession() - var runner: Option[SQLQueryTestRunner] = None - - val outputs: Seq[QueryTestOutput] = queries.map { sql => - val output = { - // Use the runner when generating golden files, and Spark when running the test against - // the already generated golden files. - if (regenerateGoldenFiles) { - val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty) - runner = runner.orElse(Some(getConnection(connectionUrl))) - preprocessingCommands.foreach(command => runner.foreach(_.runQuery(command))) - - try { - // Either of the below two lines can error. If we go into the catch statement, then it - // is likely one of the following scenarios: - // 1. Error thrown in Spark analysis: - // a. The query is incompatible with either Spark and the other DBMS. - // b. There is an issue with the schema in Spark. - // c. The error is expected - it errors on both systems, but only the Spark error - // will be printed to the golden file, because it errors first. - // 2. Error thrown in other DBMS execution: - // a. The query is either incompatible between Spark and the other DBMS - // b. Some test table/view is not created on the other DBMS. - val sparkDf = localSparkSession.sql(sql) - val output = runner.map(_.runQuery(sql)).get - // Use Spark analyzed plan to check if the query result is already semantically sorted. - if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { - output - } else { - // Sort the answer manually if it isn't sorted. - output.sorted - } - } catch { - case NonFatal(e) => Seq(e.getClass.getName, e.getMessage) - } - } else { - // Use Spark. - // Execute the list of set operation in order to add the desired configs - val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" } - setOperations.foreach(localSparkSession.sql) - val (_, output) = handleExceptions( - getNormalizedQueryExecutionResult(localSparkSession, sql)) - output - } - } - ExecutionOutput( - sql = sql, - // Don't care about the schema for this test. Only care about correctness. - schema = None, - output = normalizeTestResults(output.mkString("\n"))) - } - if (runner.isDefined) { - runner.foreach(_.cleanUp()) - runner = None - } - - if (regenerateGoldenFiles) { - val goldenOutput = { - s"-- Automatically generated by ${getClass.getSimpleName} with " + - s"$crossDbmsToGenerateGoldenFiles\n" + outputs.mkString("\n\n\n") + "\n" - } - val resultFile = new File(testCase.resultFile) - val parent = resultFile.getParentFile - if (!parent.exists()) { - assert(parent.mkdirs(), "Could not create directory: " + parent) - } - stringToFile(resultFile, goldenOutput) - } - - readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput) - } - - override def createScalaTestCase(testCase: TestCase): Unit = { - testCase match { - case _ if ignoreList.exists(t => - testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT))) => - ignore(s"${testCase.name} is in the ignore list.") { - log.debug(s"${testCase.name} is in the ignore list.") - } - case _: RegularTestCase => - // Create a test case to run this case. - test(testCase.name) { - runSqlTestCase(testCase, listTestCases) - } - case _ => - ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") { - log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.") - } - } - } - - override protected def resultFileForInputFile(file: File): String = { - val goldenFilePath = new File(baseResourcePath, - CrossDbmsQueryTestSuite.RESULT_DIR).getAbsolutePath - file.getAbsolutePath.replace(customInputFilePath, goldenFilePath) + ".out" - } - - override lazy val listTestCases: Seq[TestCase] = { - listFilesRecursively(new File(customInputFilePath)).flatMap { file => - val resultFile = resultFileForInputFile(file) - val absPath = file.getAbsolutePath - val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator) - RegularTestCase(testCaseName, absPath, resultFile) :: Nil - }.sortBy(_.name) - } -} - -object CrossDbmsQueryTestSuite { - - final val POSTGRES = "postgres" - final val SUPPORTED_DBMS = Seq(POSTGRES) - - // Result directory for golden files produced by CrossDBMS. This means that all DBMS will share - // the same golden files. This is only possible if we make the queries simple enough that they are - // compatible with all DBMS. - private final val RESULT_DIR = "crossdbms-results" - // System argument to indicate a custom connection URL to the reference DBMS. - private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL" - // Argument in input files to indicate that the sql file is restricted to certain systems. - private final val ONLY_IF_ARG = "--ONLY_IF " -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala deleted file mode 100644 index 653e00d34d562..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.crossdbms - -import java.sql.{DriverManager, ResultSet} -import java.util.Properties - -import scala.collection.mutable.ArrayBuffer - -import org.apache.spark.sql.{DataFrame, Row} -import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry - -/** - * Represents a connection (session) to a database using JDBC. - */ -private[crossdbms] trait JdbcConnection { - - /** - * Executes the given SQL query and returns the result as a sequence of strings. - * @return A Seq[String] representing the output, where each element represents a single row. - */ - def runQuery(query: String): Seq[String] - - /** - * Drops the table with the specified table name. - */ - def dropTable(tableName: String): Unit - - /** - * Creates a table with the specified name and schema. - * @param schemaString The schema definition for the table. Note that this may vary depending on - * the database system. - */ - def createTable(tableName: String, schemaString: String): Unit - - /** - * Loads data from the given Spark DataFrame into the table with the specified name. - * @param df The Spark DataFrame containing the data to be loaded. - */ - def loadData(df: DataFrame, tableName: String): Unit - - /** - * Closes the JDBC connection. - */ - def close(): Unit -} - -/** - * Represents a connection (session) to a PostgreSQL database. - */ -private[crossdbms] case class PostgresConnection(connection_url: Option[String] = None) - extends JdbcConnection { - - private final val POSTGRES_DRIVER_CLASS_NAME = "org.postgresql.Driver" - DriverRegistry.register(POSTGRES_DRIVER_CLASS_NAME) - - private final val DEFAULT_HOST = "localhost" - private final val DEFAULT_PORT = "5432" - private final val DEFAULT_USER = "postgres" - private final val DEFAULT_DB = "postgres" - private final val DEFAULT_PASSWORD = "postgres" - private final val DEFAULT_CONNECTION_URL = s"jdbc:postgresql://$DEFAULT_HOST:$DEFAULT_PORT/" + - s"$DEFAULT_DB?user=$DEFAULT_USER&password=$DEFAULT_PASSWORD" - private val url = connection_url.getOrElse(DEFAULT_CONNECTION_URL) - - private val conn = DriverManager.getConnection(url) - private val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) - - def runQuery(query: String): Seq[String] = { - try { - val isResultSet = stmt.execute(query) - val rows = ArrayBuffer[Row]() - if (isResultSet) { - val rs = stmt.getResultSet - val metadata = rs.getMetaData - while (rs.next()) { - val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => { - val value = rs.getObject(i) - if (value == null) { "NULL" } else { value } - })) - rows.append(row) - } - } - rows.map(_.mkString("\t")).toSeq - } catch { - case e: Throwable => Seq(e.toString) - } - } - - def dropTable(tableName: String): Unit = { - val st = conn.prepareStatement("DROP TABLE IF EXISTS ?") - st.setString(1, tableName) - st.executeUpdate() - } - - def createTable(tableName: String, schemaString: String): Unit = { - val st = conn.prepareStatement("CREATE TABLE ? (?)") - st.setString(1, tableName) - st.setString(2, schemaString) - st.executeUpdate() - } - - def loadData(df: DataFrame, tableName: String): Unit = { - df.write.option("driver", POSTGRES_DRIVER_CLASS_NAME).mode("append") - .jdbc(url, tableName, new Properties()) - } - - def close(): Unit = if (!conn.isClosed) conn.close() -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala deleted file mode 100644 index d6230cc6b69a1..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.crossdbms - -/** - * Trait for classes that can run SQL queries for testing. This is specifically used as a wrapper - * around a connection to a system that can run SQL queries, to run queries from - * [[SQLQueryTestSuite]]. - */ -trait SQLQueryTestRunner { - - /** - * Runs a given query and returns a Seq[String] that represents the query result output. This is a - * Seq where each element represents a single row. - */ - def runQuery(query: String): Seq[String] - - /** - * Perform clean up, such as dropping tables and closing the database connection. - */ - def cleanUp(): Unit -} - -/** - * A runner that takes a JDBC connection and uses it to execute queries. - */ -private[sql] case class JdbcSQLQueryTestRunner(connection: JdbcConnection) - extends SQLQueryTestRunner { - - def runQuery(query: String): Seq[String] = connection.runQuery(query) - - def cleanUp(): Unit = connection.close() -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml deleted file mode 100644 index 787bdda8ecd31..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml +++ /dev/null @@ -1,31 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -version: '3' - -services: - # Postgres database service - postgres-db: - image: postgres:latest - container_name: postgres-db - environment: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - POSTGRES_DB: postgres - ports: - - "5432:5432" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh deleted file mode 100644 index 32e1573b51e6e..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Get the root of the runtime repo -GIT_ROOT="$(git rev-parse --show-toplevel)" -echo "Root of repo is at $GIT_ROOT" - -echo "Starting DB container.." -docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" up --build --detach - -echo "Waiting for DB container to be ready.." -# Change if using different container or name. -DOCKER_CONTAINER_NAME="postgres-db" -timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done" - -echo "Generating all golden files for postgres.." -SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite" - -echo "Bringing down DB container.." -docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes From 81a61f741b64bc88f1bd340d70d2b972dde47118 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 3 Jan 2024 14:49:06 -0800 Subject: [PATCH 61/63] Passing tests --- .../querytest/PostgreSQLQueryTestSuite.scala | 14 +++-- .../exists-subquery/exists-having.sql.out | 55 +++++++++---------- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala index 808f3e58fb64c..e9ebd62210af5 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala @@ -55,7 +55,7 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT | IMMUTABLE | RETURNS NULL ON NULL INPUT; |""".stripMargin - ) + ).executeUpdate() } protected val baseResourcePath = { @@ -162,11 +162,13 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT val goldenOutput = fileToString(new File(testCase.resultFile)) val segments = goldenOutput.split("-- !query.*\n") outputs.map { output => - val result = ExecutionOutput( - segments(curSegment + 1).trim, // SQL - None, // Schema - normalizeTestResults(segments(curSegment + 3))) // Output - curSegment += output.numSegments + val result = + ExecutionOutput( + segments(curSegment + 1).trim, // SQL + None, // Schema + normalizeTestResults(segments(curSegment + 3))) // Output + // Assume that the golden file always has all 3 segments. + curSegment += 3 result } } diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out index ae4cf010ffc98..2a84516e90abe 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out @@ -1,16 +1,15 @@ -- Automatically generated by SQLQueryTestSuite -- !query -CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (100, "emp 1", date "2005-01-01", 100.00D, 10), - (200, "emp 2", date "2003-01-01", 200.00D, 10), - (300, "emp 3", date "2002-01-01", 300.00D, 20), - (400, "emp 4", date "2005-01-01", 400.00D, 30), - (500, "emp 5", date "2001-01-01", 400.00D, NULL), - (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), - (700, "emp 7", date "2010-01-01", 400.00D, 100), - (800, "emp 8", date "2016-01-01", 150.00D, 70) -AS EMP(id, emp_name, hiredate, salary, dept_id) +CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (100, 'emp 1', date '2005-01-01', double(100.00), 10), + (200, 'emp 2', date '2003-01-01', double(200.00), 10), + (300, 'emp 3', date '2002-01-01', double(300.00), 20), + (400, 'emp 4', date '2005-01-01', double(400.00), 30), + (500, 'emp 5', date '2001-01-01', double(400.00), NULL), + (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), + (700, 'emp 7', date '2010-01-01', double(400.00), 100), + (800, 'emp 8', date '2016-01-01', double(150.00), 70) -- !query schema struct<> -- !query output @@ -18,14 +17,13 @@ struct<> -- !query -CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES - (10, "dept 1", "CA"), - (20, "dept 2", "NY"), - (30, "dept 3", "TX"), - (40, "dept 4 - unassigned", "OR"), - (50, "dept 5 - unassigned", "NJ"), - (70, "dept 7", "FL") -AS DEPT(dept_id, dept_name, state) +CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES + (10, 'dept 1', 'CA'), + (20, 'dept 2', 'NY'), + (30, 'dept 3', 'TX'), + (40, 'dept 4 - unassigned', 'OR'), + (50, 'dept 5 - unassigned', 'NJ'), + (70, 'dept 7', 'FL') -- !query schema struct<> -- !query output @@ -33,16 +31,15 @@ struct<> -- !query -CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES - ("emp 1", 10.00D), - ("emp 1", 20.00D), - ("emp 2", 300.00D), - ("emp 2", 100.00D), - ("emp 3", 300.00D), - ("emp 4", 100.00D), - ("emp 5", 1000.00D), - ("emp 6 - no dept", 500.00D) -AS BONUS(emp_name, bonus_amt) +CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES + ('emp 1', double(10.00)), + ('emp 1', double(20.00)), + ('emp 2', double(300.00)), + ('emp 2', double(100.00)), + ('emp 3', double(300.00)), + ('emp 4', double(100.00)), + ('emp 5', double(1000.00)), + ('emp 6 - no dept', double(500.00)) -- !query schema struct<> -- !query output From cf7b0449f43a664b192f5084f28cc5d6624c3b1f Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 3 Jan 2024 14:55:42 -0800 Subject: [PATCH 62/63] Add documnetation and delete previous files --- .../querytest/PostgreSQLQueryTestSuite.scala | 20 +++ .../exists-subquery/exists-having.sql.out | 131 ------------------ .../sql-tests/inputs/subquery/README.md | 10 -- 3 files changed, 20 insertions(+), 141 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out delete mode 100644 sql/core/src/test/resources/sql-tests/inputs/subquery/README.md diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala index e9ebd62210af5..ec622b8deb79f 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala @@ -27,6 +27,26 @@ import org.apache.spark.sql.SQLQueryTestHelper import org.apache.spark.sql.catalyst.util.fileToString import org.apache.spark.tags.DockerTest +/** + * READ THIS IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING: + * Your new SQL test is automatically opted into this suite. It is likely failing because it is not + * compatible with the default Postgres. You have two options: + * 1. (Recommended) Modify your queries to be compatible with both systems. This is recommended + * because it will run your queries against postgres, providing higher correctness testing + * confidence, and you won't have to manually verify the golden files generated with your test. + * 2. Add this line to your .sql file: --ONLY_IF spark + * + * This suite builds off of that to allow us to run Postgres against the SQL test golden files (on + * which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness. + * Note that this is not currently run on all SQL input files by default because there is + * incompatibility between SQL dialects for Spark and the other DBMS. + * + * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for + * which is eligible for the SQL file. These strings are defined in the companion object. For + * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is + * incompatible, add the following comment into the input file: + * --ONLY_IF spark + */ @DockerTest class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper with CrossDbmsQueryTestHelper { diff --git a/sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out deleted file mode 100644 index 95229f39865c0..0000000000000 --- a/sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out +++ /dev/null @@ -1,131 +0,0 @@ --- Automatically generated by PostgreSQLQueryTestSuite with postgres --- !query -CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES - (100, 'emp 1', date '2005-01-01', double(100.00), 10), - (100, 'emp 1', date '2005-01-01', double(100.00), 10), - (200, 'emp 2', date '2003-01-01', double(200.00), 10), - (300, 'emp 3', date '2002-01-01', double(300.00), 20), - (400, 'emp 4', date '2005-01-01', double(400.00), 30), - (500, 'emp 5', date '2001-01-01', double(400.00), NULL), - (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100), - (700, 'emp 7', date '2010-01-01', double(400.00), 100), - (800, 'emp 8', date '2016-01-01', double(150.00), 70) --- !query output - - - --- !query -CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES - (10, 'dept 1', 'CA'), - (20, 'dept 2', 'NY'), - (30, 'dept 3', 'TX'), - (40, 'dept 4 - unassigned', 'OR'), - (50, 'dept 5 - unassigned', 'NJ'), - (70, 'dept 7', 'FL') --- !query output - - - --- !query -CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES - ('emp 1', double(10.00)), - ('emp 1', double(20.00)), - ('emp 2', double(300.00)), - ('emp 2', double(100.00)), - ('emp 3', double(300.00)), - ('emp 4', double(100.00)), - ('emp 5', double(1000.00)), - ('emp 6 - no dept', double(500.00)) --- !query output - - - --- !query -SELECT dept_id, count(*) -FROM emp -GROUP BY dept_id -HAVING EXISTS (SELECT 1 - FROM bonus - WHERE bonus_amt < min(emp.salary)) --- !query output -10 3 -100 2 -20 1 -30 1 -70 1 -NULL 1 - - --- !query -SELECT * -FROM dept -WHERE EXISTS (SELECT dept_id, - Count(*) - FROM emp - GROUP BY dept_id - HAVING EXISTS (SELECT 1 - FROM bonus - WHERE bonus_amt < Min(emp.salary))) --- !query output -10 dept 1 CA -20 dept 2 NY -30 dept 3 TX -40 dept 4 - unassigned OR -50 dept 5 - unassigned NJ -70 dept 7 FL - - --- !query -SELECT dept_id, - Max(salary) -FROM emp gp -WHERE EXISTS (SELECT dept_id, - Count(*) - FROM emp p - GROUP BY dept_id - HAVING EXISTS (SELECT 1 - FROM bonus - WHERE bonus_amt < Min(p.salary))) -GROUP BY gp.dept_id --- !query output -10 200.0 -100 400.0 -20 300.0 -30 400.0 -70 150.0 -NULL 400.0 - - --- !query -SELECT * -FROM dept -WHERE EXISTS (SELECT dept_id, - Count(*) - FROM emp - GROUP BY dept_id - HAVING EXISTS (SELECT 1 - FROM bonus - WHERE bonus_amt > Min(emp.salary))) --- !query output -10 dept 1 CA -20 dept 2 NY -30 dept 3 TX -40 dept 4 - unassigned OR -50 dept 5 - unassigned NJ -70 dept 7 FL - - --- !query -SELECT * -FROM dept -WHERE EXISTS (SELECT dept_id, - count(emp.dept_id) - FROM emp - WHERE dept.dept_id = dept_id - GROUP BY dept_id - HAVING EXISTS (SELECT 1 - FROM bonus - WHERE ( bonus_amt > min(emp.salary) - AND count(emp.dept_id) > 1 ))) --- !query output -10 dept 1 CA diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md b/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md deleted file mode 100644 index a42cbed4003af..0000000000000 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md +++ /dev/null @@ -1,10 +0,0 @@ - -## Note on PostgreSQLQueryTestSuite - -The SQL files in this directory are automatically opted into `PostgreSQLQueryTestSuite`, which -generates golden files with Postgres and runs Spark against these golden files. If you added a new -SQL test file, please generate the golden files with `PostgreSQLQueryTestSuite`. It is likely to -fail because of incompatibility between the SQL dialects of Spark and Postgres. To have queries that -are compatible for both systems, keep your SQL queries simple by using basic types like INTs and -strings only, if possible, and using functions that are present in both Spark and Postgres. Refer -to the header comment for `PostgreSQLQueryTestSuite` for more information. \ No newline at end of file From a32a693fd6b92916fc1886cefba5e7ae15e29666 Mon Sep 17 00:00:00 2001 From: Andy Lam Date: Wed, 3 Jan 2024 15:13:27 -0800 Subject: [PATCH 63/63] Move functions from PostgreSQLQueryTestSuite -> CrossDbmsQueryTestSuite --- .../querytest/CrossDbmsQueryTestHelper.scala | 26 --- .../querytest/CrossDbmsQueryTestSuite.scala | 182 ++++++++++++++++++ .../querytest/PostgreSQLQueryTestSuite.scala | 162 ++-------------- 3 files changed, 193 insertions(+), 177 deletions(-) delete mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala deleted file mode 100644 index cec304093b3f9..0000000000000 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.jdbc - -trait CrossDbmsQueryTestHelper { - - val DATABASE_NAME: String - - protected final val POSTGRES = "postgres" - // Argument in input files to indicate that the sql file is restricted to certain systems. - protected final val ONLY_IF_ARG = "--ONLY_IF " -} diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala new file mode 100644 index 0000000000000..dfaa334e1c6ca --- /dev/null +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.jdbc + +import java.io.File +import java.sql.ResultSet + +import scala.collection.mutable.ArrayBuffer +import scala.util.control.NonFatal + +import org.apache.spark.sql.Row +import org.apache.spark.sql.SQLQueryTestHelper +import org.apache.spark.sql.catalyst.util.fileToString + +/** + * This suite builds off of that to allow us to run other DBMS against the SQL test golden files (on + * which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness. + * Note that this is not currently run on all SQL input files by default because there is + * incompatibility between SQL dialects for Spark and the other DBMS. + * + * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for + * which is eligible for the SQL file. These strings are defined in the companion object. For + * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is + * incompatible, add the following comment into the input file: + * --ONLY_IF spark + */ +trait CrossDbmsQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper { + + val DATABASE_NAME: String + + protected val baseResourcePath = { + // We use a path based on Spark home for 2 reasons: + // 1. Maven can't get correct resource directory when resources in other jars. + // 2. We test subclasses in the hive-thriftserver module. + getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile + } + protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath + protected val customInputFilePath: String + protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath + + protected def listTestCases: Seq[TestCase] = { + listFilesRecursively(new File(customInputFilePath)).flatMap { file => + val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" + val absPath = file.getAbsolutePath + val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator) + RegularTestCase(testCaseName, absPath, resultFile) :: Nil + }.sortBy(_.name) + } + + def createScalaTestCase(testCase: TestCase): Unit = { + testCase match { + case _: RegularTestCase => + // Create a test case to run this case. + test(testCase.name) { + runSqlTestCase(testCase, listTestCases) + } + case _ => + ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") { + log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.") + } + } + } + + protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { + val input = fileToString(new File(testCase.inputFile)) + val (comments, code) = splitCommentsAndCodes(input) + val queries = getQueries(code, comments, listTestCases) + + val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG)) + .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length)) + // If `--ONLY_IF` is found, check if the DBMS being used is allowed. + if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) { + log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " + + s"not eligible with $DATABASE_NAME.") + } else { + runQueriesAndCheckAgainstGoldenFile(queries, testCase) + } + } + + protected def runQueriesAndCheckAgainstGoldenFile( + queries: Seq[String], testCase: TestCase): Unit = { + val localSparkSession = spark.newSession() + val conn = getConnection() + val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) + + val outputs: Seq[QueryTestOutput] = queries.map { sql => + val output = { + try { + val sparkDf = localSparkSession.sql(sql) + val isResultSet = stmt.execute(sql) + val rows = ArrayBuffer[Row]() + if (isResultSet) { + val rs = stmt.getResultSet + val metadata = rs.getMetaData + while (rs.next()) { + val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => { + val value = rs.getObject(i) + if (value == null) { + "NULL" + } else { + value + } + })) + rows.append(row) + } + } + val output = rows.map(_.mkString("\t")).toSeq + // Use Spark analyzed plan to check if the query result is already semantically sorted. + if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { + output + } else { + // Sort the answer manually if it isn't sorted. + output.sorted + } + } catch { + case NonFatal(e) => Seq(e.getClass.getName, e.getMessage) + } + } + + ExecutionOutput( + sql = sql, + // Don't care about the schema for this test. Only care about correctness. + schema = None, + output = output.mkString("\n")) + } + conn.close() + + // Read back the golden files. + var curSegment = 0 + val expectedOutputs: Seq[QueryTestOutput] = { + val goldenOutput = fileToString(new File(testCase.resultFile)) + val segments = goldenOutput.split("-- !query.*\n") + outputs.map { output => + val result = + ExecutionOutput( + segments(curSegment + 1).trim, // SQL + None, // Schema + normalizeTestResults(segments(curSegment + 3))) // Output + // Assume that the golden file always has all 3 segments. + curSegment += 3 + result + } + } + + // Compare results. + assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") { + outputs.size + } + + outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => + assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") { + output.sql + } + assertResult(expected.output, s"Result did not match" + + s" for query #$i\n${expected.sql}") { + output.output + } + } + } + +} + +object CrossDbmsQueryTestSuite { + + final val POSTGRES = "postgres" + // Argument in input files to indicate that the sql file is restricted to certain systems. + final val ONLY_IF_ARG = "--ONLY_IF " +} diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala index ec622b8deb79f..16ca831c56f2f 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala @@ -17,14 +17,8 @@ package org.apache.spark.sql.jdbc import java.io.File -import java.sql.{Connection, ResultSet} +import java.sql.Connection -import scala.collection.mutable.ArrayBuffer -import scala.util.control.NonFatal - -import org.apache.spark.sql.Row -import org.apache.spark.sql.SQLQueryTestHelper -import org.apache.spark.sql.catalyst.util.fileToString import org.apache.spark.tags.DockerTest /** @@ -36,22 +30,19 @@ import org.apache.spark.tags.DockerTest * confidence, and you won't have to manually verify the golden files generated with your test. * 2. Add this line to your .sql file: --ONLY_IF spark * - * This suite builds off of that to allow us to run Postgres against the SQL test golden files (on - * which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness. - * Note that this is not currently run on all SQL input files by default because there is - * incompatibility between SQL dialects for Spark and the other DBMS. - * - * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for - * which is eligible for the SQL file. These strings are defined in the companion object. For - * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is - * incompatible, add the following comment into the input file: - * --ONLY_IF spark + * Note: To run this test suite for a specific version (e.g., postgres:15.1): + * {{{ + * ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1 + * ./build/sbt -Pdocker-integration-tests + * "testOnly org.apache.spark.sql.jdbc.PostgreSQLQueryTestSuite" + * }}} */ @DockerTest -class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper - with CrossDbmsQueryTestHelper { +class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite { - val DATABASE_NAME = POSTGRES + val DATABASE_NAME = CrossDbmsQueryTestSuite.POSTGRES + // Scope to only subquery directory for now. + protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath override val db = new DatabaseOnDocker { override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine") @@ -78,136 +69,5 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT ).executeUpdate() } - protected val baseResourcePath = { - // We use a path based on Spark home for 2 reasons: - // 1. Maven can't get correct resource directory when resources in other jars. - // 2. We test subclasses in the hive-thriftserver module. - getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile - } - protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath - protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath - protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath - - def listTestCases: Seq[TestCase] = { - listFilesRecursively(new File(customInputFilePath)).flatMap { file => - val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out" - val absPath = file.getAbsolutePath - val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator) - RegularTestCase(testCaseName, absPath, resultFile) :: Nil - }.sortBy(_.name) - } - - def createScalaTestCase(testCase: TestCase): Unit = { - testCase match { - case _: RegularTestCase => - // Create a test case to run this case. - test(testCase.name) { - runSqlTestCase(testCase, listTestCases) - } - case _ => - ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") { - log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.") - } - } - } - - protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = { - val input = fileToString(new File(testCase.inputFile)) - val (comments, code) = splitCommentsAndCodes(input) - val queries = getQueries(code, comments, listTestCases) - - val dbmsConfig = comments.filter(_.startsWith(ONLY_IF_ARG)) - .map(_.substring(ONLY_IF_ARG.length)) - // If `--ONLY_IF` is found, check if the DBMS being used is allowed. - if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) { - log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " + - s"not eligible with $DATABASE_NAME.") - } else { - runQueriesAndCheckAgainstGoldenFile(queries, testCase) - } - } - - protected def runQueriesAndCheckAgainstGoldenFile( - queries: Seq[String], testCase: TestCase): Unit = { - val localSparkSession = spark.newSession() - val conn = getConnection() - val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) - - val outputs: Seq[QueryTestOutput] = queries.map { sql => - val output = { - try { - val sparkDf = localSparkSession.sql(sql) - - val isResultSet = stmt.execute(sql) - val rows = ArrayBuffer[Row]() - if (isResultSet) { - val rs = stmt.getResultSet - val metadata = rs.getMetaData - while (rs.next()) { - val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => { - val value = rs.getObject(i) - if (value == null) { - "NULL" - } else { - value - } - })) - rows.append(row) - } - } - val output = rows.map(_.mkString("\t")).toSeq - // Use Spark analyzed plan to check if the query result is already semantically sorted. - if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) { - output - } else { - // Sort the answer manually if it isn't sorted. - output.sorted - } - } catch { - case NonFatal(e) => Seq(e.getClass.getName, e.getMessage) - } - } - - ExecutionOutput( - sql = sql, - // Don't care about the schema for this test. Only care about correctness. - schema = None, - output = output.mkString("\n")) - } - conn.close() - - // Read back the golden files. - var curSegment = 0 - val expectedOutputs: Seq[QueryTestOutput] = { - val goldenOutput = fileToString(new File(testCase.resultFile)) - val segments = goldenOutput.split("-- !query.*\n") - outputs.map { output => - val result = - ExecutionOutput( - segments(curSegment + 1).trim, // SQL - None, // Schema - normalizeTestResults(segments(curSegment + 3))) // Output - // Assume that the golden file always has all 3 segments. - curSegment += 3 - result - } - } - - // Compare results. - assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") { - outputs.size - } - - outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => - assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") { - output.sql - } - assertResult(expected.output, s"Result did not match" + - s" for query #$i\n${expected.sql}") { - output.output - } - } - } - listTestCases.foreach(createScalaTestCase) }