From 612396c6c16e62428338790639d8e5afb2431eb6 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 11:02:48 -0800
Subject: [PATCH 01/63] Initial commit

---
 .../spark/sql/crossdbms/JdbcConnection.scala  | 101 ++++++++++++++++++
 .../sql/crossdbms/SQLQueryTestRunner.scala    |  85 +++++++++++++++
 2 files changed, 186 insertions(+)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
new file mode 100644
index 0000000000000..a9be898306987
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.crossdbms
+
+import java.sql.{DriverManager, ResultSet}
+import java.util.Properties
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry
+
+private[sql] trait JdbcConnection {
+  /**
+   * Runs the given query.
+   * @return A Seq[String] representing the output.
+   */
+  def runQuery(query: String): Seq[String]
+
+  /**
+   * Drop the table with the given table name.
+   */
+  def dropTable(tableName: String): Unit
+
+  /**
+   * Create a table with the given table name and schema.
+   */
+  def createTable(tableName: String, schemaString: String): Unit
+
+  /**
+   * Load data from the given Spark Dataframe into the table with given name.
+   */
+  def loadData(df: DataFrame, tableName: String): Unit
+
+  /**
+   * Close the connection.
+   */
+  def close(): Unit
+}
+
+private[sql] case class PostgresConnection(connection_url: Option[String] = None)
+  extends JdbcConnection {
+
+  DriverRegistry.register("org.postgresql.Driver")
+  private final val DEFAULT_USER = "pg"
+  private final val DEFAULT_CONNECTION_URL =
+    s"jdbc:postgresql://localhost:5432/postgres?user=$DEFAULT_USER"
+  private val url = connection_url.getOrElse(DEFAULT_CONNECTION_URL)
+  private val conn = DriverManager.getConnection(url)
+  private val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
+
+  def runQuery(query: String): Seq[String] = {
+    try {
+      val isResultSet = stmt.execute(query)
+      val rows = ArrayBuffer[Row]()
+      if (isResultSet) {
+        val rs = stmt.getResultSet
+        val metadata = rs.getMetaData
+        while (rs.next()) {
+          val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => rs.getObject(i)))
+          rows.append(row)
+        }
+      }
+      rows.map(_.mkString("\t")).toSeq
+    } catch {
+      case e: Throwable => Seq(e.toString)
+    }
+  }
+
+  def dropTable(tableName: String): Unit = {
+    val dropTableSql = s"DROP TABLE IF EXISTS $tableName"
+    stmt.executeUpdate(dropTableSql)
+  }
+
+  def createTable(tableName: String, schemaString: String): Unit = {
+    val createTableSql = s"CREATE TABLE $tableName ($schemaString)"
+    stmt.executeUpdate(createTableSql)
+  }
+
+  def loadData(df: DataFrame, tableName: String): Unit = {
+    df.write.option("driver", "org.postgresql.Driver").mode("append")
+      .jdbc(url, tableName, new Properties())
+  }
+
+  def close(): Unit = if (!conn.isClosed) conn.close()
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
new file mode 100644
index 0000000000000..7751818476ba6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.crossdbms
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
+import org.apache.spark.sql.test.SQLTestUtils
+
+trait SQLQueryTestRunner {
+
+  /**
+   * Runs a given query.
+   * @return Returns a tuple with first argument being the schema and the second argument being a
+   *         Sequence of strings representing the output.
+   */
+  def runQuery(query: String): (String, Seq[String])
+
+  /**
+   * Perform clean up, such as dropping tables and closing the database connection.
+   */
+  def cleanUp(): Unit
+}
+
+/**
+ * A runner that takes a JDBC connection and uses it to execute queries. We still need the local
+ * Spark session to do some things, such as generate the schema and load test data into
+ * dataframes, before creating tables in the database.
+ */
+private[sql] case class JdbcSQLQueryTestRunner(
+  connection: JdbcConnection, spark: SparkSession) extends SQLQueryTestRunner with SQLTestUtils {
+  setUp()
+
+  def runQuery(query: String): (String, Seq[String]) = {
+    def isSorted(plan: LogicalPlan): Boolean = plan match {
+      case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
+      case _: DescribeCommandBase
+           | _: DescribeColumnCommand
+           | _: DescribeRelation
+           | _: DescribeColumn => true
+      case PhysicalOperation(_, _, Sort(_, true, _)) => true
+
+      case _ => plan.children.iterator.exists(isSorted)
+    }
+
+    val df = spark.sql(query)
+    val schema = df.schema.catalogString
+    val output = connection.runQuery(query)
+    // Use Spark analyzed plan to check if the query result is already semantically sorted.
+    val result = if (isSorted(df.queryExecution.analyzed)) {
+      output
+    } else {
+      // Sort the answer manually if it isn't sorted.
+      output.sorted
+    }
+    // Use the Spark schema for schema generation.
+    (schema, result)
+  }
+
+  def cleanUp(): Unit = {
+    connection.close()
+  }
+
+  private def setUp(): Unit = {
+    loadTestData()
+  }
+
+  override def loadTestData(): Unit = {} // TODO: Load test data
+}
\ No newline at end of file

From 83e6a518e1c32f0362686a1da77c7b01c038fff3 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 11:33:17 -0800
Subject: [PATCH 02/63] Add CrossDbmsQueryTestSuite

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 114 ++++++++++++++++++
 .../spark/sql/crossdbms/JdbcConnection.scala  |  43 +++++++
 .../sql/crossdbms/SQLQueryTestRunner.scala    |  52 +-------
 3 files changed, 162 insertions(+), 47 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
new file mode 100644
index 0000000000000..930a7293bd6a1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.crossdbms
+
+import java.io.File
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SQLQueryTestSuite
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DescribeColumn, DescribeRelation, Distinct, Generate, Join, LogicalPlan, Sample, Sort}
+import org.apache.spark.sql.catalyst.util.stringToFile
+import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
+
+class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
+  private val DBMS_MAPPING = Map(
+    "postgres" ->((connection_url: Option[String]) =>
+      JdbcSQLQueryTestRunner(PostgresConnection(connection_url))))
+
+  private val crossDbmsToGenerateGoldenFiles: String = System.getenv("REF_DBMS")
+  private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL")
+
+  override def ignoreList: Set[String] = super.ignoreList ++ Set(
+    // TODO: include all test files for now.
+  )
+
+  private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
+    case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
+    case _: DescribeCommandBase
+         | _: DescribeColumnCommand
+         | _: DescribeRelation
+         | _: DescribeColumn => true
+    case PhysicalOperation(_, _, Sort(_, true, _)) => true
+    case _ => plan.children.iterator.exists(isSemanticallySorted)
+  }
+
+  override protected def runQueries(
+    queries: Seq[String],
+    testCase: TestCase,
+    configSet: Seq[(String, String)]): Unit = {
+    // TODO: check if testcase is an Analyzer test and ignore if so
+    val localSparkSession = spark.newSession()
+
+    var runner: Option[SQLQueryTestRunner] = None
+
+    // Run the SQL queries preparing them for comparison.
+    val outputs: Seq[QueryTestOutput] = queries.map { sql =>
+      val (schema, output) =
+        if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) {
+          if (runner.isEmpty) {
+            val connectionUrl = if (customConnectionUrl.nonEmpty) {
+              Some(customConnectionUrl)
+            } else {
+              None
+            }
+            runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl))
+          }
+          val sparkDf = spark.sql(sql)
+          val schema = sparkDf.schema.catalogString
+          val output = runner.map(_.runQuery(sql)).get
+          // Use Spark analyzed plan to check if the query result is already semantically sorted
+          val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
+            output.sorted
+          } else {
+            // Sort the answer manually if it isn't sorted.
+            output
+          }
+          (schema, result)
+        } else {
+          handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))
+        }
+      // We do some query canonicalization now.
+      val executionOutput = ExecutionOutput(
+        sql = sql,
+        schema = Some(schema),
+        output = normalizeTestResults(output.mkString("\n")))
+      if (testCase.isInstanceOf[CTETest]) {
+        expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput)
+      }
+      executionOutput
+    }
+    runner.foreach(_.cleanUp())
+
+    if (regenerateGoldenFiles) {
+      // Again, we are explicitly not using multi-line string due to stripMargin removing "|".
+      val goldenOutput = {
+        s"-- Automatically generated by ${getClass.getSimpleName}\n" +
+          outputs.mkString("\n\n\n") + "\n"
+      }
+      val resultFile = new File(testCase.resultFile)
+      val parent = resultFile.getParentFile
+      if (!parent.exists()) {
+        assert(parent.mkdirs(), "Could not create directory: " + parent)
+      }
+      stringToFile(resultFile, goldenOutput)
+    }
+
+    readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index a9be898306987..4c23cd054e743 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -99,3 +99,46 @@ private[sql] case class PostgresConnection(connection_url: Option[String] = None
 
   def close(): Unit = if (!conn.isClosed) conn.close()
 }
+
+var runner: Option[SQLQueryTestRunner] = None
+
+  // Run the SQL queries preparing them for comparison.
+  val outputs: Seq[QueryTestOutput] = queries.map { sql =>
+  testCase match {
+  case _: AnalyzerTest =>
+  val (_, output) =
+  handleExceptions(getNormalizedQueryAnalysisResult(localSparkSession, sql))
+  // We do some query canonicalization now.
+  AnalyzerOutput(
+  sql = sql,
+  schema = None,
+  output = normalizeTestResults(output.mkString("\n")))
+  case _ =>
+  val (schema, output) =
+  if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) {
+  if (runner.isEmpty) {
+  val connectionUrl = if (customConnectionUrl.nonEmpty) {
+  Some(customConnectionUrl)
+  } else {
+  None
+  }
+  runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl))
+  }
+  val output = handleExceptions(runner.map(_.runQuery(sql)).get)
+  val schema = spark.sql(sql).schema.catalogString
+  (schema, output)
+  } else {
+  handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))
+  }
+  // We do some query canonicalization now.
+  val executionOutput = ExecutionOutput(
+  sql = sql,
+  schema = Some(schema),
+  output = normalizeTestResults(output.mkString("\n")))
+  if (testCase.isInstanceOf[CTETest]) {
+  expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput)
+  }
+  executionOutput
+  }
+  }
+  jdbcRunner.foreach(_.cleanUp())
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
index 7751818476ba6..672a943380fe4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
@@ -17,20 +17,12 @@
 
 package org.apache.spark.sql.crossdbms
 
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
-import org.apache.spark.sql.test.SQLTestUtils
-
 trait SQLQueryTestRunner {
 
   /**
    * Runs a given query.
-   * @return Returns a tuple with first argument being the schema and the second argument being a
-   *         Sequence of strings representing the output.
    */
-  def runQuery(query: String): (String, Seq[String])
+  def runQuery(query: String): Seq[String]
 
   /**
    * Perform clean up, such as dropping tables and closing the database connection.
@@ -40,46 +32,12 @@ trait SQLQueryTestRunner {
 
 /**
  * A runner that takes a JDBC connection and uses it to execute queries. We still need the local
- * Spark session to do some things, such as generate the schema and load test data into
- * dataframes, before creating tables in the database.
+ * Spark session to do some things..
  */
 private[sql] case class JdbcSQLQueryTestRunner(
-  connection: JdbcConnection, spark: SparkSession) extends SQLQueryTestRunner with SQLTestUtils {
-  setUp()
-
-  def runQuery(query: String): (String, Seq[String]) = {
-    def isSorted(plan: LogicalPlan): Boolean = plan match {
-      case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
-      case _: DescribeCommandBase
-           | _: DescribeColumnCommand
-           | _: DescribeRelation
-           | _: DescribeColumn => true
-      case PhysicalOperation(_, _, Sort(_, true, _)) => true
-
-      case _ => plan.children.iterator.exists(isSorted)
-    }
-
-    val df = spark.sql(query)
-    val schema = df.schema.catalogString
-    val output = connection.runQuery(query)
-    // Use Spark analyzed plan to check if the query result is already semantically sorted.
-    val result = if (isSorted(df.queryExecution.analyzed)) {
-      output
-    } else {
-      // Sort the answer manually if it isn't sorted.
-      output.sorted
-    }
-    // Use the Spark schema for schema generation.
-    (schema, result)
-  }
-
-  def cleanUp(): Unit = {
-    connection.close()
-  }
+  connection: JdbcConnection) extends SQLQueryTestRunner {
 
-  private def setUp(): Unit = {
-    loadTestData()
-  }
+  def runQuery(query: String): Seq[String] = connection.runQuery(query)
 
-  override def loadTestData(): Unit = {} // TODO: Load test data
+  def cleanUp(): Unit = connection.close()
 }
\ No newline at end of file

From c691f839b6b35e650919656d2a7d45cf2cacc7a1 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 12:55:45 -0800
Subject: [PATCH 03/63] Small changes for niceness

---
 .../org/apache/spark/sql/SQLQueryTestSuite.scala      |  8 ++++++--
 .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 11 ++++++-----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 36b572c7e8b5d..9111aa472a519 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -928,10 +928,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       output: String) extends QueryTestOutput {
     override def toString: String = {
       // We are explicitly not using multi-line string due to stripMargin removing "|" in output.
+      val schemaString = if (schema.nonEmpty) {
+        s"-- !query schema\n" + schema.get + "\n"
+      } else {
+        ""
+      }
       s"-- !query\n" +
         sql + "\n" +
-        s"-- !query schema\n" +
-        schema.get + "\n" +
+        schemaString +
         s"-- !query output\n" +
         output
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 930a7293bd6a1..9e5bedbe6c9fb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DescribeColumn, D
 import org.apache.spark.sql.catalyst.util.stringToFile
 import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
 
+// TODO: Add header
 class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   private val DBMS_MAPPING = Map(
     "postgres" ->((connection_url: Option[String]) =>
@@ -59,7 +60,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
     // Run the SQL queries preparing them for comparison.
     val outputs: Seq[QueryTestOutput] = queries.map { sql =>
-      val (schema, output) =
+      val output =
         if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) {
           if (runner.isEmpty) {
             val connectionUrl = if (customConnectionUrl.nonEmpty) {
@@ -70,7 +71,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
             runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl))
           }
           val sparkDf = spark.sql(sql)
-          val schema = sparkDf.schema.catalogString
           val output = runner.map(_.runQuery(sql)).get
           // Use Spark analyzed plan to check if the query result is already semantically sorted
           val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
@@ -79,14 +79,15 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
             // Sort the answer manually if it isn't sorted.
             output
           }
-          (schema, result)
+          result
         } else {
-          handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))
+          handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))._2
         }
       // We do some query canonicalization now.
       val executionOutput = ExecutionOutput(
         sql = sql,
-        schema = Some(schema),
+        // Don't care about the schema for this test. Only care about correctness.
+        schema = None,
         output = normalizeTestResults(output.mkString("\n")))
       if (testCase.isInstanceOf[CTETest]) {
         expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput)

From 24a02f0dac1e9f2dd828b0d033acb66943ebc22a Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 13:18:14 -0800
Subject: [PATCH 04/63] Save point

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 58 ++++++++++++++++---
 .../spark/sql/crossdbms/JdbcConnection.scala  | 43 --------------
 .../sql/crossdbms/SQLQueryTestRunner.scala    | 12 ++--
 3 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 9e5bedbe6c9fb..f62c4793abeb7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.crossdbms
 
 import java.io.File
+import java.util.Locale
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLQueryTestSuite
@@ -25,6 +26,7 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DescribeColumn, DescribeRelation, Distinct, Generate, Join, LogicalPlan, Sample, Sort}
 import org.apache.spark.sql.catalyst.util.stringToFile
 import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
+import org.apache.spark.util.Utils
 
 // TODO: Add header
 class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
@@ -35,9 +37,12 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   private val crossDbmsToGenerateGoldenFiles: String = System.getenv("REF_DBMS")
   private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL")
 
-  override def ignoreList: Set[String] = super.ignoreList ++ Set(
-    // TODO: include all test files for now.
-  )
+  override protected val inputFilePath = {
+    val originalInputs = new File(baseResourcePath, "inputs").getAbsolutePath
+    new File(originalInputs, crossDbmsToGenerateGoldenFiles).getAbsolutePath
+  }
+  override protected val goldenFilePath = new File(
+    baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
 
   private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
     case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
@@ -53,12 +58,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     queries: Seq[String],
     testCase: TestCase,
     configSet: Seq[(String, String)]): Unit = {
-    // TODO: check if testcase is an Analyzer test and ignore if so
     val localSparkSession = spark.newSession()
 
     var runner: Option[SQLQueryTestRunner] = None
-
-    // Run the SQL queries preparing them for comparison.
     val outputs: Seq[QueryTestOutput] = queries.map { sql =>
       val output =
         if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) {
@@ -97,7 +99,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     runner.foreach(_.cleanUp())
 
     if (regenerateGoldenFiles) {
-      // Again, we are explicitly not using multi-line string due to stripMargin removing "|".
       val goldenOutput = {
         s"-- Automatically generated by ${getClass.getSimpleName}\n" +
           outputs.mkString("\n\n\n") + "\n"
@@ -112,4 +113,47 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
     readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput)
   }
+
+  override def createScalaTestCase(testCase: TestCase): Unit = {
+    if (ignoreList.exists(t =>
+      testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) {
+      ignore(testCase.name) {
+        /* Do nothing */
+      }
+    } else {
+      testCase match {
+        case _: AnalyzerTestCase =>
+          ignore(s"${testCase.name} is skipped because it is an analyzer test and this " +
+            s"suite is only testing for correctness.") {
+            /* Do nothing */
+          }
+        case _: RegularTestCase =>
+          // Create a test case to run this case.
+          test(testCase.name) {
+            runSqlTestCase(testCase, listTestCases)
+          }
+        case _ =>
+          ignore(s"Ignoring test cases that are not RegularTestCases are now") {
+            /* Do nothing */
+          }
+      }
+    }
+  }
+
+  override lazy val listTestCases: Seq[TestCase] = {
+    listFilesRecursively(new File(inputFilePath)).flatMap { file =>
+      var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+      var analyzerResultFile =
+        file.getAbsolutePath.replace(inputFilePath, analyzerGoldenFilePath) + ".out"
+      // JDK-4511638 changes 'toString' result of Float/Double
+      // JDK-8282081 changes DataTimeFormatter 'F' symbol
+      if (Utils.isJavaVersionAtLeast21) {
+        if (new File(resultFile + ".java21").exists()) resultFile += ".java21"
+        if (new File(analyzerResultFile + ".java21").exists()) analyzerResultFile += ".java21"
+      }
+      val absPath = file.getAbsolutePath
+      val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
+      RegularTestCase(testCaseName, absPath, resultFile) :: Nil
+    }.sortBy(_.name)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index 4c23cd054e743..a9be898306987 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -99,46 +99,3 @@ private[sql] case class PostgresConnection(connection_url: Option[String] = None
 
   def close(): Unit = if (!conn.isClosed) conn.close()
 }
-
-var runner: Option[SQLQueryTestRunner] = None
-
-  // Run the SQL queries preparing them for comparison.
-  val outputs: Seq[QueryTestOutput] = queries.map { sql =>
-  testCase match {
-  case _: AnalyzerTest =>
-  val (_, output) =
-  handleExceptions(getNormalizedQueryAnalysisResult(localSparkSession, sql))
-  // We do some query canonicalization now.
-  AnalyzerOutput(
-  sql = sql,
-  schema = None,
-  output = normalizeTestResults(output.mkString("\n")))
-  case _ =>
-  val (schema, output) =
-  if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) {
-  if (runner.isEmpty) {
-  val connectionUrl = if (customConnectionUrl.nonEmpty) {
-  Some(customConnectionUrl)
-  } else {
-  None
-  }
-  runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl))
-  }
-  val output = handleExceptions(runner.map(_.runQuery(sql)).get)
-  val schema = spark.sql(sql).schema.catalogString
-  (schema, output)
-  } else {
-  handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))
-  }
-  // We do some query canonicalization now.
-  val executionOutput = ExecutionOutput(
-  sql = sql,
-  schema = Some(schema),
-  output = normalizeTestResults(output.mkString("\n")))
-  if (testCase.isInstanceOf[CTETest]) {
-  expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput)
-  }
-  executionOutput
-  }
-  }
-  jdbcRunner.foreach(_.cleanUp())
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
index 672a943380fe4..87df28ccaaa85 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.sql.crossdbms
 
+/**
+ * Trait for classes that can run SQL queries for testing.
+ */
 trait SQLQueryTestRunner {
 
   /**
-   * Runs a given query.
+   * Runs a given query and returns a Seq[String] that represents the query result output.
    */
   def runQuery(query: String): Seq[String]
 
@@ -31,11 +34,10 @@ trait SQLQueryTestRunner {
 }
 
 /**
- * A runner that takes a JDBC connection and uses it to execute queries. We still need the local
- * Spark session to do some things..
+ * A runner that takes a JDBC connection and uses it to execute queries.
  */
-private[sql] case class JdbcSQLQueryTestRunner(
-  connection: JdbcConnection) extends SQLQueryTestRunner {
+private[sql] case class JdbcSQLQueryTestRunner(connection: JdbcConnection)
+  extends SQLQueryTestRunner {
 
   def runQuery(query: String): Seq[String] = connection.runQuery(query)
 

From 315847af6ca84190beb50c12f9f529bad61f052c Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 13:23:59 -0800
Subject: [PATCH 05/63] Add sqllogictest-select1.sql in postgres-crosstest
 input file

---
 .../sqllogictest-select1.sql                  | 90 +++++++++++++++++++
 .../crossdbms/CrossDbmsQueryTestSuite.scala   |  4 +-
 2 files changed, 92 insertions(+), 2 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql

diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
new file mode 100644
index 0000000000000..27328457ac851
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
@@ -0,0 +1,90 @@
+-- First 6 queries of the sql-logic-tests with some minor changes for compatibility, available here:
+-- https://www.sqlite.org/sqllogictest/file?name=test/select1.test&ci=tip.
+
+CREATE VIEW t1(a, b, c, d, e) AS VALUES
+    (103,102,100,101,104),
+    (107,106,108,109,105),
+    (110,114,112,111,113),
+    (116,119,117,115,118),
+    (123,122,124,120,121),
+    (127,128,129,126,125),
+    (132,134,131,133,130),
+    (138,136,139,135,137),
+    (144,141,140,142,143),
+    (145,149,146,148,147),
+    (151,150,153,154,152),
+    (155,157,159,156,158),
+    (161,160,163,164,162),
+    (167,169,168,165,166),
+    (171,170,172,173,174),
+    (177,176,179,178,175),
+    (181,180,182,183,184),
+    (187,188,186,189,185),
+    (190,194,193,192,191),
+    (199,197,198,196,195),
+    (200,202,203,201,204),
+    (208,209,205,206,207),
+    (214,210,213,212,211),
+    (218,215,216,217,219),
+    (223,221,222,220,224),
+    (226,227,228,229,225),
+    (234,231,232,230,233),
+    (237,236,239,235,238),
+    (242,244,240,243,241),
+    (246,248,247,249,245);
+
+SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END
+  FROM t1
+ ORDER BY 1;
+
+SELECT a+b*2+c*3+d*4+e*5,
+       CAST((a+b+c+d+e)/5 AS INT)
+  FROM t1
+ ORDER BY 1,2;
+
+SELECT a+b*2+c*3+d*4+e*5,
+       CASE WHEN a<b-3 THEN 111 WHEN a<=b THEN 222
+        WHEN a<b+3 THEN 333 ELSE 444 END,
+       abs(b-c),
+       CAST((a+b+c+d+e)/5 AS INT),
+       a+b*2+c*3
+  FROM t1
+ WHERE (e>c OR e<d)
+   AND d>e
+   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b<t1.b)
+ ORDER BY 4,2,1,3,5;
+
+SELECT c,
+       d-e,
+       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       a+b*2+c*3+d*4,
+       e
+  FROM t1
+ WHERE d NOT BETWEEN 110 AND 150
+    OR c BETWEEN b-2 AND d+2
+    OR (e>c OR e<d)
+ ORDER BY 1,5,3,2,4;
+
+SELECT a+b*2+c*3+d*4,
+       CAST((a+b+c+d+e)/5 AS INT),
+       abs(a),
+       e,
+       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       d
+  FROM t1
+ WHERE b>c
+   AND c>d
+ ORDER BY 3,4,5,1,2,6;
+
+SELECT a+b*2+c*3+d*4,
+       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       (SELECT count(*) FROM t1 AS x WHERE x.c>t1.c AND x.d<t1.d),
+       c
+  FROM t1
+ WHERE (c<=d-2 OR c>=d+2)
+ ORDER BY 4,2,1,3;
+
+DROP VIEW IF EXISTS t1;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index f62c4793abeb7..3fa9f5c2b4c9b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -39,7 +39,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
   override protected val inputFilePath = {
     val originalInputs = new File(baseResourcePath, "inputs").getAbsolutePath
-    new File(originalInputs, crossDbmsToGenerateGoldenFiles).getAbsolutePath
+    new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath
   }
   override protected val goldenFilePath = new File(
     baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
@@ -133,7 +133,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
             runSqlTestCase(testCase, listTestCases)
           }
         case _ =>
-          ignore(s"Ignoring test cases that are not RegularTestCases are now") {
+          ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
             /* Do nothing */
           }
       }

From 5e087e1425c5c6764c7db033bc0c362ce5ee171e Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 13:47:26 -0800
Subject: [PATCH 06/63] Add header for class suite

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 63 +++++++++++++++----
 .../sql/crossdbms/SQLQueryTestRunner.scala    |  2 +-
 2 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 3fa9f5c2b4c9b..a11e3598f8ab8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -28,18 +28,65 @@ import org.apache.spark.sql.catalyst.util.stringToFile
 import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
 import org.apache.spark.util.Utils
 
-// TODO: Add header
+// scalastyle:off line.size.limit
+/**
+ * See SQLQueryTestSuite.scala for the more information. This class builds off of that to allow us
+ * to generate golden files with other DBMS to perform cross-checking for correctness. Note that the
+ * input directory path is currently limited because most, if not all, of our current SQL query
+ * tests will not be compatible with other DBMSes. There will be more work in the future, such as
+ * some kind of conversion, to increase coverage.
+ *
+ * You need to have a database server up before running this test.
+ * For postgres:
+ * 1. On a mac: `brew install postgresql@13`
+ * 2. After installing PostgreSQL, start the database server, then create a role named pg with
+ * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser``
+ *
+ * To run the entire test suite:
+ * {{{
+ *   build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+ * }}}
+ *
+ * To re-generate golden files for entire suite, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+ * }}}
+ *
+ * To re-generate golden file for a single test, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" -- -z describe.sql"
+ * }}}
+ *
+ * To specify a DBMS to use (the default is postgres):
+ * {{{
+ *   REF_DBMS=mysql SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+ * }}}
+ */
+// scalastyle:on line.size.limit
 class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
+  private val DEFAULT = "postgres"
   private val DBMS_MAPPING = Map(
     "postgres" ->((connection_url: Option[String]) =>
       JdbcSQLQueryTestRunner(PostgresConnection(connection_url))))
 
-  private val crossDbmsToGenerateGoldenFiles: String = System.getenv("REF_DBMS")
+  private val crossDbmsToGenerateGoldenFiles: String = {
+    val userInputDbms = System.getenv("REF_DBMS")
+    if (userInputDbms.isEmpty) {
+      DEFAULT
+    } else {
+      userInputDbms
+    }
+  }
   private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL")
 
+  // Currently using a separate directory for this because the current SQL tests we have are highly
+  // unlikely to get compatible with the other DBMS.
   override protected val inputFilePath = {
     val originalInputs = new File(baseResourcePath, "inputs").getAbsolutePath
-    new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath
+    val x = new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath
+    log.info("HERE " + originalInputs)
+    log.info("HERE " + x)
+    x
   }
   override protected val goldenFilePath = new File(
     baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
@@ -63,7 +110,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     var runner: Option[SQLQueryTestRunner] = None
     val outputs: Seq[QueryTestOutput] = queries.map { sql =>
       val output =
-        if (regenerateGoldenFiles && crossDbmsToGenerateGoldenFiles.nonEmpty) {
+        if (regenerateGoldenFiles) {
           if (runner.isEmpty) {
             val connectionUrl = if (customConnectionUrl.nonEmpty) {
               Some(customConnectionUrl)
@@ -122,11 +169,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
       }
     } else {
       testCase match {
-        case _: AnalyzerTestCase =>
-          ignore(s"${testCase.name} is skipped because it is an analyzer test and this " +
-            s"suite is only testing for correctness.") {
-            /* Do nothing */
-          }
         case _: RegularTestCase =>
           // Create a test case to run this case.
           test(testCase.name) {
@@ -143,13 +185,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   override lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
       var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
-      var analyzerResultFile =
-        file.getAbsolutePath.replace(inputFilePath, analyzerGoldenFilePath) + ".out"
       // JDK-4511638 changes 'toString' result of Float/Double
       // JDK-8282081 changes DataTimeFormatter 'F' symbol
       if (Utils.isJavaVersionAtLeast21) {
         if (new File(resultFile + ".java21").exists()) resultFile += ".java21"
-        if (new File(analyzerResultFile + ".java21").exists()) analyzerResultFile += ".java21"
       }
       val absPath = file.getAbsolutePath
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
index 87df28ccaaa85..f600276a49e92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
@@ -42,4 +42,4 @@ private[sql] case class JdbcSQLQueryTestRunner(connection: JdbcConnection)
   def runQuery(query: String): Seq[String] = connection.runQuery(query)
 
   def cleanUp(): Unit = connection.close()
-}
\ No newline at end of file
+}

From 33b52b2e2611f2cc0b0cfcfcf2c545f80fc6ae3b Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 14:09:25 -0800
Subject: [PATCH 07/63] Small changes

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 40 +++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index a11e3598f8ab8..3621ae77277dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -37,10 +37,11 @@ import org.apache.spark.util.Utils
  * some kind of conversion, to increase coverage.
  *
  * You need to have a database server up before running this test.
- * For postgres:
- * 1. On a mac: `brew install postgresql@13`
+ * For example, for postgres:
+ * 1. Install PostgreSQL.
+ *   a. On a mac: `brew install postgresql@13`
  * 2. After installing PostgreSQL, start the database server, then create a role named pg with
- * superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser``
+ *    superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser``
  *
  * To run the entire test suite:
  * {{{
@@ -64,17 +65,15 @@ import org.apache.spark.util.Utils
  */
 // scalastyle:on line.size.limit
 class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
-  private val DEFAULT = "postgres"
-  private val DBMS_MAPPING = Map(
-    "postgres" ->((connection_url: Option[String]) =>
-      JdbcSQLQueryTestRunner(PostgresConnection(connection_url))))
 
   private val crossDbmsToGenerateGoldenFiles: String = {
     val userInputDbms = System.getenv("REF_DBMS")
-    if (userInputDbms.isEmpty) {
-      DEFAULT
-    } else {
+    if (userInputDbms.nonEmpty) {
+      assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(userInputDbms),
+        s"$userInputDbms is not currently supported.")
       userInputDbms
+    } else {
+      CrossDbmsQueryTestSuite.DEFAULT_DBMS
     }
   }
   private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL")
@@ -82,11 +81,8 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   // Currently using a separate directory for this because the current SQL tests we have are highly
   // unlikely to get compatible with the other DBMS.
   override protected val inputFilePath = {
-    val originalInputs = new File(baseResourcePath, "inputs").getAbsolutePath
-    val x = new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath
-    log.info("HERE " + originalInputs)
-    log.info("HERE " + x)
-    x
+    val originalInputs = new File(baseResourcePath, "inputs")
+    new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath
   }
   override protected val goldenFilePath = new File(
     baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
@@ -117,7 +113,8 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
             } else {
               None
             }
-            runner = Some(DBMS_MAPPING(crossDbmsToGenerateGoldenFiles)(connectionUrl))
+            runner = Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING(
+              crossDbmsToGenerateGoldenFiles)(connectionUrl))
           }
           val sparkDf = spark.sql(sql)
           val output = runner.map(_.runQuery(sql)).get
@@ -196,3 +193,14 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     }.sortBy(_.name)
   }
 }
+
+object CrossDbmsQueryTestSuite {
+
+  private final val POSTGRES = "postgres"
+  private final val SUPPORTED_DBMS = Seq(POSTGRES)
+  private final val DEFAULT_DBMS = POSTGRES
+  private final val DBMS_TO_CONNECTION_MAPPING = Map(
+    POSTGRES -> ((connection_url: Option[String]) =>
+      JdbcSQLQueryTestRunner(PostgresConnection(connection_url)))
+  )
+}

From 81202c470079653bed7ca030902ef9625a6ca4ef Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 15:30:32 -0800
Subject: [PATCH 08/63] Trying to get the input files set up..

---
 .../apache/spark/sql/SQLQueryTestSuite.scala  |  6 ++-
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 48 +++++++++++--------
 .../ThriftServerQueryTestSuite.scala          |  2 +-
 3 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 9111aa472a519..464c9dfc22428 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -612,9 +612,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     }
   }
 
+  protected def resultFileForInputFile(file: File): String = {
+    file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+  }
+
   protected lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
-      var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+      var resultFile = resultFileForInputFile(file)
       var analyzerResultFile =
         file.getAbsolutePath.replace(inputFilePath, analyzerGoldenFilePath) + ".out"
       // JDK-4511638 changes 'toString' result of Float/Double
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 3621ae77277dc..9f1f11eb040b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -68,7 +68,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
   private val crossDbmsToGenerateGoldenFiles: String = {
     val userInputDbms = System.getenv("REF_DBMS")
-    if (userInputDbms.nonEmpty) {
+    if (userInputDbms != null && userInputDbms.nonEmpty) {
       assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(userInputDbms),
         s"$userInputDbms is not currently supported.")
       userInputDbms
@@ -78,24 +78,16 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   }
   private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL")
 
-  // Currently using a separate directory for this because the current SQL tests we have are highly
-  // unlikely to get compatible with the other DBMS.
-  override protected val inputFilePath = {
-    val originalInputs = new File(baseResourcePath, "inputs")
-    new File(originalInputs, s"$crossDbmsToGenerateGoldenFiles-crosstest").getAbsolutePath
-  }
-  override protected val goldenFilePath = new File(
-    baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
-
-  private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
-    case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
-    case _: DescribeCommandBase
-         | _: DescribeColumnCommand
-         | _: DescribeRelation
-         | _: DescribeColumn => true
-    case PhysicalOperation(_, _, Sort(_, true, _)) => true
-    case _ => plan.children.iterator.exists(isSemanticallySorted)
-  }
+  override def ignoreList: Set[String] = super.ignoreList ++ Set(
+    "postgreSQL",
+    "subquery",
+    "ansi",
+    "udtf",
+    "udf",
+    "timestampNTZ",
+    "udaf",
+    "typeCoercion"
+  )
 
   override protected def runQueries(
     queries: Seq[String],
@@ -179,9 +171,15 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     }
   }
 
+  override protected def resultFileForInputFile(file: File): String = {
+    val goldenFilePath = new File(
+      baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
+    file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+  }
+
   override lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
-      var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+      var resultFile = resultFileForInputFile(file)
       // JDK-4511638 changes 'toString' result of Float/Double
       // JDK-8282081 changes DataTimeFormatter 'F' symbol
       if (Utils.isJavaVersionAtLeast21) {
@@ -192,6 +190,16 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
       RegularTestCase(testCaseName, absPath, resultFile) :: Nil
     }.sortBy(_.name)
   }
+
+  private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
+    case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
+    case _: DescribeCommandBase
+         | _: DescribeColumnCommand
+         | _: DescribeRelation
+         | _: DescribeColumn => true
+    case PhysicalOperation(_, _, Sort(_, true, _)) => true
+    case _ => plan.children.iterator.exists(isSemanticallySorted)
+  }
 }
 
 object CrossDbmsQueryTestSuite {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index ee336b13b0b2d..8d90d47e1bf5f 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -246,7 +246,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
 
   override lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
-      var resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+      var resultFile = resultFileForInputFile(file)
       // JDK-4511638 changes 'toString' result of Float/Double
       // JDK-8282081 changes DataTimeFormatter 'F' symbol
       if (Utils.isJavaVersionAtLeast21 && (new File(resultFile + ".java21")).exists()) {

From c9fba5b0f3b5fa2039fd29dfdc40abd400c0771b Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 15:46:50 -0800
Subject: [PATCH 09/63] Passing test!!

---
 .../sqllogictest-select1.sql.out              | 236 ++++++++++++++++++
 .../apache/spark/sql/SQLQueryTestSuite.scala  |   2 +-
 .../crossdbms/CrossDbmsQueryTestSuite.scala   |  40 +--
 3 files changed, 259 insertions(+), 19 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
new file mode 100644
index 0000000000000..1ce87f0fc54bb
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
@@ -0,0 +1,236 @@
+-- Automatically generated by CrossDbmsQueryTestSuite
+-- !query
+CREATE VIEW t1(a, b, c, d, e) AS VALUES
+    (103,102,100,101,104),
+    (107,106,108,109,105),
+    (110,114,112,111,113),
+    (116,119,117,115,118),
+    (123,122,124,120,121),
+    (127,128,129,126,125),
+    (132,134,131,133,130),
+    (138,136,139,135,137),
+    (144,141,140,142,143),
+    (145,149,146,148,147),
+    (151,150,153,154,152),
+    (155,157,159,156,158),
+    (161,160,163,164,162),
+    (167,169,168,165,166),
+    (171,170,172,173,174),
+    (177,176,179,178,175),
+    (181,180,182,183,184),
+    (187,188,186,189,185),
+    (190,194,193,192,191),
+    (199,197,198,196,195),
+    (200,202,203,201,204),
+    (208,209,205,206,207),
+    (214,210,213,212,211),
+    (218,215,216,217,219),
+    (223,221,222,220,224),
+    (226,227,228,229,225),
+    (234,231,232,230,233),
+    (237,236,239,235,238),
+    (242,244,240,243,241),
+    (246,248,247,249,245)
+-- !query output
+
+
+
+-- !query
+SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END
+  FROM t1
+ ORDER BY 1
+-- !query output
+354
+362
+374
+380
+398
+400
+416
+428
+436
+446
+452
+468
+474
+484
+492
+1020
+1060
+1140
+1190
+1220
+1280
+1340
+1360
+1410
+1490
+1500
+1570
+1600
+1690
+1700
+
+
+-- !query
+SELECT a+b*2+c*3+d*4+e*5,
+       CAST((a+b+c+d+e)/5 AS INT)
+  FROM t1
+ ORDER BY 1,2
+-- !query output
+1531	102
+1604	107
+1683	112
+1755	117
+1824	122
+1899	127
+1975	132
+2052	137
+2129	142
+2208	147
+2286	152
+2360	157
+2436	162
+2499	167
+2589	172
+2653	177
+2739	182
+2802	187
+2880	192
+2946	197
+3037	202
+3100	207
+3176	212
+3259	217
+3331	222
+3405	227
+3477	232
+3556	237
+3627	242
+3704	247
+
+
+-- !query
+SELECT a+b*2+c*3+d*4+e*5,
+       CASE WHEN a<b-3 THEN 111 WHEN a<=b THEN 222
+        WHEN a<b+3 THEN 333 ELSE 444 END,
+       abs(b-c),
+       CAST((a+b+c+d+e)/5 AS INT),
+       a+b*2+c*3
+  FROM t1
+ WHERE (e>c OR e<d)
+   AND d>e
+   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b<t1.b)
+ ORDER BY 4,2,1,3,5
+-- !query output
+1604	333	2	107	643
+1899	222	1	127	770
+1975	222	3	132	793
+2208	111	3	147	881
+2286	333	3	152	910
+2436	333	3	162	970
+2653	333	3	177	1066
+2802	222	2	187	1121
+2880	111	1	192	1157
+2946	333	1	197	1187
+3176	444	3	212	1273
+3405	222	1	227	1364
+3627	222	4	242	1450
+3704	222	1	247	1483
+
+
+-- !query
+SELECT c,
+       d-e,
+       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       a+b*2+c*3+d*4,
+       e
+  FROM t1
+ WHERE d NOT BETWEEN 110 AND 150
+    OR c BETWEEN b-2 AND d+2
+    OR (e>c OR e<d)
+ ORDER BY 1,5,3,2,4
+-- !query output
+100	-3	444	1011	104
+108	4	222	1079	105
+112	-2	333	1118	113
+117	-3	222	1165	118
+129	1	111	1274	125
+131	3	333	1325	130
+140	-1	555	1414	143
+146	1	222	1473	147
+153	2	444	1526	152
+159	-2	333	1570	158
+163	2	444	1626	162
+168	-1	222	1669	166
+172	-1	222	1719	174
+179	3	333	1778	175
+182	-1	222	1819	184
+186	4	111	1877	185
+193	1	444	1925	191
+198	1	555	1971	195
+203	-3	333	2017	204
+205	-1	111	2065	207
+213	1	555	2121	211
+216	-2	444	2164	219
+222	-4	444	2211	224
+228	4	111	2280	225
+232	-3	555	2312	233
+239	-3	444	2366	238
+240	2	333	2422	241
+247	4	222	2479	245
+
+
+-- !query
+SELECT a+b*2+c*3+d*4,
+       CAST((a+b+c+d+e)/5 AS INT),
+       abs(a),
+       e,
+       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       d
+  FROM t1
+ WHERE b>c
+   AND c>d
+ ORDER BY 3,4,5,1,2,6
+-- !query output
+1118	112	110	113	333	111
+1165	117	116	118	222	115
+1669	167	167	166	222	165
+1925	192	190	191	444	192
+
+
+-- !query
+SELECT a+b*2+c*3+d*4,
+       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       (SELECT count(*) FROM t1 AS x WHERE x.c>t1.c AND x.d<t1.d),
+       c
+  FROM t1
+ WHERE (c<=d-2 OR c>=d+2)
+ ORDER BY 4,2,1,3
+-- !query output
+1165	222	0	117
+1219	222	0	124
+1274	111	0	129
+1325	333	0	131
+1367	222	0	139
+1414	555	0	140
+1473	222	0	146
+1570	333	0	159
+1669	222	0	168
+1877	111	0	186
+1971	555	0	198
+2017	333	0	203
+2211	444	0	222
+2312	555	0	232
+2366	444	0	239
+2422	333	0	240
+2479	222	0	247
+
+
+-- !query
+DROP VIEW IF EXISTS t1
+-- !query output
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 464c9dfc22428..f5d51d58e1e10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -943,7 +943,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
         s"-- !query output\n" +
         output
     }
-    override def numSegments: Int = 3
+    override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 }
   }
 
   /** A single SQL query's analysis results. */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 9f1f11eb040b4..2533be0e12d5a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -55,7 +55,7 @@ import org.apache.spark.util.Utils
  *
  * To re-generate golden file for a single test, run:
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite" -- -z describe.sql"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite -- -z describe.sql"
  * }}}
  *
  * To specify a DBMS to use (the default is postgres):
@@ -66,7 +66,9 @@ import org.apache.spark.util.Utils
 // scalastyle:on line.size.limit
 class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
-  private val crossDbmsToGenerateGoldenFiles: String = {
+  // Note: the below two functions have to be functions instead of variables because the superclass
+  // runs the test first before the subclass variables can be instantiated.
+  private def crossDbmsToGenerateGoldenFiles: String = {
     val userInputDbms = System.getenv("REF_DBMS")
     if (userInputDbms != null && userInputDbms.nonEmpty) {
       assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(userInputDbms),
@@ -76,18 +78,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
       CrossDbmsQueryTestSuite.DEFAULT_DBMS
     }
   }
-  private val customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL")
-
-  override def ignoreList: Set[String] = super.ignoreList ++ Set(
-    "postgreSQL",
-    "subquery",
-    "ansi",
-    "udtf",
-    "udf",
-    "timestampNTZ",
-    "udaf",
-    "typeCoercion"
-  )
+  private def customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL")
 
   override protected def runQueries(
     queries: Seq[String],
@@ -100,7 +91,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
       val output =
         if (regenerateGoldenFiles) {
           if (runner.isEmpty) {
-            val connectionUrl = if (customConnectionUrl.nonEmpty) {
+            val connectionUrl = if (customConnectionUrl != null && customConnectionUrl.nonEmpty) {
               Some(customConnectionUrl)
             } else {
               None
@@ -112,10 +103,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
           val output = runner.map(_.runQuery(sql)).get
           // Use Spark analyzed plan to check if the query result is already semantically sorted
           val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
-            output.sorted
+            output
           } else {
             // Sort the answer manually if it isn't sorted.
-            output
+            output.sorted
           }
           result
         } else {
@@ -172,8 +163,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   }
 
   override protected def resultFileForInputFile(file: File): String = {
+    val defaultResultsDir = new File(baseResourcePath, "results")
     val goldenFilePath = new File(
-      baseResourcePath, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
+      defaultResultsDir, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
     file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
   }
 
@@ -200,6 +192,18 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     case PhysicalOperation(_, _, Sort(_, true, _)) => true
     case _ => plan.children.iterator.exists(isSemanticallySorted)
   }
+
+  // Ignore all earlier tests for now due to incompatibility.
+  override def ignoreList: Set[String] = super.ignoreList ++ Set(
+    "postgreSQL",
+    "subquery",
+    "ansi",
+    "udtf",
+    "udf",
+    "timestampNTZ",
+    "udaf",
+    "typeCoercion"
+  )
 }
 
 object CrossDbmsQueryTestSuite {

From 8b5ed22ab1fceb4aca95f9d8766676d5e692a6bb Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 15:53:01 -0800
Subject: [PATCH 10/63] Ignore all earlier tests first

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 128 +++++++++++++++++-
 1 file changed, 127 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 2533be0e12d5a..07b4410fd30bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -202,7 +202,133 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     "udf",
     "timestampNTZ",
     "udaf",
-    "typeCoercion"
+    "typeCoercion",
+    "group-by.sql",
+    "natural-join.sql",
+    "timestamp-ltz.sql",
+    "csv-functions.sql",
+    "datetime-formatting-invalid.sql",
+    "except.sql",
+    "string-functions.sql",
+    "cte-command.sql",
+    "identifier-clause.sql",
+    "try_reflect.sql",
+    "order-by-all.sql",
+    "timestamp-ntz.sql",
+    "url-functions.sql",
+    "math.sql",
+    "random.sql",
+    "tablesample-negative.sql",
+    "date.sql",
+    "window.sql",
+    "linear-regression.sql",
+    "join-empty-relation.sql",
+    "null-propagation.sql",
+    "operators.sql",
+    "change-column.sql",
+    "count.sql",
+    "mask-functions.sql",
+    "decimalArithmeticOperations.sql",
+    "column-resolution-sort.sql",
+    "group-analytics.sql",
+    "inline-table.sql",
+    "comparator.sql",
+    "ceil-floor-with-scale-param.sql",
+    "show-tblproperties.sql",
+    "timezone.sql",
+    "ilike.sql",
+    "parse-schema-string.sql",
+    "charvarchar.sql",
+    "ignored.sql",
+    "cte.sql",
+    "selectExcept.sql",
+    "order-by-nulls-ordering.sql",
+    "query_regex_column.sql",
+    "show_columns.sql",
+    "columnresolution-views.sql",
+    "percentiles.sql",
+    "hll.sql",
+    "group-by-all.sql",
+    "like-any.sql",
+    "struct.sql",
+    "show-create-table.sql",
+    "try_cast.sql",
+    "unpivot.sql",
+    "describe-query.sql",
+    "describe.sql",
+    "columnresolution.sql",
+    "union.sql",
+    "order-by-ordinal.sql",
+    "explain-cbo.sql",
+    "intersect-all.sql",
+    "ilike-all.sql",
+    "try_aggregates.sql",
+    "try-string-functions.sql",
+    "show-views.sql",
+    "xml-functions.sql",
+    "datetime-parsing.sql",
+    "cte-nested.sql",
+    "columnresolution-negative.sql",
+    "datetime-special.sql",
+    "describe-table-after-alter-table.sql",
+    "column-resolution-aggregate.sql",
+    "keywords.sql",
+    "table-aliases.sql",
+    "bitwise.sql",
+    "like-all.sql",
+    "named-function-arguments.sql",
+    "try_datetime_functions.sql",
+    "datetime-legacy.sql",
+    "outer-join.sql",
+    "explain.sql",
+    "sql-compatibility-functions.sql",
+    "join-lateral.sql",
+    "pred-pushdown.sql",
+    "cast.sql",
+    "except-all.sql",
+    "predicate-functions.sql",
+    "timestamp.sql",
+    "group-by-filter.sql",
+    "pivot.sql",
+    "try_arithmetic.sql",
+    "higher-order-functions.sql",
+    "cte-legacy.sql",
+    "misc-functions.sql",
+    "describe-part-after-analyze.sql",
+    "extract.sql",
+    "datetime-formatting.sql",
+    "interval.sql",
+    "double-quoted-identifiers.sql",
+    "datetime-formatting-legacy.sql",
+    "group-by-ordinal.sql",
+    "having.sql",
+    "inner-join.sql",
+    "null-handling.sql",
+    "ilike-any.sql",
+    "show-tables.sql",
+    "sql-session-variables.sql",
+    "using-join.sql",
+    "subexp-elimination.sql",
+    "cte-nonlegacy.sql",
+    "group-by-all-duckdb.sql",
+    "transform.sql",
+    "map.sql",
+    "table-valued-functions.sql",
+    "comments.sql",
+    "regexp-functions.sql",
+    "datetime-parsing-legacy.sql",
+    "cross-join.sql",
+    "array.sql",
+    "group-by-all-mosha.sql",
+    "limit.sql",
+    "non-excludable-rule.sql",
+    "grouping_set.sql",
+    "json-functions.sql",
+    "datetime-parsing-invalid.sql",
+    "try_element_at.sql",
+    "explain-aqe.sql",
+    "current_database_catalog.sql",
+    "literals.sql"
   )
 }
 

From fea3975dcdc9c235e53799a9daa2d4dcc5769585 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 16:35:49 -0800
Subject: [PATCH 11/63] Small comment changes

---
 .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala      | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 07b4410fd30bb..dad03f08d3831 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.util.Utils
 
 // scalastyle:off line.size.limit
 /**
- * See SQLQueryTestSuite.scala for the more information. This class builds off of that to allow us
+ * See SQLQueryTestSuite.scala for more information. This class builds off of that to allow us
  * to generate golden files with other DBMS to perform cross-checking for correctness. Note that the
  * input directory path is currently limited because most, if not all, of our current SQL query
  * tests will not be compatible with other DBMSes. There will be more work in the future, such as
@@ -88,7 +88,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
     var runner: Option[SQLQueryTestRunner] = None
     val outputs: Seq[QueryTestOutput] = queries.map { sql =>
-      val output =
+      val output = {
+        // Use the runner when generating golden files, and Spark when running the test against
+        // the already generated golden files.
         if (regenerateGoldenFiles) {
           if (runner.isEmpty) {
             val connectionUrl = if (customConnectionUrl != null && customConnectionUrl.nonEmpty) {
@@ -112,6 +114,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         } else {
           handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))._2
         }
+      }
       // We do some query canonicalization now.
       val executionOutput = ExecutionOutput(
         sql = sql,

From 678a0efc07662cb4b678505f371cebeffde7487d Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 29 Nov 2023 16:52:22 -0800
Subject: [PATCH 12/63] More comments

---
 .../apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index dad03f08d3831..38335a99f8359 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -36,6 +36,9 @@ import org.apache.spark.util.Utils
  * tests will not be compatible with other DBMSes. There will be more work in the future, such as
  * some kind of conversion, to increase coverage.
  *
+ * If your SQL query test is not compatible with other DBMSes, please add it to the `ignoreList` at
+ * the bottom of this file.
+ *
  * You need to have a database server up before running this test.
  * For example, for postgres:
  * 1. Install PostgreSQL.
@@ -196,7 +199,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     case _ => plan.children.iterator.exists(isSemanticallySorted)
   }
 
-  // Ignore all earlier tests for now due to incompatibility.
+  // Ignore all tests for now due to likely incompatibility.
   override def ignoreList: Set[String] = super.ignoreList ++ Set(
     "postgreSQL",
     "subquery",

From 309a7e2db06665ffef47a7544f5a07a77aca1110 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 10:27:53 -0800
Subject: [PATCH 13/63] Address simple comments

---
 .../sqllogictest-select1.sql                  | 48 +++++-----
 .../apache/spark/sql/SQLQueryTestHelper.scala | 26 +++---
 .../apache/spark/sql/SQLQueryTestSuite.scala  | 11 +++
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 92 ++++++++-----------
 .../spark/sql/crossdbms/JdbcConnection.scala  |  6 ++
 .../sql/crossdbms/SQLQueryTestRunner.scala    |  4 +-
 6 files changed, 98 insertions(+), 89 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
index 27328457ac851..7fcc5419044d0 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
@@ -33,58 +33,58 @@ CREATE VIEW t1(a, b, c, d, e) AS VALUES
     (242,244,240,243,241),
     (246,248,247,249,245);
 
-SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END
+SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
   FROM t1
  ORDER BY 1;
 
-SELECT a+b*2+c*3+d*4+e*5,
-       CAST((a+b+c+d+e)/5 AS INT)
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CAST((a + b + c + d + e) / 5 AS INT)
   FROM t1
  ORDER BY 1,2;
 
-SELECT a+b*2+c*3+d*4+e*5,
-       CASE WHEN a<b-3 THEN 111 WHEN a<=b THEN 222
-        WHEN a<b+3 THEN 333 ELSE 444 END,
-       abs(b-c),
-       CAST((a+b+c+d+e)/5 AS INT),
-       a+b*2+c*3
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
+        WHEN a < b + 3 THEN 333 ELSE 444 END,
+       abs(b - c),
+       CAST((a + b + c + d + e) / 5 AS INT),
+       a + b * 2 + c * 3
   FROM t1
- WHERE (e>c OR e<d)
+ WHERE (e > c OR e < d)
    AND d>e
-   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b<t1.b)
+   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
  ORDER BY 4,2,1,3,5;
 
 SELECT c,
        d-e,
-       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
         WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       a+b*2+c*3+d*4,
+       a + b * 2 + c * 3 + d * 4,
        e
   FROM t1
  WHERE d NOT BETWEEN 110 AND 150
-    OR c BETWEEN b-2 AND d+2
-    OR (e>c OR e<d)
+    OR c BETWEEN b - 2 AND d + 2
+    OR (e > c OR e < d)
  ORDER BY 1,5,3,2,4;
 
-SELECT a+b*2+c*3+d*4,
-       CAST((a+b+c+d+e)/5 AS INT),
+SELECT a + b * 2 + c * 3 + d * 4,
+       CAST((a + b + c + d + e) / 5 AS INT),
        abs(a),
        e,
-       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
         WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
        d
   FROM t1
- WHERE b>c
-   AND c>d
+ WHERE b > c
+   AND c > d
  ORDER BY 3,4,5,1,2,6;
 
-SELECT a+b*2+c*3+d*4,
-       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+SELECT a + b * 2 + c * 3 + d * 4,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
         WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       (SELECT count(*) FROM t1 AS x WHERE x.c>t1.c AND x.d<t1.d),
+       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
        c
   FROM t1
- WHERE (c<=d-2 OR c>=d+2)
+ WHERE (c <= d - 2 OR c >= d + 2)
  ORDER BY 4,2,1,3;
 
 DROP VIEW IF EXISTS t1;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index d8956961440df..ca26d31aa161d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -98,20 +98,24 @@ trait SQLQueryTestHelper extends Logging {
     }
   }
 
+  /**
+   * Uses the Spark logical plan to determine whether the plan is semantically sorted. This is
+   * important to make non-sorted queries test cases more deterministic.
+   */
+  def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
+    case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
+    case _: DescribeCommandBase
+         | _: DescribeColumnCommand
+         | _: DescribeRelation
+         | _: DescribeColumn => true
+    case PhysicalOperation(_, _, Sort(_, true, _)) => true
+    case _ => plan.children.iterator.exists(isSemanticallySorted)
+  }
+
   /** Executes a query and returns the result as (schema of the output, normalized output). */
   protected def getNormalizedQueryExecutionResult(
       session: SparkSession, sql: String): (String, Seq[String]) = {
     // Returns true if the plan is supposed to be sorted.
-    def isSorted(plan: LogicalPlan): Boolean = plan match {
-      case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
-      case _: DescribeCommandBase
-          | _: DescribeColumnCommand
-          | _: DescribeRelation
-          | _: DescribeColumn => true
-      case PhysicalOperation(_, _, Sort(_, true, _)) => true
-      case _ => plan.children.iterator.exists(isSorted)
-    }
-
     val df = session.sql(sql)
     val schema = df.schema.catalogString
     // Get answer, but also get rid of the #1234 expression ids that show up in explain plans
@@ -120,7 +124,7 @@ trait SQLQueryTestHelper extends Logging {
     }
 
     // If the output is not pre-sorted, sort it.
-    if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
+    if (isSemanticallySorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index f5d51d58e1e10..be59753f97655 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -612,6 +612,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     }
   }
 
+  /**
+   * Returns the desired file path for results, given the input file. This is implemented as a
+   * function because differente Suites extending this class may want their results files with
+   * different names or in different locations.
+   */
   protected def resultFileForInputFile(file: File): String = {
     file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
   }
@@ -943,6 +948,12 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
         s"-- !query output\n" +
         output
     }
+
+    /**
+     * Some suites extending this one, such as [[CrossDbmsQueryTestSuite]], only test for
+     * correctness and do not care about the schema. The logic here allows passing the check for
+     * number of segments in the golden files when testing.
+     */
     override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 38335a99f8359..8248c61776f96 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -22,10 +22,7 @@ import java.util.Locale
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLQueryTestSuite
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, DescribeColumn, DescribeRelation, Distinct, Generate, Join, LogicalPlan, Sample, Sort}
 import org.apache.spark.sql.catalyst.util.stringToFile
-import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
 import org.apache.spark.util.Utils
 
 // scalastyle:off line.size.limit
@@ -72,21 +69,20 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   // Note: the below two functions have to be functions instead of variables because the superclass
   // runs the test first before the subclass variables can be instantiated.
   private def crossDbmsToGenerateGoldenFiles: String = {
-    val userInputDbms = System.getenv("REF_DBMS")
-    if (userInputDbms != null && userInputDbms.nonEmpty) {
-      assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(userInputDbms),
-        s"$userInputDbms is not currently supported.")
-      userInputDbms
-    } else {
-      CrossDbmsQueryTestSuite.DEFAULT_DBMS
-    }
+    val userInputDbms = System.getenv(CrossDbmsQueryTestSuite.REF_DBMS_ARGUMENT)
+    val selectedDbms = Option(userInputDbms).filter(_.nonEmpty).getOrElse(
+      CrossDbmsQueryTestSuite.DEFAULT_DBMS)
+    assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(selectedDbms),
+      s"$selectedDbms is not currently supported.")
+    selectedDbms
   }
-  private def customConnectionUrl: String = System.getenv("REF_DBMS_CONNECTION_URL")
+  private def customConnectionUrl: String = System.getenv(
+    CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL)
 
   override protected def runQueries(
-    queries: Seq[String],
-    testCase: TestCase,
-    configSet: Seq[(String, String)]): Unit = {
+      queries: Seq[String],
+      testCase: TestCase,
+      configSet: Seq[(String, String)]): Unit = {
     val localSparkSession = spark.newSession()
 
     var runner: Option[SQLQueryTestRunner] = None
@@ -95,18 +91,13 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         // Use the runner when generating golden files, and Spark when running the test against
         // the already generated golden files.
         if (regenerateGoldenFiles) {
-          if (runner.isEmpty) {
-            val connectionUrl = if (customConnectionUrl != null && customConnectionUrl.nonEmpty) {
-              Some(customConnectionUrl)
-            } else {
-              None
-            }
-            runner = Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING(
-              crossDbmsToGenerateGoldenFiles)(connectionUrl))
-          }
+          val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty)
+          runner = runner.getOrElse(
+            Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING(
+              crossDbmsToGenerateGoldenFiles)(connectionUrl)))
           val sparkDf = spark.sql(sql)
           val output = runner.map(_.runQuery(sql)).get
-          // Use Spark analyzed plan to check if the query result is already semantically sorted
+          // Use Spark analyzed plan to check if the query result is already semantically sorted.
           val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
             output
           } else {
@@ -115,7 +106,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
           }
           result
         } else {
-          handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))._2
+          val (_, output) = handleExceptions(
+            getNormalizedQueryExecutionResult(localSparkSession, sql))
+          output
         }
       }
       // We do some query canonicalization now.
@@ -148,23 +141,21 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   }
 
   override def createScalaTestCase(testCase: TestCase): Unit = {
-    if (ignoreList.exists(t =>
-      testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) {
-      ignore(testCase.name) {
-        /* Do nothing */
-      }
-    } else {
-      testCase match {
-        case _: RegularTestCase =>
-          // Create a test case to run this case.
-          test(testCase.name) {
-            runSqlTestCase(testCase, listTestCases)
-          }
-        case _ =>
-          ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
-            /* Do nothing */
-          }
-      }
+    testCase match {
+      case _ if ignoreList.exists(t =>
+        testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT))) =>
+        ignore(s"${testCase.name} is in the ignore list.") {
+          log.debug(s"${testCase.name} is in the ignore list.")
+        }
+      case _: RegularTestCase =>
+        // Create a test case to run this case.
+        test(testCase.name) {
+          runSqlTestCase(testCase, listTestCases)
+        }
+      case _ =>
+        ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
+          log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.")
+        }
     }
   }
 
@@ -189,16 +180,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     }.sortBy(_.name)
   }
 
-  private def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
-    case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
-    case _: DescribeCommandBase
-         | _: DescribeColumnCommand
-         | _: DescribeRelation
-         | _: DescribeColumn => true
-    case PhysicalOperation(_, _, Sort(_, true, _)) => true
-    case _ => plan.children.iterator.exists(isSemanticallySorted)
-  }
-
   // Ignore all tests for now due to likely incompatibility.
   override def ignoreList: Set[String] = super.ignoreList ++ Set(
     "postgreSQL",
@@ -340,6 +321,11 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
 object CrossDbmsQueryTestSuite {
 
+  // System argument to indicate which reference DBMS is being used.
+  private final val REF_DBMS_ARGUMENT = "REF_DBMS"
+  // System arguemnt to indicate a custom connection URL to the reference DBMS.
+  private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
+
   private final val POSTGRES = "postgres"
   private final val SUPPORTED_DBMS = Seq(POSTGRES)
   private final val DEFAULT_DBMS = POSTGRES
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index a9be898306987..ee4cd7ec6eed3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -25,6 +25,9 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry
 
+/**
+ * Represents a connection (session) to a database.
+ */
 private[sql] trait JdbcConnection {
   /**
    * Runs the given query.
@@ -53,6 +56,9 @@ private[sql] trait JdbcConnection {
   def close(): Unit
 }
 
+/**
+ * Represents a connection (session) to a PostgreSQL database.
+ */
 private[sql] case class PostgresConnection(connection_url: Option[String] = None)
   extends JdbcConnection {
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
index f600276a49e92..7c79b27657d1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.crossdbms
 
 /**
- * Trait for classes that can run SQL queries for testing.
+ * Trait for classes that can run SQL queries for testing. This is specifically used as a wrapper
+ * around a connection to a system that can run SQL queries, to run queries from
+ * [[SQLQueryTestSuite]].
  */
 trait SQLQueryTestRunner {
 

From 5c52b9c4e1fc0327ccbf51fa9937530912157765 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 10:39:24 -0800
Subject: [PATCH 14/63] A little more small changes..

---
 .../apache/spark/sql/SQLQueryTestHelper.scala |   6 +-
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 277 +++++++++---------
 .../spark/sql/crossdbms/JdbcConnection.scala  |   3 +-
 .../sql/crossdbms/SQLQueryTestRunner.scala    |   3 +-
 4 files changed, 150 insertions(+), 139 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index ca26d31aa161d..3bda8c75d7806 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -124,7 +124,11 @@ trait SQLQueryTestHelper extends Logging {
     }
 
     // If the output is not pre-sorted, sort it.
-    if (isSemanticallySorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
+    if (isSemanticallySorted(df.queryExecution.analyzed)) {
+      (schema, answer)
+    } else {
+      (schema, answer.sorted)
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 8248c61776f96..b30820bc55378 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -181,142 +181,147 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   }
 
   // Ignore all tests for now due to likely incompatibility.
-  override def ignoreList: Set[String] = super.ignoreList ++ Set(
-    "postgreSQL",
-    "subquery",
-    "ansi",
-    "udtf",
-    "udf",
-    "timestampNTZ",
-    "udaf",
-    "typeCoercion",
-    "group-by.sql",
-    "natural-join.sql",
-    "timestamp-ltz.sql",
-    "csv-functions.sql",
-    "datetime-formatting-invalid.sql",
-    "except.sql",
-    "string-functions.sql",
-    "cte-command.sql",
-    "identifier-clause.sql",
-    "try_reflect.sql",
-    "order-by-all.sql",
-    "timestamp-ntz.sql",
-    "url-functions.sql",
-    "math.sql",
-    "random.sql",
-    "tablesample-negative.sql",
-    "date.sql",
-    "window.sql",
-    "linear-regression.sql",
-    "join-empty-relation.sql",
-    "null-propagation.sql",
-    "operators.sql",
-    "change-column.sql",
-    "count.sql",
-    "mask-functions.sql",
-    "decimalArithmeticOperations.sql",
-    "column-resolution-sort.sql",
-    "group-analytics.sql",
-    "inline-table.sql",
-    "comparator.sql",
-    "ceil-floor-with-scale-param.sql",
-    "show-tblproperties.sql",
-    "timezone.sql",
-    "ilike.sql",
-    "parse-schema-string.sql",
-    "charvarchar.sql",
-    "ignored.sql",
-    "cte.sql",
-    "selectExcept.sql",
-    "order-by-nulls-ordering.sql",
-    "query_regex_column.sql",
-    "show_columns.sql",
-    "columnresolution-views.sql",
-    "percentiles.sql",
-    "hll.sql",
-    "group-by-all.sql",
-    "like-any.sql",
-    "struct.sql",
-    "show-create-table.sql",
-    "try_cast.sql",
-    "unpivot.sql",
-    "describe-query.sql",
-    "describe.sql",
-    "columnresolution.sql",
-    "union.sql",
-    "order-by-ordinal.sql",
-    "explain-cbo.sql",
-    "intersect-all.sql",
-    "ilike-all.sql",
-    "try_aggregates.sql",
-    "try-string-functions.sql",
-    "show-views.sql",
-    "xml-functions.sql",
-    "datetime-parsing.sql",
-    "cte-nested.sql",
-    "columnresolution-negative.sql",
-    "datetime-special.sql",
-    "describe-table-after-alter-table.sql",
-    "column-resolution-aggregate.sql",
-    "keywords.sql",
-    "table-aliases.sql",
-    "bitwise.sql",
-    "like-all.sql",
-    "named-function-arguments.sql",
-    "try_datetime_functions.sql",
-    "datetime-legacy.sql",
-    "outer-join.sql",
-    "explain.sql",
-    "sql-compatibility-functions.sql",
-    "join-lateral.sql",
-    "pred-pushdown.sql",
-    "cast.sql",
-    "except-all.sql",
-    "predicate-functions.sql",
-    "timestamp.sql",
-    "group-by-filter.sql",
-    "pivot.sql",
-    "try_arithmetic.sql",
-    "higher-order-functions.sql",
-    "cte-legacy.sql",
-    "misc-functions.sql",
-    "describe-part-after-analyze.sql",
-    "extract.sql",
-    "datetime-formatting.sql",
-    "interval.sql",
-    "double-quoted-identifiers.sql",
-    "datetime-formatting-legacy.sql",
-    "group-by-ordinal.sql",
-    "having.sql",
-    "inner-join.sql",
-    "null-handling.sql",
-    "ilike-any.sql",
-    "show-tables.sql",
-    "sql-session-variables.sql",
-    "using-join.sql",
-    "subexp-elimination.sql",
-    "cte-nonlegacy.sql",
-    "group-by-all-duckdb.sql",
-    "transform.sql",
-    "map.sql",
-    "table-valued-functions.sql",
-    "comments.sql",
-    "regexp-functions.sql",
-    "datetime-parsing-legacy.sql",
-    "cross-join.sql",
-    "array.sql",
-    "group-by-all-mosha.sql",
-    "limit.sql",
-    "non-excludable-rule.sql",
-    "grouping_set.sql",
-    "json-functions.sql",
-    "datetime-parsing-invalid.sql",
-    "try_element_at.sql",
-    "explain-aqe.sql",
-    "current_database_catalog.sql",
-    "literals.sql"
-  )
+  override def ignoreList: Set[String] = super.ignoreList ++
+    // Directories
+    Set(
+      "postgreSQL",
+      "subquery",
+      "ansi",
+      "udtf",
+      "udf",
+      "timestampNTZ",
+      "udaf",
+      "typeCoercion",
+    ) ++
+    // Files
+    Set(
+      "group-by.sql",
+      "natural-join.sql",
+      "timestamp-ltz.sql",
+      "csv-functions.sql",
+      "datetime-formatting-invalid.sql",
+      "except.sql",
+      "string-functions.sql",
+      "cte-command.sql",
+      "identifier-clause.sql",
+      "try_reflect.sql",
+      "order-by-all.sql",
+      "timestamp-ntz.sql",
+      "url-functions.sql",
+      "math.sql",
+      "random.sql",
+      "tablesample-negative.sql",
+      "date.sql",
+      "window.sql",
+      "linear-regression.sql",
+      "join-empty-relation.sql",
+      "null-propagation.sql",
+      "operators.sql",
+      "change-column.sql",
+      "count.sql",
+      "mask-functions.sql",
+      "decimalArithmeticOperations.sql",
+      "column-resolution-sort.sql",
+      "group-analytics.sql",
+      "inline-table.sql",
+      "comparator.sql",
+      "ceil-floor-with-scale-param.sql",
+      "show-tblproperties.sql",
+      "timezone.sql",
+      "ilike.sql",
+      "parse-schema-string.sql",
+      "charvarchar.sql",
+      "ignored.sql",
+      "cte.sql",
+      "selectExcept.sql",
+      "order-by-nulls-ordering.sql",
+      "query_regex_column.sql",
+      "show_columns.sql",
+      "columnresolution-views.sql",
+      "percentiles.sql",
+      "hll.sql",
+      "group-by-all.sql",
+      "like-any.sql",
+      "struct.sql",
+      "show-create-table.sql",
+      "try_cast.sql",
+      "unpivot.sql",
+      "describe-query.sql",
+      "describe.sql",
+      "columnresolution.sql",
+      "union.sql",
+      "order-by-ordinal.sql",
+      "explain-cbo.sql",
+      "intersect-all.sql",
+      "ilike-all.sql",
+      "try_aggregates.sql",
+      "try-string-functions.sql",
+      "show-views.sql",
+      "xml-functions.sql",
+      "datetime-parsing.sql",
+      "cte-nested.sql",
+      "columnresolution-negative.sql",
+      "datetime-special.sql",
+      "describe-table-after-alter-table.sql",
+      "column-resolution-aggregate.sql",
+      "keywords.sql",
+      "table-aliases.sql",
+      "bitwise.sql",
+      "like-all.sql",
+      "named-function-arguments.sql",
+      "try_datetime_functions.sql",
+      "datetime-legacy.sql",
+      "outer-join.sql",
+      "explain.sql",
+      "sql-compatibility-functions.sql",
+      "join-lateral.sql",
+      "pred-pushdown.sql",
+      "cast.sql",
+      "except-all.sql",
+      "predicate-functions.sql",
+      "timestamp.sql",
+      "group-by-filter.sql",
+      "pivot.sql",
+      "try_arithmetic.sql",
+      "higher-order-functions.sql",
+      "cte-legacy.sql",
+      "misc-functions.sql",
+      "describe-part-after-analyze.sql",
+      "extract.sql",
+      "datetime-formatting.sql",
+      "interval.sql",
+      "double-quoted-identifiers.sql",
+      "datetime-formatting-legacy.sql",
+      "group-by-ordinal.sql",
+      "having.sql",
+      "inner-join.sql",
+      "null-handling.sql",
+      "ilike-any.sql",
+      "show-tables.sql",
+      "sql-session-variables.sql",
+      "using-join.sql",
+      "subexp-elimination.sql",
+      "cte-nonlegacy.sql",
+      "group-by-all-duckdb.sql",
+      "transform.sql",
+      "map.sql",
+      "table-valued-functions.sql",
+      "comments.sql",
+      "regexp-functions.sql",
+      "datetime-parsing-legacy.sql",
+      "cross-join.sql",
+      "array.sql",
+      "group-by-all-mosha.sql",
+      "limit.sql",
+      "non-excludable-rule.sql",
+      "grouping_set.sql",
+      "json-functions.sql",
+      "datetime-parsing-invalid.sql",
+      "try_element_at.sql",
+      "explain-aqe.sql",
+      "current_database_catalog.sql",
+      "literals.sql"
+    )
 }
 
 object CrossDbmsQueryTestSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index ee4cd7ec6eed3..f04677fea4e8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry
 private[sql] trait JdbcConnection {
   /**
    * Runs the given query.
-   * @return A Seq[String] representing the output.
+   * @return A Seq[String] representing the output. This is a Seq where each element represents a
+   *         single row.
    */
   def runQuery(query: String): Seq[String]
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
index 7c79b27657d1a..d6230cc6b69a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
@@ -25,7 +25,8 @@ package org.apache.spark.sql.crossdbms
 trait SQLQueryTestRunner {
 
   /**
-   * Runs a given query and returns a Seq[String] that represents the query result output.
+   * Runs a given query and returns a Seq[String] that represents the query result output. This is a
+   * Seq where each element represents a single row.
    */
   def runQuery(query: String): Seq[String]
 

From ad867df8d282bd6be1b45c2340ec4bc9667e7a9f Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 10:51:40 -0800
Subject: [PATCH 15/63] More small changes and removal of redundant code

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 15 ++-------------
 .../spark/sql/crossdbms/JdbcConnection.scala  | 19 +++++++++++--------
 2 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index b30820bc55378..28080926e34a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -23,7 +23,6 @@ import java.util.Locale
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLQueryTestSuite
 import org.apache.spark.sql.catalyst.util.stringToFile
-import org.apache.spark.util.Utils
 
 // scalastyle:off line.size.limit
 /**
@@ -111,16 +110,11 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
           output
         }
       }
-      // We do some query canonicalization now.
-      val executionOutput = ExecutionOutput(
+      ExecutionOutput(
         sql = sql,
         // Don't care about the schema for this test. Only care about correctness.
         schema = None,
         output = normalizeTestResults(output.mkString("\n")))
-      if (testCase.isInstanceOf[CTETest]) {
-        expandCTEQueryAndCompareResult(localSparkSession, sql, executionOutput)
-      }
-      executionOutput
     }
     runner.foreach(_.cleanUp())
 
@@ -169,11 +163,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   override lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
       var resultFile = resultFileForInputFile(file)
-      // JDK-4511638 changes 'toString' result of Float/Double
-      // JDK-8282081 changes DataTimeFormatter 'F' symbol
-      if (Utils.isJavaVersionAtLeast21) {
-        if (new File(resultFile + ".java21").exists()) resultFile += ".java21"
-      }
       val absPath = file.getAbsolutePath
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
       RegularTestCase(testCaseName, absPath, resultFile) :: Nil
@@ -328,7 +317,7 @@ object CrossDbmsQueryTestSuite {
 
   // System argument to indicate which reference DBMS is being used.
   private final val REF_DBMS_ARGUMENT = "REF_DBMS"
-  // System arguemnt to indicate a custom connection URL to the reference DBMS.
+  // System argument to indicate a custom connection URL to the reference DBMS.
   private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
 
   private final val POSTGRES = "postgres"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index f04677fea4e8b..5901a523a6cad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -26,33 +26,36 @@ import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry
 
 /**
- * Represents a connection (session) to a database.
+ * Represents a connection (session) to a database using JDBC.
  */
 private[sql] trait JdbcConnection {
+
   /**
-   * Runs the given query.
-   * @return A Seq[String] representing the output. This is a Seq where each element represents a
-   *         single row.
+   * Executes the given SQL query and returns the result as a sequence of strings.
+   * @return A Seq[String] representing the output, where each element represents a single row.
    */
   def runQuery(query: String): Seq[String]
 
   /**
-   * Drop the table with the given table name.
+   * Drops the table with the specified table name.
    */
   def dropTable(tableName: String): Unit
 
   /**
-   * Create a table with the given table name and schema.
+   * Creates a table with the specified name and schema.
+   * @param schemaString The schema definition for the table. Note that this may vary depending on
+   *                     the database system.
    */
   def createTable(tableName: String, schemaString: String): Unit
 
   /**
-   * Load data from the given Spark Dataframe into the table with given name.
+   * Loads data from the given Spark DataFrame into the table with the specified name.
+   * @param df        The Spark DataFrame containing the data to be loaded.
    */
   def loadData(df: DataFrame, tableName: String): Unit
 
   /**
-   * Close the connection.
+   * Closes the JDBC connection.
    */
   def close(): Unit
 }

From bef801184d51936a81d7b2c261cb71bafbdeed3e Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 11:01:42 -0800
Subject: [PATCH 16/63] Generate golden files for SQLQueryTestSuite

---
 .../sqllogictest-select1.sql.out              | 203 ++++++++++++++
 .../sqllogictest-select1.sql.out              | 252 ++++++++++++++++++
 .../apache/spark/sql/SQLQueryTestHelper.scala |   2 +-
 .../crossdbms/CrossDbmsQueryTestSuite.scala   |   9 +-
 4 files changed, 460 insertions(+), 6 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
 create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
new file mode 100644
index 0000000000000..16d7edfbe0467
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
@@ -0,0 +1,203 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE VIEW t1(a, b, c, d, e) AS VALUES
+    (103,102,100,101,104),
+    (107,106,108,109,105),
+    (110,114,112,111,113),
+    (116,119,117,115,118),
+    (123,122,124,120,121),
+    (127,128,129,126,125),
+    (132,134,131,133,130),
+    (138,136,139,135,137),
+    (144,141,140,142,143),
+    (145,149,146,148,147),
+    (151,150,153,154,152),
+    (155,157,159,156,158),
+    (161,160,163,164,162),
+    (167,169,168,165,166),
+    (171,170,172,173,174),
+    (177,176,179,178,175),
+    (181,180,182,183,184),
+    (187,188,186,189,185),
+    (190,194,193,192,191),
+    (199,197,198,196,195),
+    (200,202,203,201,204),
+    (208,209,205,206,207),
+    (214,210,213,212,211),
+    (218,215,216,217,219),
+    (223,221,222,220,224),
+    (226,227,228,229,225),
+    (234,231,232,230,233),
+    (237,236,239,235,238),
+    (242,244,240,243,241),
+    (246,248,247,249,245)
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`t1`, [(a,None), (b,None), (c,None), (d,None), (e,None)], VALUES
+    (103,102,100,101,104),
+    (107,106,108,109,105),
+    (110,114,112,111,113),
+    (116,119,117,115,118),
+    (123,122,124,120,121),
+    (127,128,129,126,125),
+    (132,134,131,133,130),
+    (138,136,139,135,137),
+    (144,141,140,142,143),
+    (145,149,146,148,147),
+    (151,150,153,154,152),
+    (155,157,159,156,158),
+    (161,160,163,164,162),
+    (167,169,168,165,166),
+    (171,170,172,173,174),
+    (177,176,179,178,175),
+    (181,180,182,183,184),
+    (187,188,186,189,185),
+    (190,194,193,192,191),
+    (199,197,198,196,195),
+    (200,202,203,201,204),
+    (208,209,205,206,207),
+    (214,210,213,212,211),
+    (218,215,216,217,219),
+    (223,221,222,220,224),
+    (226,227,228,229,225),
+    (234,231,232,230,233),
+    (237,236,239,235,238),
+    (242,244,240,243,241),
+    (246,248,247,249,245), false, false, PersistedView, true
+   +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
+  FROM t1
+ ORDER BY 1
+-- !query analysis
+Sort [CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x ASC NULLS FIRST], true
++- Project [CASE WHEN (cast(c#x as double) > scalar-subquery#x []) THEN (a#x * 2) ELSE (b#x * 10) END AS CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x]
+   :  +- Aggregate [avg(c#x) AS avg(c)#x]
+   :     +- SubqueryAlias spark_catalog.default.t1
+   :        +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+   :           +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+   :              +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+   +- SubqueryAlias spark_catalog.default.t1
+      +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+         +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+            +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CAST((a + b + c + d + e) / 5 AS INT)
+  FROM t1
+ ORDER BY 1,2
+-- !query analysis
+Sort [((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST], true
++- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x]
+   +- SubqueryAlias spark_catalog.default.t1
+      +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+         +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+            +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
+        WHEN a < b + 3 THEN 333 ELSE 444 END,
+       abs(b - c),
+       CAST((a + b + c + d + e) / 5 AS INT),
+       a + b * 2 + c * 3
+  FROM t1
+ WHERE (e > c OR e < d)
+   AND d>e
+   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
+ ORDER BY 4,2,1,3,5
+-- !query analysis
+Sort [CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x ASC NULLS FIRST, ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, abs((b - c))#x ASC NULLS FIRST, ((a + (b * 2)) + (c * 3))#x ASC NULLS FIRST], true
++- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, CASE WHEN (a#x < (b#x - 3)) THEN 111 WHEN (a#x <= b#x) THEN 222 WHEN (a#x < (b#x + 3)) THEN 333 ELSE 444 END AS CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x, abs((b#x - c#x)) AS abs((b - c))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, ((a#x + (b#x * 2)) + (c#x * 3)) AS ((a + (b * 2)) + (c * 3))#x]
+   +- Filter ((((e#x > c#x) OR (e#x < d#x)) AND (d#x > e#x)) AND exists#x [b#x])
+      :  +- Project [1 AS 1#x]
+      :     +- Filter (b#x < outer(b#x))
+      :        +- SubqueryAlias x
+      :           +- SubqueryAlias spark_catalog.default.t1
+      :              +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+      :                 +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+      :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT c,
+       d-e,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       a + b * 2 + c * 3 + d * 4,
+       e
+  FROM t1
+ WHERE d NOT BETWEEN 110 AND 150
+    OR c BETWEEN b - 2 AND d + 2
+    OR (e > c OR e < d)
+ ORDER BY 1,5,3,2,4
+-- !query analysis
+Sort [c#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (d - e)#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST], true
++- Project [c#x, (d#x - e#x) AS (d - e)#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, (((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, e#x]
+   +- Filter ((NOT ((d#x >= 110) AND (d#x <= 150)) OR ((c#x >= (b#x - 2)) AND (c#x <= (d#x + 2)))) OR ((e#x > c#x) OR (e#x < d#x)))
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4,
+       CAST((a + b + c + d + e) / 5 AS INT),
+       abs(a),
+       e,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       d
+  FROM t1
+ WHERE b > c
+   AND c > d
+ ORDER BY 3,4,5,1,2,6
+-- !query analysis
+Sort [abs(a)#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, d#x ASC NULLS FIRST], true
++- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, abs(a#x) AS abs(a)#x, e#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, d#x]
+   +- Filter ((b#x > c#x) AND (c#x > d#x))
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
+       c
+  FROM t1
+ WHERE (c <= d - 2 OR c >= d + 2)
+ ORDER BY 4,2,1,3
+-- !query analysis
+Sort [c#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, scalarsubquery(c, d)#xL ASC NULLS FIRST], true
++- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, scalar-subquery#x [c#x && d#x] AS scalarsubquery(c, d)#xL, c#x]
+   :  +- Aggregate [count(1) AS count(1)#xL]
+   :     +- Filter ((c#x > outer(c#x)) AND (d#x < outer(d#x)))
+   :        +- SubqueryAlias x
+   :           +- SubqueryAlias spark_catalog.default.t1
+   :              +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+   :                 +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+   :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+   +- Filter ((c#x <= (d#x - 2)) OR (c#x >= (d#x + 2)))
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+DROP VIEW IF EXISTS t1
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
new file mode 100644
index 0000000000000..762a04c22900d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
@@ -0,0 +1,252 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE VIEW t1(a, b, c, d, e) AS VALUES
+    (103,102,100,101,104),
+    (107,106,108,109,105),
+    (110,114,112,111,113),
+    (116,119,117,115,118),
+    (123,122,124,120,121),
+    (127,128,129,126,125),
+    (132,134,131,133,130),
+    (138,136,139,135,137),
+    (144,141,140,142,143),
+    (145,149,146,148,147),
+    (151,150,153,154,152),
+    (155,157,159,156,158),
+    (161,160,163,164,162),
+    (167,169,168,165,166),
+    (171,170,172,173,174),
+    (177,176,179,178,175),
+    (181,180,182,183,184),
+    (187,188,186,189,185),
+    (190,194,193,192,191),
+    (199,197,198,196,195),
+    (200,202,203,201,204),
+    (208,209,205,206,207),
+    (214,210,213,212,211),
+    (218,215,216,217,219),
+    (223,221,222,220,224),
+    (226,227,228,229,225),
+    (234,231,232,230,233),
+    (237,236,239,235,238),
+    (242,244,240,243,241),
+    (246,248,247,249,245)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
+  FROM t1
+ ORDER BY 1
+-- !query schema
+struct<CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END:int>
+-- !query output
+354
+362
+374
+380
+398
+400
+416
+428
+436
+446
+452
+468
+474
+484
+492
+1020
+1060
+1140
+1190
+1220
+1280
+1340
+1360
+1410
+1490
+1500
+1570
+1600
+1690
+1700
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CAST((a + b + c + d + e) / 5 AS INT)
+  FROM t1
+ ORDER BY 1,2
+-- !query schema
+struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int>
+-- !query output
+1531	102
+1604	107
+1683	112
+1755	117
+1824	122
+1899	127
+1975	132
+2052	137
+2129	142
+2208	147
+2286	152
+2360	157
+2436	162
+2499	167
+2589	172
+2653	177
+2739	182
+2802	187
+2880	192
+2946	197
+3037	202
+3100	207
+3176	212
+3259	217
+3331	222
+3405	227
+3477	232
+3556	237
+3627	242
+3704	247
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
+        WHEN a < b + 3 THEN 333 ELSE 444 END,
+       abs(b - c),
+       CAST((a + b + c + d + e) / 5 AS INT),
+       a + b * 2 + c * 3
+  FROM t1
+ WHERE (e > c OR e < d)
+   AND d>e
+   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
+ ORDER BY 4,2,1,3,5
+-- !query schema
+struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END:int,abs((b - c)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,((a + (b * 2)) + (c * 3)):int>
+-- !query output
+1604	333	2	107	643
+1899	222	1	127	770
+1975	222	3	132	793
+2208	111	3	147	881
+2286	333	3	152	910
+2436	333	3	162	970
+2653	333	3	177	1066
+2802	222	2	187	1121
+2880	111	1	192	1157
+2946	333	1	197	1187
+3176	444	3	212	1273
+3405	222	1	227	1364
+3627	222	4	242	1450
+3704	222	1	247	1483
+
+
+-- !query
+SELECT c,
+       d-e,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       a + b * 2 + c * 3 + d * 4,
+       e
+  FROM t1
+ WHERE d NOT BETWEEN 110 AND 150
+    OR c BETWEEN b - 2 AND d + 2
+    OR (e > c OR e < d)
+ ORDER BY 1,5,3,2,4
+-- !query schema
+struct<c:int,(d - e):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,(((a + (b * 2)) + (c * 3)) + (d * 4)):int,e:int>
+-- !query output
+100	-3	444	1011	104
+108	4	222	1079	105
+112	-2	333	1118	113
+117	-3	222	1165	118
+129	1	111	1274	125
+131	3	333	1325	130
+140	-1	555	1414	143
+146	1	222	1473	147
+153	2	444	1526	152
+159	-2	333	1570	158
+163	2	444	1626	162
+168	-1	222	1669	166
+172	-1	222	1719	174
+179	3	333	1778	175
+182	-1	222	1819	184
+186	4	111	1877	185
+193	1	444	1925	191
+198	1	555	1971	195
+203	-3	333	2017	204
+205	-1	111	2065	207
+213	1	555	2121	211
+216	-2	444	2164	219
+222	-4	444	2211	224
+228	4	111	2280	225
+232	-3	555	2312	233
+239	-3	444	2366	238
+240	2	333	2422	241
+247	4	222	2479	245
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4,
+       CAST((a + b + c + d + e) / 5 AS INT),
+       abs(a),
+       e,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       d
+  FROM t1
+ WHERE b > c
+   AND c > d
+ ORDER BY 3,4,5,1,2,6
+-- !query schema
+struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,abs(a):int,e:int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,d:int>
+-- !query output
+1118	112	110	113	333	111
+1165	117	116	118	222	115
+1669	167	167	166	222	165
+1925	192	190	191	444	192
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
+       c
+  FROM t1
+ WHERE (c <= d - 2 OR c >= d + 2)
+ ORDER BY 4,2,1,3
+-- !query schema
+struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,scalarsubquery(c, d):bigint,c:int>
+-- !query output
+1165	222	0	117
+1219	222	0	124
+1274	111	0	129
+1325	333	0	131
+1367	222	0	139
+1414	555	0	140
+1473	222	0	146
+1570	333	0	159
+1669	222	0	168
+1877	111	0	186
+1971	555	0	198
+2017	333	0	203
+2211	444	0	222
+2312	555	0	232
+2366	444	0	239
+2422	333	0	240
+2479	222	0	247
+
+
+-- !query
+DROP VIEW IF EXISTS t1
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index 3bda8c75d7806..c08569150e2a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -102,7 +102,7 @@ trait SQLQueryTestHelper extends Logging {
    * Uses the Spark logical plan to determine whether the plan is semantically sorted. This is
    * important to make non-sorted queries test cases more deterministic.
    */
-  def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
+  protected def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
     case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
     case _: DescribeCommandBase
          | _: DescribeColumnCommand
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 28080926e34a4..cbf7f9c1a536c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -91,19 +91,18 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         // the already generated golden files.
         if (regenerateGoldenFiles) {
           val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty)
-          runner = runner.getOrElse(
+          runner = runner.orElse(
             Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING(
               crossDbmsToGenerateGoldenFiles)(connectionUrl)))
           val sparkDf = spark.sql(sql)
           val output = runner.map(_.runQuery(sql)).get
           // Use Spark analyzed plan to check if the query result is already semantically sorted.
-          val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
+          if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
             output
           } else {
             // Sort the answer manually if it isn't sorted.
             output.sorted
           }
-          result
         } else {
           val (_, output) = handleExceptions(
             getNormalizedQueryExecutionResult(localSparkSession, sql))
@@ -162,7 +161,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
   override lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
-      var resultFile = resultFileForInputFile(file)
+      val resultFile = resultFileForInputFile(file)
       val absPath = file.getAbsolutePath
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
       RegularTestCase(testCaseName, absPath, resultFile) :: Nil
@@ -180,7 +179,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
       "udf",
       "timestampNTZ",
       "udaf",
-      "typeCoercion",
+      "typeCoercion"
     ) ++
     // Files
     Set(

From a6e7e2a57833d2c815cae1b98687ca5eb6fb444d Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 11:04:33 -0800
Subject: [PATCH 17/63] Revert "Generate golden files for SQLQueryTestSuite"

This reverts commit bef801184d51936a81d7b2c261cb71bafbdeed3e.
---
 .../sqllogictest-select1.sql.out              | 203 --------------
 .../sqllogictest-select1.sql.out              | 252 ------------------
 .../apache/spark/sql/SQLQueryTestHelper.scala |   2 +-
 .../crossdbms/CrossDbmsQueryTestSuite.scala   |   9 +-
 4 files changed, 6 insertions(+), 460 deletions(-)
 delete mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
 delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
deleted file mode 100644
index 16d7edfbe0467..0000000000000
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
+++ /dev/null
@@ -1,203 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-CREATE VIEW t1(a, b, c, d, e) AS VALUES
-    (103,102,100,101,104),
-    (107,106,108,109,105),
-    (110,114,112,111,113),
-    (116,119,117,115,118),
-    (123,122,124,120,121),
-    (127,128,129,126,125),
-    (132,134,131,133,130),
-    (138,136,139,135,137),
-    (144,141,140,142,143),
-    (145,149,146,148,147),
-    (151,150,153,154,152),
-    (155,157,159,156,158),
-    (161,160,163,164,162),
-    (167,169,168,165,166),
-    (171,170,172,173,174),
-    (177,176,179,178,175),
-    (181,180,182,183,184),
-    (187,188,186,189,185),
-    (190,194,193,192,191),
-    (199,197,198,196,195),
-    (200,202,203,201,204),
-    (208,209,205,206,207),
-    (214,210,213,212,211),
-    (218,215,216,217,219),
-    (223,221,222,220,224),
-    (226,227,228,229,225),
-    (234,231,232,230,233),
-    (237,236,239,235,238),
-    (242,244,240,243,241),
-    (246,248,247,249,245)
--- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t1`, [(a,None), (b,None), (c,None), (d,None), (e,None)], VALUES
-    (103,102,100,101,104),
-    (107,106,108,109,105),
-    (110,114,112,111,113),
-    (116,119,117,115,118),
-    (123,122,124,120,121),
-    (127,128,129,126,125),
-    (132,134,131,133,130),
-    (138,136,139,135,137),
-    (144,141,140,142,143),
-    (145,149,146,148,147),
-    (151,150,153,154,152),
-    (155,157,159,156,158),
-    (161,160,163,164,162),
-    (167,169,168,165,166),
-    (171,170,172,173,174),
-    (177,176,179,178,175),
-    (181,180,182,183,184),
-    (187,188,186,189,185),
-    (190,194,193,192,191),
-    (199,197,198,196,195),
-    (200,202,203,201,204),
-    (208,209,205,206,207),
-    (214,210,213,212,211),
-    (218,215,216,217,219),
-    (223,221,222,220,224),
-    (226,227,228,229,225),
-    (234,231,232,230,233),
-    (237,236,239,235,238),
-    (242,244,240,243,241),
-    (246,248,247,249,245), false, false, PersistedView, true
-   +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
-  FROM t1
- ORDER BY 1
--- !query analysis
-Sort [CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x ASC NULLS FIRST], true
-+- Project [CASE WHEN (cast(c#x as double) > scalar-subquery#x []) THEN (a#x * 2) ELSE (b#x * 10) END AS CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x]
-   :  +- Aggregate [avg(c#x) AS avg(c)#x]
-   :     +- SubqueryAlias spark_catalog.default.t1
-   :        +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-   :           +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-   :              +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-   +- SubqueryAlias spark_catalog.default.t1
-      +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-         +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-            +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CAST((a + b + c + d + e) / 5 AS INT)
-  FROM t1
- ORDER BY 1,2
--- !query analysis
-Sort [((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST], true
-+- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x]
-   +- SubqueryAlias spark_catalog.default.t1
-      +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-         +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-            +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
-        WHEN a < b + 3 THEN 333 ELSE 444 END,
-       abs(b - c),
-       CAST((a + b + c + d + e) / 5 AS INT),
-       a + b * 2 + c * 3
-  FROM t1
- WHERE (e > c OR e < d)
-   AND d>e
-   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
- ORDER BY 4,2,1,3,5
--- !query analysis
-Sort [CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x ASC NULLS FIRST, ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, abs((b - c))#x ASC NULLS FIRST, ((a + (b * 2)) + (c * 3))#x ASC NULLS FIRST], true
-+- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, CASE WHEN (a#x < (b#x - 3)) THEN 111 WHEN (a#x <= b#x) THEN 222 WHEN (a#x < (b#x + 3)) THEN 333 ELSE 444 END AS CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x, abs((b#x - c#x)) AS abs((b - c))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, ((a#x + (b#x * 2)) + (c#x * 3)) AS ((a + (b * 2)) + (c * 3))#x]
-   +- Filter ((((e#x > c#x) OR (e#x < d#x)) AND (d#x > e#x)) AND exists#x [b#x])
-      :  +- Project [1 AS 1#x]
-      :     +- Filter (b#x < outer(b#x))
-      :        +- SubqueryAlias x
-      :           +- SubqueryAlias spark_catalog.default.t1
-      :              +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-      :                 +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-      :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-      +- SubqueryAlias spark_catalog.default.t1
-         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT c,
-       d-e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       a + b * 2 + c * 3 + d * 4,
-       e
-  FROM t1
- WHERE d NOT BETWEEN 110 AND 150
-    OR c BETWEEN b - 2 AND d + 2
-    OR (e > c OR e < d)
- ORDER BY 1,5,3,2,4
--- !query analysis
-Sort [c#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (d - e)#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST], true
-+- Project [c#x, (d#x - e#x) AS (d - e)#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, (((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, e#x]
-   +- Filter ((NOT ((d#x >= 110) AND (d#x <= 150)) OR ((c#x >= (b#x - 2)) AND (c#x <= (d#x + 2)))) OR ((e#x > c#x) OR (e#x < d#x)))
-      +- SubqueryAlias spark_catalog.default.t1
-         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CAST((a + b + c + d + e) / 5 AS INT),
-       abs(a),
-       e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       d
-  FROM t1
- WHERE b > c
-   AND c > d
- ORDER BY 3,4,5,1,2,6
--- !query analysis
-Sort [abs(a)#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, d#x ASC NULLS FIRST], true
-+- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, abs(a#x) AS abs(a)#x, e#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, d#x]
-   +- Filter ((b#x > c#x) AND (c#x > d#x))
-      +- SubqueryAlias spark_catalog.default.t1
-         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
-       c
-  FROM t1
- WHERE (c <= d - 2 OR c >= d + 2)
- ORDER BY 4,2,1,3
--- !query analysis
-Sort [c#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, scalarsubquery(c, d)#xL ASC NULLS FIRST], true
-+- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, scalar-subquery#x [c#x && d#x] AS scalarsubquery(c, d)#xL, c#x]
-   :  +- Aggregate [count(1) AS count(1)#xL]
-   :     +- Filter ((c#x > outer(c#x)) AND (d#x < outer(d#x)))
-   :        +- SubqueryAlias x
-   :           +- SubqueryAlias spark_catalog.default.t1
-   :              +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-   :                 +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-   :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-   +- Filter ((c#x <= (d#x - 2)) OR (c#x >= (d#x + 2)))
-      +- SubqueryAlias spark_catalog.default.t1
-         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-DROP VIEW IF EXISTS t1
--- !query analysis
-DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
deleted file mode 100644
index 762a04c22900d..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
+++ /dev/null
@@ -1,252 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-CREATE VIEW t1(a, b, c, d, e) AS VALUES
-    (103,102,100,101,104),
-    (107,106,108,109,105),
-    (110,114,112,111,113),
-    (116,119,117,115,118),
-    (123,122,124,120,121),
-    (127,128,129,126,125),
-    (132,134,131,133,130),
-    (138,136,139,135,137),
-    (144,141,140,142,143),
-    (145,149,146,148,147),
-    (151,150,153,154,152),
-    (155,157,159,156,158),
-    (161,160,163,164,162),
-    (167,169,168,165,166),
-    (171,170,172,173,174),
-    (177,176,179,178,175),
-    (181,180,182,183,184),
-    (187,188,186,189,185),
-    (190,194,193,192,191),
-    (199,197,198,196,195),
-    (200,202,203,201,204),
-    (208,209,205,206,207),
-    (214,210,213,212,211),
-    (218,215,216,217,219),
-    (223,221,222,220,224),
-    (226,227,228,229,225),
-    (234,231,232,230,233),
-    (237,236,239,235,238),
-    (242,244,240,243,241),
-    (246,248,247,249,245)
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
-  FROM t1
- ORDER BY 1
--- !query schema
-struct<CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END:int>
--- !query output
-354
-362
-374
-380
-398
-400
-416
-428
-436
-446
-452
-468
-474
-484
-492
-1020
-1060
-1140
-1190
-1220
-1280
-1340
-1360
-1410
-1490
-1500
-1570
-1600
-1690
-1700
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CAST((a + b + c + d + e) / 5 AS INT)
-  FROM t1
- ORDER BY 1,2
--- !query schema
-struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int>
--- !query output
-1531	102
-1604	107
-1683	112
-1755	117
-1824	122
-1899	127
-1975	132
-2052	137
-2129	142
-2208	147
-2286	152
-2360	157
-2436	162
-2499	167
-2589	172
-2653	177
-2739	182
-2802	187
-2880	192
-2946	197
-3037	202
-3100	207
-3176	212
-3259	217
-3331	222
-3405	227
-3477	232
-3556	237
-3627	242
-3704	247
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
-        WHEN a < b + 3 THEN 333 ELSE 444 END,
-       abs(b - c),
-       CAST((a + b + c + d + e) / 5 AS INT),
-       a + b * 2 + c * 3
-  FROM t1
- WHERE (e > c OR e < d)
-   AND d>e
-   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
- ORDER BY 4,2,1,3,5
--- !query schema
-struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END:int,abs((b - c)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,((a + (b * 2)) + (c * 3)):int>
--- !query output
-1604	333	2	107	643
-1899	222	1	127	770
-1975	222	3	132	793
-2208	111	3	147	881
-2286	333	3	152	910
-2436	333	3	162	970
-2653	333	3	177	1066
-2802	222	2	187	1121
-2880	111	1	192	1157
-2946	333	1	197	1187
-3176	444	3	212	1273
-3405	222	1	227	1364
-3627	222	4	242	1450
-3704	222	1	247	1483
-
-
--- !query
-SELECT c,
-       d-e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       a + b * 2 + c * 3 + d * 4,
-       e
-  FROM t1
- WHERE d NOT BETWEEN 110 AND 150
-    OR c BETWEEN b - 2 AND d + 2
-    OR (e > c OR e < d)
- ORDER BY 1,5,3,2,4
--- !query schema
-struct<c:int,(d - e):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,(((a + (b * 2)) + (c * 3)) + (d * 4)):int,e:int>
--- !query output
-100	-3	444	1011	104
-108	4	222	1079	105
-112	-2	333	1118	113
-117	-3	222	1165	118
-129	1	111	1274	125
-131	3	333	1325	130
-140	-1	555	1414	143
-146	1	222	1473	147
-153	2	444	1526	152
-159	-2	333	1570	158
-163	2	444	1626	162
-168	-1	222	1669	166
-172	-1	222	1719	174
-179	3	333	1778	175
-182	-1	222	1819	184
-186	4	111	1877	185
-193	1	444	1925	191
-198	1	555	1971	195
-203	-3	333	2017	204
-205	-1	111	2065	207
-213	1	555	2121	211
-216	-2	444	2164	219
-222	-4	444	2211	224
-228	4	111	2280	225
-232	-3	555	2312	233
-239	-3	444	2366	238
-240	2	333	2422	241
-247	4	222	2479	245
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CAST((a + b + c + d + e) / 5 AS INT),
-       abs(a),
-       e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       d
-  FROM t1
- WHERE b > c
-   AND c > d
- ORDER BY 3,4,5,1,2,6
--- !query schema
-struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,abs(a):int,e:int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,d:int>
--- !query output
-1118	112	110	113	333	111
-1165	117	116	118	222	115
-1669	167	167	166	222	165
-1925	192	190	191	444	192
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
-       c
-  FROM t1
- WHERE (c <= d - 2 OR c >= d + 2)
- ORDER BY 4,2,1,3
--- !query schema
-struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,scalarsubquery(c, d):bigint,c:int>
--- !query output
-1165	222	0	117
-1219	222	0	124
-1274	111	0	129
-1325	333	0	131
-1367	222	0	139
-1414	555	0	140
-1473	222	0	146
-1570	333	0	159
-1669	222	0	168
-1877	111	0	186
-1971	555	0	198
-2017	333	0	203
-2211	444	0	222
-2312	555	0	232
-2366	444	0	239
-2422	333	0	240
-2479	222	0	247
-
-
--- !query
-DROP VIEW IF EXISTS t1
--- !query schema
-struct<>
--- !query output
-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index c08569150e2a9..3bda8c75d7806 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -102,7 +102,7 @@ trait SQLQueryTestHelper extends Logging {
    * Uses the Spark logical plan to determine whether the plan is semantically sorted. This is
    * important to make non-sorted queries test cases more deterministic.
    */
-  protected def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
+  def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
     case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
     case _: DescribeCommandBase
          | _: DescribeColumnCommand
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index cbf7f9c1a536c..28080926e34a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -91,18 +91,19 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         // the already generated golden files.
         if (regenerateGoldenFiles) {
           val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty)
-          runner = runner.orElse(
+          runner = runner.getOrElse(
             Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING(
               crossDbmsToGenerateGoldenFiles)(connectionUrl)))
           val sparkDf = spark.sql(sql)
           val output = runner.map(_.runQuery(sql)).get
           // Use Spark analyzed plan to check if the query result is already semantically sorted.
-          if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
+          val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
             output
           } else {
             // Sort the answer manually if it isn't sorted.
             output.sorted
           }
+          result
         } else {
           val (_, output) = handleExceptions(
             getNormalizedQueryExecutionResult(localSparkSession, sql))
@@ -161,7 +162,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
   override lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
-      val resultFile = resultFileForInputFile(file)
+      var resultFile = resultFileForInputFile(file)
       val absPath = file.getAbsolutePath
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
       RegularTestCase(testCaseName, absPath, resultFile) :: Nil
@@ -179,7 +180,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
       "udf",
       "timestampNTZ",
       "udaf",
-      "typeCoercion"
+      "typeCoercion",
     ) ++
     // Files
     Set(

From 1409d63258a190a3823e26e4a5e2d9d94ae599b7 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 11:06:03 -0800
Subject: [PATCH 18/63] Ignore "postgres-crosstest" in SQLQueryTestSuite

---
 .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala    | 3 ++-
 .../apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index be59753f97655..ca45276efbb69 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -165,7 +165,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     if (TestUtils.testCommandAvailable("/bin/bash")) Nil else Set("transform.sql")
   /** List of test cases to ignore, in lower cases. */
   protected def ignoreList: Set[String] = Set(
-    "ignored.sql" // Do NOT remove this one. It is here to test the ignore functionality.
+    "ignored.sql", // Do NOT remove this one. It is here to test the ignore functionality.
+    "postgres-crosstest"
   ) ++ otherIgnoreList
 
   // Create all the test cases.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 28080926e34a4..d42609ff54af5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -170,7 +170,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   }
 
   // Ignore all tests for now due to likely incompatibility.
-  override def ignoreList: Set[String] = super.ignoreList ++
+  override def ignoreList: Set[String] =
     // Directories
     Set(
       "postgreSQL",

From a552d86563254225ccacc574ef4b5c92d0c0bf5a Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 11:07:15 -0800
Subject: [PATCH 19/63] Add comment to clarify why it is ignored

---
 .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index ca45276efbb69..5c7b877c24b11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -165,7 +165,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     if (TestUtils.testCommandAvailable("/bin/bash")) Nil else Set("transform.sql")
   /** List of test cases to ignore, in lower cases. */
   protected def ignoreList: Set[String] = Set(
-    "ignored.sql", // Do NOT remove this one. It is here to test the ignore functionality.
+    // Do NOT remove this one. It is here to test the ignore functionality.
+    "ignored.sql",
+    // CrossDbms tests are captured in [[CrossDbmsQueryTestSuite]], there is no need to repeat them.
     "postgres-crosstest"
   ) ++ otherIgnoreList
 

From 7a9cb80cc14f9d2bccbcaadb69c630d645066f27 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 11:14:50 -0800
Subject: [PATCH 20/63] Fix compilation failures

---
 .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index d42609ff54af5..551c290587e66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -91,7 +91,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         // the already generated golden files.
         if (regenerateGoldenFiles) {
           val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty)
-          runner = runner.getOrElse(
+          runner = runner.orElse(
             Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING(
               crossDbmsToGenerateGoldenFiles)(connectionUrl)))
           val sparkDf = spark.sql(sql)
@@ -173,14 +173,14 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   override def ignoreList: Set[String] =
     // Directories
     Set(
-      "postgreSQL",
+      // "postgreSQL",
       "subquery",
       "ansi",
       "udtf",
       "udf",
       "timestampNTZ",
       "udaf",
-      "typeCoercion",
+      "typeCoercion"
     ) ++
     // Files
     Set(

From 66f41a5fddb74d359eaab12e097ae135e239ee84 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 11:18:21 -0800
Subject: [PATCH 21/63] Regenerate golden files for
 sqllogictest-select1.sql.out

---
 .../sqllogictest-select1.sql.out              | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
index 1ce87f0fc54bb..fde78003ffa90 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
@@ -36,7 +36,7 @@ CREATE VIEW t1(a, b, c, d, e) AS VALUES
 
 
 -- !query
-SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END
+SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
   FROM t1
  ORDER BY 1
 -- !query output
@@ -73,8 +73,8 @@ SELECT CASE WHEN c>(SELECT avg(c) FROM t1) THEN a*2 ELSE b*10 END
 
 
 -- !query
-SELECT a+b*2+c*3+d*4+e*5,
-       CAST((a+b+c+d+e)/5 AS INT)
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CAST((a + b + c + d + e) / 5 AS INT)
   FROM t1
  ORDER BY 1,2
 -- !query output
@@ -111,16 +111,16 @@ SELECT a+b*2+c*3+d*4+e*5,
 
 
 -- !query
-SELECT a+b*2+c*3+d*4+e*5,
-       CASE WHEN a<b-3 THEN 111 WHEN a<=b THEN 222
-        WHEN a<b+3 THEN 333 ELSE 444 END,
-       abs(b-c),
-       CAST((a+b+c+d+e)/5 AS INT),
-       a+b*2+c*3
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
+        WHEN a < b + 3 THEN 333 ELSE 444 END,
+       abs(b - c),
+       CAST((a + b + c + d + e) / 5 AS INT),
+       a + b * 2 + c * 3
   FROM t1
- WHERE (e>c OR e<d)
+ WHERE (e > c OR e < d)
    AND d>e
-   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b<t1.b)
+   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
  ORDER BY 4,2,1,3,5
 -- !query output
 1604	333	2	107	643
@@ -142,14 +142,14 @@ SELECT a+b*2+c*3+d*4+e*5,
 -- !query
 SELECT c,
        d-e,
-       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
         WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       a+b*2+c*3+d*4,
+       a + b * 2 + c * 3 + d * 4,
        e
   FROM t1
  WHERE d NOT BETWEEN 110 AND 150
-    OR c BETWEEN b-2 AND d+2
-    OR (e>c OR e<d)
+    OR c BETWEEN b - 2 AND d + 2
+    OR (e > c OR e < d)
  ORDER BY 1,5,3,2,4
 -- !query output
 100	-3	444	1011	104
@@ -183,16 +183,16 @@ SELECT c,
 
 
 -- !query
-SELECT a+b*2+c*3+d*4,
-       CAST((a+b+c+d+e)/5 AS INT),
+SELECT a + b * 2 + c * 3 + d * 4,
+       CAST((a + b + c + d + e) / 5 AS INT),
        abs(a),
        e,
-       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
         WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
        d
   FROM t1
- WHERE b>c
-   AND c>d
+ WHERE b > c
+   AND c > d
  ORDER BY 3,4,5,1,2,6
 -- !query output
 1118	112	110	113	333	111
@@ -202,13 +202,13 @@ SELECT a+b*2+c*3+d*4,
 
 
 -- !query
-SELECT a+b*2+c*3+d*4,
-       CASE a+1 WHEN b THEN 111 WHEN c THEN 222
+SELECT a + b * 2 + c * 3 + d * 4,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
         WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       (SELECT count(*) FROM t1 AS x WHERE x.c>t1.c AND x.d<t1.d),
+       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
        c
   FROM t1
- WHERE (c<=d-2 OR c>=d+2)
+ WHERE (c <= d - 2 OR c >= d + 2)
  ORDER BY 4,2,1,3
 -- !query output
 1165	222	0	117

From 69614d310eedd25f7dbdc1de5adce178cf833dbb Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 11:34:09 -0800
Subject: [PATCH 22/63] Tiny changes..

---
 .../scala/org/apache/spark/sql/SQLQueryTestHelper.scala     | 2 +-
 .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala       | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index 3bda8c75d7806..c08569150e2a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -102,7 +102,7 @@ trait SQLQueryTestHelper extends Logging {
    * Uses the Spark logical plan to determine whether the plan is semantically sorted. This is
    * important to make non-sorted queries test cases more deterministic.
    */
-  def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
+  protected def isSemanticallySorted(plan: LogicalPlan): Boolean = plan match {
     case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
     case _: DescribeCommandBase
          | _: DescribeColumnCommand
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 551c290587e66..44f45f1174acd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -97,14 +97,14 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
           val sparkDf = spark.sql(sql)
           val output = runner.map(_.runQuery(sql)).get
           // Use Spark analyzed plan to check if the query result is already semantically sorted.
-          val result = if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
+          if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
             output
           } else {
             // Sort the answer manually if it isn't sorted.
             output.sorted
           }
-          result
         } else {
+          // Use Spark.
           val (_, output) = handleExceptions(
             getNormalizedQueryExecutionResult(localSparkSession, sql))
           output
@@ -162,7 +162,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
   override lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
-      var resultFile = resultFileForInputFile(file)
+      val resultFile = resultFileForInputFile(file)
       val absPath = file.getAbsolutePath
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
       RegularTestCase(testCaseName, absPath, resultFile) :: Nil

From 2bf43dfdbd4b6207206bac12d8f02830e8adbf9c Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 12:11:51 -0800
Subject: [PATCH 23/63] Add postgresql back to ignorelist

---
 .../apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 44f45f1174acd..7cea9d41d2772 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -173,7 +173,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   override def ignoreList: Set[String] =
     // Directories
     Set(
-      // "postgreSQL",
+      "postgreSQL",
       "subquery",
       "ansi",
       "udtf",

From aba83b89c2861cf2461b06c88d09ae1fb7f5b1fc Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 15:30:15 -0800
Subject: [PATCH 24/63] Add exception handling in CrossDbmsQueryTestSuite

---
 .../apache/spark/sql/SQLQueryTestSuite.scala  |  5 +--
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 32 ++++++++++++++-----
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 5c7b877c24b11..be59753f97655 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -165,10 +165,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     if (TestUtils.testCommandAvailable("/bin/bash")) Nil else Set("transform.sql")
   /** List of test cases to ignore, in lower cases. */
   protected def ignoreList: Set[String] = Set(
-    // Do NOT remove this one. It is here to test the ignore functionality.
-    "ignored.sql",
-    // CrossDbms tests are captured in [[CrossDbmsQueryTestSuite]], there is no need to repeat them.
-    "postgres-crosstest"
+    "ignored.sql" // Do NOT remove this one. It is here to test the ignore functionality.
   ) ++ otherIgnoreList
 
   // Create all the test cases.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 7cea9d41d2772..dc5c6c897e598 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.crossdbms
 import java.io.File
 import java.util.Locale
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLQueryTestSuite
 import org.apache.spark.sql.catalyst.util.stringToFile
@@ -94,14 +96,28 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
           runner = runner.orElse(
             Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING(
               crossDbmsToGenerateGoldenFiles)(connectionUrl)))
-          val sparkDf = spark.sql(sql)
-          val output = runner.map(_.runQuery(sql)).get
-          // Use Spark analyzed plan to check if the query result is already semantically sorted.
-          if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
-            output
-          } else {
-            // Sort the answer manually if it isn't sorted.
-            output.sorted
+          try {
+            // Either of the below two lines can error. If we go into the catch statement, then it
+            // is likely one of the following scenarios:
+            // 1. Error thrown in Spark analysis:
+            //    a. The query is either incompatible between Spark and the other DBMS
+            //    b. There is issue with the schema in Spark.
+            //    c. The error is expected - it errors on both systems, but only the Spark error
+            //       will be printed to the golden file, because it errors first.
+            // 2. Error thrown in other DBMS execution:
+            //    a. The query is either incompatible between Spark and the other DBMS
+            //    b. Some test table/view is not created on the other DBMS.
+            val sparkDf = spark.sql(sql)
+            val output = runner.map(_.runQuery(sql)).get
+            // Use Spark analyzed plan to check if the query result is already semantically sorted.
+            if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
+              output
+            } else {
+              // Sort the answer manually if it isn't sorted.
+              output.sorted
+            }
+          } catch {
+            case NonFatal(e) => Seq(e.getClass.getName, e.getMessage)
           }
         } else {
           // Use Spark.

From bf4036d7d2786ea11ee72081889012a4da907edc Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 16:51:36 -0800
Subject: [PATCH 25/63] Do refactoring so that we can add an additional input
 argument for CrossDbmsQueryTestSuite

---
 .../apache/spark/sql/SQLQueryTestSuite.scala  | 109 ++++++----
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 201 +++++-------------
 2 files changed, 112 insertions(+), 198 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index be59753f97655..b054d837c3d5c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -349,20 +349,16 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     }
   }
 
-  /** Run a test case. */
-  protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
-    def splitWithSemicolon(seq: Seq[String]) = {
-      seq.mkString("\n").split("(?<=[^\\\\]);")
-    }
-
-    def splitCommentsAndCodes(input: String) = input.split("\n").partition { line =>
+  protected def splitCommentsAndCodes(input: String) =
+    input.split("\n").partition { line =>
       val newLine = line.trim
       newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
     }
 
-    val input = fileToString(new File(testCase.inputFile))
-
-    val (comments, code) = splitCommentsAndCodes(input)
+  protected def getQueries(code: Array[String], comments: Array[String]) = {
+    def splitWithSemicolon(seq: Seq[String]) = {
+      seq.mkString("\n").split("(?<=[^\\\\]);")
+    }
 
     // If `--IMPORT` found, load code from another test case file, then insert them
     // into the head in this test.
@@ -406,52 +402,74 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     }
 
     // List of SQL queries to run
-    val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
+    tempQueries.map(_.trim).filter(_ != "")
       // Fix misplacement when comment is at the end of the query.
       .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")
+  }
 
+  protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = {
     val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6))
-    val settings = settingLines.flatMap(_.split(",").map { kv =>
+    settingLines.flatMap(_.split(",").map { kv =>
       val (conf, value) = kv.span(_ != '=')
       conf.trim -> value.substring(1).trim
     })
+  }
 
-    if (regenerateGoldenFiles) {
-      runQueries(queries, testCase, settings.toImmutableArraySeq)
-    } else {
-      // A config dimension has multiple config sets, and a config set has multiple configs.
-      // - config dim:     Seq[Seq[(String, String)]]
-      //   - config set:   Seq[(String, String)]
-      //     - config:     (String, String))
-      // We need to do cartesian product for all the config dimensions, to get a list of
-      // config sets, and run the query once for each config set.
-      val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
-      val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines =>
-        lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv =>
-          val (conf, value) = kv.span(_ != '=')
-          conf.trim -> value.substring(1).trim
-        }.toSeq).toSeq
-      }
+  protected def getSparkConfigDimensions(comments: Array[String]): Seq[Seq[(String, String)]] = {
+    // A config dimension has multiple config sets, and a config set has multiple configs.
+    // - config dim:     Seq[Seq[(String, String)]]
+    //   - config set:   Seq[(String, String)]
+    //     - config:     (String, String))
+    // We need to do cartesian product for all the config dimensions, to get a list of
+    // config sets, and run the query once for each config set.
+    val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
+    val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines =>
+      lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv =>
+        val (conf, value) = kv.span(_ != '=')
+        conf.trim -> value.substring(1).trim
+      }.toSeq).toSeq
+    }
 
-      val configSets = configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) =>
-        dim.flatMap { configSet => res.map(_ ++ configSet) }
-      }
+    configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) =>
+      dim.flatMap { configSet => res.map(_ ++ configSet) }
+    }
+  }
 
-      configSets.foreach { configSet =>
-        try {
-          runQueries(queries, testCase, (settings ++ configSet).toImmutableArraySeq)
-        } catch {
-          case e: Throwable =>
-            val configs = configSet.map {
-              case (k, v) => s"$k=$v"
-            }
-            logError(s"Error using configs: ${configs.mkString(",")}")
-            throw e
-        }
+  protected def runQueriesWithSparkConfigDimensions(
+      queries: Seq[String],
+      testCase: TestCase,
+      sparkConfigSet: Array[(String, String)],
+      sparkConfigDims: Seq[Seq[(String, String)]]): Unit = {
+    sparkConfigDims.foreach { configDim =>
+      try {
+        runQueries(queries, testCase, (sparkConfigSet ++ configDim).toImmutableArraySeq)
+      } catch {
+        case e: Throwable =>
+          val configs = configDim.map {
+            case (k, v) => s"$k=$v"
+          }
+          logError(s"Error using configs: ${configs.mkString(",")}")
+          throw e
       }
     }
   }
 
+  /** Run a test case. */
+  protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
+    val input = fileToString(new File(testCase.inputFile))
+    val (comments, code) = splitCommentsAndCodes(input)
+    val queries = getQueries(code, comments)
+    val settings = getSparkSettings(comments)
+
+    if (regenerateGoldenFiles) {
+      runQueries(queries, testCase, settings.toImmutableArraySeq)
+    } else {
+      val configSets = getSparkConfigDimensions(comments)
+      runQueriesWithSparkConfigDimensions(
+        queries, testCase, settings, configSets)
+    }
+  }
+
   def hasNoDuplicateColumns(schema: String): Boolean = {
     val columnAndTypes = schema.replaceFirst("^struct<", "").stripSuffix(">").split(",")
     columnAndTypes.size == columnAndTypes.distinct.length
@@ -498,7 +516,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
   protected def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      configSet: Seq[(String, String)]): Unit = {
+      sparkConfigSet: Seq[(String, String)],
+      otherConfigs: Map[String, String] = Map.empty): Unit = {
     // Create a local SparkSession to have stronger isolation between different test cases.
     // This does not isolate catalog changes.
     val localSparkSession = spark.newSession()
@@ -529,9 +548,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
         localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, false)
     }
 
-    if (configSet.nonEmpty) {
+    if (sparkConfigSet.nonEmpty) {
       // Execute the list of set operation in order to add the desired configs
-      val setOperations = configSet.map { case (key, value) => s"set $key=$value" }
+      val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" }
       logInfo(s"Setting configs: ${setOperations.mkString(", ")}")
       setOperations.foreach(localSparkSession.sql)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index dc5c6c897e598..86a751a90117d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -24,7 +24,8 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLQueryTestSuite
-import org.apache.spark.sql.catalyst.util.stringToFile
+import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
+import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 
 // scalastyle:off line.size.limit
 /**
@@ -34,9 +35,6 @@ import org.apache.spark.sql.catalyst.util.stringToFile
  * tests will not be compatible with other DBMSes. There will be more work in the future, such as
  * some kind of conversion, to increase coverage.
  *
- * If your SQL query test is not compatible with other DBMSes, please add it to the `ignoreList` at
- * the bottom of this file.
- *
  * You need to have a database server up before running this test.
  * For example, for postgres:
  * 1. Install PostgreSQL.
@@ -44,7 +42,13 @@ import org.apache.spark.sql.catalyst.util.stringToFile
  * 2. After installing PostgreSQL, start the database server, then create a role named pg with
  *    superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser``
  *
- * To run the entire test suite:
+ * To indicate that the SQL file is eligible for testing with this suite, add the following comment
+ * into the input file:
+ * {{{
+ *   --DBMS_TO_GENERATE_GOLDEN_FILE postgres
+ * }}}
+ *
+ * And then, to run the entire test suite:
  * {{{
  *   build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
  * }}}
@@ -54,14 +58,14 @@ import org.apache.spark.sql.catalyst.util.stringToFile
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
  * }}}
  *
- * To re-generate golden file for a single test, run:
+ * To re-generate golden file for a single test, e.g. `describe.sql`, run:
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite -- -z describe.sql"
  * }}}
  *
  * To specify a DBMS to use (the default is postgres):
  * {{{
- *   REF_DBMS=mysql SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+ *   REF_DBMS=postgres SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
  * }}}
  */
 // scalastyle:on line.size.limit
@@ -80,12 +84,41 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   private def customConnectionUrl: String = System.getenv(
     CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL)
 
+  override protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
+    val input = fileToString(new File(testCase.inputFile))
+    val (comments, code) = splitCommentsAndCodes(input)
+    val queries = getQueries(code, comments)
+    val settings = getSparkSettings(comments)
+
+    if (regenerateGoldenFiles) {
+      // If `--DBMS_TO_GENERATE_GOLDEN_FILE` found,
+      val dbmsConfig = comments.filter(_.startsWith(s"--${
+        CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31))
+      val otherConfigs = dbmsConfig.map(
+        (CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE, _)).toMap
+      runQueries(queries, testCase, settings.toImmutableArraySeq, otherConfigs)
+    } else {
+      val configSets = getSparkConfigDimensions(comments)
+      runQueriesWithSparkConfigDimensions(
+        queries, testCase, settings, configSets)
+    }
+  }
+
   override protected def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      configSet: Seq[(String, String)]): Unit = {
+      sparkConfigSet: Seq[(String, String)],
+      otherConfigs: Map[String, String] = Map.empty): Unit = {
     val localSparkSession = spark.newSession()
 
+    if (!otherConfigs.contains(CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE) ||
+      !otherConfigs.get(CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE)
+        .contains(crossDbmsToGenerateGoldenFiles)) {
+      log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " +
+        s"testing with $crossDbmsToGenerateGoldenFiles")
+      return
+    }
+
     var runner: Option[SQLQueryTestRunner] = None
     val outputs: Seq[QueryTestOutput] = queries.map { sql =>
       val output = {
@@ -121,6 +154,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
           }
         } else {
           // Use Spark.
+          // Execute the list of set operation in order to add the desired configs
+          val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" }
+          setOperations.foreach(localSparkSession.sql)
           val (_, output) = handleExceptions(
             getNormalizedQueryExecutionResult(localSparkSession, sql))
           output
@@ -136,8 +172,8 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
     if (regenerateGoldenFiles) {
       val goldenOutput = {
-        s"-- Automatically generated by ${getClass.getSimpleName}\n" +
-          outputs.mkString("\n\n\n") + "\n"
+        s"-- Automatically generated by ${getClass.getSimpleName} with " +
+          s"$crossDbmsToGenerateGoldenFiles\n" + outputs.mkString("\n\n\n") + "\n"
       }
       val resultFile = new File(testCase.resultFile)
       val parent = resultFile.getParentFile
@@ -184,149 +220,6 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
       RegularTestCase(testCaseName, absPath, resultFile) :: Nil
     }.sortBy(_.name)
   }
-
-  // Ignore all tests for now due to likely incompatibility.
-  override def ignoreList: Set[String] =
-    // Directories
-    Set(
-      "postgreSQL",
-      "subquery",
-      "ansi",
-      "udtf",
-      "udf",
-      "timestampNTZ",
-      "udaf",
-      "typeCoercion"
-    ) ++
-    // Files
-    Set(
-      "group-by.sql",
-      "natural-join.sql",
-      "timestamp-ltz.sql",
-      "csv-functions.sql",
-      "datetime-formatting-invalid.sql",
-      "except.sql",
-      "string-functions.sql",
-      "cte-command.sql",
-      "identifier-clause.sql",
-      "try_reflect.sql",
-      "order-by-all.sql",
-      "timestamp-ntz.sql",
-      "url-functions.sql",
-      "math.sql",
-      "random.sql",
-      "tablesample-negative.sql",
-      "date.sql",
-      "window.sql",
-      "linear-regression.sql",
-      "join-empty-relation.sql",
-      "null-propagation.sql",
-      "operators.sql",
-      "change-column.sql",
-      "count.sql",
-      "mask-functions.sql",
-      "decimalArithmeticOperations.sql",
-      "column-resolution-sort.sql",
-      "group-analytics.sql",
-      "inline-table.sql",
-      "comparator.sql",
-      "ceil-floor-with-scale-param.sql",
-      "show-tblproperties.sql",
-      "timezone.sql",
-      "ilike.sql",
-      "parse-schema-string.sql",
-      "charvarchar.sql",
-      "ignored.sql",
-      "cte.sql",
-      "selectExcept.sql",
-      "order-by-nulls-ordering.sql",
-      "query_regex_column.sql",
-      "show_columns.sql",
-      "columnresolution-views.sql",
-      "percentiles.sql",
-      "hll.sql",
-      "group-by-all.sql",
-      "like-any.sql",
-      "struct.sql",
-      "show-create-table.sql",
-      "try_cast.sql",
-      "unpivot.sql",
-      "describe-query.sql",
-      "describe.sql",
-      "columnresolution.sql",
-      "union.sql",
-      "order-by-ordinal.sql",
-      "explain-cbo.sql",
-      "intersect-all.sql",
-      "ilike-all.sql",
-      "try_aggregates.sql",
-      "try-string-functions.sql",
-      "show-views.sql",
-      "xml-functions.sql",
-      "datetime-parsing.sql",
-      "cte-nested.sql",
-      "columnresolution-negative.sql",
-      "datetime-special.sql",
-      "describe-table-after-alter-table.sql",
-      "column-resolution-aggregate.sql",
-      "keywords.sql",
-      "table-aliases.sql",
-      "bitwise.sql",
-      "like-all.sql",
-      "named-function-arguments.sql",
-      "try_datetime_functions.sql",
-      "datetime-legacy.sql",
-      "outer-join.sql",
-      "explain.sql",
-      "sql-compatibility-functions.sql",
-      "join-lateral.sql",
-      "pred-pushdown.sql",
-      "cast.sql",
-      "except-all.sql",
-      "predicate-functions.sql",
-      "timestamp.sql",
-      "group-by-filter.sql",
-      "pivot.sql",
-      "try_arithmetic.sql",
-      "higher-order-functions.sql",
-      "cte-legacy.sql",
-      "misc-functions.sql",
-      "describe-part-after-analyze.sql",
-      "extract.sql",
-      "datetime-formatting.sql",
-      "interval.sql",
-      "double-quoted-identifiers.sql",
-      "datetime-formatting-legacy.sql",
-      "group-by-ordinal.sql",
-      "having.sql",
-      "inner-join.sql",
-      "null-handling.sql",
-      "ilike-any.sql",
-      "show-tables.sql",
-      "sql-session-variables.sql",
-      "using-join.sql",
-      "subexp-elimination.sql",
-      "cte-nonlegacy.sql",
-      "group-by-all-duckdb.sql",
-      "transform.sql",
-      "map.sql",
-      "table-valued-functions.sql",
-      "comments.sql",
-      "regexp-functions.sql",
-      "datetime-parsing-legacy.sql",
-      "cross-join.sql",
-      "array.sql",
-      "group-by-all-mosha.sql",
-      "limit.sql",
-      "non-excludable-rule.sql",
-      "grouping_set.sql",
-      "json-functions.sql",
-      "datetime-parsing-invalid.sql",
-      "try_element_at.sql",
-      "explain-aqe.sql",
-      "current_database_catalog.sql",
-      "literals.sql"
-    )
 }
 
 object CrossDbmsQueryTestSuite {
@@ -335,6 +228,8 @@ object CrossDbmsQueryTestSuite {
   private final val REF_DBMS_ARGUMENT = "REF_DBMS"
   // System argument to indicate a custom connection URL to the reference DBMS.
   private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
+  // Argument in input files to indicate that golden file should be generated with a reference DBMS
+  private final val DBMS_TO_GENERATE_GOLDEN_FILE = "DBMS_TO_GENERATE_GOLDEN_FILE"
 
   private final val POSTGRES = "postgres"
   private final val SUPPORTED_DBMS = Seq(POSTGRES)

From 08e2cfb58cddeed786a8cf0369afb2e4fd125a72 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 30 Nov 2023 16:53:07 -0800
Subject: [PATCH 26/63] Generate with postgres

---
 .../inputs/postgres-crosstest/sqllogictest-select1.sql          | 1 +
 .../postgres-crosstest/sqllogictest-select1.sql.out             | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
index 7fcc5419044d0..87c15a07733b5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
@@ -1,6 +1,7 @@
 -- First 6 queries of the sql-logic-tests with some minor changes for compatibility, available here:
 -- https://www.sqlite.org/sqllogictest/file?name=test/select1.test&ci=tip.
 
+--DBMS_TO_GENERATE_GOLDEN_FILE postgres
 CREATE VIEW t1(a, b, c, d, e) AS VALUES
     (103,102,100,101,104),
     (107,106,108,109,105),
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
index fde78003ffa90..b5eb7eef4a9a3 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
@@ -1,4 +1,4 @@
--- Automatically generated by CrossDbmsQueryTestSuite
+-- Automatically generated by CrossDbmsQueryTestSuite with postgres
 -- !query
 CREATE VIEW t1(a, b, c, d, e) AS VALUES
     (103,102,100,101,104),

From 2ec381c52ac7853efa6523382d182fcb3aa99520 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Fri, 1 Dec 2023 10:33:39 -0800
Subject: [PATCH 27/63] Fix compilation error with ThriftServerQueryTestSuite

---
 .../sql/hive/thriftserver/ThriftServerQueryTestSuite.scala     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 8d90d47e1bf5f..febf763e209d5 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -107,7 +107,8 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
   override def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      configSet: Seq[(String, String)]): Unit = {
+      configSet: Seq[(String, String)],
+      otherConfigs: Map[String, String] = Map.empty): Unit = {
     // We do not test with configSet.
     withJdbcStatement() { statement =>
 

From 9c2d2832129c0341c1d14e6bf1563882f1d41a87 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Fri, 1 Dec 2023 13:15:00 -0800
Subject: [PATCH 28/63] Generate golden files with SQLQueryTestSuite

---
 .../sqllogictest-select1.sql.out              | 203 ++++++++++++++
 .../sqllogictest-select1.sql.out              | 252 ++++++++++++++++++
 2 files changed, 455 insertions(+)
 create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
 create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
new file mode 100644
index 0000000000000..16d7edfbe0467
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
@@ -0,0 +1,203 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE VIEW t1(a, b, c, d, e) AS VALUES
+    (103,102,100,101,104),
+    (107,106,108,109,105),
+    (110,114,112,111,113),
+    (116,119,117,115,118),
+    (123,122,124,120,121),
+    (127,128,129,126,125),
+    (132,134,131,133,130),
+    (138,136,139,135,137),
+    (144,141,140,142,143),
+    (145,149,146,148,147),
+    (151,150,153,154,152),
+    (155,157,159,156,158),
+    (161,160,163,164,162),
+    (167,169,168,165,166),
+    (171,170,172,173,174),
+    (177,176,179,178,175),
+    (181,180,182,183,184),
+    (187,188,186,189,185),
+    (190,194,193,192,191),
+    (199,197,198,196,195),
+    (200,202,203,201,204),
+    (208,209,205,206,207),
+    (214,210,213,212,211),
+    (218,215,216,217,219),
+    (223,221,222,220,224),
+    (226,227,228,229,225),
+    (234,231,232,230,233),
+    (237,236,239,235,238),
+    (242,244,240,243,241),
+    (246,248,247,249,245)
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`t1`, [(a,None), (b,None), (c,None), (d,None), (e,None)], VALUES
+    (103,102,100,101,104),
+    (107,106,108,109,105),
+    (110,114,112,111,113),
+    (116,119,117,115,118),
+    (123,122,124,120,121),
+    (127,128,129,126,125),
+    (132,134,131,133,130),
+    (138,136,139,135,137),
+    (144,141,140,142,143),
+    (145,149,146,148,147),
+    (151,150,153,154,152),
+    (155,157,159,156,158),
+    (161,160,163,164,162),
+    (167,169,168,165,166),
+    (171,170,172,173,174),
+    (177,176,179,178,175),
+    (181,180,182,183,184),
+    (187,188,186,189,185),
+    (190,194,193,192,191),
+    (199,197,198,196,195),
+    (200,202,203,201,204),
+    (208,209,205,206,207),
+    (214,210,213,212,211),
+    (218,215,216,217,219),
+    (223,221,222,220,224),
+    (226,227,228,229,225),
+    (234,231,232,230,233),
+    (237,236,239,235,238),
+    (242,244,240,243,241),
+    (246,248,247,249,245), false, false, PersistedView, true
+   +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
+  FROM t1
+ ORDER BY 1
+-- !query analysis
+Sort [CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x ASC NULLS FIRST], true
++- Project [CASE WHEN (cast(c#x as double) > scalar-subquery#x []) THEN (a#x * 2) ELSE (b#x * 10) END AS CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x]
+   :  +- Aggregate [avg(c#x) AS avg(c)#x]
+   :     +- SubqueryAlias spark_catalog.default.t1
+   :        +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+   :           +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+   :              +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+   +- SubqueryAlias spark_catalog.default.t1
+      +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+         +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+            +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CAST((a + b + c + d + e) / 5 AS INT)
+  FROM t1
+ ORDER BY 1,2
+-- !query analysis
+Sort [((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST], true
++- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x]
+   +- SubqueryAlias spark_catalog.default.t1
+      +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+         +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+            +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
+        WHEN a < b + 3 THEN 333 ELSE 444 END,
+       abs(b - c),
+       CAST((a + b + c + d + e) / 5 AS INT),
+       a + b * 2 + c * 3
+  FROM t1
+ WHERE (e > c OR e < d)
+   AND d>e
+   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
+ ORDER BY 4,2,1,3,5
+-- !query analysis
+Sort [CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x ASC NULLS FIRST, ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, abs((b - c))#x ASC NULLS FIRST, ((a + (b * 2)) + (c * 3))#x ASC NULLS FIRST], true
++- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, CASE WHEN (a#x < (b#x - 3)) THEN 111 WHEN (a#x <= b#x) THEN 222 WHEN (a#x < (b#x + 3)) THEN 333 ELSE 444 END AS CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x, abs((b#x - c#x)) AS abs((b - c))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, ((a#x + (b#x * 2)) + (c#x * 3)) AS ((a + (b * 2)) + (c * 3))#x]
+   +- Filter ((((e#x > c#x) OR (e#x < d#x)) AND (d#x > e#x)) AND exists#x [b#x])
+      :  +- Project [1 AS 1#x]
+      :     +- Filter (b#x < outer(b#x))
+      :        +- SubqueryAlias x
+      :           +- SubqueryAlias spark_catalog.default.t1
+      :              +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+      :                 +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+      :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT c,
+       d-e,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       a + b * 2 + c * 3 + d * 4,
+       e
+  FROM t1
+ WHERE d NOT BETWEEN 110 AND 150
+    OR c BETWEEN b - 2 AND d + 2
+    OR (e > c OR e < d)
+ ORDER BY 1,5,3,2,4
+-- !query analysis
+Sort [c#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (d - e)#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST], true
++- Project [c#x, (d#x - e#x) AS (d - e)#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, (((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, e#x]
+   +- Filter ((NOT ((d#x >= 110) AND (d#x <= 150)) OR ((c#x >= (b#x - 2)) AND (c#x <= (d#x + 2)))) OR ((e#x > c#x) OR (e#x < d#x)))
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4,
+       CAST((a + b + c + d + e) / 5 AS INT),
+       abs(a),
+       e,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       d
+  FROM t1
+ WHERE b > c
+   AND c > d
+ ORDER BY 3,4,5,1,2,6
+-- !query analysis
+Sort [abs(a)#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, d#x ASC NULLS FIRST], true
++- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, abs(a#x) AS abs(a)#x, e#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, d#x]
+   +- Filter ((b#x > c#x) AND (c#x > d#x))
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
+       c
+  FROM t1
+ WHERE (c <= d - 2 OR c >= d + 2)
+ ORDER BY 4,2,1,3
+-- !query analysis
+Sort [c#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, scalarsubquery(c, d)#xL ASC NULLS FIRST], true
++- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, scalar-subquery#x [c#x && d#x] AS scalarsubquery(c, d)#xL, c#x]
+   :  +- Aggregate [count(1) AS count(1)#xL]
+   :     +- Filter ((c#x > outer(c#x)) AND (d#x < outer(d#x)))
+   :        +- SubqueryAlias x
+   :           +- SubqueryAlias spark_catalog.default.t1
+   :              +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+   :                 +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+   :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+   +- Filter ((c#x <= (d#x - 2)) OR (c#x >= (d#x + 2)))
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
+            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+
+
+-- !query
+DROP VIEW IF EXISTS t1
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
new file mode 100644
index 0000000000000..762a04c22900d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
@@ -0,0 +1,252 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE VIEW t1(a, b, c, d, e) AS VALUES
+    (103,102,100,101,104),
+    (107,106,108,109,105),
+    (110,114,112,111,113),
+    (116,119,117,115,118),
+    (123,122,124,120,121),
+    (127,128,129,126,125),
+    (132,134,131,133,130),
+    (138,136,139,135,137),
+    (144,141,140,142,143),
+    (145,149,146,148,147),
+    (151,150,153,154,152),
+    (155,157,159,156,158),
+    (161,160,163,164,162),
+    (167,169,168,165,166),
+    (171,170,172,173,174),
+    (177,176,179,178,175),
+    (181,180,182,183,184),
+    (187,188,186,189,185),
+    (190,194,193,192,191),
+    (199,197,198,196,195),
+    (200,202,203,201,204),
+    (208,209,205,206,207),
+    (214,210,213,212,211),
+    (218,215,216,217,219),
+    (223,221,222,220,224),
+    (226,227,228,229,225),
+    (234,231,232,230,233),
+    (237,236,239,235,238),
+    (242,244,240,243,241),
+    (246,248,247,249,245)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
+  FROM t1
+ ORDER BY 1
+-- !query schema
+struct<CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END:int>
+-- !query output
+354
+362
+374
+380
+398
+400
+416
+428
+436
+446
+452
+468
+474
+484
+492
+1020
+1060
+1140
+1190
+1220
+1280
+1340
+1360
+1410
+1490
+1500
+1570
+1600
+1690
+1700
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CAST((a + b + c + d + e) / 5 AS INT)
+  FROM t1
+ ORDER BY 1,2
+-- !query schema
+struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int>
+-- !query output
+1531	102
+1604	107
+1683	112
+1755	117
+1824	122
+1899	127
+1975	132
+2052	137
+2129	142
+2208	147
+2286	152
+2360	157
+2436	162
+2499	167
+2589	172
+2653	177
+2739	182
+2802	187
+2880	192
+2946	197
+3037	202
+3100	207
+3176	212
+3259	217
+3331	222
+3405	227
+3477	232
+3556	237
+3627	242
+3704	247
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
+       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
+        WHEN a < b + 3 THEN 333 ELSE 444 END,
+       abs(b - c),
+       CAST((a + b + c + d + e) / 5 AS INT),
+       a + b * 2 + c * 3
+  FROM t1
+ WHERE (e > c OR e < d)
+   AND d>e
+   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
+ ORDER BY 4,2,1,3,5
+-- !query schema
+struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END:int,abs((b - c)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,((a + (b * 2)) + (c * 3)):int>
+-- !query output
+1604	333	2	107	643
+1899	222	1	127	770
+1975	222	3	132	793
+2208	111	3	147	881
+2286	333	3	152	910
+2436	333	3	162	970
+2653	333	3	177	1066
+2802	222	2	187	1121
+2880	111	1	192	1157
+2946	333	1	197	1187
+3176	444	3	212	1273
+3405	222	1	227	1364
+3627	222	4	242	1450
+3704	222	1	247	1483
+
+
+-- !query
+SELECT c,
+       d-e,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       a + b * 2 + c * 3 + d * 4,
+       e
+  FROM t1
+ WHERE d NOT BETWEEN 110 AND 150
+    OR c BETWEEN b - 2 AND d + 2
+    OR (e > c OR e < d)
+ ORDER BY 1,5,3,2,4
+-- !query schema
+struct<c:int,(d - e):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,(((a + (b * 2)) + (c * 3)) + (d * 4)):int,e:int>
+-- !query output
+100	-3	444	1011	104
+108	4	222	1079	105
+112	-2	333	1118	113
+117	-3	222	1165	118
+129	1	111	1274	125
+131	3	333	1325	130
+140	-1	555	1414	143
+146	1	222	1473	147
+153	2	444	1526	152
+159	-2	333	1570	158
+163	2	444	1626	162
+168	-1	222	1669	166
+172	-1	222	1719	174
+179	3	333	1778	175
+182	-1	222	1819	184
+186	4	111	1877	185
+193	1	444	1925	191
+198	1	555	1971	195
+203	-3	333	2017	204
+205	-1	111	2065	207
+213	1	555	2121	211
+216	-2	444	2164	219
+222	-4	444	2211	224
+228	4	111	2280	225
+232	-3	555	2312	233
+239	-3	444	2366	238
+240	2	333	2422	241
+247	4	222	2479	245
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4,
+       CAST((a + b + c + d + e) / 5 AS INT),
+       abs(a),
+       e,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       d
+  FROM t1
+ WHERE b > c
+   AND c > d
+ ORDER BY 3,4,5,1,2,6
+-- !query schema
+struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,abs(a):int,e:int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,d:int>
+-- !query output
+1118	112	110	113	333	111
+1165	117	116	118	222	115
+1669	167	167	166	222	165
+1925	192	190	191	444	192
+
+
+-- !query
+SELECT a + b * 2 + c * 3 + d * 4,
+       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
+        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
+       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
+       c
+  FROM t1
+ WHERE (c <= d - 2 OR c >= d + 2)
+ ORDER BY 4,2,1,3
+-- !query schema
+struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,scalarsubquery(c, d):bigint,c:int>
+-- !query output
+1165	222	0	117
+1219	222	0	124
+1274	111	0	129
+1325	333	0	131
+1367	222	0	139
+1414	555	0	140
+1473	222	0	146
+1570	333	0	159
+1669	222	0	168
+1877	111	0	186
+1971	555	0	198
+2017	333	0	203
+2211	444	0	222
+2312	555	0	232
+2366	444	0	239
+2422	333	0	240
+2479	222	0	247
+
+
+-- !query
+DROP VIEW IF EXISTS t1
+-- !query schema
+struct<>
+-- !query output
+

From ab4ab121549877a43d76d1c8f239237f2ed44eca Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 4 Dec 2023 13:20:25 -0800
Subject: [PATCH 29/63] Fixed a bug where tests weren't running against the
 golden file

---
 .../apache/spark/sql/SQLQueryTestSuite.scala  |  3 +--
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 24 +++++++------------
 2 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index b054d837c3d5c..5705feec96a21 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -516,8 +516,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
   protected def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      sparkConfigSet: Seq[(String, String)],
-      otherConfigs: Map[String, String] = Map.empty): Unit = {
+      sparkConfigSet: Seq[(String, String)]): Unit = {
     // Create a local SparkSession to have stronger isolation between different test cases.
     // This does not isolate catalog changes.
     val localSparkSession = spark.newSession()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 86a751a90117d..85654dce0c6ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -91,12 +91,15 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     val settings = getSparkSettings(comments)
 
     if (regenerateGoldenFiles) {
-      // If `--DBMS_TO_GENERATE_GOLDEN_FILE` found,
       val dbmsConfig = comments.filter(_.startsWith(s"--${
         CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31))
-      val otherConfigs = dbmsConfig.map(
-        (CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE, _)).toMap
-      runQueries(queries, testCase, settings.toImmutableArraySeq, otherConfigs)
+      // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test.
+      if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
+        log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " +
+          s"testing with $crossDbmsToGenerateGoldenFiles")
+        return
+      }
+      runQueries(queries, testCase, settings.toImmutableArraySeq)
     } else {
       val configSets = getSparkConfigDimensions(comments)
       runQueriesWithSparkConfigDimensions(
@@ -107,19 +110,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   override protected def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      sparkConfigSet: Seq[(String, String)],
-      otherConfigs: Map[String, String] = Map.empty): Unit = {
+      sparkConfigSet: Seq[(String, String)]): Unit = {
     val localSparkSession = spark.newSession()
-
-    if (!otherConfigs.contains(CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE) ||
-      !otherConfigs.get(CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE)
-        .contains(crossDbmsToGenerateGoldenFiles)) {
-      log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " +
-        s"testing with $crossDbmsToGenerateGoldenFiles")
-      return
-    }
-
     var runner: Option[SQLQueryTestRunner] = None
+
     val outputs: Seq[QueryTestOutput] = queries.map { sql =>
       val output = {
         // Use the runner when generating golden files, and Spark when running the test against

From 0166aede0ca0c70cdbc429a96d05afd7ab52027b Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 4 Dec 2023 13:47:53 -0800
Subject: [PATCH 30/63] Use local spark session

---
 .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala      | 7 +++++--
 .../org/apache/spark/sql/crossdbms/JdbcConnection.scala    | 5 +++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 85654dce0c6ad..66447289e3b2a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -134,7 +134,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
             // 2. Error thrown in other DBMS execution:
             //    a. The query is either incompatible between Spark and the other DBMS
             //    b. Some test table/view is not created on the other DBMS.
-            val sparkDf = spark.sql(sql)
+            val sparkDf = localSparkSession.sql(sql)
             val output = runner.map(_.runQuery(sql)).get
             // Use Spark analyzed plan to check if the query result is already semantically sorted.
             if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
@@ -162,7 +162,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         schema = None,
         output = normalizeTestResults(output.mkString("\n")))
     }
-    runner.foreach(_.cleanUp())
+    if (runner.isDefined) {
+      runner.foreach(_.cleanUp())
+      runner = None
+    }
 
     if (regenerateGoldenFiles) {
       val goldenOutput = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index 5901a523a6cad..f203f75ac0c8d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -66,7 +66,8 @@ private[sql] trait JdbcConnection {
 private[sql] case class PostgresConnection(connection_url: Option[String] = None)
   extends JdbcConnection {
 
-  DriverRegistry.register("org.postgresql.Driver")
+  private final val POSTGRES_DRIVER_CLASS_NAME = "org.postgresql.Driver"
+  DriverRegistry.register(POSTGRES_DRIVER_CLASS_NAME)
   private final val DEFAULT_USER = "pg"
   private final val DEFAULT_CONNECTION_URL =
     s"jdbc:postgresql://localhost:5432/postgres?user=$DEFAULT_USER"
@@ -103,7 +104,7 @@ private[sql] case class PostgresConnection(connection_url: Option[String] = None
   }
 
   def loadData(df: DataFrame, tableName: String): Unit = {
-    df.write.option("driver", "org.postgresql.Driver").mode("append")
+    df.write.option("driver", POSTGRES_DRIVER_CLASS_NAME).mode("append")
       .jdbc(url, tableName, new Properties())
   }
 

From 2e45a67ea85bee971aed751bdf98ace39b9dffac Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 4 Dec 2023 13:53:46 -0800
Subject: [PATCH 31/63] Small comment change

---
 .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 66447289e3b2a..d086b8dce1092 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -30,10 +30,10 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 // scalastyle:off line.size.limit
 /**
  * See SQLQueryTestSuite.scala for more information. This class builds off of that to allow us
- * to generate golden files with other DBMS to perform cross-checking for correctness. Note that the
- * input directory path is currently limited because most, if not all, of our current SQL query
- * tests will not be compatible with other DBMSes. There will be more work in the future, such as
- * some kind of conversion, to increase coverage.
+ * to generate golden files with other DBMS to perform cross-checking for correctness. Note that
+ * this is not currently run on all SQL input files by default because there is incompatibility
+ * between Spark SQL and the DBMS SQL. More work will be done to increase coverage, focusing on
+ * postgres.
  *
  * You need to have a database server up before running this test.
  * For example, for postgres:

From dd4597e9bee91d508bca5975360150783e4d2f0b Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 4 Dec 2023 15:28:00 -0800
Subject: [PATCH 32/63] Revert change in ThriftServerQueryTestSuite

---
 .../sql/crossdbms/DialectConverter.scala      | 66 +++++++++++++++++++
 .../ThriftServerQueryTestSuite.scala          |  3 +-
 2 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala
new file mode 100644
index 0000000000000..c8ea1b271b9b2
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.crossdbms
+
+trait DialectConverter {
+
+  final val NOOP = ""
+  final val SET_COMMAND_PREFIX = "set"
+  final val COMMAND_PREFIXES_TO_IGNORE = Seq(SET_COMMAND_PREFIX)
+
+  final val SPARK_SET_COMMAND_PATTERN = """set (\w+) = (true|false);""".r
+
+  protected def checkIgnore(query: String): Boolean =
+    COMMAND_PREFIXES_TO_IGNORE.count(prefix => query.startsWith(prefix)) > 0
+
+  /**
+   * Transform the query string, which is compatible with Spark, such that it is compatible with
+   * dialect. This transforms a query string itself. Note that this is a best effort conversion and
+   * may not always produce a compatible query string.
+   */
+  def preprocessQuery(query: String): String
+
+  /**
+   * Transform the query SET, which is compatible with Spark, such that it is compatible with
+   * dialect.
+   * For example:
+   * {{{
+   *   1. create temporary view t1 as values (..);
+   *   2. set spark.sql... = true;
+   *   3. select * from ...
+   *   4. set spark.sql... = false;
+   * }}}
+   * We can remove everything between lines 2 and 4 because if setting configs is not applicable to
+   * other DBMS systems, and the tests between such config setting is not relevant.
+   */
+  def preprocessQuerySet(queries: Seq[String]): Seq[String]
+}
+
+/**
+ * Wrapper around utility functions that help convert from Spark SQL to PostgreSQL.
+ */
+object PostgresDialectConverter extends DialectConverter {
+
+  def preprocessQuery(query: String): String = {
+    query
+  }
+
+  override def preprocessQuerySet(queries: Seq[String]): Seq[String] = {
+    queries
+  }
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index febf763e209d5..8d90d47e1bf5f 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -107,8 +107,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
   override def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      configSet: Seq[(String, String)],
-      otherConfigs: Map[String, String] = Map.empty): Unit = {
+      configSet: Seq[(String, String)]): Unit = {
     // We do not test with configSet.
     withJdbcStatement() { statement =>
 

From cb719791a292d6f8214574239373e17eab9c3a6e Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 4 Dec 2023 15:29:40 -0800
Subject: [PATCH 33/63] Remove DialectConverter..

---
 .../sql/crossdbms/DialectConverter.scala      | 66 -------------------
 1 file changed, 66 deletions(-)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala
deleted file mode 100644
index c8ea1b271b9b2..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/DialectConverter.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.crossdbms
-
-trait DialectConverter {
-
-  final val NOOP = ""
-  final val SET_COMMAND_PREFIX = "set"
-  final val COMMAND_PREFIXES_TO_IGNORE = Seq(SET_COMMAND_PREFIX)
-
-  final val SPARK_SET_COMMAND_PATTERN = """set (\w+) = (true|false);""".r
-
-  protected def checkIgnore(query: String): Boolean =
-    COMMAND_PREFIXES_TO_IGNORE.count(prefix => query.startsWith(prefix)) > 0
-
-  /**
-   * Transform the query string, which is compatible with Spark, such that it is compatible with
-   * dialect. This transforms a query string itself. Note that this is a best effort conversion and
-   * may not always produce a compatible query string.
-   */
-  def preprocessQuery(query: String): String
-
-  /**
-   * Transform the query SET, which is compatible with Spark, such that it is compatible with
-   * dialect.
-   * For example:
-   * {{{
-   *   1. create temporary view t1 as values (..);
-   *   2. set spark.sql... = true;
-   *   3. select * from ...
-   *   4. set spark.sql... = false;
-   * }}}
-   * We can remove everything between lines 2 and 4 because if setting configs is not applicable to
-   * other DBMS systems, and the tests between such config setting is not relevant.
-   */
-  def preprocessQuerySet(queries: Seq[String]): Seq[String]
-}
-
-/**
- * Wrapper around utility functions that help convert from Spark SQL to PostgreSQL.
- */
-object PostgresDialectConverter extends DialectConverter {
-
-  def preprocessQuery(query: String): String = {
-    query
-  }
-
-  override def preprocessQuerySet(queries: Seq[String]): Seq[String] = {
-    queries
-  }
-}

From 6cd52f1e97601054ddf2588f8a36bddb12a7f9b2 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 4 Dec 2023 17:51:18 -0800
Subject: [PATCH 34/63] Don't run the tests if the cross dbms is not specified

---
 .../sql/crossdbms/CrossDbmsQueryTestSuite.scala  | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index d086b8dce1092..4932ed96efbd8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -90,15 +90,13 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     val queries = getQueries(code, comments)
     val settings = getSparkSettings(comments)
 
-    if (regenerateGoldenFiles) {
-      val dbmsConfig = comments.filter(_.startsWith(s"--${
-        CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31))
-      // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test.
-      if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
-        log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " +
-          s"testing with $crossDbmsToGenerateGoldenFiles")
-        return
-      }
+    val dbmsConfig = comments.filter(_.startsWith(s"--${
+      CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31))
+    // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test.
+    if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
+      log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " +
+        s"testing with $crossDbmsToGenerateGoldenFiles")
+    } else if (regenerateGoldenFiles) {
       runQueries(queries, testCase, settings.toImmutableArraySeq)
     } else {
       val configSets = getSparkConfigDimensions(comments)

From 7f3970777360245a0b7b6847dd6d2f66fe5a02df Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 18 Dec 2023 11:39:22 -0800
Subject: [PATCH 35/63] Remove sqllogictest

---
 .../sqllogictest-select1.sql.out              | 203 --------------
 .../sqllogictest-select1.sql                  |  91 -------
 .../sqllogictest-select1.sql.out              | 252 ------------------
 .../sqllogictest-select1.sql.out              | 236 ----------------
 4 files changed, 782 deletions(-)
 delete mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
 delete mode 100644 sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
 delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
 delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
deleted file mode 100644
index 16d7edfbe0467..0000000000000
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgres-crosstest/sqllogictest-select1.sql.out
+++ /dev/null
@@ -1,203 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-CREATE VIEW t1(a, b, c, d, e) AS VALUES
-    (103,102,100,101,104),
-    (107,106,108,109,105),
-    (110,114,112,111,113),
-    (116,119,117,115,118),
-    (123,122,124,120,121),
-    (127,128,129,126,125),
-    (132,134,131,133,130),
-    (138,136,139,135,137),
-    (144,141,140,142,143),
-    (145,149,146,148,147),
-    (151,150,153,154,152),
-    (155,157,159,156,158),
-    (161,160,163,164,162),
-    (167,169,168,165,166),
-    (171,170,172,173,174),
-    (177,176,179,178,175),
-    (181,180,182,183,184),
-    (187,188,186,189,185),
-    (190,194,193,192,191),
-    (199,197,198,196,195),
-    (200,202,203,201,204),
-    (208,209,205,206,207),
-    (214,210,213,212,211),
-    (218,215,216,217,219),
-    (223,221,222,220,224),
-    (226,227,228,229,225),
-    (234,231,232,230,233),
-    (237,236,239,235,238),
-    (242,244,240,243,241),
-    (246,248,247,249,245)
--- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t1`, [(a,None), (b,None), (c,None), (d,None), (e,None)], VALUES
-    (103,102,100,101,104),
-    (107,106,108,109,105),
-    (110,114,112,111,113),
-    (116,119,117,115,118),
-    (123,122,124,120,121),
-    (127,128,129,126,125),
-    (132,134,131,133,130),
-    (138,136,139,135,137),
-    (144,141,140,142,143),
-    (145,149,146,148,147),
-    (151,150,153,154,152),
-    (155,157,159,156,158),
-    (161,160,163,164,162),
-    (167,169,168,165,166),
-    (171,170,172,173,174),
-    (177,176,179,178,175),
-    (181,180,182,183,184),
-    (187,188,186,189,185),
-    (190,194,193,192,191),
-    (199,197,198,196,195),
-    (200,202,203,201,204),
-    (208,209,205,206,207),
-    (214,210,213,212,211),
-    (218,215,216,217,219),
-    (223,221,222,220,224),
-    (226,227,228,229,225),
-    (234,231,232,230,233),
-    (237,236,239,235,238),
-    (242,244,240,243,241),
-    (246,248,247,249,245), false, false, PersistedView, true
-   +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
-  FROM t1
- ORDER BY 1
--- !query analysis
-Sort [CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x ASC NULLS FIRST], true
-+- Project [CASE WHEN (cast(c#x as double) > scalar-subquery#x []) THEN (a#x * 2) ELSE (b#x * 10) END AS CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END#x]
-   :  +- Aggregate [avg(c#x) AS avg(c)#x]
-   :     +- SubqueryAlias spark_catalog.default.t1
-   :        +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-   :           +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-   :              +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-   +- SubqueryAlias spark_catalog.default.t1
-      +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-         +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-            +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CAST((a + b + c + d + e) / 5 AS INT)
-  FROM t1
- ORDER BY 1,2
--- !query analysis
-Sort [((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST], true
-+- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x]
-   +- SubqueryAlias spark_catalog.default.t1
-      +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-         +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-            +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
-        WHEN a < b + 3 THEN 333 ELSE 444 END,
-       abs(b - c),
-       CAST((a + b + c + d + e) / 5 AS INT),
-       a + b * 2 + c * 3
-  FROM t1
- WHERE (e > c OR e < d)
-   AND d>e
-   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
- ORDER BY 4,2,1,3,5
--- !query analysis
-Sort [CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x ASC NULLS FIRST, ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x ASC NULLS FIRST, abs((b - c))#x ASC NULLS FIRST, ((a + (b * 2)) + (c * 3))#x ASC NULLS FIRST], true
-+- Project [((((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) + (e#x * 5)) AS ((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5))#x, CASE WHEN (a#x < (b#x - 3)) THEN 111 WHEN (a#x <= b#x) THEN 222 WHEN (a#x < (b#x + 3)) THEN 333 ELSE 444 END AS CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END#x, abs((b#x - c#x)) AS abs((b - c))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, ((a#x + (b#x * 2)) + (c#x * 3)) AS ((a + (b * 2)) + (c * 3))#x]
-   +- Filter ((((e#x > c#x) OR (e#x < d#x)) AND (d#x > e#x)) AND exists#x [b#x])
-      :  +- Project [1 AS 1#x]
-      :     +- Filter (b#x < outer(b#x))
-      :        +- SubqueryAlias x
-      :           +- SubqueryAlias spark_catalog.default.t1
-      :              +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-      :                 +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-      :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-      +- SubqueryAlias spark_catalog.default.t1
-         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT c,
-       d-e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       a + b * 2 + c * 3 + d * 4,
-       e
-  FROM t1
- WHERE d NOT BETWEEN 110 AND 150
-    OR c BETWEEN b - 2 AND d + 2
-    OR (e > c OR e < d)
- ORDER BY 1,5,3,2,4
--- !query analysis
-Sort [c#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (d - e)#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST], true
-+- Project [c#x, (d#x - e#x) AS (d - e)#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, (((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, e#x]
-   +- Filter ((NOT ((d#x >= 110) AND (d#x <= 150)) OR ((c#x >= (b#x - 2)) AND (c#x <= (d#x + 2)))) OR ((e#x > c#x) OR (e#x < d#x)))
-      +- SubqueryAlias spark_catalog.default.t1
-         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CAST((a + b + c + d + e) / 5 AS INT),
-       abs(a),
-       e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       d
-  FROM t1
- WHERE b > c
-   AND c > d
- ORDER BY 3,4,5,1,2,6
--- !query analysis
-Sort [abs(a)#x ASC NULLS FIRST, e#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x ASC NULLS FIRST, d#x ASC NULLS FIRST], true
-+- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, cast((cast(((((a#x + b#x) + c#x) + d#x) + e#x) as double) / cast(5 as double)) as int) AS CAST((((((a + b) + c) + d) + e) / 5) AS INT)#x, abs(a#x) AS abs(a)#x, e#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, d#x]
-   +- Filter ((b#x > c#x) AND (c#x > d#x))
-      +- SubqueryAlias spark_catalog.default.t1
-         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
-       c
-  FROM t1
- WHERE (c <= d - 2 OR c >= d + 2)
- ORDER BY 4,2,1,3
--- !query analysis
-Sort [c#x ASC NULLS FIRST, CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x ASC NULLS FIRST, (((a + (b * 2)) + (c * 3)) + (d * 4))#x ASC NULLS FIRST, scalarsubquery(c, d)#xL ASC NULLS FIRST], true
-+- Project [(((a#x + (b#x * 2)) + (c#x * 3)) + (d#x * 4)) AS (((a + (b * 2)) + (c * 3)) + (d * 4))#x, CASE WHEN ((a#x + 1) = b#x) THEN 111 WHEN ((a#x + 1) = c#x) THEN 222 WHEN ((a#x + 1) = d#x) THEN 333 WHEN ((a#x + 1) = e#x) THEN 444 ELSE 555 END AS CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END#x, scalar-subquery#x [c#x && d#x] AS scalarsubquery(c, d)#xL, c#x]
-   :  +- Aggregate [count(1) AS count(1)#xL]
-   :     +- Filter ((c#x > outer(c#x)) AND (d#x < outer(d#x)))
-   :        +- SubqueryAlias x
-   :           +- SubqueryAlias spark_catalog.default.t1
-   :              +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-   :                 +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-   :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-   +- Filter ((c#x <= (d#x - 2)) OR (c#x >= (d#x + 2)))
-      +- SubqueryAlias spark_catalog.default.t1
-         +- View (`spark_catalog`.`default`.`t1`, [a#x,b#x,c#x,d#x,e#x])
-            +- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x, cast(col3#x as int) AS c#x, cast(col4#x as int) AS d#x, cast(col5#x as int) AS e#x]
-               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
-
-
--- !query
-DROP VIEW IF EXISTS t1
--- !query analysis
-DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
deleted file mode 100644
index 87c15a07733b5..0000000000000
--- a/sql/core/src/test/resources/sql-tests/inputs/postgres-crosstest/sqllogictest-select1.sql
+++ /dev/null
@@ -1,91 +0,0 @@
--- First 6 queries of the sql-logic-tests with some minor changes for compatibility, available here:
--- https://www.sqlite.org/sqllogictest/file?name=test/select1.test&ci=tip.
-
---DBMS_TO_GENERATE_GOLDEN_FILE postgres
-CREATE VIEW t1(a, b, c, d, e) AS VALUES
-    (103,102,100,101,104),
-    (107,106,108,109,105),
-    (110,114,112,111,113),
-    (116,119,117,115,118),
-    (123,122,124,120,121),
-    (127,128,129,126,125),
-    (132,134,131,133,130),
-    (138,136,139,135,137),
-    (144,141,140,142,143),
-    (145,149,146,148,147),
-    (151,150,153,154,152),
-    (155,157,159,156,158),
-    (161,160,163,164,162),
-    (167,169,168,165,166),
-    (171,170,172,173,174),
-    (177,176,179,178,175),
-    (181,180,182,183,184),
-    (187,188,186,189,185),
-    (190,194,193,192,191),
-    (199,197,198,196,195),
-    (200,202,203,201,204),
-    (208,209,205,206,207),
-    (214,210,213,212,211),
-    (218,215,216,217,219),
-    (223,221,222,220,224),
-    (226,227,228,229,225),
-    (234,231,232,230,233),
-    (237,236,239,235,238),
-    (242,244,240,243,241),
-    (246,248,247,249,245);
-
-SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
-  FROM t1
- ORDER BY 1;
-
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CAST((a + b + c + d + e) / 5 AS INT)
-  FROM t1
- ORDER BY 1,2;
-
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
-        WHEN a < b + 3 THEN 333 ELSE 444 END,
-       abs(b - c),
-       CAST((a + b + c + d + e) / 5 AS INT),
-       a + b * 2 + c * 3
-  FROM t1
- WHERE (e > c OR e < d)
-   AND d>e
-   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
- ORDER BY 4,2,1,3,5;
-
-SELECT c,
-       d-e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       a + b * 2 + c * 3 + d * 4,
-       e
-  FROM t1
- WHERE d NOT BETWEEN 110 AND 150
-    OR c BETWEEN b - 2 AND d + 2
-    OR (e > c OR e < d)
- ORDER BY 1,5,3,2,4;
-
-SELECT a + b * 2 + c * 3 + d * 4,
-       CAST((a + b + c + d + e) / 5 AS INT),
-       abs(a),
-       e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       d
-  FROM t1
- WHERE b > c
-   AND c > d
- ORDER BY 3,4,5,1,2,6;
-
-SELECT a + b * 2 + c * 3 + d * 4,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
-       c
-  FROM t1
- WHERE (c <= d - 2 OR c >= d + 2)
- ORDER BY 4,2,1,3;
-
-DROP VIEW IF EXISTS t1;
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
deleted file mode 100644
index 762a04c22900d..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/postgres-crosstest/sqllogictest-select1.sql.out
+++ /dev/null
@@ -1,252 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-CREATE VIEW t1(a, b, c, d, e) AS VALUES
-    (103,102,100,101,104),
-    (107,106,108,109,105),
-    (110,114,112,111,113),
-    (116,119,117,115,118),
-    (123,122,124,120,121),
-    (127,128,129,126,125),
-    (132,134,131,133,130),
-    (138,136,139,135,137),
-    (144,141,140,142,143),
-    (145,149,146,148,147),
-    (151,150,153,154,152),
-    (155,157,159,156,158),
-    (161,160,163,164,162),
-    (167,169,168,165,166),
-    (171,170,172,173,174),
-    (177,176,179,178,175),
-    (181,180,182,183,184),
-    (187,188,186,189,185),
-    (190,194,193,192,191),
-    (199,197,198,196,195),
-    (200,202,203,201,204),
-    (208,209,205,206,207),
-    (214,210,213,212,211),
-    (218,215,216,217,219),
-    (223,221,222,220,224),
-    (226,227,228,229,225),
-    (234,231,232,230,233),
-    (237,236,239,235,238),
-    (242,244,240,243,241),
-    (246,248,247,249,245)
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
-  FROM t1
- ORDER BY 1
--- !query schema
-struct<CASE WHEN (c > scalarsubquery()) THEN (a * 2) ELSE (b * 10) END:int>
--- !query output
-354
-362
-374
-380
-398
-400
-416
-428
-436
-446
-452
-468
-474
-484
-492
-1020
-1060
-1140
-1190
-1220
-1280
-1340
-1360
-1410
-1490
-1500
-1570
-1600
-1690
-1700
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CAST((a + b + c + d + e) / 5 AS INT)
-  FROM t1
- ORDER BY 1,2
--- !query schema
-struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int>
--- !query output
-1531	102
-1604	107
-1683	112
-1755	117
-1824	122
-1899	127
-1975	132
-2052	137
-2129	142
-2208	147
-2286	152
-2360	157
-2436	162
-2499	167
-2589	172
-2653	177
-2739	182
-2802	187
-2880	192
-2946	197
-3037	202
-3100	207
-3176	212
-3259	217
-3331	222
-3405	227
-3477	232
-3556	237
-3627	242
-3704	247
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
-        WHEN a < b + 3 THEN 333 ELSE 444 END,
-       abs(b - c),
-       CAST((a + b + c + d + e) / 5 AS INT),
-       a + b * 2 + c * 3
-  FROM t1
- WHERE (e > c OR e < d)
-   AND d>e
-   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
- ORDER BY 4,2,1,3,5
--- !query schema
-struct<((((a + (b * 2)) + (c * 3)) + (d * 4)) + (e * 5)):int,CASE WHEN (a < (b - 3)) THEN 111 WHEN (a <= b) THEN 222 WHEN (a < (b + 3)) THEN 333 ELSE 444 END:int,abs((b - c)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,((a + (b * 2)) + (c * 3)):int>
--- !query output
-1604	333	2	107	643
-1899	222	1	127	770
-1975	222	3	132	793
-2208	111	3	147	881
-2286	333	3	152	910
-2436	333	3	162	970
-2653	333	3	177	1066
-2802	222	2	187	1121
-2880	111	1	192	1157
-2946	333	1	197	1187
-3176	444	3	212	1273
-3405	222	1	227	1364
-3627	222	4	242	1450
-3704	222	1	247	1483
-
-
--- !query
-SELECT c,
-       d-e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       a + b * 2 + c * 3 + d * 4,
-       e
-  FROM t1
- WHERE d NOT BETWEEN 110 AND 150
-    OR c BETWEEN b - 2 AND d + 2
-    OR (e > c OR e < d)
- ORDER BY 1,5,3,2,4
--- !query schema
-struct<c:int,(d - e):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,(((a + (b * 2)) + (c * 3)) + (d * 4)):int,e:int>
--- !query output
-100	-3	444	1011	104
-108	4	222	1079	105
-112	-2	333	1118	113
-117	-3	222	1165	118
-129	1	111	1274	125
-131	3	333	1325	130
-140	-1	555	1414	143
-146	1	222	1473	147
-153	2	444	1526	152
-159	-2	333	1570	158
-163	2	444	1626	162
-168	-1	222	1669	166
-172	-1	222	1719	174
-179	3	333	1778	175
-182	-1	222	1819	184
-186	4	111	1877	185
-193	1	444	1925	191
-198	1	555	1971	195
-203	-3	333	2017	204
-205	-1	111	2065	207
-213	1	555	2121	211
-216	-2	444	2164	219
-222	-4	444	2211	224
-228	4	111	2280	225
-232	-3	555	2312	233
-239	-3	444	2366	238
-240	2	333	2422	241
-247	4	222	2479	245
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CAST((a + b + c + d + e) / 5 AS INT),
-       abs(a),
-       e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       d
-  FROM t1
- WHERE b > c
-   AND c > d
- ORDER BY 3,4,5,1,2,6
--- !query schema
-struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CAST((((((a + b) + c) + d) + e) / 5) AS INT):int,abs(a):int,e:int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,d:int>
--- !query output
-1118	112	110	113	333	111
-1165	117	116	118	222	115
-1669	167	167	166	222	165
-1925	192	190	191	444	192
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
-       c
-  FROM t1
- WHERE (c <= d - 2 OR c >= d + 2)
- ORDER BY 4,2,1,3
--- !query schema
-struct<(((a + (b * 2)) + (c * 3)) + (d * 4)):int,CASE WHEN ((a + 1) = b) THEN 111 WHEN ((a + 1) = c) THEN 222 WHEN ((a + 1) = d) THEN 333 WHEN ((a + 1) = e) THEN 444 ELSE 555 END:int,scalarsubquery(c, d):bigint,c:int>
--- !query output
-1165	222	0	117
-1219	222	0	124
-1274	111	0	129
-1325	333	0	131
-1367	222	0	139
-1414	555	0	140
-1473	222	0	146
-1570	333	0	159
-1669	222	0	168
-1877	111	0	186
-1971	555	0	198
-2017	333	0	203
-2211	444	0	222
-2312	555	0	232
-2366	444	0	239
-2422	333	0	240
-2479	222	0	247
-
-
--- !query
-DROP VIEW IF EXISTS t1
--- !query schema
-struct<>
--- !query output
-
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
deleted file mode 100644
index b5eb7eef4a9a3..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/postgres-results/postgres-crosstest/sqllogictest-select1.sql.out
+++ /dev/null
@@ -1,236 +0,0 @@
--- Automatically generated by CrossDbmsQueryTestSuite with postgres
--- !query
-CREATE VIEW t1(a, b, c, d, e) AS VALUES
-    (103,102,100,101,104),
-    (107,106,108,109,105),
-    (110,114,112,111,113),
-    (116,119,117,115,118),
-    (123,122,124,120,121),
-    (127,128,129,126,125),
-    (132,134,131,133,130),
-    (138,136,139,135,137),
-    (144,141,140,142,143),
-    (145,149,146,148,147),
-    (151,150,153,154,152),
-    (155,157,159,156,158),
-    (161,160,163,164,162),
-    (167,169,168,165,166),
-    (171,170,172,173,174),
-    (177,176,179,178,175),
-    (181,180,182,183,184),
-    (187,188,186,189,185),
-    (190,194,193,192,191),
-    (199,197,198,196,195),
-    (200,202,203,201,204),
-    (208,209,205,206,207),
-    (214,210,213,212,211),
-    (218,215,216,217,219),
-    (223,221,222,220,224),
-    (226,227,228,229,225),
-    (234,231,232,230,233),
-    (237,236,239,235,238),
-    (242,244,240,243,241),
-    (246,248,247,249,245)
--- !query output
-
-
-
--- !query
-SELECT CASE WHEN c > (SELECT avg(c) FROM t1) THEN a * 2 ELSE b * 10 END
-  FROM t1
- ORDER BY 1
--- !query output
-354
-362
-374
-380
-398
-400
-416
-428
-436
-446
-452
-468
-474
-484
-492
-1020
-1060
-1140
-1190
-1220
-1280
-1340
-1360
-1410
-1490
-1500
-1570
-1600
-1690
-1700
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CAST((a + b + c + d + e) / 5 AS INT)
-  FROM t1
- ORDER BY 1,2
--- !query output
-1531	102
-1604	107
-1683	112
-1755	117
-1824	122
-1899	127
-1975	132
-2052	137
-2129	142
-2208	147
-2286	152
-2360	157
-2436	162
-2499	167
-2589	172
-2653	177
-2739	182
-2802	187
-2880	192
-2946	197
-3037	202
-3100	207
-3176	212
-3259	217
-3331	222
-3405	227
-3477	232
-3556	237
-3627	242
-3704	247
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4 + e * 5,
-       CASE WHEN a < b - 3 THEN 111 WHEN a <= b THEN 222
-        WHEN a < b + 3 THEN 333 ELSE 444 END,
-       abs(b - c),
-       CAST((a + b + c + d + e) / 5 AS INT),
-       a + b * 2 + c * 3
-  FROM t1
- WHERE (e > c OR e < d)
-   AND d>e
-   AND EXISTS(SELECT 1 FROM t1 AS x WHERE x.b < t1.b)
- ORDER BY 4,2,1,3,5
--- !query output
-1604	333	2	107	643
-1899	222	1	127	770
-1975	222	3	132	793
-2208	111	3	147	881
-2286	333	3	152	910
-2436	333	3	162	970
-2653	333	3	177	1066
-2802	222	2	187	1121
-2880	111	1	192	1157
-2946	333	1	197	1187
-3176	444	3	212	1273
-3405	222	1	227	1364
-3627	222	4	242	1450
-3704	222	1	247	1483
-
-
--- !query
-SELECT c,
-       d-e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       a + b * 2 + c * 3 + d * 4,
-       e
-  FROM t1
- WHERE d NOT BETWEEN 110 AND 150
-    OR c BETWEEN b - 2 AND d + 2
-    OR (e > c OR e < d)
- ORDER BY 1,5,3,2,4
--- !query output
-100	-3	444	1011	104
-108	4	222	1079	105
-112	-2	333	1118	113
-117	-3	222	1165	118
-129	1	111	1274	125
-131	3	333	1325	130
-140	-1	555	1414	143
-146	1	222	1473	147
-153	2	444	1526	152
-159	-2	333	1570	158
-163	2	444	1626	162
-168	-1	222	1669	166
-172	-1	222	1719	174
-179	3	333	1778	175
-182	-1	222	1819	184
-186	4	111	1877	185
-193	1	444	1925	191
-198	1	555	1971	195
-203	-3	333	2017	204
-205	-1	111	2065	207
-213	1	555	2121	211
-216	-2	444	2164	219
-222	-4	444	2211	224
-228	4	111	2280	225
-232	-3	555	2312	233
-239	-3	444	2366	238
-240	2	333	2422	241
-247	4	222	2479	245
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CAST((a + b + c + d + e) / 5 AS INT),
-       abs(a),
-       e,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       d
-  FROM t1
- WHERE b > c
-   AND c > d
- ORDER BY 3,4,5,1,2,6
--- !query output
-1118	112	110	113	333	111
-1165	117	116	118	222	115
-1669	167	167	166	222	165
-1925	192	190	191	444	192
-
-
--- !query
-SELECT a + b * 2 + c * 3 + d * 4,
-       CASE a + 1 WHEN b THEN 111 WHEN c THEN 222
-        WHEN d THEN 333  WHEN e THEN 444 ELSE 555 END,
-       (SELECT count(*) FROM t1 AS x WHERE x.c > t1.c AND x.d < t1.d),
-       c
-  FROM t1
- WHERE (c <= d - 2 OR c >= d + 2)
- ORDER BY 4,2,1,3
--- !query output
-1165	222	0	117
-1219	222	0	124
-1274	111	0	129
-1325	333	0	131
-1367	222	0	139
-1414	555	0	140
-1473	222	0	146
-1570	333	0	159
-1669	222	0	168
-1877	111	0	186
-1971	555	0	198
-2017	333	0	203
-2211	444	0	222
-2312	555	0	232
-2366	444	0	239
-2422	333	0	240
-2479	222	0	247
-
-
--- !query
-DROP VIEW IF EXISTS t1
--- !query output
-

From 615f4d98a6e5e9842514cfbc827d70a6ba23f5a3 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 18 Dec 2023 13:52:19 -0800
Subject: [PATCH 36/63] Small niceness changes

---
 .../sql/crossdbms/CrossDbmsQueryTestSuite.scala  | 11 ++++++-----
 .../spark/sql/crossdbms/JdbcConnection.scala     | 16 +++++++++++-----
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 4932ed96efbd8..1b6ef3de8c7ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -90,8 +90,9 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     val queries = getQueries(code, comments)
     val settings = getSparkSettings(comments)
 
-    val dbmsConfig = comments.filter(_.startsWith(s"--${
-      CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE} ")).map(_.substring(31))
+    val dbmsConfig = comments.filter(_.startsWith(
+      CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE)).map(_.substring(
+      CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE.length))
     // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test.
     if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
       log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " +
@@ -125,8 +126,8 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
             // Either of the below two lines can error. If we go into the catch statement, then it
             // is likely one of the following scenarios:
             // 1. Error thrown in Spark analysis:
-            //    a. The query is either incompatible between Spark and the other DBMS
-            //    b. There is issue with the schema in Spark.
+            //    a. The query is incompatible with either Spark and the other DBMS.
+            //    b. There is an issue with the schema in Spark.
             //    c. The error is expected - it errors on both systems, but only the Spark error
             //       will be printed to the golden file, because it errors first.
             // 2. Error thrown in other DBMS execution:
@@ -224,7 +225,7 @@ object CrossDbmsQueryTestSuite {
   // System argument to indicate a custom connection URL to the reference DBMS.
   private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
   // Argument in input files to indicate that golden file should be generated with a reference DBMS
-  private final val DBMS_TO_GENERATE_GOLDEN_FILE = "DBMS_TO_GENERATE_GOLDEN_FILE"
+  private final val DBMS_TO_GENERATE_GOLDEN_FILE = "--DBMS_TO_GENERATE_GOLDEN_FILE "
 
   private final val POSTGRES = "postgres"
   private final val SUPPORTED_DBMS = Seq(POSTGRES)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index f203f75ac0c8d..1c5842d951c67 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry
 /**
  * Represents a connection (session) to a database using JDBC.
  */
-private[sql] trait JdbcConnection {
+private[crossdbms] trait JdbcConnection {
 
   /**
    * Executes the given SQL query and returns the result as a sequence of strings.
@@ -63,15 +63,21 @@ private[sql] trait JdbcConnection {
 /**
  * Represents a connection (session) to a PostgreSQL database.
  */
-private[sql] case class PostgresConnection(connection_url: Option[String] = None)
+private[crossdbms] case class PostgresConnection(connection_url: Option[String] = None)
   extends JdbcConnection {
 
   private final val POSTGRES_DRIVER_CLASS_NAME = "org.postgresql.Driver"
   DriverRegistry.register(POSTGRES_DRIVER_CLASS_NAME)
-  private final val DEFAULT_USER = "pg"
-  private final val DEFAULT_CONNECTION_URL =
-    s"jdbc:postgresql://localhost:5432/postgres?user=$DEFAULT_USER"
+
+  private final val DEFAULT_HOST = "localhost"
+  private final val DEFAULT_PORT = "5432"
+  private final val DEFAULT_USER = "postgres"
+  private final val DEFAULT_DB = "postgres"
+  private final val DEFAULT_PASSWORD = "postgres"
+  private final val DEFAULT_CONNECTION_URL = s"jdbc:postgresql://$DEFAULT_HOST:$DEFAULT_PORT/" +
+    s"$DEFAULT_DB?user=$DEFAULT_USER?password=$DEFAULT_PASSWORD"
   private val url = connection_url.getOrElse(DEFAULT_CONNECTION_URL)
+
   private val conn = DriverManager.getConnection(url)
   private val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
 

From cf345bd96aa153d04e917fdb1c66897d7e14fe2b Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 18 Dec 2023 14:20:36 -0800
Subject: [PATCH 37/63] Do null -> NULL replacement, and put 2 tests in

---
 ...not-in-unit-tests-multi-column-literal.sql | 14 +--
 .../not-in-unit-tests-multi-column.sql        | 16 +--
 ...in-unit-tests-multi-column-literal.sql.out | 37 +++++++
 .../not-in-unit-tests-multi-column.sql.out    | 99 +++++++++++++++++++
 4 files changed, 153 insertions(+), 13 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
 create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
index a061e495f51b8..cfce07cc020c2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
@@ -7,12 +7,14 @@
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
 
-CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
-  (null, null),
-  (null, 1.0),
-  (2, 3.0),
-  (4, 5.0)
-  AS m(a, b);
+--DBMS_TO_GENERATE_GOLDEN_FILE postgres
+
+CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES
+    (NULL, NULL),
+    (NULL, 1.0),
+    (2, 3.0),
+    (4, 5.0))
+    AS m(a, b);
 
 -- Case 1 (not possible to write a literal with no rows, so we ignore it.)
 -- (subquery is empty -> row is returned)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
index 28ab75121573a..9314878e13917 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
@@ -18,18 +18,20 @@
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
 
-CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
-  (null, null),
-  (null, 1.0),
+--DBMS_TO_GENERATE_GOLDEN_FILE postgres
+
+CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES
+  (NULL, NULL),
+  (NULL, 1.0),
   (2, 3.0),
-  (4, 5.0)
+  (4, 5.0))
   AS m(a, b);
 
-CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
-  (null, null),
+CREATE TEMPORARY VIEW s AS SELECT * FROM (VALUES
+  (NULL, NULL),
   (0, 1.0),
   (2, 3.0),
-  (4, null)
+  (4, NULL))
   AS s(c, d);
 
   -- Case 1
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
new file mode 100644
index 0000000000000..b696ee2d43b4a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
@@ -0,0 +1,37 @@
+-- Automatically generated by CrossDbmsQueryTestSuite with postgres
+-- !query
+CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES
+    (NULL, NULL),
+    (NULL, 1.0),
+    (2, 3.0),
+    (4, 5.0))
+    AS m(a, b)
+-- !query output
+
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  b = 1.0 -- Matches (null, 1.0)
+       AND (a, b) NOT IN ((2, 3.0))
+-- !query output
+NULL	1.0
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  b = 3.0 -- Matches (2, 3.0)
+       AND (a, b) NOT IN ((2, 3.0))
+-- !query output
+
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  b = 5.0 -- Matches (4, 5.0)
+       AND (a, b) NOT IN ((2, 3.0))
+-- !query output
+4	5.0
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
new file mode 100644
index 0000000000000..4f6510ac2f52b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
@@ -0,0 +1,99 @@
+-- Automatically generated by CrossDbmsQueryTestSuite with postgres
+-- !query
+CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES
+  (NULL, NULL),
+  (NULL, 1.0),
+  (2, 3.0),
+  (4, 5.0))
+  AS m(a, b)
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW s AS SELECT * FROM (VALUES
+  (NULL, NULL),
+  (0, 1.0),
+  (2, 3.0),
+  (4, NULL))
+  AS s(c, d)
+-- !query output
+
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  (a, b) NOT IN (SELECT *
+                      FROM   s
+                      WHERE  d > 5.0) -- Matches no rows
+-- !query output
+2	3.0
+4	5.0
+NULL	1.0
+NULL	NULL
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  (a, b) NOT IN (SELECT *
+                      FROM s
+                      WHERE c IS NULL AND d IS NULL) -- Matches only (null, null)
+-- !query output
+
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  a IS NULL AND b IS NULL -- Matches only (null, null)
+       AND (a, b) NOT IN (SELECT *
+                          FROM s
+                          WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
+-- !query output
+
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  b = 1.0 -- Matches (null, 1.0)
+       AND (a, b) NOT IN (SELECT *
+                          FROM s
+                          WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
+-- !query output
+
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  b = 1.0 -- Matches (null, 1.0)
+       AND (a, b) NOT IN (SELECT *
+                          FROM s
+                          WHERE c = 2) -- Matches (2, 3.0)
+-- !query output
+NULL	1.0
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  b = 3.0 -- Matches (2, 3.0)
+       AND (a, b) NOT IN (SELECT *
+                          FROM s
+                          WHERE c = 2) -- Matches (2, 3.0)
+-- !query output
+
+
+
+-- !query
+SELECT *
+FROM   m
+WHERE  b = 5.0 -- Matches (4, 5.0)
+       AND (a, b) NOT IN (SELECT *
+                          FROM s
+                          WHERE c = 2) -- Matches (2, 3.0)
+-- !query output
+4	5.0

From cdef387ac34eae8ece1c9457bdbfe754c6cdf338 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 18 Dec 2023 14:49:00 -0800
Subject: [PATCH 38/63] Add docker compose and bash scripts for easy postgres
 instance

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 17 ++++-----
 .../spark/sql/crossdbms/JdbcConnection.scala  |  7 ++--
 .../sql/crossdbms/bin/docker-compose.yml      | 31 ++++++++++++++++
 .../crossdbms/bin/generate_golden_files.sh    | 36 +++++++++++++++++++
 4 files changed, 81 insertions(+), 10 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 1b6ef3de8c7ef..0019fcfc374aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -32,15 +32,16 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
  * See SQLQueryTestSuite.scala for more information. This class builds off of that to allow us
  * to generate golden files with other DBMS to perform cross-checking for correctness. Note that
  * this is not currently run on all SQL input files by default because there is incompatibility
- * between Spark SQL and the DBMS SQL. More work will be done to increase coverage, focusing on
- * postgres.
+ * between Spark SQL and the DBMS SQL.
  *
- * You need to have a database server up before running this test.
- * For example, for postgres:
- * 1. Install PostgreSQL.
- *   a. On a mac: `brew install postgresql@13`
- * 2. After installing PostgreSQL, start the database server, then create a role named pg with
- *    superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser``
+ * You need to have a database server up before running this test. Two options:
+ * - Install Docker and use the bash script in ./bin/generate_golden_files.sh to run a DBMS
+ *   container and generate golden files.
+ * - Otherwise, do manual installation. For example, for postgres:
+ *   1. Install PostgreSQL.
+ *     a. On a mac: `brew install postgresql@13`
+ *   2. After installing PostgreSQL, start the database server, then create a role named pg with
+ *      superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser``
  *
  * To indicate that the SQL file is eligible for testing with this suite, add the following comment
  * into the input file:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index 1c5842d951c67..771b304401ae9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -75,7 +75,7 @@ private[crossdbms] case class PostgresConnection(connection_url: Option[String]
   private final val DEFAULT_DB = "postgres"
   private final val DEFAULT_PASSWORD = "postgres"
   private final val DEFAULT_CONNECTION_URL = s"jdbc:postgresql://$DEFAULT_HOST:$DEFAULT_PORT/" +
-    s"$DEFAULT_DB?user=$DEFAULT_USER?password=$DEFAULT_PASSWORD"
+    s"$DEFAULT_DB?user=$DEFAULT_USER&password=$DEFAULT_PASSWORD"
   private val url = connection_url.getOrElse(DEFAULT_CONNECTION_URL)
 
   private val conn = DriverManager.getConnection(url)
@@ -89,7 +89,10 @@ private[crossdbms] case class PostgresConnection(connection_url: Option[String]
         val rs = stmt.getResultSet
         val metadata = rs.getMetaData
         while (rs.next()) {
-          val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => rs.getObject(i)))
+          val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => {
+            val value = rs.getObject(i)
+            if (value == null) { "NULL" } else { value }
+          }))
           rows.append(row)
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml
new file mode 100644
index 0000000000000..787bdda8ecd31
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+version: '3'
+
+services:
+  # Postgres database service
+  postgres-db:
+    image: postgres:latest
+    container_name: postgres-db
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+      POSTGRES_DB: postgres
+    ports:
+      - "5432:5432"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
new file mode 100644
index 0000000000000..cd68caab182f5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Get the root of the runtime repo
+GIT_ROOT="$(git rev-parse --show-toplevel)"
+echo "Root of repo is at $GIT_ROOT"
+
+# Change if using different container or name
+DOCKER_CONTAINER_NAME="postgres-db"
+
+echo "Starting DB container.."
+docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" up --build --detach
+
+echo "Waiting for DB container to be ready.."
+timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done"
+
+echo "Generating golden files.."
+SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+
+echo "Bringing down DB container.."
+docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes
\ No newline at end of file

From 36be209f2cce4c97f7924c230f94236c980e1d44 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 18 Dec 2023 14:49:51 -0800
Subject: [PATCH 39/63] Trivial changes

---
 .../apache/spark/sql/crossdbms/bin/generate_golden_files.sh   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
index cd68caab182f5..25820a885d0a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
@@ -29,8 +29,8 @@ docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apac
 echo "Waiting for DB container to be ready.."
 timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done"
 
-echo "Generating golden files.."
+echo "Generating all golden files.."
 SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
 
 echo "Bringing down DB container.."
-docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes
\ No newline at end of file
+docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes

From d0373313fa5aa314527c6fcf73d2067eb0cd63fb Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 18 Dec 2023 14:53:00 -0800
Subject: [PATCH 40/63] Trivial changes

---
 .../apache/spark/sql/crossdbms/bin/generate_golden_files.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
index 25820a885d0a9..07c740f52ca10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
@@ -20,13 +20,13 @@
 GIT_ROOT="$(git rev-parse --show-toplevel)"
 echo "Root of repo is at $GIT_ROOT"
 
-# Change if using different container or name
-DOCKER_CONTAINER_NAME="postgres-db"
-
 echo "Starting DB container.."
 docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" up --build --detach
 
 echo "Waiting for DB container to be ready.."
+# TODO: Make this dynamically detect which DBMS is being used.
+# Change if using different container or name.
+DOCKER_CONTAINER_NAME="postgres-db"
 timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done"
 
 echo "Generating all golden files.."

From 750a1dea821bc2ef42b22442648991304f178add Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 09:45:37 -0800
Subject: [PATCH 41/63] Fix typo

---
 .../src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index eb04aed145f28..6c7b4c48864a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -632,7 +632,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
 
   /**
    * Returns the desired file path for results, given the input file. This is implemented as a
-   * function because differente Suites extending this class may want their results files with
+   * function because different Suites extending this class may want their results files with
    * different names or in different locations.
    */
   protected def resultFileForInputFile(file: File): String = {

From 089c2922b146a3edcd23622b6c66dbe64bbb17cf Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 10:32:30 -0800
Subject: [PATCH 42/63] Add header comments, and make CrossDbmsQuerySuite an
 abstract class

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 107 ++++++++++--------
 1 file changed, 58 insertions(+), 49 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 0019fcfc374aa..55ba9159a4906 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -29,10 +29,14 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 
 // scalastyle:off line.size.limit
 /**
- * See SQLQueryTestSuite.scala for more information. This class builds off of that to allow us
- * to generate golden files with other DBMS to perform cross-checking for correctness. Note that
- * this is not currently run on all SQL input files by default because there is incompatibility
- * between Spark SQL and the DBMS SQL.
+ * IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING, READ THIS:
+ * Your new SQL test is automatically opted into this suite. It is likely failing because it is not
+ * compatible with the default DBMS (currently postgres). You have two options:
+ * 1. (Recommended) Modify your queries to be compatible with both systems, and generate golden
+ *    files with the instructions below. This is recommended because it will run your queries
+ *    against postgres, providing higher correctness testing confidence, and you won't have to
+ *    manually verify the golden files generated with your test.
+ * 2. Add this line to your .sql file: --SPARK_ONLY
  *
  * You need to have a database server up before running this test. Two options:
  * - Install Docker and use the bash script in ./bin/generate_golden_files.sh to run a DBMS
@@ -43,45 +47,61 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
  *   2. After installing PostgreSQL, start the database server, then create a role named pg with
  *      superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser``
  *
- * To indicate that the SQL file is eligible for testing with this suite, add the following comment
- * into the input file:
+ * To indicate that the SQL file is not eligible for testing with this suite, add the following
+ * comment into the input file:
  * {{{
- *   --DBMS_TO_GENERATE_GOLDEN_FILE postgres
+ *   --SPARK_ONLY
  * }}}
  *
- * And then, to run the entire test suite:
+ * And then, to run the entire test suite, with the default cross DBMS:
  * {{{
- *   build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+ *   build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
  * }}}
  *
- * To re-generate golden files for entire suite, run:
+ * To re-generate golden files for entire suite, either run:
+ * 1. (Recommended) You need Docker on your machine.
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+ *   bash ./bin/generate_golden_files.sh
+ * }}}
+ * 2.
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
  * }}}
  *
  * To re-generate golden file for a single test, e.g. `describe.sql`, run:
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite -- -z describe.sql"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql"
  * }}}
  *
  * To specify a DBMS to use (the default is postgres):
  * {{{
- *   REF_DBMS=postgres SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
  * }}}
  */
-// scalastyle:on line.size.limit
-class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
-
-  // Note: the below two functions have to be functions instead of variables because the superclass
-  // runs the test first before the subclass variables can be instantiated.
-  private def crossDbmsToGenerateGoldenFiles: String = {
-    val userInputDbms = System.getenv(CrossDbmsQueryTestSuite.REF_DBMS_ARGUMENT)
-    val selectedDbms = Option(userInputDbms).filter(_.nonEmpty).getOrElse(
-      CrossDbmsQueryTestSuite.DEFAULT_DBMS)
-    assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(selectedDbms),
-      s"$selectedDbms is not currently supported.")
-    selectedDbms
-  }
+
+class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
+
+  def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES
+
+  // Reduce scope to subquery tests for now. That is where most correctness issues are.
+  override def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
+
+  override def getConnection: Option[String] => JdbcSQLQueryTestRunner =
+    (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url))
+}
+
+/**
+ * See SQLQueryTestSuite.scala for more information. This suite builds off of that to allow us
+ * to generate golden files with other DBMS to perform cross-checking for correctness. It generates
+ * another set of golden files. Note that this is not currently run on all SQL input files by
+ * default because there is incompatibility between SQL dialects for Spark and the other DBMS.
+ */
+abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
+
+  protected def crossDbmsToGenerateGoldenFiles: String
+  protected def customInputFilePath: String = inputFilePath
+  protected def getConnection: Option[String] => JdbcSQLQueryTestRunner
+
   private def customConnectionUrl: String = System.getenv(
     CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL)
 
@@ -91,13 +111,12 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     val queries = getQueries(code, comments)
     val settings = getSparkSettings(comments)
 
-    val dbmsConfig = comments.filter(_.startsWith(
-      CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE)).map(_.substring(
-      CrossDbmsQueryTestSuite.DBMS_TO_GENERATE_GOLDEN_FILE.length))
-    // If `--DBMS_TO_GENERATE_GOLDEN_FILE` is not found, skip the test.
+    val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG))
+      .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length))
+    // If `--ONLY_IF` is found, check if the DBMS being used is allowed.
     if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
-      log.info(s"This test case (${testCase.name}) is ignored because it does not indicate " +
-        s"testing with $crossDbmsToGenerateGoldenFiles")
+      log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
+        "SPARK_ONLY")
     } else if (regenerateGoldenFiles) {
       runQueries(queries, testCase, settings.toImmutableArraySeq)
     } else {
@@ -120,9 +139,7 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         // the already generated golden files.
         if (regenerateGoldenFiles) {
           val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty)
-          runner = runner.orElse(
-            Some(CrossDbmsQueryTestSuite.DBMS_TO_CONNECTION_MAPPING(
-              crossDbmsToGenerateGoldenFiles)(connectionUrl)))
+          runner = runner.orElse(Some(getConnection(connectionUrl))
           try {
             // Either of the below two lines can error. If we go into the catch statement, then it
             // is likely one of the following scenarios:
@@ -206,14 +223,14 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     val defaultResultsDir = new File(baseResourcePath, "results")
     val goldenFilePath = new File(
       defaultResultsDir, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
-    file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+    file.getAbsolutePath.replace(customInputFilePath, goldenFilePath) + ".out"
   }
 
   override lazy val listTestCases: Seq[TestCase] = {
-    listFilesRecursively(new File(inputFilePath)).flatMap { file =>
+    listFilesRecursively(new File(customInputFilePath)).flatMap { file =>
       val resultFile = resultFileForInputFile(file)
       val absPath = file.getAbsolutePath
-      val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
+      val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator)
       RegularTestCase(testCaseName, absPath, resultFile) :: Nil
     }.sortBy(_.name)
   }
@@ -221,18 +238,10 @@ class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
 object CrossDbmsQueryTestSuite {
 
-  // System argument to indicate which reference DBMS is being used.
-  private final val REF_DBMS_ARGUMENT = "REF_DBMS"
+  final val POSTGRES = "postgres"
+
   // System argument to indicate a custom connection URL to the reference DBMS.
   private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
   // Argument in input files to indicate that golden file should be generated with a reference DBMS
-  private final val DBMS_TO_GENERATE_GOLDEN_FILE = "--DBMS_TO_GENERATE_GOLDEN_FILE "
-
-  private final val POSTGRES = "postgres"
-  private final val SUPPORTED_DBMS = Seq(POSTGRES)
-  private final val DEFAULT_DBMS = POSTGRES
-  private final val DBMS_TO_CONNECTION_MAPPING = Map(
-    POSTGRES -> ((connection_url: Option[String]) =>
-      JdbcSQLQueryTestRunner(PostgresConnection(connection_url)))
-  )
+  private final val ONLY_IF_ARG = "--ONLY_IF "
 }

From 62d1760b99d6d8e136029a28d1494629337fa570 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 10:38:01 -0800
Subject: [PATCH 43/63] Trivial changes

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala    | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 55ba9159a4906..923b0e97080b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
  *    files with the instructions below. This is recommended because it will run your queries
  *    against postgres, providing higher correctness testing confidence, and you won't have to
  *    manually verify the golden files generated with your test.
- * 2. Add this line to your .sql file: --SPARK_ONLY
+ * 2. Add this line to your .sql file: --ONLY_IF spark
  *
  * You need to have a database server up before running this test. Two options:
  * - Install Docker and use the bash script in ./bin/generate_golden_files.sh to run a DBMS
@@ -50,7 +50,7 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
  * To indicate that the SQL file is not eligible for testing with this suite, add the following
  * comment into the input file:
  * {{{
- *   --SPARK_ONLY
+ *   --ONLY_IF spark
  * }}}
  *
  * And then, to run the entire test suite, with the default cross DBMS:
@@ -81,12 +81,12 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 
 class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
 
-  def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES
+  protected def crossDbmsToGenerateGoldenFiles: String = "postgres"
 
   // Reduce scope to subquery tests for now. That is where most correctness issues are.
-  override def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
+  override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
 
-  override def getConnection: Option[String] => JdbcSQLQueryTestRunner =
+  override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner =
     (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url))
 }
 
@@ -99,8 +99,8 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
 abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
   protected def crossDbmsToGenerateGoldenFiles: String
-  protected def customInputFilePath: String = inputFilePath
-  protected def getConnection: Option[String] => JdbcSQLQueryTestRunner
+  protected def customInputFilePath: String
+  protected def getConnection: Option[String] => SQLQueryTestRunner
 
   private def customConnectionUrl: String = System.getenv(
     CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL)
@@ -238,10 +238,8 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
 object CrossDbmsQueryTestSuite {
 
-  final val POSTGRES = "postgres"
-
   // System argument to indicate a custom connection URL to the reference DBMS.
   private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
-  // Argument in input files to indicate that golden file should be generated with a reference DBMS
+  // Argument in input files to indicate that the sql file is restricted to certain systems.
   private final val ONLY_IF_ARG = "--ONLY_IF "
 }

From def270bbd21895709f5536732ce758ee30b9a8e1 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 10:57:22 -0800
Subject: [PATCH 44/63] Use prepared statements

---
 .../apache/spark/sql/crossdbms/JdbcConnection.scala | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index 771b304401ae9..ff59af955e474 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.crossdbms
 
-import java.sql.{DriverManager, ResultSet}
+import java.sql.{DriverManager, PreparedStatement, ResultSet}
 import java.util.Properties
 
 import scala.collection.mutable.ArrayBuffer
@@ -103,13 +103,16 @@ private[crossdbms] case class PostgresConnection(connection_url: Option[String]
   }
 
   def dropTable(tableName: String): Unit = {
-    val dropTableSql = s"DROP TABLE IF EXISTS $tableName"
-    stmt.executeUpdate(dropTableSql)
+    val st = conn.prepareStatement("DROP TABLE IF EXISTS ?")
+    st.setString(1, tableName)
+    st.executeUpdate()
   }
 
   def createTable(tableName: String, schemaString: String): Unit = {
-    val createTableSql = s"CREATE TABLE $tableName ($schemaString)"
-    stmt.executeUpdate(createTableSql)
+    val st = conn.prepareStatement("CREATE TABLE ? (?)")
+    st.setString(1, tableName)
+    st.setString(2, schemaString)
+    st.executeUpdate()
   }
 
   def loadData(df: DataFrame, tableName: String): Unit = {

From fdb90a4c077cecff58a3a0220d8434adebd98265 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 10:58:45 -0800
Subject: [PATCH 45/63] Remove sql file changes for now

---
 ...not-in-unit-tests-multi-column-literal.sql | 14 ++-
 .../not-in-unit-tests-multi-column.sql        | 16 ++-
 ...in-unit-tests-multi-column-literal.sql.out | 37 -------
 .../not-in-unit-tests-multi-column.sql.out    | 99 -------------------
 4 files changed, 13 insertions(+), 153 deletions(-)
 delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
 delete mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
index cfce07cc020c2..a061e495f51b8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
@@ -7,14 +7,12 @@
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
 
---DBMS_TO_GENERATE_GOLDEN_FILE postgres
-
-CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES
-    (NULL, NULL),
-    (NULL, 1.0),
-    (2, 3.0),
-    (4, 5.0))
-    AS m(a, b);
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+  (null, null),
+  (null, 1.0),
+  (2, 3.0),
+  (4, 5.0)
+  AS m(a, b);
 
 -- Case 1 (not possible to write a literal with no rows, so we ignore it.)
 -- (subquery is empty -> row is returned)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
index 9314878e13917..28ab75121573a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
@@ -18,20 +18,18 @@
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
 
---DBMS_TO_GENERATE_GOLDEN_FILE postgres
-
-CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES
-  (NULL, NULL),
-  (NULL, 1.0),
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+  (null, null),
+  (null, 1.0),
   (2, 3.0),
-  (4, 5.0))
+  (4, 5.0)
   AS m(a, b);
 
-CREATE TEMPORARY VIEW s AS SELECT * FROM (VALUES
-  (NULL, NULL),
+CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
+  (null, null),
   (0, 1.0),
   (2, 3.0),
-  (4, NULL))
+  (4, null)
   AS s(c, d);
 
   -- Case 1
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
deleted file mode 100644
index b696ee2d43b4a..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
+++ /dev/null
@@ -1,37 +0,0 @@
--- Automatically generated by CrossDbmsQueryTestSuite with postgres
--- !query
-CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES
-    (NULL, NULL),
-    (NULL, 1.0),
-    (2, 3.0),
-    (4, 5.0))
-    AS m(a, b)
--- !query output
-
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  b = 1.0 -- Matches (null, 1.0)
-       AND (a, b) NOT IN ((2, 3.0))
--- !query output
-NULL	1.0
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  b = 3.0 -- Matches (2, 3.0)
-       AND (a, b) NOT IN ((2, 3.0))
--- !query output
-
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  b = 5.0 -- Matches (4, 5.0)
-       AND (a, b) NOT IN ((2, 3.0))
--- !query output
-4	5.0
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
deleted file mode 100644
index 4f6510ac2f52b..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/postgres-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
+++ /dev/null
@@ -1,99 +0,0 @@
--- Automatically generated by CrossDbmsQueryTestSuite with postgres
--- !query
-CREATE TEMPORARY VIEW m AS SELECT * FROM (VALUES
-  (NULL, NULL),
-  (NULL, 1.0),
-  (2, 3.0),
-  (4, 5.0))
-  AS m(a, b)
--- !query output
-
-
-
--- !query
-CREATE TEMPORARY VIEW s AS SELECT * FROM (VALUES
-  (NULL, NULL),
-  (0, 1.0),
-  (2, 3.0),
-  (4, NULL))
-  AS s(c, d)
--- !query output
-
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  (a, b) NOT IN (SELECT *
-                      FROM   s
-                      WHERE  d > 5.0) -- Matches no rows
--- !query output
-2	3.0
-4	5.0
-NULL	1.0
-NULL	NULL
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  (a, b) NOT IN (SELECT *
-                      FROM s
-                      WHERE c IS NULL AND d IS NULL) -- Matches only (null, null)
--- !query output
-
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  a IS NULL AND b IS NULL -- Matches only (null, null)
-       AND (a, b) NOT IN (SELECT *
-                          FROM s
-                          WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
--- !query output
-
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  b = 1.0 -- Matches (null, 1.0)
-       AND (a, b) NOT IN (SELECT *
-                          FROM s
-                          WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
--- !query output
-
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  b = 1.0 -- Matches (null, 1.0)
-       AND (a, b) NOT IN (SELECT *
-                          FROM s
-                          WHERE c = 2) -- Matches (2, 3.0)
--- !query output
-NULL	1.0
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  b = 3.0 -- Matches (2, 3.0)
-       AND (a, b) NOT IN (SELECT *
-                          FROM s
-                          WHERE c = 2) -- Matches (2, 3.0)
--- !query output
-
-
-
--- !query
-SELECT *
-FROM   m
-WHERE  b = 5.0 -- Matches (4, 5.0)
-       AND (a, b) NOT IN (SELECT *
-                          FROM s
-                          WHERE c = 2) -- Matches (2, 3.0)
--- !query output
-4	5.0

From 8c21ceb1356ea38cac0979525a9a83687805c945 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 11:08:54 -0800
Subject: [PATCH 46/63] Header comment improvements

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 41 +++++++++----------
 ...=> generate_golden_files_with_postgres.sh} |  5 +--
 2 files changed, 21 insertions(+), 25 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/{generate_golden_files.sh => generate_golden_files_with_postgres.sh} (91%)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 923b0e97080b7..22acbb96f7307 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -38,14 +38,20 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
  *    manually verify the golden files generated with your test.
  * 2. Add this line to your .sql file: --ONLY_IF spark
  *
- * You need to have a database server up before running this test. Two options:
- * - Install Docker and use the bash script in ./bin/generate_golden_files.sh to run a DBMS
- *   container and generate golden files.
- * - Otherwise, do manual installation. For example, for postgres:
- *   1. Install PostgreSQL.
- *     a. On a mac: `brew install postgresql@13`
- *   2. After installing PostgreSQL, start the database server, then create a role named pg with
- *      superuser permissions: `createuser -s pg`` OR `psql> CREATE role pg superuser``
+ * To re-generate golden files for entire suite, either run:
+ * 1. (Recommended) You need Docker on your machine. Install Docker and run the following command:
+ * {{{
+ *   bash ./bin/generate_golden_files_with_postgres.sh
+ * }}}
+ * 2.
+ *   a. You need to have a Postgres server up before running this test.
+ *      i. Install PostgreSQL. On a mac: `brew install postgresql@13`
+ *      ii. After installing PostgreSQL, start the database server, then create a role named pg with
+ *      superuser permissions: `createuser -s postgres` OR `psql> CREATE role postgres superuser`
+ *   b. Run the following command:
+ *     {{{
+ *       SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
+ *     }}}
  *
  * To indicate that the SQL file is not eligible for testing with this suite, add the following
  * comment into the input file:
@@ -58,25 +64,16 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
  *   build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
  * }}}
  *
- * To re-generate golden files for entire suite, either run:
- * 1. (Recommended) You need Docker on your machine.
- * {{{
- *   bash ./bin/generate_golden_files.sh
- * }}}
- * 2.
- * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
- * }}}
- *
  * To re-generate golden file for a single test, e.g. `describe.sql`, run:
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql"
  * }}}
  *
- * To specify a DBMS to use (the default is postgres):
- * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
- * }}}
+ * This file adds a new comment argument, --ONLY_IF. This comment is used to indicate that the
+ * SQL file is not eligible for testing with this suite. For example, if you have a SQL file
+ * named `describe.sql`, and you want to exclude it from this suite, add the following comment
+ * into the input file:
+ * --ONLY_IF spark
  */
 
 class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh
similarity index 91%
rename from sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
rename to sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh
index 07c740f52ca10..32e1573b51e6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files.sh
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh
@@ -24,13 +24,12 @@ echo "Starting DB container.."
 docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" up --build --detach
 
 echo "Waiting for DB container to be ready.."
-# TODO: Make this dynamically detect which DBMS is being used.
 # Change if using different container or name.
 DOCKER_CONTAINER_NAME="postgres-db"
 timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done"
 
-echo "Generating all golden files.."
-SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.CrossDbmsQueryTestSuite"
+echo "Generating all golden files for postgres.."
+SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
 
 echo "Bringing down DB container.."
 docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes

From 50c2dfb0babe5bf6ea3fc01169f89ebaed3f5910 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 11:37:54 -0800
Subject: [PATCH 47/63] Trivial changes

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala    | 18 +++++++++---------
 .../spark/sql/crossdbms/JdbcConnection.scala   |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 22acbb96f7307..c4aec76641b41 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -68,12 +68,6 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql"
  * }}}
- *
- * This file adds a new comment argument, --ONLY_IF. This comment is used to indicate that the
- * SQL file is not eligible for testing with this suite. For example, if you have a SQL file
- * named `describe.sql`, and you want to exclude it from this suite, add the following comment
- * into the input file:
- * --ONLY_IF spark
  */
 
 class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
@@ -92,6 +86,12 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
  * to generate golden files with other DBMS to perform cross-checking for correctness. It generates
  * another set of golden files. Note that this is not currently run on all SQL input files by
  * default because there is incompatibility between SQL dialects for Spark and the other DBMS.
+ *
+ * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for
+ * which is eligible for the SQL file . For example, if you have a SQL file named `describe.sql`,
+ * and you want to indicate that postgres is incompatible, add the following comment into the input
+ * file:
+ * --ONLY_IF spark
  */
 abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
@@ -111,9 +111,9 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
     val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG))
       .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length))
     // If `--ONLY_IF` is found, check if the DBMS being used is allowed.
-    if (!dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
+    if (dbmsConfig.nonEmpty && !dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
       log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
-        "SPARK_ONLY")
+        s"not eligible with $crossDbmsToGenerateGoldenFiles.")
     } else if (regenerateGoldenFiles) {
       runQueries(queries, testCase, settings.toImmutableArraySeq)
     } else {
@@ -136,7 +136,7 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         // the already generated golden files.
         if (regenerateGoldenFiles) {
           val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty)
-          runner = runner.orElse(Some(getConnection(connectionUrl))
+          runner = runner.orElse(Some(getConnection(connectionUrl)))
           try {
             // Either of the below two lines can error. If we go into the catch statement, then it
             // is likely one of the following scenarios:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index ff59af955e474..4e83ab8c6da03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.crossdbms
 
-import java.sql.{DriverManager, PreparedStatement, ResultSet}
+import java.sql.{DriverManager, ResultSet}
 import java.util.Properties
 
 import scala.collection.mutable.ArrayBuffer

From 939b8f252224236e683f5b3a4d4591419738c3b5 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 14:55:51 -0800
Subject: [PATCH 48/63] Add custom postgres command

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 33 ++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index c4aec76641b41..a955e59a4b131 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -79,6 +79,17 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
 
   override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner =
     (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url))
+
+  override protected def preprocessingCommands = Seq(
+    // Custom function `double` to imitate Spark's function, so that more tests are covered.
+    """
+      |CREATE FUNCTION OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision
+      |    AS 'select CAST($1 AS double precision);'
+      |    LANGUAGE SQL
+      |    IMMUTABLE
+      |    RETURNS NULL ON NULL INPUT;
+      |""".stripMargin
+  )
 }
 
 /**
@@ -95,10 +106,28 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
  */
 abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
+  /**
+   * A String representing the database system being used.
+   */
   protected def crossDbmsToGenerateGoldenFiles: String
-  protected def customInputFilePath: String
+
+  /**
+   * A custom input file path where SQL tests are located, if desired.
+   */
+  protected def customInputFilePath: String = super.inputFilePath
+
+  /**
+   * A function taking in an optional custom connection URL, and returns a [[SQLQueryTestRunner]]
+   * specific to the database system.
+   */
   protected def getConnection: Option[String] => SQLQueryTestRunner
 
+  /**
+   * Commands to be run before running queries to generate golden files, such as defining custom
+   * functions.
+   */
+  protected def preprocessingCommands: Seq[String]
+
   private def customConnectionUrl: String = System.getenv(
     CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL)
 
@@ -137,6 +166,8 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
         if (regenerateGoldenFiles) {
           val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty)
           runner = runner.orElse(Some(getConnection(connectionUrl)))
+          preprocessingCommands.foreach(command => runner.foreach(_.runQuery(command)))
+
           try {
             // Either of the below two lines can error. If we go into the catch statement, then it
             // is likely one of the following scenarios:

From db01061f7d672dec5ad1607bbbb3707cd6bfc436 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 15:04:49 -0800
Subject: [PATCH 49/63] Modify exists-having to be compatible with psql

---
 .../exists-subquery/exists-having.sql         |  52 +++----
 .../exists-subquery/exists-having.sql.out     | 134 ++++++++++++++++++
 .../crossdbms/CrossDbmsQueryTestSuite.scala   |  17 ++-
 3 files changed, 171 insertions(+), 32 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
index c30159039ff3e..5605ae104e655 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
@@ -1,35 +1,35 @@
 -- Tests HAVING clause in subquery.
 
-CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (200, "emp 2", date "2003-01-01", 200.00D, 10),
-  (300, "emp 3", date "2002-01-01", 300.00D, 20),
-  (400, "emp 4", date "2005-01-01", 400.00D, 30),
-  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
-  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
-  (700, "emp 7", date "2010-01-01", 400.00D, 100),
-  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (200, 'emp 2', date '2003-01-01', double(200.00), 10),
+  (300, 'emp 3', date '2002-01-01', double(300.00), 20),
+  (400, 'emp 4', date '2005-01-01', double(400.00), 30),
+  (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
+  (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
+  (700, 'emp 7', date '2010-01-01', double(400.00), 100),
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70))
 AS EMP(id, emp_name, hiredate, salary, dept_id);
 
-CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
-  (10, "dept 1", "CA"),
-  (20, "dept 2", "NY"),
-  (30, "dept 3", "TX"),
-  (40, "dept 4 - unassigned", "OR"),
-  (50, "dept 5 - unassigned", "NJ"),
-  (70, "dept 7", "FL")
+CREATE TEMPORARY VIEW DEPT AS SELECT * FROM (VALUES
+  (10, 'dept 1', 'CA'),
+  (20, 'dept 2', 'NY'),
+  (30, 'dept 3', 'TX'),
+  (40, 'dept 4 - unassigned', 'OR'),
+  (50, 'dept 5 - unassigned', 'NJ'),
+  (70, 'dept 7', 'FL'))
 AS DEPT(dept_id, dept_name, state);
 
-CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
-  ("emp 1", 10.00D),
-  ("emp 1", 20.00D),
-  ("emp 2", 300.00D),
-  ("emp 2", 100.00D),
-  ("emp 3", 300.00D),
-  ("emp 4", 100.00D),
-  ("emp 5", 1000.00D),
-  ("emp 6 - no dept", 500.00D)
+CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES
+  ('emp 1', double(10.00)),
+  ('emp 1', double(20.00)),
+  ('emp 2', double(300.00)),
+  ('emp 2', double(100.00)),
+  ('emp 3', double(300.00)),
+  ('emp 4', double(100.00)),
+  ('emp 5', double(1000.00)),
+  ('emp 6 - no dept', double(500.00)))
 AS BONUS(emp_name, bonus_amt);
 
 -- simple having in subquery. 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out
new file mode 100644
index 0000000000000..e330971afdf08
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out
@@ -0,0 +1,134 @@
+-- Automatically generated by PostgreSQLQueryTestSuite with postgres
+-- !query
+CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (200, 'emp 2', date '2003-01-01', double(200.00), 10),
+  (300, 'emp 3', date '2002-01-01', double(300.00), 20),
+  (400, 'emp 4', date '2005-01-01', double(400.00), 30),
+  (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
+  (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
+  (700, 'emp 7', date '2010-01-01', double(400.00), 100),
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70))
+AS EMP(id, emp_name, hiredate, salary, dept_id)
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW DEPT AS SELECT * FROM (VALUES
+  (10, 'dept 1', 'CA'),
+  (20, 'dept 2', 'NY'),
+  (30, 'dept 3', 'TX'),
+  (40, 'dept 4 - unassigned', 'OR'),
+  (50, 'dept 5 - unassigned', 'NJ'),
+  (70, 'dept 7', 'FL'))
+AS DEPT(dept_id, dept_name, state)
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES
+  ('emp 1', double(10.00)),
+  ('emp 1', double(20.00)),
+  ('emp 2', double(300.00)),
+  ('emp 2', double(100.00)),
+  ('emp 3', double(300.00)),
+  ('emp 4', double(100.00)),
+  ('emp 5', double(1000.00)),
+  ('emp 6 - no dept', double(500.00)))
+AS BONUS(emp_name, bonus_amt)
+-- !query output
+
+
+
+-- !query
+SELECT dept_id, count(*) 
+FROM   emp 
+GROUP  BY dept_id 
+HAVING EXISTS (SELECT 1 
+               FROM   bonus 
+               WHERE  bonus_amt < min(emp.salary))
+-- !query output
+10	3
+100	2
+20	1
+30	1
+70	1
+NULL	1
+
+
+-- !query
+SELECT * 
+FROM   dept 
+WHERE  EXISTS (SELECT dept_id, 
+                      Count(*) 
+               FROM   emp 
+               GROUP  BY dept_id 
+               HAVING EXISTS (SELECT 1 
+                              FROM   bonus 
+                              WHERE bonus_amt < Min(emp.salary)))
+-- !query output
+10	dept 1	CA
+20	dept 2	NY
+30	dept 3	TX
+40	dept 4 - unassigned	OR
+50	dept 5 - unassigned	NJ
+70	dept 7	FL
+
+
+-- !query
+SELECT dept_id, 
+       Max(salary) 
+FROM   emp gp 
+WHERE  EXISTS (SELECT dept_id, 
+                      Count(*) 
+               FROM   emp p
+               GROUP  BY dept_id 
+               HAVING EXISTS (SELECT 1 
+                              FROM   bonus 
+                              WHERE  bonus_amt < Min(p.salary))) 
+GROUP  BY gp.dept_id
+-- !query output
+10	200.0
+100	400.0
+20	300.0
+30	400.0
+70	150.0
+NULL	400.0
+
+
+-- !query
+SELECT * 
+FROM   dept 
+WHERE  EXISTS (SELECT dept_id, 
+                        Count(*) 
+                 FROM   emp 
+                 GROUP  BY dept_id 
+                 HAVING EXISTS (SELECT 1 
+                                FROM   bonus 
+                                WHERE  bonus_amt > Min(emp.salary)))
+-- !query output
+10	dept 1	CA
+20	dept 2	NY
+30	dept 3	TX
+40	dept 4 - unassigned	OR
+50	dept 5 - unassigned	NJ
+70	dept 7	FL
+
+
+-- !query
+SELECT * 
+FROM   dept 
+WHERE  EXISTS (SELECT dept_id, 
+                      count(emp.dept_id)
+               FROM   emp 
+               WHERE  dept.dept_id = dept_id 
+               GROUP  BY dept_id 
+               HAVING EXISTS (SELECT 1 
+                              FROM   bonus 
+                              WHERE  ( bonus_amt > min(emp.salary) 
+                                       AND count(emp.dept_id) > 1 )))
+-- !query output
+10	dept 1	CA
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index a955e59a4b131..b55268f66e8c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -72,7 +72,7 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 
 class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
 
-  protected def crossDbmsToGenerateGoldenFiles: String = "postgres"
+  protected def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES
 
   // Reduce scope to subquery tests for now. That is where most correctness issues are.
   override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
@@ -83,7 +83,7 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
   override protected def preprocessingCommands = Seq(
     // Custom function `double` to imitate Spark's function, so that more tests are covered.
     """
-      |CREATE FUNCTION OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision
+      |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision
       |    AS 'select CAST($1 AS double precision);'
       |    LANGUAGE SQL
       |    IMMUTABLE
@@ -107,14 +107,16 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
 abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
   /**
-   * A String representing the database system being used.
+   * A String representing the database system being used. A list of these is defined in the
+   * companion object.
    */
   protected def crossDbmsToGenerateGoldenFiles: String
+  assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(crossDbmsToGenerateGoldenFiles))
 
   /**
    * A custom input file path where SQL tests are located, if desired.
    */
-  protected def customInputFilePath: String = super.inputFilePath
+  protected def customInputFilePath: String = inputFilePath
 
   /**
    * A function taking in an optional custom connection URL, and returns a [[SQLQueryTestRunner]]
@@ -123,8 +125,8 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   protected def getConnection: Option[String] => SQLQueryTestRunner
 
   /**
-   * Commands to be run before running queries to generate golden files, such as defining custom
-   * functions.
+   * Commands to be run on the DBMS before running queries to generate golden files, such as
+   * defining custom functions.
    */
   protected def preprocessingCommands: Seq[String]
 
@@ -266,6 +268,9 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
 
 object CrossDbmsQueryTestSuite {
 
+  final val POSTGRES = "postgres"
+  final val SUPPORTED_DBMS = Seq(POSTGRES)
+
   // System argument to indicate a custom connection URL to the reference DBMS.
   private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
   // Argument in input files to indicate that the sql file is restricted to certain systems.

From 7200f7d3263e366a831de08a6897cd65d989fdc1 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 19 Dec 2023 15:20:38 -0800
Subject: [PATCH 50/63] Modify query slightly

---
 .../exists-subquery/exists-having.sql.out     | 189 ++++++++----------
 .../exists-subquery/exists-having.sql         |  12 +-
 .../exists-subquery/exists-having.sql.out     |  52 ++---
 3 files changed, 118 insertions(+), 135 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
index da242e583f45e..54e046eafec29 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
@@ -1,81 +1,78 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
-CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (200, "emp 2", date "2003-01-01", 200.00D, 10),
-  (300, "emp 3", date "2002-01-01", 300.00D, 20),
-  (400, "emp 4", date "2005-01-01", 400.00D, 30),
-  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
-  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
-  (700, "emp 7", date "2010-01-01", 400.00D, 100),
-  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+CREATE TEMPORARY VIEW EMP AS VALUES
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (200, 'emp 2', date '2003-01-01', double(200.00), 10),
+  (300, 'emp 3', date '2002-01-01', double(300.00), 20),
+  (400, 'emp 4', date '2005-01-01', double(400.00), 30),
+  (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
+  (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
+  (700, 'emp 7', date '2010-01-01', double(400.00), 100),
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
 -- !query analysis
-CreateViewCommand `EMP`, SELECT * FROM VALUES
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (200, "emp 2", date "2003-01-01", 200.00D, 10),
-  (300, "emp 3", date "2002-01-01", 300.00D, 20),
-  (400, "emp 4", date "2005-01-01", 400.00D, 30),
-  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
-  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
-  (700, "emp 7", date "2010-01-01", 400.00D, 100),
-  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+CreateViewCommand `EMP`, VALUES
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (200, 'emp 2', date '2003-01-01', double(200.00), 10),
+  (300, 'emp 3', date '2002-01-01', double(300.00), 20),
+  (400, 'emp 4', date '2005-01-01', double(400.00), 30),
+  (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
+  (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
+  (700, 'emp 7', date '2010-01-01', double(400.00), 100),
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
-   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-      +- SubqueryAlias EMP
-         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   +- SubqueryAlias EMP
+      +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 
 
 -- !query
-CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
-  (10, "dept 1", "CA"),
-  (20, "dept 2", "NY"),
-  (30, "dept 3", "TX"),
-  (40, "dept 4 - unassigned", "OR"),
-  (50, "dept 5 - unassigned", "NJ"),
-  (70, "dept 7", "FL")
+CREATE TEMPORARY VIEW DEPT AS VALUES
+  (10, 'dept 1', 'CA'),
+  (20, 'dept 2', 'NY'),
+  (30, 'dept 3', 'TX'),
+  (40, 'dept 4 - unassigned', 'OR'),
+  (50, 'dept 5 - unassigned', 'NJ'),
+  (70, 'dept 7', 'FL')
 AS DEPT(dept_id, dept_name, state)
 -- !query analysis
-CreateViewCommand `DEPT`, SELECT * FROM VALUES
-  (10, "dept 1", "CA"),
-  (20, "dept 2", "NY"),
-  (30, "dept 3", "TX"),
-  (40, "dept 4 - unassigned", "OR"),
-  (50, "dept 5 - unassigned", "NJ"),
-  (70, "dept 7", "FL")
+CreateViewCommand `DEPT`, VALUES
+  (10, 'dept 1', 'CA'),
+  (20, 'dept 2', 'NY'),
+  (30, 'dept 3', 'TX'),
+  (40, 'dept 4 - unassigned', 'OR'),
+  (50, 'dept 5 - unassigned', 'NJ'),
+  (70, 'dept 7', 'FL')
 AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
-   +- Project [dept_id#x, dept_name#x, state#x]
-      +- SubqueryAlias DEPT
-         +- LocalRelation [dept_id#x, dept_name#x, state#x]
+   +- SubqueryAlias DEPT
+      +- LocalRelation [dept_id#x, dept_name#x, state#x]
 
 
 -- !query
-CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
-  ("emp 1", 10.00D),
-  ("emp 1", 20.00D),
-  ("emp 2", 300.00D),
-  ("emp 2", 100.00D),
-  ("emp 3", 300.00D),
-  ("emp 4", 100.00D),
-  ("emp 5", 1000.00D),
-  ("emp 6 - no dept", 500.00D)
+CREATE TEMPORARY VIEW BONUS AS VALUES
+  ('emp 1', double(10.00)),
+  ('emp 1', double(20.00)),
+  ('emp 2', double(300.00)),
+  ('emp 2', double(100.00)),
+  ('emp 3', double(300.00)),
+  ('emp 4', double(100.00)),
+  ('emp 5', double(1000.00)),
+  ('emp 6 - no dept', double(500.00))
 AS BONUS(emp_name, bonus_amt)
 -- !query analysis
-CreateViewCommand `BONUS`, SELECT * FROM VALUES
-  ("emp 1", 10.00D),
-  ("emp 1", 20.00D),
-  ("emp 2", 300.00D),
-  ("emp 2", 100.00D),
-  ("emp 3", 300.00D),
-  ("emp 4", 100.00D),
-  ("emp 5", 1000.00D),
-  ("emp 6 - no dept", 500.00D)
+CreateViewCommand `BONUS`, VALUES
+  ('emp 1', double(10.00)),
+  ('emp 1', double(20.00)),
+  ('emp 2', double(300.00)),
+  ('emp 2', double(100.00)),
+  ('emp 3', double(300.00)),
+  ('emp 4', double(100.00)),
+  ('emp 5', double(1000.00)),
+  ('emp 6 - no dept', double(500.00))
 AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
-   +- Project [emp_name#x, bonus_amt#x]
-      +- SubqueryAlias BONUS
-         +- LocalRelation [emp_name#x, bonus_amt#x]
+   +- SubqueryAlias BONUS
+      +- LocalRelation [emp_name#x, bonus_amt#x]
 
 
 -- !query
@@ -93,16 +90,14 @@ Project [dept_id#x, count(1)#xL]
    :        +- SubqueryAlias bonus
    :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
    :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :                 +- Project [emp_name#x, bonus_amt#x]
-   :                    +- SubqueryAlias BONUS
-   :                       +- LocalRelation [emp_name#x, bonus_amt#x]
+   :                 +- SubqueryAlias BONUS
+   :                    +- LocalRelation [emp_name#x, bonus_amt#x]
    +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x]
       +- SubqueryAlias emp
          +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
             +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-               +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-                  +- SubqueryAlias EMP
-                     +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+               +- SubqueryAlias EMP
+                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 
 
 -- !query
@@ -125,22 +120,19 @@ Project [dept_id#x, dept_name#x, state#x]
    :        :        +- SubqueryAlias bonus
    :        :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
    :        :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :        :                 +- Project [emp_name#x, bonus_amt#x]
-   :        :                    +- SubqueryAlias BONUS
-   :        :                       +- LocalRelation [emp_name#x, bonus_amt#x]
+   :        :                 +- SubqueryAlias BONUS
+   :        :                    +- LocalRelation [emp_name#x, bonus_amt#x]
    :        +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x]
    :           +- SubqueryAlias emp
    :              +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
    :                 +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-   :                    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-   :                       +- SubqueryAlias EMP
-   :                          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   :                    +- SubqueryAlias EMP
+   :                       +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
    +- SubqueryAlias dept
       +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
          +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-            +- Project [dept_id#x, dept_name#x, state#x]
-               +- SubqueryAlias DEPT
-                  +- LocalRelation [dept_id#x, dept_name#x, state#x]
+            +- SubqueryAlias DEPT
+               +- LocalRelation [dept_id#x, dept_name#x, state#x]
 
 
 -- !query
@@ -165,24 +157,21 @@ Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x]
    :        :        +- SubqueryAlias bonus
    :        :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
    :        :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :        :                 +- Project [emp_name#x, bonus_amt#x]
-   :        :                    +- SubqueryAlias BONUS
-   :        :                       +- LocalRelation [emp_name#x, bonus_amt#x]
+   :        :                 +- SubqueryAlias BONUS
+   :        :                    +- LocalRelation [emp_name#x, bonus_amt#x]
    :        +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x]
    :           +- SubqueryAlias p
    :              +- SubqueryAlias emp
    :                 +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
    :                    +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-   :                       +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-   :                          +- SubqueryAlias EMP
-   :                             +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   :                       +- SubqueryAlias EMP
+   :                          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
    +- SubqueryAlias gp
       +- SubqueryAlias emp
          +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
             +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-               +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-                  +- SubqueryAlias EMP
-                     +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+               +- SubqueryAlias EMP
+                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 
 
 -- !query
@@ -205,22 +194,19 @@ Project [dept_id#x, dept_name#x, state#x]
    :        :        +- SubqueryAlias bonus
    :        :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
    :        :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :        :                 +- Project [emp_name#x, bonus_amt#x]
-   :        :                    +- SubqueryAlias BONUS
-   :        :                       +- LocalRelation [emp_name#x, bonus_amt#x]
+   :        :                 +- SubqueryAlias BONUS
+   :        :                    +- LocalRelation [emp_name#x, bonus_amt#x]
    :        +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x]
    :           +- SubqueryAlias emp
    :              +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
    :                 +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-   :                    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-   :                       +- SubqueryAlias EMP
-   :                          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   :                    +- SubqueryAlias EMP
+   :                       +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
    +- SubqueryAlias dept
       +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
          +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-            +- Project [dept_id#x, dept_name#x, state#x]
-               +- SubqueryAlias DEPT
-                  +- LocalRelation [dept_id#x, dept_name#x, state#x]
+            +- SubqueryAlias DEPT
+               +- LocalRelation [dept_id#x, dept_name#x, state#x]
 
 
 -- !query
@@ -245,20 +231,17 @@ Project [dept_id#x, dept_name#x, state#x]
    :        :        +- SubqueryAlias bonus
    :        :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
    :        :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :        :                 +- Project [emp_name#x, bonus_amt#x]
-   :        :                    +- SubqueryAlias BONUS
-   :        :                       +- LocalRelation [emp_name#x, bonus_amt#x]
+   :        :                 +- SubqueryAlias BONUS
+   :        :                    +- LocalRelation [emp_name#x, bonus_amt#x]
    :        +- Aggregate [dept_id#x], [dept_id#x, count(dept_id#x) AS count(dept_id)#xL, min(salary#x) AS min(salary#x)#x]
    :           +- Filter (outer(dept_id#x) = dept_id#x)
    :              +- SubqueryAlias emp
    :                 +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
    :                    +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-   :                       +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-   :                          +- SubqueryAlias EMP
-   :                             +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   :                       +- SubqueryAlias EMP
+   :                          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
    +- SubqueryAlias dept
       +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
          +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-            +- Project [dept_id#x, dept_name#x, state#x]
-               +- SubqueryAlias DEPT
-                  +- LocalRelation [dept_id#x, dept_name#x, state#x]
+            +- SubqueryAlias DEPT
+               +- LocalRelation [dept_id#x, dept_name#x, state#x]
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
index 5605ae104e655..670034ec1eab2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
@@ -1,6 +1,6 @@
 -- Tests HAVING clause in subquery.
 
-CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES
+CREATE TEMPORARY VIEW EMP AS VALUES
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (200, 'emp 2', date '2003-01-01', double(200.00), 10),
@@ -9,19 +9,19 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES
   (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
   (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
   (700, 'emp 7', date '2010-01-01', double(400.00), 100),
-  (800, 'emp 8', date '2016-01-01', double(150.00), 70))
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id);
 
-CREATE TEMPORARY VIEW DEPT AS SELECT * FROM (VALUES
+CREATE TEMPORARY VIEW DEPT AS VALUES
   (10, 'dept 1', 'CA'),
   (20, 'dept 2', 'NY'),
   (30, 'dept 3', 'TX'),
   (40, 'dept 4 - unassigned', 'OR'),
   (50, 'dept 5 - unassigned', 'NJ'),
-  (70, 'dept 7', 'FL'))
+  (70, 'dept 7', 'FL')
 AS DEPT(dept_id, dept_name, state);
 
-CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES
+CREATE TEMPORARY VIEW BONUS AS VALUES
   ('emp 1', double(10.00)),
   ('emp 1', double(20.00)),
   ('emp 2', double(300.00)),
@@ -29,7 +29,7 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES
   ('emp 3', double(300.00)),
   ('emp 4', double(100.00)),
   ('emp 5', double(1000.00)),
-  ('emp 6 - no dept', double(500.00)))
+  ('emp 6 - no dept', double(500.00))
 AS BONUS(emp_name, bonus_amt);
 
 -- simple having in subquery. 
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
index ae4cf010ffc98..170c99555ffe3 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
@@ -1,15 +1,15 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
-CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (200, "emp 2", date "2003-01-01", 200.00D, 10),
-  (300, "emp 3", date "2002-01-01", 300.00D, 20),
-  (400, "emp 4", date "2005-01-01", 400.00D, 30),
-  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
-  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
-  (700, "emp 7", date "2010-01-01", 400.00D, 100),
-  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+CREATE TEMPORARY VIEW EMP AS VALUES
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (200, 'emp 2', date '2003-01-01', double(200.00), 10),
+  (300, 'emp 3', date '2002-01-01', double(300.00), 20),
+  (400, 'emp 4', date '2005-01-01', double(400.00), 30),
+  (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
+  (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
+  (700, 'emp 7', date '2010-01-01', double(400.00), 100),
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
 -- !query schema
 struct<>
@@ -18,13 +18,13 @@ struct<>
 
 
 -- !query
-CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
-  (10, "dept 1", "CA"),
-  (20, "dept 2", "NY"),
-  (30, "dept 3", "TX"),
-  (40, "dept 4 - unassigned", "OR"),
-  (50, "dept 5 - unassigned", "NJ"),
-  (70, "dept 7", "FL")
+CREATE TEMPORARY VIEW DEPT AS VALUES
+  (10, 'dept 1', 'CA'),
+  (20, 'dept 2', 'NY'),
+  (30, 'dept 3', 'TX'),
+  (40, 'dept 4 - unassigned', 'OR'),
+  (50, 'dept 5 - unassigned', 'NJ'),
+  (70, 'dept 7', 'FL')
 AS DEPT(dept_id, dept_name, state)
 -- !query schema
 struct<>
@@ -33,15 +33,15 @@ struct<>
 
 
 -- !query
-CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
-  ("emp 1", 10.00D),
-  ("emp 1", 20.00D),
-  ("emp 2", 300.00D),
-  ("emp 2", 100.00D),
-  ("emp 3", 300.00D),
-  ("emp 4", 100.00D),
-  ("emp 5", 1000.00D),
-  ("emp 6 - no dept", 500.00D)
+CREATE TEMPORARY VIEW BONUS AS VALUES
+  ('emp 1', double(10.00)),
+  ('emp 1', double(20.00)),
+  ('emp 2', double(300.00)),
+  ('emp 2', double(100.00)),
+  ('emp 3', double(300.00)),
+  ('emp 4', double(100.00)),
+  ('emp 5', double(1000.00)),
+  ('emp 6 - no dept', double(500.00))
 AS BONUS(emp_name, bonus_amt)
 -- !query schema
 struct<>

From 1d7934540b54a68336544813a4c2ac50b450b0ae Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 20 Dec 2023 11:32:07 -0800
Subject: [PATCH 51/63] Add readme and add ONLY_IF for most sql files in
 subquery dir

---
 .../test/resources/sql-tests/inputs/subquery/README.md | 10 ++++++++++
 .../subquery/exists-subquery/exists-aggregate.sql      |  1 +
 .../inputs/subquery/exists-subquery/exists-basic.sql   |  1 +
 .../subquery/exists-subquery/exists-count-bug.sql      |  1 +
 .../inputs/subquery/exists-subquery/exists-cte.sql     |  1 +
 .../exists-subquery/exists-in-join-condition.sql       |  1 +
 .../exists-subquery/exists-joins-and-set-ops.sql       |  1 +
 .../subquery/exists-subquery/exists-orderby-limit.sql  |  1 +
 .../subquery/exists-subquery/exists-outside-filter.sql |  1 +
 .../subquery/exists-subquery/exists-within-and-or.sql  |  1 +
 .../sql-tests/inputs/subquery/in-subquery/in-basic.sql |  1 +
 .../inputs/subquery/in-subquery/in-count-bug.sql       |  1 +
 .../inputs/subquery/in-subquery/in-group-by.sql        |  1 +
 .../inputs/subquery/in-subquery/in-having.sql          |  1 +
 .../sql-tests/inputs/subquery/in-subquery/in-joins.sql |  1 +
 .../sql-tests/inputs/subquery/in-subquery/in-limit.sql |  1 +
 .../subquery/in-subquery/in-multiple-columns.sql       |  1 +
 .../inputs/subquery/in-subquery/in-null-semantics.sql  |  1 +
 .../inputs/subquery/in-subquery/in-nullability.sql     |  1 +
 .../inputs/subquery/in-subquery/in-order-by.sql        |  1 +
 .../inputs/subquery/in-subquery/in-set-operations.sql  |  1 +
 .../in-subquery/in-subquery-in-join-condition.sql      |  1 +
 .../inputs/subquery/in-subquery/in-with-cte.sql        |  1 +
 .../inputs/subquery/in-subquery/nested-not-in.sql      |  1 +
 .../inputs/subquery/in-subquery/not-in-group-by.sql    |  1 +
 .../inputs/subquery/in-subquery/not-in-joins.sql       |  1 +
 .../not-in-unit-tests-multi-column-literal.sql         |  1 +
 .../in-subquery/not-in-unit-tests-multi-column.sql     |  1 +
 .../not-in-unit-tests-single-column-literal.sql        |  1 +
 .../in-subquery/not-in-unit-tests-single-column.sql    |  1 +
 .../inputs/subquery/in-subquery/simple-in.sql          |  1 +
 .../subquery/negative-cases/invalid-correlation.sql    |  1 +
 .../subquery/negative-cases/subq-input-typecheck.sql   |  1 +
 .../nested-scalar-subquery-count-bug.sql               |  1 +
 .../scalar-subquery/scalar-subquery-count-bug.sql      |  1 +
 .../scalar-subquery/scalar-subquery-predicate.sql      |  1 +
 .../scalar-subquery/scalar-subquery-select.sql         |  1 +
 .../scalar-subquery/scalar-subquery-set-op.sql         |  1 +
 .../sql-tests/inputs/subquery/subquery-in-from.sql     |  1 +
 39 files changed, 48 insertions(+)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/subquery/README.md

diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md b/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md
new file mode 100644
index 0000000000000..a42cbed4003af
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md
@@ -0,0 +1,10 @@
+
+## Note on PostgreSQLQueryTestSuite
+
+The SQL files in this directory are automatically opted into `PostgreSQLQueryTestSuite`, which
+generates golden files with Postgres and runs Spark against these golden files. If you added a new
+SQL test file, please generate the golden files with `PostgreSQLQueryTestSuite`. It is likely to
+fail because of incompatibility between the SQL dialects of Spark and Postgres. To have queries that
+are compatible for both systems, keep your SQL queries simple by using basic types like INTs and
+strings only, if possible, and using functions that are present in both Spark and Postgres. Refer
+to the header comment for `PostgreSQLQueryTestSuite` for more information.
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql
index 1c4ef982c662f..637f230e739d7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql
@@ -4,6 +4,7 @@
 --CONFIG_DIM1 spark.sql.codegen.wholeStage=true
 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql
index 332e858800f7c..4055f798ad85d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql
@@ -1,5 +1,6 @@
 -- Tests EXISTS subquery support. Tests basic form 
 -- of EXISTS subquery (both EXISTS and NOT EXISTS)
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql
index 1af7a2356836b..e6721f5f1c50e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-count-bug.sql
@@ -1,3 +1,4 @@
+--ONLY_IF spark
 create temporary view t1(c1, c2) as values (0, 1), (1, 2);
 create temporary view t2(c1, c2) as values (0, 2), (0, 3);
 create temporary view t3(c1, c2) as values (0, 3), (1, 4), (2, 5);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql
index c6784838158e1..ea8c0fc36ccfc 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-cte.sql
@@ -1,5 +1,6 @@
 -- Tests EXISTS subquery used along with 
 -- Common Table Expressions(CTE)
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql
index 5cff53d33816e..ad2e7ad563e08 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql
@@ -6,6 +6,7 @@
 -- 3. Join type: inner / left outer / right outer / full outer / left semi / left anti
 -- 4. AND or OR for the join condition
 
+--ONLY_IF spark
 CREATE TEMP VIEW x(x1, x2) AS VALUES
     (2, 1),
     (1, 1),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
index 28cb2f74748ea..1542fa5149aaf 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
@@ -13,6 +13,7 @@
 --CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
 --CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
 
+--ONLY_IF spark
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
index d920a413ab8fc..2e3055db60afa 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
@@ -5,6 +5,7 @@
 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
 
+--ONLY_IF spark
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql
index af75103797fe5..585de247b11c5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql
@@ -1,5 +1,6 @@
 -- Tests EXISTS subquery support where the subquery is used outside the WHERE clause.
 
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql
index 7743b5241d11d..5920b61dade52 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql
@@ -1,6 +1,7 @@
 -- Tests EXISTS subquery support. Tests EXISTS 
 -- subquery within a AND or OR expression.
 
+--ONLY_IF spark
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql
index 5669423148f80..fc243422bd7c8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-basic.sql
@@ -1,5 +1,6 @@
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
+--ONLY_IF spark
 
 create temporary view tab_a as select * from values (1, 1) as tab_a(a1, b1);
 create temporary view tab_b as select * from values (1, 1) as tab_b(a2, b2);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql
index 4840f02511b64..f07ead3e868bd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-count-bug.sql
@@ -1,3 +1,4 @@
+--ONLY_IF spark
 create temporary view t1(c1, c2) as values (0, 1), (1, 2);
 create temporary view t2(c1, c2) as values (0, 2), (0, 3);
 create temporary view t3(c1, c2) as values (0, 3), (1, 4), (2, 5);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
index 168faa0aee7c5..5c962dceb067f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
@@ -6,6 +6,7 @@
 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
 --CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
 
+--ONLY_IF spark
 create temporary view t1 as select * from values
   ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql
index 750cc42b8641c..4d11ea7005b3b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-having.sql
@@ -4,6 +4,7 @@
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
 
+--ONLY_IF spark
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
index 08eeb1d106fd5..d12144ae7e492 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
@@ -16,6 +16,7 @@
 --CONFIG_DIM3 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM3 spark.sql.optimizeNullAwareAntiJoin=false
 
+--ONLY_IF spark
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
index 5f06f159f0b60..a76da33e501ca 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
@@ -4,6 +4,7 @@
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
 
+--ONLY_IF spark
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql
index 1a6c06f9dad49..c403c2d66ab1f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-multiple-columns.sql
@@ -4,6 +4,7 @@
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
 
+--ONLY_IF spark
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql
index aad655f05099e..7bf49a3209268 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-null-semantics.sql
@@ -1,3 +1,4 @@
+--ONLY_IF spark
 create temp view v (c) as values (1), (null);
 create temp view v_empty (e) as select 1 where false;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql
index 07b4afe36c143..30f3a25f0e85a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-nullability.sql
@@ -1,4 +1,5 @@
 -- SPARK-43413: Tests for IN subquery nullability
+--ONLY_IF spark
 
 create temp view t0 as select 1 as a_nonnullable;
 create temp view t1 as select cast(null as int) as b_nullable;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
index 0b006af4130a9..8bf49a1c2d996 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
@@ -8,6 +8,7 @@
 
 --CONFIG_DIM2 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM2 spark.sql.optimizeNullAwareAntiJoin=false
+--ONLY_IF spark
 
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
index e4a931ce2c800..c6b6a338c9b1d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
@@ -1,5 +1,6 @@
 -- A test suite for set-operations in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
+--ONLY_IF spark
 
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql
index d4fa2f6b0e816..d519abdbacc05 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql
@@ -1,4 +1,5 @@
 -- Test that correlated EXISTS subqueries in join conditions are supported.
+--ONLY_IF spark
 
 -- Permutations of the test:
 -- 1. In / Not In
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql
index fa4ae87f041cf..8d08cfb4da346 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-with-cte.sql
@@ -1,5 +1,6 @@
 -- A test suite for in with cte in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
+--ONLY_IF spark
 
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql
index e2d4ad522d446..9472690d99143 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/nested-not-in.sql
@@ -1,4 +1,5 @@
 -- Tests NOT-IN subqueries nested inside OR expression(s).
+--ONLY_IF spark
 
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
index 54b74534c1162..97da00ac2cefd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
@@ -1,5 +1,6 @@
 -- A test suite for NOT IN GROUP BY in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
+--ONLY_IF spark
 
 -- Test aggregate operator with codegen on and off.
 --CONFIG_DIM1 spark.sql.codegen.wholeStage=true
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
index 2d11c5da20633..639034c18f31b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
@@ -1,5 +1,6 @@
 -- A test suite for not-in-joins in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
+--ONLY_IF spark
 
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
index a061e495f51b8..e9f3fda9c1b92 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
@@ -6,6 +6,7 @@
 
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, null),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
index 28ab75121573a..d5a30950b7fe7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
@@ -15,6 +15,7 @@
 -- This can be generalized to include more tests for more columns, but it covers the main cases
 -- when there is more than one column.
 
+--ONLY_IF spark
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql
index 79747022eb1e8..2c3aed95a13ba 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql
@@ -6,6 +6,7 @@
 
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, 1.0),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql
index 8060246bf3a3f..bbf5dce38a962 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql
@@ -33,6 +33,7 @@
 
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, 1.0),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
index d8a58afa344db..98f1f3d52cbb0 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
@@ -3,6 +3,7 @@
 
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
+--ONLY_IF spark
 
 create temporary view t1 as select * from values
   ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
index 1260fb73c6d81..0b83ecf43a301 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
@@ -1,5 +1,6 @@
 -- The test file contains negative test cases
 -- of invalid queries where error messages are expected.
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
   (1, 2, 3)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql
index 98ce1354a1355..da3ed9d11a8bd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql
@@ -1,5 +1,6 @@
 -- The test file contains negative test cases
 -- of invalid queries where error messages are expected.
+--ONLY_IF spark
 
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
   (1, 2, 3)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql
index 86476389a8577..fe9152a27fe8d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql
@@ -1,3 +1,4 @@
+--ONLY_IF spark
 CREATE OR REPLACE VIEW t1(a1, a2) as values (0, 1), (1, 2);
 CREATE OR REPLACE VIEW t2(b1, b2) as values (0, 2), (0, 3);
 CREATE OR REPLACE VIEW t3(c1, c2) as values (0, 2), (0, 3);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
index 94b707ebf2002..c428dd81f287a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
@@ -1,6 +1,7 @@
 --CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=true
 --CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=false
 
+--ONLY_IF spark
 create temp view l (a, b)
 as values
     (1, 2.0),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
index d2d0ef7817178..20ba10176196f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
@@ -1,5 +1,6 @@
 -- A test suite for scalar subquery in predicate context
 
+--ONLY_IF spark
 CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv);
 CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv);
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
index e4f7b25a1684d..ef1e612fd744a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
@@ -1,5 +1,6 @@
 -- A test suite for scalar subquery in SELECT clause
 
+--ONLY_IF spark
 create temporary view t1 as select * from values
   ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
   ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql
index 39e456611c03b..d282e477678c9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-set-op.sql
@@ -1,5 +1,6 @@
 -- Set operations in correlation path
 
+--ONLY_IF spark
 CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0);
 CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3);
 CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql
index 1273b56b6344b..662a065cdc13a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-in-from.sql
@@ -1,3 +1,4 @@
+--ONLY_IF spark
 -- Aliased subqueries in FROM clause
 SELECT * FROM (SELECT * FROM testData) AS t WHERE key = 1;
 

From 83cd8d4e130f894d72156bad84f819923c76183a Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 20 Dec 2023 11:41:52 -0800
Subject: [PATCH 52/63] Rewrite exists-having

---
 .../exists-subquery/exists-having.sql.out     | 103 +++++++-----------
 .../exists-subquery/exists-having.sql         |  15 +--
 .../exists-subquery/exists-having.sql.out     |  15 +--
 .../exists-subquery/exists-having.sql.out     |   9 +-
 4 files changed, 55 insertions(+), 87 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
index 54e046eafec29..e8e1acdda34c0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
@@ -1,6 +1,6 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
-CREATE TEMPORARY VIEW EMP AS VALUES
+CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (200, 'emp 2', date '2003-01-01', double(200.00), 10),
@@ -10,9 +10,8 @@ CREATE TEMPORARY VIEW EMP AS VALUES
   (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
   (700, 'emp 7', date '2010-01-01', double(400.00), 100),
   (800, 'emp 8', date '2016-01-01', double(150.00), 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id)
 -- !query analysis
-CreateViewCommand `EMP`, VALUES
+CreateViewCommand `EMP`, [(id,None), (emp_name,None), (hiredate,None), (salary,None), (dept_id,None)], VALUES
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (200, 'emp 2', date '2003-01-01', double(200.00), 10),
@@ -21,36 +20,31 @@ CreateViewCommand `EMP`, VALUES
   (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
   (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
   (700, 'emp 7', date '2010-01-01', double(400.00), 100),
-  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
-   +- SubqueryAlias EMP
-      +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70), false, false, LocalTempView, true
+   +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
 
 
 -- !query
-CREATE TEMPORARY VIEW DEPT AS VALUES
+CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES
   (10, 'dept 1', 'CA'),
   (20, 'dept 2', 'NY'),
   (30, 'dept 3', 'TX'),
   (40, 'dept 4 - unassigned', 'OR'),
   (50, 'dept 5 - unassigned', 'NJ'),
   (70, 'dept 7', 'FL')
-AS DEPT(dept_id, dept_name, state)
 -- !query analysis
-CreateViewCommand `DEPT`, VALUES
+CreateViewCommand `DEPT`, [(dept_id,None), (dept_name,None), (state,None)], VALUES
   (10, 'dept 1', 'CA'),
   (20, 'dept 2', 'NY'),
   (30, 'dept 3', 'TX'),
   (40, 'dept 4 - unassigned', 'OR'),
   (50, 'dept 5 - unassigned', 'NJ'),
-  (70, 'dept 7', 'FL')
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
-   +- SubqueryAlias DEPT
-      +- LocalRelation [dept_id#x, dept_name#x, state#x]
+  (70, 'dept 7', 'FL'), false, false, LocalTempView, true
+   +- LocalRelation [col1#x, col2#x, col3#x]
 
 
 -- !query
-CREATE TEMPORARY VIEW BONUS AS VALUES
+CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES
   ('emp 1', double(10.00)),
   ('emp 1', double(20.00)),
   ('emp 2', double(300.00)),
@@ -59,9 +53,8 @@ CREATE TEMPORARY VIEW BONUS AS VALUES
   ('emp 4', double(100.00)),
   ('emp 5', double(1000.00)),
   ('emp 6 - no dept', double(500.00))
-AS BONUS(emp_name, bonus_amt)
 -- !query analysis
-CreateViewCommand `BONUS`, VALUES
+CreateViewCommand `BONUS`, [(emp_name,None), (bonus_amt,None)], VALUES
   ('emp 1', double(10.00)),
   ('emp 1', double(20.00)),
   ('emp 2', double(300.00)),
@@ -69,10 +62,8 @@ CreateViewCommand `BONUS`, VALUES
   ('emp 3', double(300.00)),
   ('emp 4', double(100.00)),
   ('emp 5', double(1000.00)),
-  ('emp 6 - no dept', double(500.00))
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
-   +- SubqueryAlias BONUS
-      +- LocalRelation [emp_name#x, bonus_amt#x]
+  ('emp 6 - no dept', double(500.00)), false, false, LocalTempView, true
+   +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
@@ -89,15 +80,13 @@ Project [dept_id#x, count(1)#xL]
    :     +- Filter (bonus_amt#x < outer(min(salary#x)#x))
    :        +- SubqueryAlias bonus
    :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
-   :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :                 +- SubqueryAlias BONUS
-   :                    +- LocalRelation [emp_name#x, bonus_amt#x]
+   :              +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x]
+   :                 +- LocalRelation [col1#x, col2#x]
    +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x]
       +- SubqueryAlias emp
          +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-            +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-               +- SubqueryAlias EMP
-                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+            +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
 
 
 -- !query
@@ -119,20 +108,17 @@ Project [dept_id#x, dept_name#x, state#x]
    :        :     +- Filter (bonus_amt#x < outer(min(salary#x)#x))
    :        :        +- SubqueryAlias bonus
    :        :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
-   :        :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :        :                 +- SubqueryAlias BONUS
-   :        :                    +- LocalRelation [emp_name#x, bonus_amt#x]
+   :        :              +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x]
+   :        :                 +- LocalRelation [col1#x, col2#x]
    :        +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x]
    :           +- SubqueryAlias emp
    :              +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-   :                 +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-   :                    +- SubqueryAlias EMP
-   :                       +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   :                 +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x]
+   :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
    +- SubqueryAlias dept
       +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
-         +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-            +- SubqueryAlias DEPT
-               +- LocalRelation [dept_id#x, dept_name#x, state#x]
+         +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x]
+            +- LocalRelation [col1#x, col2#x, col3#x]
 
 
 -- !query
@@ -156,22 +142,19 @@ Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x]
    :        :     +- Filter (bonus_amt#x < outer(min(salary#x)#x))
    :        :        +- SubqueryAlias bonus
    :        :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
-   :        :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :        :                 +- SubqueryAlias BONUS
-   :        :                    +- LocalRelation [emp_name#x, bonus_amt#x]
+   :        :              +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x]
+   :        :                 +- LocalRelation [col1#x, col2#x]
    :        +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x]
    :           +- SubqueryAlias p
    :              +- SubqueryAlias emp
    :                 +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-   :                    +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-   :                       +- SubqueryAlias EMP
-   :                          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   :                    +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x]
+   :                       +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
    +- SubqueryAlias gp
       +- SubqueryAlias emp
          +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-            +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-               +- SubqueryAlias EMP
-                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+            +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x]
+               +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
 
 
 -- !query
@@ -193,20 +176,17 @@ Project [dept_id#x, dept_name#x, state#x]
    :        :     +- Filter (bonus_amt#x > outer(min(salary#x)#x))
    :        :        +- SubqueryAlias bonus
    :        :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
-   :        :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :        :                 +- SubqueryAlias BONUS
-   :        :                    +- LocalRelation [emp_name#x, bonus_amt#x]
+   :        :              +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x]
+   :        :                 +- LocalRelation [col1#x, col2#x]
    :        +- Aggregate [dept_id#x], [dept_id#x, count(1) AS count(1)#xL, min(salary#x) AS min(salary#x)#x]
    :           +- SubqueryAlias emp
    :              +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-   :                 +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-   :                    +- SubqueryAlias EMP
-   :                       +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   :                 +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x]
+   :                    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
    +- SubqueryAlias dept
       +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
-         +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-            +- SubqueryAlias DEPT
-               +- LocalRelation [dept_id#x, dept_name#x, state#x]
+         +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x]
+            +- LocalRelation [col1#x, col2#x, col3#x]
 
 
 -- !query
@@ -230,18 +210,15 @@ Project [dept_id#x, dept_name#x, state#x]
    :        :     +- Filter ((bonus_amt#x > outer(min(salary#x)#x)) AND (outer(count(dept_id)#xL) > cast(1 as bigint)))
    :        :        +- SubqueryAlias bonus
    :        :           +- View (`BONUS`, [emp_name#x,bonus_amt#x])
-   :        :              +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
-   :        :                 +- SubqueryAlias BONUS
-   :        :                    +- LocalRelation [emp_name#x, bonus_amt#x]
+   :        :              +- Project [cast(col1#x as string) AS emp_name#x, cast(col2#x as double) AS bonus_amt#x]
+   :        :                 +- LocalRelation [col1#x, col2#x]
    :        +- Aggregate [dept_id#x], [dept_id#x, count(dept_id#x) AS count(dept_id)#xL, min(salary#x) AS min(salary#x)#x]
    :           +- Filter (outer(dept_id#x) = dept_id#x)
    :              +- SubqueryAlias emp
    :                 +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-   :                    +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-   :                       +- SubqueryAlias EMP
-   :                          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   :                    +- Project [cast(col1#x as int) AS id#x, cast(col2#x as string) AS emp_name#x, cast(col3#x as date) AS hiredate#x, cast(col4#x as double) AS salary#x, cast(col5#x as int) AS dept_id#x]
+   :                       +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
    +- SubqueryAlias dept
       +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
-         +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-            +- SubqueryAlias DEPT
-               +- LocalRelation [dept_id#x, dept_name#x, state#x]
+         +- Project [cast(col1#x as int) AS dept_id#x, cast(col2#x as string) AS dept_name#x, cast(col3#x as string) AS state#x]
+            +- LocalRelation [col1#x, col2#x, col3#x]
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
index 670034ec1eab2..d172220fbc413 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-having.sql
@@ -1,6 +1,6 @@
 -- Tests HAVING clause in subquery.
 
-CREATE TEMPORARY VIEW EMP AS VALUES
+CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (200, 'emp 2', date '2003-01-01', double(200.00), 10),
@@ -9,19 +9,17 @@ CREATE TEMPORARY VIEW EMP AS VALUES
   (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
   (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
   (700, 'emp 7', date '2010-01-01', double(400.00), 100),
-  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id);
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70);
 
-CREATE TEMPORARY VIEW DEPT AS VALUES
+CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES
   (10, 'dept 1', 'CA'),
   (20, 'dept 2', 'NY'),
   (30, 'dept 3', 'TX'),
   (40, 'dept 4 - unassigned', 'OR'),
   (50, 'dept 5 - unassigned', 'NJ'),
-  (70, 'dept 7', 'FL')
-AS DEPT(dept_id, dept_name, state);
+  (70, 'dept 7', 'FL');
 
-CREATE TEMPORARY VIEW BONUS AS VALUES
+CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES
   ('emp 1', double(10.00)),
   ('emp 1', double(20.00)),
   ('emp 2', double(300.00)),
@@ -29,8 +27,7 @@ CREATE TEMPORARY VIEW BONUS AS VALUES
   ('emp 3', double(300.00)),
   ('emp 4', double(100.00)),
   ('emp 5', double(1000.00)),
-  ('emp 6 - no dept', double(500.00))
-AS BONUS(emp_name, bonus_amt);
+  ('emp 6 - no dept', double(500.00));
 
 -- simple having in subquery. 
 -- TC.01.01
diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out
index e330971afdf08..95229f39865c0 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out
@@ -1,6 +1,6 @@
 -- Automatically generated by PostgreSQLQueryTestSuite with postgres
 -- !query
-CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES
+CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (200, 'emp 2', date '2003-01-01', double(200.00), 10),
@@ -9,27 +9,25 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM (VALUES
   (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
   (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
   (700, 'emp 7', date '2010-01-01', double(400.00), 100),
-  (800, 'emp 8', date '2016-01-01', double(150.00), 70))
-AS EMP(id, emp_name, hiredate, salary, dept_id)
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
 -- !query output
 
 
 
 -- !query
-CREATE TEMPORARY VIEW DEPT AS SELECT * FROM (VALUES
+CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES
   (10, 'dept 1', 'CA'),
   (20, 'dept 2', 'NY'),
   (30, 'dept 3', 'TX'),
   (40, 'dept 4 - unassigned', 'OR'),
   (50, 'dept 5 - unassigned', 'NJ'),
-  (70, 'dept 7', 'FL'))
-AS DEPT(dept_id, dept_name, state)
+  (70, 'dept 7', 'FL')
 -- !query output
 
 
 
 -- !query
-CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES
+CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES
   ('emp 1', double(10.00)),
   ('emp 1', double(20.00)),
   ('emp 2', double(300.00)),
@@ -37,8 +35,7 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM (VALUES
   ('emp 3', double(300.00)),
   ('emp 4', double(100.00)),
   ('emp 5', double(1000.00)),
-  ('emp 6 - no dept', double(500.00)))
-AS BONUS(emp_name, bonus_amt)
+  ('emp 6 - no dept', double(500.00))
 -- !query output
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
index 170c99555ffe3..2a84516e90abe 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
@@ -1,6 +1,6 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
-CREATE TEMPORARY VIEW EMP AS VALUES
+CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (100, 'emp 1', date '2005-01-01', double(100.00), 10),
   (200, 'emp 2', date '2003-01-01', double(200.00), 10),
@@ -10,7 +10,6 @@ CREATE TEMPORARY VIEW EMP AS VALUES
   (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
   (700, 'emp 7', date '2010-01-01', double(400.00), 100),
   (800, 'emp 8', date '2016-01-01', double(150.00), 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id)
 -- !query schema
 struct<>
 -- !query output
@@ -18,14 +17,13 @@ struct<>
 
 
 -- !query
-CREATE TEMPORARY VIEW DEPT AS VALUES
+CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES
   (10, 'dept 1', 'CA'),
   (20, 'dept 2', 'NY'),
   (30, 'dept 3', 'TX'),
   (40, 'dept 4 - unassigned', 'OR'),
   (50, 'dept 5 - unassigned', 'NJ'),
   (70, 'dept 7', 'FL')
-AS DEPT(dept_id, dept_name, state)
 -- !query schema
 struct<>
 -- !query output
@@ -33,7 +31,7 @@ struct<>
 
 
 -- !query
-CREATE TEMPORARY VIEW BONUS AS VALUES
+CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES
   ('emp 1', double(10.00)),
   ('emp 1', double(20.00)),
   ('emp 2', double(300.00)),
@@ -42,7 +40,6 @@ CREATE TEMPORARY VIEW BONUS AS VALUES
   ('emp 4', double(100.00)),
   ('emp 5', double(1000.00)),
   ('emp 6 - no dept', double(500.00))
-AS BONUS(emp_name, bonus_amt)
 -- !query schema
 struct<>
 -- !query output

From f491d1f4ce379987fc33af3c83c876ed214127d4 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 20 Dec 2023 11:43:41 -0800
Subject: [PATCH 53/63] Update comment

---
 .../spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index b55268f66e8c8..368d6319e0429 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -99,9 +99,9 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
  * default because there is incompatibility between SQL dialects for Spark and the other DBMS.
  *
  * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for
- * which is eligible for the SQL file . For example, if you have a SQL file named `describe.sql`,
- * and you want to indicate that postgres is incompatible, add the following comment into the input
- * file:
+ * which is eligible for the SQL file. These strings are defined in the companion object. For
+ * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is
+ * incompatible, add the following comment into the input file:
  * --ONLY_IF spark
  */
 abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {

From ce0e1064acc260a9d7ee42d33a08cc6c8b36bd3c Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 21 Dec 2023 12:30:28 -0800
Subject: [PATCH 54/63] Ignore subquery-offset.sql

---
 .../test/resources/sql-tests/inputs/subquery/subquery-offset.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql
index 9b61d1b26270b..cc9c70df9df87 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql
@@ -1,3 +1,4 @@
+--ONLY_IF spark
 drop table if exists x;
 drop table if exists y;
 

From 1d7c620ed5a7205d27381f6abf94f08bd02199b7 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Thu, 21 Dec 2023 18:00:25 -0800
Subject: [PATCH 55/63] Create new file PostgreSQLQueryTestSuite.scala and fix
 indent

---
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 65 --------------
 .../spark/sql/crossdbms/JdbcConnection.scala  |  2 +-
 .../crossdbms/PostgreSQLQueryTestSuite.scala  | 84 +++++++++++++++++++
 3 files changed, 85 insertions(+), 66 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index 368d6319e0429..a4ca037301137 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -27,71 +27,6 @@ import org.apache.spark.sql.SQLQueryTestSuite
 import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
 import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 
-// scalastyle:off line.size.limit
-/**
- * IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING, READ THIS:
- * Your new SQL test is automatically opted into this suite. It is likely failing because it is not
- * compatible with the default DBMS (currently postgres). You have two options:
- * 1. (Recommended) Modify your queries to be compatible with both systems, and generate golden
- *    files with the instructions below. This is recommended because it will run your queries
- *    against postgres, providing higher correctness testing confidence, and you won't have to
- *    manually verify the golden files generated with your test.
- * 2. Add this line to your .sql file: --ONLY_IF spark
- *
- * To re-generate golden files for entire suite, either run:
- * 1. (Recommended) You need Docker on your machine. Install Docker and run the following command:
- * {{{
- *   bash ./bin/generate_golden_files_with_postgres.sh
- * }}}
- * 2.
- *   a. You need to have a Postgres server up before running this test.
- *      i. Install PostgreSQL. On a mac: `brew install postgresql@13`
- *      ii. After installing PostgreSQL, start the database server, then create a role named pg with
- *      superuser permissions: `createuser -s postgres` OR `psql> CREATE role postgres superuser`
- *   b. Run the following command:
- *     {{{
- *       SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
- *     }}}
- *
- * To indicate that the SQL file is not eligible for testing with this suite, add the following
- * comment into the input file:
- * {{{
- *   --ONLY_IF spark
- * }}}
- *
- * And then, to run the entire test suite, with the default cross DBMS:
- * {{{
- *   build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
- * }}}
- *
- * To re-generate golden file for a single test, e.g. `describe.sql`, run:
- * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql"
- * }}}
- */
-
-class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
-
-  protected def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES
-
-  // Reduce scope to subquery tests for now. That is where most correctness issues are.
-  override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
-
-  override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner =
-    (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url))
-
-  override protected def preprocessingCommands = Seq(
-    // Custom function `double` to imitate Spark's function, so that more tests are covered.
-    """
-      |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision
-      |    AS 'select CAST($1 AS double precision);'
-      |    LANGUAGE SQL
-      |    IMMUTABLE
-      |    RETURNS NULL ON NULL INPUT;
-      |""".stripMargin
-  )
-}
-
 /**
  * See SQLQueryTestSuite.scala for more information. This suite builds off of that to allow us
  * to generate golden files with other DBMS to perform cross-checking for correctness. It generates
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
index 4e83ab8c6da03..653e00d34d562 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
@@ -50,7 +50,7 @@ private[crossdbms] trait JdbcConnection {
 
   /**
    * Loads data from the given Spark DataFrame into the table with the specified name.
-   * @param df        The Spark DataFrame containing the data to be loaded.
+   * @param df The Spark DataFrame containing the data to be loaded.
    */
   def loadData(df: DataFrame, tableName: String): Unit
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala
new file mode 100644
index 0000000000000..bb0aae3d8ab03
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.crossdbms
+
+import java.io.File
+
+// scalastyle:off line.size.limit
+/**
+ * IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING, READ THIS:
+ * Your new SQL test is automatically opted into this suite. It is likely failing because it is not
+ * compatible with the default DBMS (currently postgres). You have two options:
+ * 1. (Recommended) Modify your queries to be compatible with both systems, and generate golden
+ *    files with the instructions below. This is recommended because it will run your queries
+ *    against postgres, providing higher correctness testing confidence, and you won't have to
+ *    manually verify the golden files generated with your test.
+ * 2. Add this line to your .sql file: --ONLY_IF spark
+ *
+ * To re-generate golden files for entire suite, either run:
+ * 1. (Recommended) You need Docker on your machine. Install Docker and run the following command:
+ * {{{
+ *   bash ./bin/generate_golden_files_with_postgres.sh
+ * }}}
+ * 2.
+ *   a. You need to have a Postgres server up before running this test.
+ *      i. Install PostgreSQL. On a mac: `brew install postgresql@13`
+ *      ii. After installing PostgreSQL, start the database server, then create a role named pg with
+ *      superuser permissions: `createuser -s postgres` OR `psql> CREATE role postgres superuser`
+ *   b. Run the following command:
+ *     {{{
+ *       SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
+ *     }}}
+ *
+ * To indicate that the SQL file is not eligible for testing with this suite, add the following
+ * comment into the input file:
+ * {{{
+ *   --ONLY_IF spark
+ * }}}
+ *
+ * And then, to run the entire test suite, with the default cross DBMS:
+ * {{{
+ *   build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
+ * }}}
+ *
+ * To re-generate golden file for a single test, e.g. `describe.sql`, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql"
+ * }}}
+ */
+class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
+
+  protected def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES
+
+  // Reduce scope to subquery tests for now. That is where most correctness issues are.
+  override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
+
+  override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner =
+    (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url))
+
+  override protected def preprocessingCommands = Seq(
+    // Custom function `double` to imitate Spark's function, so that more tests are covered.
+    """
+      |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision
+      |    AS 'select CAST($1 AS double precision);'
+      |    LANGUAGE SQL
+      |    IMMUTABLE
+      |    RETURNS NULL ON NULL INPUT;
+      |""".stripMargin
+  )
+}

From 0e542cf46bf99b6d62ee679b8336701311bd292e Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Mon, 1 Jan 2024 21:17:38 -0800
Subject: [PATCH 56/63] Move from postgres-results -> crossdbms-results

---
 .../exists-subquery/exists-having.sql.out     |   0
 .../exists-subquery/exists-having.sql.out     | 147 ------------------
 .../crossdbms/CrossDbmsQueryTestSuite.scala   |   9 +-
 3 files changed, 6 insertions(+), 150 deletions(-)
 rename sql/core/src/test/resources/sql-tests/{results/postgres-results => crossdbms-results}/exists-subquery/exists-having.sql.out (100%)
 delete mode 100644 sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/postgres-results/exists-subquery/exists-having.sql.out
rename to sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
deleted file mode 100644
index 2a84516e90abe..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
+++ /dev/null
@@ -1,147 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES
-  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
-  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
-  (200, 'emp 2', date '2003-01-01', double(200.00), 10),
-  (300, 'emp 3', date '2002-01-01', double(300.00), 20),
-  (400, 'emp 4', date '2005-01-01', double(400.00), 30),
-  (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
-  (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
-  (700, 'emp 7', date '2010-01-01', double(400.00), 100),
-  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES
-  (10, 'dept 1', 'CA'),
-  (20, 'dept 2', 'NY'),
-  (30, 'dept 3', 'TX'),
-  (40, 'dept 4 - unassigned', 'OR'),
-  (50, 'dept 5 - unassigned', 'NJ'),
-  (70, 'dept 7', 'FL')
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES
-  ('emp 1', double(10.00)),
-  ('emp 1', double(20.00)),
-  ('emp 2', double(300.00)),
-  ('emp 2', double(100.00)),
-  ('emp 3', double(300.00)),
-  ('emp 4', double(100.00)),
-  ('emp 5', double(1000.00)),
-  ('emp 6 - no dept', double(500.00))
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-SELECT dept_id, count(*) 
-FROM   emp 
-GROUP  BY dept_id 
-HAVING EXISTS (SELECT 1 
-               FROM   bonus 
-               WHERE  bonus_amt < min(emp.salary))
--- !query schema
-struct<dept_id:int,count(1):bigint>
--- !query output
-10	3
-100	2
-20	1
-30	1
-70	1
-NULL	1
-
-
--- !query
-SELECT * 
-FROM   dept 
-WHERE  EXISTS (SELECT dept_id, 
-                      Count(*) 
-               FROM   emp 
-               GROUP  BY dept_id 
-               HAVING EXISTS (SELECT 1 
-                              FROM   bonus 
-                              WHERE bonus_amt < Min(emp.salary)))
--- !query schema
-struct<dept_id:int,dept_name:string,state:string>
--- !query output
-10	dept 1	CA
-20	dept 2	NY
-30	dept 3	TX
-40	dept 4 - unassigned	OR
-50	dept 5 - unassigned	NJ
-70	dept 7	FL
-
-
--- !query
-SELECT dept_id, 
-       Max(salary) 
-FROM   emp gp 
-WHERE  EXISTS (SELECT dept_id, 
-                      Count(*) 
-               FROM   emp p
-               GROUP  BY dept_id 
-               HAVING EXISTS (SELECT 1 
-                              FROM   bonus 
-                              WHERE  bonus_amt < Min(p.salary))) 
-GROUP  BY gp.dept_id
--- !query schema
-struct<dept_id:int,max(salary):double>
--- !query output
-10	200.0
-100	400.0
-20	300.0
-30	400.0
-70	150.0
-NULL	400.0
-
-
--- !query
-SELECT * 
-FROM   dept 
-WHERE  EXISTS (SELECT dept_id, 
-                        Count(*) 
-                 FROM   emp 
-                 GROUP  BY dept_id 
-                 HAVING EXISTS (SELECT 1 
-                                FROM   bonus 
-                                WHERE  bonus_amt > Min(emp.salary)))
--- !query schema
-struct<dept_id:int,dept_name:string,state:string>
--- !query output
-10	dept 1	CA
-20	dept 2	NY
-30	dept 3	TX
-40	dept 4 - unassigned	OR
-50	dept 5 - unassigned	NJ
-70	dept 7	FL
-
-
--- !query
-SELECT * 
-FROM   dept 
-WHERE  EXISTS (SELECT dept_id, 
-                      count(emp.dept_id)
-               FROM   emp 
-               WHERE  dept.dept_id = dept_id 
-               GROUP  BY dept_id 
-               HAVING EXISTS (SELECT 1 
-                              FROM   bonus 
-                              WHERE  ( bonus_amt > min(emp.salary) 
-                                       AND count(emp.dept_id) > 1 )))
--- !query schema
-struct<dept_id:int,dept_name:string,state:string>
--- !query output
-10	dept 1	CA
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index a4ca037301137..e40be9d4193e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -185,9 +185,8 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   }
 
   override protected def resultFileForInputFile(file: File): String = {
-    val defaultResultsDir = new File(baseResourcePath, "results")
-    val goldenFilePath = new File(
-      defaultResultsDir, s"$crossDbmsToGenerateGoldenFiles-results").getAbsolutePath
+    val goldenFilePathsql/core/src/test/resources/sql-tests/crossdbms-results/ = new File(baseResourcePath,
+      CrossDbmsQueryTestSuite.RESULT_DIR).getAbsolutePath
     file.getAbsolutePath.replace(customInputFilePath, goldenFilePath) + ".out"
   }
 
@@ -206,6 +205,10 @@ object CrossDbmsQueryTestSuite {
   final val POSTGRES = "postgres"
   final val SUPPORTED_DBMS = Seq(POSTGRES)
 
+  // Result directory for golden files produced by CrossDBMS. This means that all DBMS will share
+  // the same golden files. This is only possible if we make the queries simple enough that they are
+  // compatible with all DBMS.
+  private final val RESULT_DIR = "crossdbms-results"
   // System argument to indicate a custom connection URL to the reference DBMS.
   private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
   // Argument in input files to indicate that the sql file is restricted to certain systems.

From 5d4c5773e872b173453c6446df91af226104f905 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 2 Jan 2024 18:06:24 -0800
Subject: [PATCH 57/63] Fix silly paste

---
 .../apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
index e40be9d4193e0..c874e2f59db1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
@@ -185,7 +185,7 @@ abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
   }
 
   override protected def resultFileForInputFile(file: File): String = {
-    val goldenFilePathsql/core/src/test/resources/sql-tests/crossdbms-results/ = new File(baseResourcePath,
+    val goldenFilePath = new File(baseResourcePath,
       CrossDbmsQueryTestSuite.RESULT_DIR).getAbsolutePath
     file.getAbsolutePath.replace(customInputFilePath, goldenFilePath) + ".out"
   }

From 68ea6f222b9aa2c12f803c39707585832642ec7d Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Tue, 2 Jan 2024 23:15:19 -0800
Subject: [PATCH 58/63] Fix unintended removal of results file

---
 .../exists-subquery/exists-having.sql.out     | 150 ++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
new file mode 100644
index 0000000000000..ae4cf010ffc98
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
@@ -0,0 +1,150 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (200, "emp 2", date "2003-01-01", 200.00D, 10),
+  (300, "emp 3", date "2002-01-01", 300.00D, 20),
+  (400, "emp 4", date "2005-01-01", 400.00D, 30),
+  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
+  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
+  (700, "emp 7", date "2010-01-01", 400.00D, 100),
+  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+AS EMP(id, emp_name, hiredate, salary, dept_id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
+  (10, "dept 1", "CA"),
+  (20, "dept 2", "NY"),
+  (30, "dept 3", "TX"),
+  (40, "dept 4 - unassigned", "OR"),
+  (50, "dept 5 - unassigned", "NJ"),
+  (70, "dept 7", "FL")
+AS DEPT(dept_id, dept_name, state)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
+  ("emp 1", 10.00D),
+  ("emp 1", 20.00D),
+  ("emp 2", 300.00D),
+  ("emp 2", 100.00D),
+  ("emp 3", 300.00D),
+  ("emp 4", 100.00D),
+  ("emp 5", 1000.00D),
+  ("emp 6 - no dept", 500.00D)
+AS BONUS(emp_name, bonus_amt)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT dept_id, count(*) 
+FROM   emp 
+GROUP  BY dept_id 
+HAVING EXISTS (SELECT 1 
+               FROM   bonus 
+               WHERE  bonus_amt < min(emp.salary))
+-- !query schema
+struct<dept_id:int,count(1):bigint>
+-- !query output
+10	3
+100	2
+20	1
+30	1
+70	1
+NULL	1
+
+
+-- !query
+SELECT * 
+FROM   dept 
+WHERE  EXISTS (SELECT dept_id, 
+                      Count(*) 
+               FROM   emp 
+               GROUP  BY dept_id 
+               HAVING EXISTS (SELECT 1 
+                              FROM   bonus 
+                              WHERE bonus_amt < Min(emp.salary)))
+-- !query schema
+struct<dept_id:int,dept_name:string,state:string>
+-- !query output
+10	dept 1	CA
+20	dept 2	NY
+30	dept 3	TX
+40	dept 4 - unassigned	OR
+50	dept 5 - unassigned	NJ
+70	dept 7	FL
+
+
+-- !query
+SELECT dept_id, 
+       Max(salary) 
+FROM   emp gp 
+WHERE  EXISTS (SELECT dept_id, 
+                      Count(*) 
+               FROM   emp p
+               GROUP  BY dept_id 
+               HAVING EXISTS (SELECT 1 
+                              FROM   bonus 
+                              WHERE  bonus_amt < Min(p.salary))) 
+GROUP  BY gp.dept_id
+-- !query schema
+struct<dept_id:int,max(salary):double>
+-- !query output
+10	200.0
+100	400.0
+20	300.0
+30	400.0
+70	150.0
+NULL	400.0
+
+
+-- !query
+SELECT * 
+FROM   dept 
+WHERE  EXISTS (SELECT dept_id, 
+                        Count(*) 
+                 FROM   emp 
+                 GROUP  BY dept_id 
+                 HAVING EXISTS (SELECT 1 
+                                FROM   bonus 
+                                WHERE  bonus_amt > Min(emp.salary)))
+-- !query schema
+struct<dept_id:int,dept_name:string,state:string>
+-- !query output
+10	dept 1	CA
+20	dept 2	NY
+30	dept 3	TX
+40	dept 4 - unassigned	OR
+50	dept 5 - unassigned	NJ
+70	dept 7	FL
+
+
+-- !query
+SELECT * 
+FROM   dept 
+WHERE  EXISTS (SELECT dept_id, 
+                      count(emp.dept_id)
+               FROM   emp 
+               WHERE  dept.dept_id = dept_id 
+               GROUP  BY dept_id 
+               HAVING EXISTS (SELECT 1 
+                              FROM   bonus 
+                              WHERE  ( bonus_amt > min(emp.salary) 
+                                       AND count(emp.dept_id) > 1 )))
+-- !query schema
+struct<dept_id:int,dept_name:string,state:string>
+-- !query output
+10	dept 1	CA

From 1e1a2838fc88a4a20bfdb9071d47400890a99810 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 3 Jan 2024 11:42:30 -0800
Subject: [PATCH 59/63] In middle of refactoring..

---
 .../sql/jdbc/PostgreSQLQueryTestSuite.scala   | 129 ++++++++
 .../apache/spark/sql/SQLQueryTestHelper.scala | 289 +++++++++++++++++
 .../apache/spark/sql/SQLQueryTestSuite.scala  | 291 ------------------
 .../crossdbms/PostgreSQLQueryTestSuite.scala  |  84 -----
 4 files changed, 418 insertions(+), 375 deletions(-)
 create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala
new file mode 100644
index 0000000000000..436639e70bcd1
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.jdbc
+
+import java.io.File
+import java.sql.Connection
+
+import scala.util.control.NonFatal
+
+import org.apache.spark.sql.SQLQueryTestHelper
+import org.apache.spark.tags.DockerTest
+
+@DockerTest
+class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
+    override val env = Map(
+      "POSTGRES_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort = 5432
+
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
+  }
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  protected val baseResourcePath = {
+    // We use a path based on Spark home for 2 reasons:
+    //   1. Maven can't get correct resource directory when resources in other jars.
+    //   2. We test subclasses in the hive-thriftserver module.
+    getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile
+  }
+  protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath
+  protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
+  protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath
+
+  def listTestCases: Seq[TestCase] = {
+    listFilesRecursively(new File(customInputFilePath)).flatMap { file =>
+      val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+      val absPath = file.getAbsolutePath
+      val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator)
+      RegularTestCase(testCaseName, absPath, resultFile) :: Nil
+    }.sortBy(_.name)
+  }
+
+  def createScalaTestCase(testCase: TestCase): Unit = {
+    testCase match {
+      case _ if ignoreList.exists(t =>
+        testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT))) =>
+        ignore(s"${testCase.name} is in the ignore list.") {
+          log.debug(s"${testCase.name} is in the ignore list.")
+        }
+      case _: RegularTestCase =>
+        // Create a test case to run this case.
+        test(testCase.name) {
+          runSqlTestCase(testCase, listTestCases)
+        }
+      case _ =>
+        ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
+          log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.")
+        }
+    }
+  }
+
+  protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
+    val input = fileToString(new File(testCase.inputFile))
+    val (comments, code) = splitCommentsAndCodes(input)
+    val queries = getQueries(code, comments)
+    val settings = getSparkSettings(comments)
+
+    val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG))
+      .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length))
+    // If `--ONLY_IF` is found, check if the DBMS being used is allowed.
+    if (dbmsConfig.nonEmpty && !dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
+      log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
+        s"not eligible with $crossDbmsToGenerateGoldenFiles.")
+    } else {
+      runQueries(queries, testCase, settings.toImmutableArraySeq)
+    }
+  }
+
+
+  protected def runQueries(
+      queries: Seq[String],
+      testCase: TestCase,
+      sparkConfigSet: Seq[(String, String)]): Unit = {
+    val localSparkSession = spark.newSession()
+    var runner: Option[SQLQueryTestRunner] = None
+
+    val outputs: Seq[QueryTestOutput] = queries.map { sql =>
+      val output = {
+        val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" }
+        setOperations.foreach(localSparkSession.sql)
+        val (_, output) = handleExceptions(
+          getNormalizedQueryExecutionResult(localSparkSession, sql))
+        output
+      }
+      ExecutionOutput(
+        sql = sql,
+        // Don't care about the schema for this test. Only care about correctness.
+        schema = None,
+        output = normalizeTestResults(output.mkString("\n")))
+    }
+    if (runner.isDefined) {
+      runner.foreach(_.cleanUp())
+      runner = None
+    }
+
+    readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput)
+  }
+
+  listTestCases.foreach(createScalaTestCase)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index c08569150e2a9..d82d285ed17aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -17,19 +17,25 @@
 
 package org.apache.spark.sql
 
+import java.io.File
+
+import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkException, SparkThrowable}
 import org.apache.spark.ErrorMessageFormat.MINIMAL
 import org.apache.spark.SparkThrowableHelper.getMessage
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.IntegratedUDFTestUtils.{TestUDF, TestUDTFSet}
 import org.apache.spark.sql.catalyst.expressions.{CurrentDate, CurrentTimestampLike, CurrentUser, Literal}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.util.fileToString
 import org.apache.spark.sql.execution.HiveResult.hiveResultString
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
 import org.apache.spark.sql.types.{DateType, StructType, TimestampType}
+import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 
 trait SQLQueryTestHelper extends Logging {
 
@@ -164,4 +170,287 @@ trait SQLQueryTestHelper extends Logging {
         (emptySchema, Seq(e.getClass.getName, e.getMessage))
     }
   }
+
+  /** A test case. */
+  protected trait TestCase {
+    val name: String
+    val inputFile: String
+    val resultFile: String
+    def asAnalyzerTest(newName: String, newResultFile: String): TestCase
+  }
+
+  /**
+   * traits that indicate UDF or PgSQL to trigger the code path specific to each. For instance,
+   * PgSQL tests require to register some UDF functions.
+   */
+  protected trait PgSQLTest
+
+  /** Trait that indicates ANSI-related tests with the ANSI mode enabled. */
+  protected trait AnsiTest
+
+  /** Trait that indicates an analyzer test that shows the analyzed plan string as output. */
+  protected trait AnalyzerTest extends TestCase {
+    override def asAnalyzerTest(newName: String, newResultFile: String): AnalyzerTest = this
+  }
+
+  /** Trait that indicates the default timestamp type is TimestampNTZType. */
+  protected trait TimestampNTZTest
+
+  /** Trait that indicates CTE test cases need their create view versions */
+  protected trait CTETest
+
+  protected trait UDFTest {
+    val udf: TestUDF
+  }
+
+  protected trait UDTFSetTest {
+    val udtfSet: TestUDTFSet
+  }
+
+  protected case class RegularTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase {
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      RegularAnalyzerTestCase(newName, inputFile, newResultFile)
+  }
+
+  /** An ANSI-related test case. */
+  protected case class AnsiTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest {
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      AnsiAnalyzerTestCase(newName, inputFile, newResultFile)
+  }
+
+  /** An analyzer test that shows the analyzed plan string as output. */
+  protected case class AnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase with AnalyzerTest
+
+  /** A PostgreSQL test case. */
+  protected case class PgSQLTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest {
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      PgSQLAnalyzerTestCase(newName, inputFile, newResultFile)
+  }
+
+  /** A UDF test case. */
+  protected case class UDFTestCase(
+      name: String,
+      inputFile: String,
+      resultFile: String,
+      udf: TestUDF) extends TestCase with UDFTest {
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      UDFAnalyzerTestCase(newName, inputFile, newResultFile, udf)
+  }
+
+  protected case class UDTFSetTestCase(
+      name: String,
+      inputFile: String,
+      resultFile: String,
+      udtfSet: TestUDTFSet) extends TestCase with UDTFSetTest {
+
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      UDTFSetAnalyzerTestCase(newName, inputFile, newResultFile, udtfSet)
+  }
+
+  /** A UDAF test case. */
+  protected case class UDAFTestCase(
+      name: String,
+      inputFile: String,
+      resultFile: String,
+      udf: TestUDF) extends TestCase with UDFTest {
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      UDAFAnalyzerTestCase(newName, inputFile, newResultFile, udf)
+  }
+
+  /** A UDF PostgreSQL test case. */
+  protected case class UDFPgSQLTestCase(
+      name: String,
+      inputFile: String,
+      resultFile: String,
+      udf: TestUDF) extends TestCase with UDFTest with PgSQLTest {
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      UDFPgSQLAnalyzerTestCase(newName, inputFile, newResultFile, udf)
+  }
+
+  /** An date time test case with default timestamp as TimestampNTZType */
+  protected case class TimestampNTZTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase with TimestampNTZTest {
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      TimestampNTZAnalyzerTestCase(newName, inputFile, newResultFile)
+  }
+
+  /** A CTE test case with special handling */
+  protected case class CTETestCase(name: String, inputFile: String, resultFile: String)
+    extends TestCase
+      with CTETest {
+    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
+      CTEAnalyzerTestCase(newName, inputFile, newResultFile)
+  }
+
+  /** These are versions of the above test cases, but only exercising analysis. */
+  protected case class RegularAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String)
+    extends AnalyzerTest
+  protected case class AnsiAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String)
+    extends AnalyzerTest with AnsiTest
+  protected case class PgSQLAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String)
+    extends AnalyzerTest with PgSQLTest
+  protected case class UDFAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String, udf: TestUDF)
+    extends AnalyzerTest with UDFTest
+  protected case class UDTFSetAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String, udtfSet: TestUDTFSet)
+    extends AnalyzerTest with UDTFSetTest
+  protected case class UDAFAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String, udf: TestUDF)
+    extends AnalyzerTest with UDFTest
+  protected case class UDFPgSQLAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String, udf: TestUDF)
+    extends AnalyzerTest with UDFTest with PgSQLTest
+  protected case class TimestampNTZAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String)
+    extends AnalyzerTest with TimestampNTZTest
+  protected case class CTEAnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String)
+    extends AnalyzerTest with CTETest
+
+  /** A single SQL query's output. */
+  trait QueryTestOutput {
+    def sql: String
+    def schema: Option[String]
+    def output: String
+    def numSegments: Int
+  }
+
+  /** A single SQL query's execution output. */
+  case class ExecutionOutput(
+      sql: String,
+      schema: Option[String],
+      output: String) extends QueryTestOutput {
+    override def toString: String = {
+      // We are explicitly not using multi-line string due to stripMargin removing "|" in output.
+      val schemaString = if (schema.nonEmpty) {
+        s"-- !query schema\n" + schema.get + "\n"
+      } else {
+        ""
+      }
+      s"-- !query\n" +
+        sql + "\n" +
+        schemaString +
+        s"-- !query output\n" +
+        output
+    }
+
+    override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 }
+  }
+
+  /** A single SQL query's analysis results. */
+  case class AnalyzerOutput(
+      sql: String,
+      schema: Option[String],
+      output: String) extends QueryTestOutput {
+    override def toString: String = {
+      // We are explicitly not using multi-line string due to stripMargin removing "|" in output.
+      s"-- !query\n" +
+        sql + "\n" +
+        s"-- !query analysis\n" +
+        output
+    }
+    override def numSegments: Int = 2
+  }
+
+  /** Returns all the files (not directories) in a directory, recursively. */
+  protected def listFilesRecursively(path: File): Seq[File] = {
+    val (dirs, files) = path.listFiles().partition(_.isDirectory)
+    // Filter out test files with invalid extensions such as temp files created
+    // by vi (.swp), Mac (.DS_Store) etc.
+    val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
+    (filteredFiles ++ dirs.flatMap(listFilesRecursively)).toImmutableArraySeq
+  }
+  protected def splitCommentsAndCodes(input: String) =
+    input.split("\n").partition { line =>
+      val newLine = line.trim
+      newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
+    }
+
+  protected def getQueries(code: Array[String], comments: Array[String]) = {
+    def splitWithSemicolon(seq: Seq[String]) = {
+      seq.mkString("\n").split("(?<=[^\\\\]);")
+    }
+
+    // If `--IMPORT` found, load code from another test case file, then insert them
+    // into the head in this test.
+    val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9))
+    val importedCode = importedTestCaseName.flatMap { testCaseName =>
+      listTestCases.find(_.name == testCaseName).map { testCase =>
+        val input = fileToString(new File(testCase.inputFile))
+        val (_, code) = splitCommentsAndCodes(input)
+        code
+      }
+    }.flatten
+
+    val allCode = importedCode ++ code
+    val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) {
+      // Although the loop is heavy, only used for bracketed comments test.
+      val queries = new ArrayBuffer[String]
+      val otherCodes = new ArrayBuffer[String]
+      var tempStr = ""
+      var start = false
+      for (c <- allCode) {
+        if (c.trim.startsWith("--QUERY-DELIMITER-START")) {
+          start = true
+          queries ++= splitWithSemicolon(otherCodes.toSeq)
+          otherCodes.clear()
+        } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) {
+          start = false
+          queries += s"\n${tempStr.stripSuffix(";")}"
+          tempStr = ""
+        } else if (start) {
+          tempStr += s"\n$c"
+        } else {
+          otherCodes += c
+        }
+      }
+      if (otherCodes.nonEmpty) {
+        queries ++= splitWithSemicolon(otherCodes.toSeq)
+      }
+      queries.toSeq
+    } else {
+      splitWithSemicolon(allCode.toImmutableArraySeq).toSeq
+    }
+
+    // List of SQL queries to run
+    tempQueries.map(_.trim).filter(_ != "")
+      // Fix misplacement when comment is at the end of the query.
+      .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")
+  }
+
+  protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = {
+    val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6))
+    settingLines.flatMap(_.split(",").map { kv =>
+      val (conf, value) = kv.span(_ != '=')
+      conf.trim -> value.substring(1).trim
+    })
+  }
+
+  protected def getSparkConfigDimensions(comments: Array[String]): Seq[Seq[(String, String)]] = {
+    // A config dimension has multiple config sets, and a config set has multiple configs.
+    // - config dim:     Seq[Seq[(String, String)]]
+    //   - config set:   Seq[(String, String)]
+    //     - config:     (String, String))
+    // We need to do cartesian product for all the config dimensions, to get a list of
+    // config sets, and run the query once for each config set.
+    val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
+    val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines =>
+      lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv =>
+        val (conf, value) = kv.span(_ != '=')
+        conf.trim -> value.substring(1).trim
+      }.toSeq).toSeq
+    }
+
+    configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) =>
+      dim.flatMap { configSet => res.map(_ ++ configSet) }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index f8a405094c43a..6eb48b13d4078 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -21,8 +21,6 @@ import java.io.File
 import java.net.URI
 import java.util.Locale
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.{SparkConf, TestUtils}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.parser.ParseException
@@ -171,151 +169,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
   // Create all the test cases.
   listTestCases.foreach(createScalaTestCase)
 
-  /** A test case. */
-  protected trait TestCase {
-    val name: String
-    val inputFile: String
-    val resultFile: String
-    def asAnalyzerTest(newName: String, newResultFile: String): TestCase
-  }
-
-  /**
-   * traits that indicate UDF or PgSQL to trigger the code path specific to each. For instance,
-   * PgSQL tests require to register some UDF functions.
-   */
-  protected trait PgSQLTest
-
-  /** Trait that indicates ANSI-related tests with the ANSI mode enabled. */
-  protected trait AnsiTest
-
-  /** Trait that indicates an analyzer test that shows the analyzed plan string as output. */
-  protected trait AnalyzerTest extends TestCase {
-    override def asAnalyzerTest(newName: String, newResultFile: String): AnalyzerTest = this
-  }
-
-  /** Trait that indicates the default timestamp type is TimestampNTZType. */
-  protected trait TimestampNTZTest
-
-  /** Trait that indicates CTE test cases need their create view versions */
-  protected trait CTETest
-
-  protected trait UDFTest {
-    val udf: TestUDF
-  }
-
-  protected trait UDTFSetTest {
-    val udtfSet: TestUDTFSet
-  }
-
-  /** A regular test case. */
-  protected case class RegularTestCase(
-      name: String, inputFile: String, resultFile: String) extends TestCase {
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      RegularAnalyzerTestCase(newName, inputFile, newResultFile)
-  }
-
-  /** An ANSI-related test case. */
-  protected case class AnsiTestCase(
-      name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest {
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      AnsiAnalyzerTestCase(newName, inputFile, newResultFile)
-  }
-
-  /** An analyzer test that shows the analyzed plan string as output. */
-  protected case class AnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String) extends TestCase with AnalyzerTest
-
-  /** A PostgreSQL test case. */
-  protected case class PgSQLTestCase(
-      name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest {
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      PgSQLAnalyzerTestCase(newName, inputFile, newResultFile)
-  }
-
-  /** A UDF test case. */
-  protected case class UDFTestCase(
-      name: String,
-      inputFile: String,
-      resultFile: String,
-      udf: TestUDF) extends TestCase with UDFTest {
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      UDFAnalyzerTestCase(newName, inputFile, newResultFile, udf)
-  }
-
-  protected case class UDTFSetTestCase(
-      name: String,
-      inputFile: String,
-      resultFile: String,
-      udtfSet: TestUDTFSet) extends TestCase with UDTFSetTest {
-
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      UDTFSetAnalyzerTestCase(newName, inputFile, newResultFile, udtfSet)
-  }
-
-  /** A UDAF test case. */
-  protected case class UDAFTestCase(
-      name: String,
-      inputFile: String,
-      resultFile: String,
-      udf: TestUDF) extends TestCase with UDFTest {
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      UDAFAnalyzerTestCase(newName, inputFile, newResultFile, udf)
-  }
-
-  /** A UDF PostgreSQL test case. */
-  protected case class UDFPgSQLTestCase(
-      name: String,
-      inputFile: String,
-      resultFile: String,
-      udf: TestUDF) extends TestCase with UDFTest with PgSQLTest {
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      UDFPgSQLAnalyzerTestCase(newName, inputFile, newResultFile, udf)
-  }
-
-  /** An date time test case with default timestamp as TimestampNTZType */
-  protected case class TimestampNTZTestCase(
-      name: String, inputFile: String, resultFile: String) extends TestCase with TimestampNTZTest {
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      TimestampNTZAnalyzerTestCase(newName, inputFile, newResultFile)
-  }
-
-  /** A CTE test case with special handling */
-  protected case class CTETestCase(name: String, inputFile: String, resultFile: String)
-      extends TestCase
-      with CTETest {
-    override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      CTEAnalyzerTestCase(newName, inputFile, newResultFile)
-  }
-
-  /** These are versions of the above test cases, but only exercising analysis. */
-  protected case class RegularAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String)
-      extends AnalyzerTest
-  protected case class AnsiAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String)
-      extends AnalyzerTest with AnsiTest
-  protected case class PgSQLAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String)
-      extends AnalyzerTest with PgSQLTest
-  protected case class UDFAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String, udf: TestUDF)
-      extends AnalyzerTest with UDFTest
-  protected case class UDTFSetAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String, udtfSet: TestUDTFSet)
-      extends AnalyzerTest with UDTFSetTest
-  protected case class UDAFAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String, udf: TestUDF)
-      extends AnalyzerTest with UDFTest
-  protected case class UDFPgSQLAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String, udf: TestUDF)
-      extends AnalyzerTest with UDFTest with PgSQLTest
-  protected case class TimestampNTZAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String)
-      extends AnalyzerTest with TimestampNTZTest
-  protected case class CTEAnalyzerTestCase(
-      name: String, inputFile: String, resultFile: String)
-      extends AnalyzerTest with CTETest
-
   protected def createScalaTestCase(testCase: TestCase): Unit = {
     if (ignoreList.exists(t =>
         testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) {
@@ -349,91 +202,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     }
   }
 
-  protected def splitCommentsAndCodes(input: String) =
-    input.split("\n").partition { line =>
-      val newLine = line.trim
-      newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
-    }
-
-  protected def getQueries(code: Array[String], comments: Array[String]) = {
-    def splitWithSemicolon(seq: Seq[String]) = {
-      seq.mkString("\n").split("(?<=[^\\\\]);")
-    }
-
-    // If `--IMPORT` found, load code from another test case file, then insert them
-    // into the head in this test.
-    val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9))
-    val importedCode = importedTestCaseName.flatMap { testCaseName =>
-      listTestCases.find(_.name == testCaseName).map { testCase =>
-        val input = fileToString(new File(testCase.inputFile))
-        val (_, code) = splitCommentsAndCodes(input)
-        code
-      }
-    }.flatten
-
-    val allCode = importedCode ++ code
-    val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) {
-      // Although the loop is heavy, only used for bracketed comments test.
-      val queries = new ArrayBuffer[String]
-      val otherCodes = new ArrayBuffer[String]
-      var tempStr = ""
-      var start = false
-      for (c <- allCode) {
-        if (c.trim.startsWith("--QUERY-DELIMITER-START")) {
-          start = true
-          queries ++= splitWithSemicolon(otherCodes.toSeq)
-          otherCodes.clear()
-        } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) {
-          start = false
-          queries += s"\n${tempStr.stripSuffix(";")}"
-          tempStr = ""
-        } else if (start) {
-          tempStr += s"\n$c"
-        } else {
-          otherCodes += c
-        }
-      }
-      if (otherCodes.nonEmpty) {
-        queries ++= splitWithSemicolon(otherCodes.toSeq)
-      }
-      queries.toSeq
-    } else {
-      splitWithSemicolon(allCode.toImmutableArraySeq).toSeq
-    }
-
-    // List of SQL queries to run
-    tempQueries.map(_.trim).filter(_ != "")
-      // Fix misplacement when comment is at the end of the query.
-      .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")
-  }
-
-  protected def getSparkSettings(comments: Array[String]): Array[(String, String)] = {
-    val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6))
-    settingLines.flatMap(_.split(",").map { kv =>
-      val (conf, value) = kv.span(_ != '=')
-      conf.trim -> value.substring(1).trim
-    })
-  }
-
-  protected def getSparkConfigDimensions(comments: Array[String]): Seq[Seq[(String, String)]] = {
-    // A config dimension has multiple config sets, and a config set has multiple configs.
-    // - config dim:     Seq[Seq[(String, String)]]
-    //   - config set:   Seq[(String, String)]
-    //     - config:     (String, String))
-    // We need to do cartesian product for all the config dimensions, to get a list of
-    // config sets, and run the query once for each config set.
-    val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
-    val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).view.mapValues { lines =>
-      lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv =>
-        val (conf, value) = kv.span(_ != '=')
-        conf.trim -> value.substring(1).trim
-      }.toSeq).toSeq
-    }
-
-    configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) =>
-      dim.flatMap { configSet => res.map(_ ++ configSet) }
-    }
-  }
 
   protected def runQueriesWithSparkConfigDimensions(
       queries: Seq[String],
@@ -703,15 +471,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     }.sortBy(_.name)
   }
 
-  /** Returns all the files (not directories) in a directory, recursively. */
-  protected def listFilesRecursively(path: File): Seq[File] = {
-    val (dirs, files) = path.listFiles().partition(_.isDirectory)
-    // Filter out test files with invalid extensions such as temp files created
-    // by vi (.swp), Mac (.DS_Store) etc.
-    val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
-    (filteredFiles ++ dirs.flatMap(listFilesRecursively)).toImmutableArraySeq
-  }
-
   /** Load built-in test tables into the SparkSession. */
   protected def createTestTables(session: SparkSession): Unit = {
     import session.implicits._
@@ -921,56 +680,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     strippedPythonErrors.replaceAll("\\s+$", "")
   }
 
-  /** A single SQL query's output. */
-  trait QueryTestOutput {
-    def sql: String
-    def schema: Option[String]
-    def output: String
-    def numSegments: Int
-  }
-
-  /** A single SQL query's execution output. */
-  case class ExecutionOutput(
-      sql: String,
-      schema: Option[String],
-      output: String) extends QueryTestOutput {
-    override def toString: String = {
-      // We are explicitly not using multi-line string due to stripMargin removing "|" in output.
-      val schemaString = if (schema.nonEmpty) {
-        s"-- !query schema\n" + schema.get + "\n"
-      } else {
-        ""
-      }
-      s"-- !query\n" +
-        sql + "\n" +
-        schemaString +
-        s"-- !query output\n" +
-        output
-    }
-
-    /**
-     * Some suites extending this one, such as [[CrossDbmsQueryTestSuite]], only test for
-     * correctness and do not care about the schema. The logic here allows passing the check for
-     * number of segments in the golden files when testing.
-     */
-    override def numSegments: Int = if (schema.isDefined) { 3 } else { 2 }
-  }
-
-  /** A single SQL query's analysis results. */
-  case class AnalyzerOutput(
-      sql: String,
-      schema: Option[String],
-      output: String) extends QueryTestOutput {
-    override def toString: String = {
-      // We are explicitly not using multi-line string due to stripMargin removing "|" in output.
-      s"-- !query\n" +
-        sql + "\n" +
-        s"-- !query analysis\n" +
-        output
-    }
-    override def numSegments: Int = 2
-  }
-
   test("test splitCommentsAndCodes") {
     {
       // Correctly split comments and codes
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala
index bb0aae3d8ab03..e69de29bb2d1d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.crossdbms
-
-import java.io.File
-
-// scalastyle:off line.size.limit
-/**
- * IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING, READ THIS:
- * Your new SQL test is automatically opted into this suite. It is likely failing because it is not
- * compatible with the default DBMS (currently postgres). You have two options:
- * 1. (Recommended) Modify your queries to be compatible with both systems, and generate golden
- *    files with the instructions below. This is recommended because it will run your queries
- *    against postgres, providing higher correctness testing confidence, and you won't have to
- *    manually verify the golden files generated with your test.
- * 2. Add this line to your .sql file: --ONLY_IF spark
- *
- * To re-generate golden files for entire suite, either run:
- * 1. (Recommended) You need Docker on your machine. Install Docker and run the following command:
- * {{{
- *   bash ./bin/generate_golden_files_with_postgres.sh
- * }}}
- * 2.
- *   a. You need to have a Postgres server up before running this test.
- *      i. Install PostgreSQL. On a mac: `brew install postgresql@13`
- *      ii. After installing PostgreSQL, start the database server, then create a role named pg with
- *      superuser permissions: `createuser -s postgres` OR `psql> CREATE role postgres superuser`
- *   b. Run the following command:
- *     {{{
- *       SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
- *     }}}
- *
- * To indicate that the SQL file is not eligible for testing with this suite, add the following
- * comment into the input file:
- * {{{
- *   --ONLY_IF spark
- * }}}
- *
- * And then, to run the entire test suite, with the default cross DBMS:
- * {{{
- *   build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
- * }}}
- *
- * To re-generate golden file for a single test, e.g. `describe.sql`, run:
- * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite -- -z describe.sql"
- * }}}
- */
-class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
-
-  protected def crossDbmsToGenerateGoldenFiles: String = CrossDbmsQueryTestSuite.POSTGRES
-
-  // Reduce scope to subquery tests for now. That is where most correctness issues are.
-  override protected def customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
-
-  override protected def getConnection: Option[String] => JdbcSQLQueryTestRunner =
-    (connection_url: Option[String]) => JdbcSQLQueryTestRunner(PostgresConnection(connection_url))
-
-  override protected def preprocessingCommands = Seq(
-    // Custom function `double` to imitate Spark's function, so that more tests are covered.
-    """
-      |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision
-      |    AS 'select CAST($1 AS double precision);'
-      |    LANGUAGE SQL
-      |    IMMUTABLE
-      |    RETURNS NULL ON NULL INPUT;
-      |""".stripMargin
-  )
-}

From 5756c8e2798b787583d2dd529b75fafdb639d6f6 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 3 Jan 2024 14:25:07 -0800
Subject: [PATCH 60/63] Major refactoring in progress..

---
 .../querytest/CrossDbmsQueryTestHelper.scala  |  26 +++
 .../PostgreSQLQueryTestSuite.scala            | 124 +++++++---
 .../apache/spark/sql/SQLQueryTestHelper.scala |  24 +-
 .../apache/spark/sql/SQLQueryTestSuite.scala  |  18 +-
 .../crossdbms/CrossDbmsQueryTestSuite.scala   | 216 ------------------
 .../spark/sql/crossdbms/JdbcConnection.scala  | 124 ----------
 .../crossdbms/PostgreSQLQueryTestSuite.scala  |   0
 .../sql/crossdbms/SQLQueryTestRunner.scala    |  48 ----
 .../sql/crossdbms/bin/docker-compose.yml      |  31 ---
 .../generate_golden_files_with_postgres.sh    |  35 ---
 10 files changed, 141 insertions(+), 505 deletions(-)
 create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala
 rename connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/{ => querytest}/PostgreSQLQueryTestSuite.scala (52%)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala
new file mode 100644
index 0000000000000..cec304093b3f9
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.jdbc
+
+trait CrossDbmsQueryTestHelper {
+
+  val DATABASE_NAME: String
+
+  protected final val POSTGRES = "postgres"
+  // Argument in input files to indicate that the sql file is restricted to certain systems.
+  protected final val ONLY_IF_ARG = "--ONLY_IF "
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
similarity index 52%
rename from connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
index 436639e70bcd1..808f3e58fb64c 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgreSQLQueryTestSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
@@ -17,15 +17,22 @@
 package org.apache.spark.sql.jdbc
 
 import java.io.File
-import java.sql.Connection
+import java.sql.{Connection, ResultSet}
 
+import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.SQLQueryTestHelper
+import org.apache.spark.sql.catalyst.util.fileToString
 import org.apache.spark.tags.DockerTest
 
 @DockerTest
-class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper {
+class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper
+  with CrossDbmsQueryTestHelper {
+
+  val DATABASE_NAME = POSTGRES
+
   override val db = new DatabaseOnDocker {
     override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
     override val env = Map(
@@ -38,7 +45,18 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT
       s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
   }
 
-  override def dataPreparation(conn: Connection): Unit = {}
+  override def dataPreparation(conn: Connection): Unit = {
+    conn.prepareStatement(
+      // Custom function `double` to imitate Spark's function, so that more tests are covered.
+      """
+        |CREATE OR REPLACE FUNCTION double(numeric_value numeric) RETURNS double precision
+        |    AS 'select CAST($1 AS double precision);'
+        |    LANGUAGE SQL
+        |    IMMUTABLE
+        |    RETURNS NULL ON NULL INPUT;
+        |""".stripMargin
+    )
+  }
 
   protected val baseResourcePath = {
     // We use a path based on Spark home for 2 reasons:
@@ -61,11 +79,6 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT
 
   def createScalaTestCase(testCase: TestCase): Unit = {
     testCase match {
-      case _ if ignoreList.exists(t =>
-        testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT))) =>
-        ignore(s"${testCase.name} is in the ignore list.") {
-          log.debug(s"${testCase.name} is in the ignore list.")
-        }
       case _: RegularTestCase =>
         // Create a test case to run this case.
         test(testCase.name) {
@@ -81,48 +94,97 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT
   protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
     val input = fileToString(new File(testCase.inputFile))
     val (comments, code) = splitCommentsAndCodes(input)
-    val queries = getQueries(code, comments)
-    val settings = getSparkSettings(comments)
+    val queries = getQueries(code, comments, listTestCases)
 
-    val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG))
-      .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length))
+    val dbmsConfig = comments.filter(_.startsWith(ONLY_IF_ARG))
+      .map(_.substring(ONLY_IF_ARG.length))
     // If `--ONLY_IF` is found, check if the DBMS being used is allowed.
-    if (dbmsConfig.nonEmpty && !dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
+    if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) {
       log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
-        s"not eligible with $crossDbmsToGenerateGoldenFiles.")
+        s"not eligible with $DATABASE_NAME.")
     } else {
-      runQueries(queries, testCase, settings.toImmutableArraySeq)
+      runQueriesAndCheckAgainstGoldenFile(queries, testCase)
     }
   }
 
-
-  protected def runQueries(
-      queries: Seq[String],
-      testCase: TestCase,
-      sparkConfigSet: Seq[(String, String)]): Unit = {
+  protected def runQueriesAndCheckAgainstGoldenFile(
+      queries: Seq[String], testCase: TestCase): Unit = {
     val localSparkSession = spark.newSession()
-    var runner: Option[SQLQueryTestRunner] = None
+    val conn = getConnection()
+    val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
 
     val outputs: Seq[QueryTestOutput] = queries.map { sql =>
       val output = {
-        val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" }
-        setOperations.foreach(localSparkSession.sql)
-        val (_, output) = handleExceptions(
-          getNormalizedQueryExecutionResult(localSparkSession, sql))
-        output
+        try {
+          val sparkDf = localSparkSession.sql(sql)
+
+          val isResultSet = stmt.execute(sql)
+          val rows = ArrayBuffer[Row]()
+          if (isResultSet) {
+            val rs = stmt.getResultSet
+            val metadata = rs.getMetaData
+            while (rs.next()) {
+              val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => {
+                val value = rs.getObject(i)
+                if (value == null) {
+                  "NULL"
+                } else {
+                  value
+                }
+              }))
+              rows.append(row)
+            }
+          }
+          val output = rows.map(_.mkString("\t")).toSeq
+          // Use Spark analyzed plan to check if the query result is already semantically sorted.
+          if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
+            output
+          } else {
+            // Sort the answer manually if it isn't sorted.
+            output.sorted
+          }
+        } catch {
+          case NonFatal(e) => Seq(e.getClass.getName, e.getMessage)
+        }
       }
+
       ExecutionOutput(
         sql = sql,
         // Don't care about the schema for this test. Only care about correctness.
         schema = None,
-        output = normalizeTestResults(output.mkString("\n")))
+        output = output.mkString("\n"))
+    }
+    conn.close()
+
+    // Read back the golden files.
+    var curSegment = 0
+    val expectedOutputs: Seq[QueryTestOutput] = {
+      val goldenOutput = fileToString(new File(testCase.resultFile))
+      val segments = goldenOutput.split("-- !query.*\n")
+      outputs.map { output =>
+        val result = ExecutionOutput(
+          segments(curSegment + 1).trim, // SQL
+          None, // Schema
+          normalizeTestResults(segments(curSegment + 3))) // Output
+        curSegment += output.numSegments
+        result
+      }
     }
-    if (runner.isDefined) {
-      runner.foreach(_.cleanUp())
-      runner = None
+
+    // Compare results.
+    assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") {
+      outputs.size
     }
 
-    readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput)
+    outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) =>
+      assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") {
+        output.sql
+      }
+      assertResult(expected.output, s"Result did not match" +
+        s" for query #$i\n${expected.sql}") {
+        output.output
+      }
+    }
   }
 
   listTestCases.foreach(createScalaTestCase)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index d82d285ed17aa..38e004e0b7209 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -368,13 +368,15 @@ trait SQLQueryTestHelper extends Logging {
     val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
     (filteredFiles ++ dirs.flatMap(listFilesRecursively)).toImmutableArraySeq
   }
-  protected def splitCommentsAndCodes(input: String) =
+
+  protected def splitCommentsAndCodes(input: String): (Array[String], Array[String]) =
     input.split("\n").partition { line =>
       val newLine = line.trim
       newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
     }
 
-  protected def getQueries(code: Array[String], comments: Array[String]) = {
+  protected def getQueries(code: Array[String], comments: Array[String],
+      allTestCases: Seq[TestCase]): Seq[String] = {
     def splitWithSemicolon(seq: Seq[String]) = {
       seq.mkString("\n").split("(?<=[^\\\\]);")
     }
@@ -383,7 +385,7 @@ trait SQLQueryTestHelper extends Logging {
     // into the head in this test.
     val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9))
     val importedCode = importedTestCaseName.flatMap { testCaseName =>
-      listTestCases.find(_.name == testCaseName).map { testCase =>
+      allTestCases.find(_.name == testCaseName).map { testCase =>
         val input = fileToString(new File(testCase.inputFile))
         val (_, code) = splitCommentsAndCodes(input)
         code
@@ -453,4 +455,20 @@ trait SQLQueryTestHelper extends Logging {
       dim.flatMap { configSet => res.map(_ ++ configSet) }
     }
   }
+
+  /** This is a helper function to normalize non-deterministic Python error stacktraces. */
+  def normalizeTestResults(output: String): String = {
+    val strippedPythonErrors: String = {
+      var traceback = false
+      output.split("\n").filter { line: String =>
+        if (line == "Traceback (most recent call last):") {
+          traceback = true
+        } else if (!line.startsWith(" ")) {
+          traceback = false
+        }
+        !traceback
+      }.mkString("\n")
+    }
+    strippedPythonErrors.replaceAll("\\s+$", "")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 6eb48b13d4078..0edf06e1d7424 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -226,7 +226,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
   protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
     val input = fileToString(new File(testCase.inputFile))
     val (comments, code) = splitCommentsAndCodes(input)
-    val queries = getQueries(code, comments)
+    val queries = getQueries(code, comments, listTestCases)
     val settings = getSparkSettings(comments)
 
     if (regenerateGoldenFiles) {
@@ -664,22 +664,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     }
   }
 
-  /** This is a helper function to normalize non-deterministic Python error stacktraces. */
-  def normalizeTestResults(output: String): String = {
-    val strippedPythonErrors: String = {
-      var traceback = false
-      output.split("\n").filter { line: String =>
-        if (line == "Traceback (most recent call last):") {
-          traceback = true
-        } else if (!line.startsWith(" ")) {
-          traceback = false
-        }
-        !traceback
-      }.mkString("\n")
-    }
-    strippedPythonErrors.replaceAll("\\s+$", "")
-  }
-
   test("test splitCommentsAndCodes") {
     {
       // Correctly split comments and codes
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
deleted file mode 100644
index c874e2f59db1b..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/CrossDbmsQueryTestSuite.scala
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.crossdbms
-
-import java.io.File
-import java.util.Locale
-
-import scala.util.control.NonFatal
-
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SQLQueryTestSuite
-import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
-import org.apache.spark.util.ArrayImplicits.SparkArrayOps
-
-/**
- * See SQLQueryTestSuite.scala for more information. This suite builds off of that to allow us
- * to generate golden files with other DBMS to perform cross-checking for correctness. It generates
- * another set of golden files. Note that this is not currently run on all SQL input files by
- * default because there is incompatibility between SQL dialects for Spark and the other DBMS.
- *
- * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for
- * which is eligible for the SQL file. These strings are defined in the companion object. For
- * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is
- * incompatible, add the following comment into the input file:
- * --ONLY_IF spark
- */
-abstract class CrossDbmsQueryTestSuite extends SQLQueryTestSuite with Logging {
-
-  /**
-   * A String representing the database system being used. A list of these is defined in the
-   * companion object.
-   */
-  protected def crossDbmsToGenerateGoldenFiles: String
-  assert(CrossDbmsQueryTestSuite.SUPPORTED_DBMS.contains(crossDbmsToGenerateGoldenFiles))
-
-  /**
-   * A custom input file path where SQL tests are located, if desired.
-   */
-  protected def customInputFilePath: String = inputFilePath
-
-  /**
-   * A function taking in an optional custom connection URL, and returns a [[SQLQueryTestRunner]]
-   * specific to the database system.
-   */
-  protected def getConnection: Option[String] => SQLQueryTestRunner
-
-  /**
-   * Commands to be run on the DBMS before running queries to generate golden files, such as
-   * defining custom functions.
-   */
-  protected def preprocessingCommands: Seq[String]
-
-  private def customConnectionUrl: String = System.getenv(
-    CrossDbmsQueryTestSuite.REF_DBMS_CONNECTION_URL)
-
-  override protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
-    val input = fileToString(new File(testCase.inputFile))
-    val (comments, code) = splitCommentsAndCodes(input)
-    val queries = getQueries(code, comments)
-    val settings = getSparkSettings(comments)
-
-    val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG))
-      .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length))
-    // If `--ONLY_IF` is found, check if the DBMS being used is allowed.
-    if (dbmsConfig.nonEmpty && !dbmsConfig.contains(crossDbmsToGenerateGoldenFiles)) {
-      log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
-        s"not eligible with $crossDbmsToGenerateGoldenFiles.")
-    } else if (regenerateGoldenFiles) {
-      runQueries(queries, testCase, settings.toImmutableArraySeq)
-    } else {
-      val configSets = getSparkConfigDimensions(comments)
-      runQueriesWithSparkConfigDimensions(
-        queries, testCase, settings, configSets)
-    }
-  }
-
-  override protected def runQueries(
-      queries: Seq[String],
-      testCase: TestCase,
-      sparkConfigSet: Seq[(String, String)]): Unit = {
-    val localSparkSession = spark.newSession()
-    var runner: Option[SQLQueryTestRunner] = None
-
-    val outputs: Seq[QueryTestOutput] = queries.map { sql =>
-      val output = {
-        // Use the runner when generating golden files, and Spark when running the test against
-        // the already generated golden files.
-        if (regenerateGoldenFiles) {
-          val connectionUrl = Option(customConnectionUrl).filter(_.nonEmpty)
-          runner = runner.orElse(Some(getConnection(connectionUrl)))
-          preprocessingCommands.foreach(command => runner.foreach(_.runQuery(command)))
-
-          try {
-            // Either of the below two lines can error. If we go into the catch statement, then it
-            // is likely one of the following scenarios:
-            // 1. Error thrown in Spark analysis:
-            //    a. The query is incompatible with either Spark and the other DBMS.
-            //    b. There is an issue with the schema in Spark.
-            //    c. The error is expected - it errors on both systems, but only the Spark error
-            //       will be printed to the golden file, because it errors first.
-            // 2. Error thrown in other DBMS execution:
-            //    a. The query is either incompatible between Spark and the other DBMS
-            //    b. Some test table/view is not created on the other DBMS.
-            val sparkDf = localSparkSession.sql(sql)
-            val output = runner.map(_.runQuery(sql)).get
-            // Use Spark analyzed plan to check if the query result is already semantically sorted.
-            if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
-              output
-            } else {
-              // Sort the answer manually if it isn't sorted.
-              output.sorted
-            }
-          } catch {
-            case NonFatal(e) => Seq(e.getClass.getName, e.getMessage)
-          }
-        } else {
-          // Use Spark.
-          // Execute the list of set operation in order to add the desired configs
-          val setOperations = sparkConfigSet.map { case (key, value) => s"set $key=$value" }
-          setOperations.foreach(localSparkSession.sql)
-          val (_, output) = handleExceptions(
-            getNormalizedQueryExecutionResult(localSparkSession, sql))
-          output
-        }
-      }
-      ExecutionOutput(
-        sql = sql,
-        // Don't care about the schema for this test. Only care about correctness.
-        schema = None,
-        output = normalizeTestResults(output.mkString("\n")))
-    }
-    if (runner.isDefined) {
-      runner.foreach(_.cleanUp())
-      runner = None
-    }
-
-    if (regenerateGoldenFiles) {
-      val goldenOutput = {
-        s"-- Automatically generated by ${getClass.getSimpleName} with " +
-          s"$crossDbmsToGenerateGoldenFiles\n" + outputs.mkString("\n\n\n") + "\n"
-      }
-      val resultFile = new File(testCase.resultFile)
-      val parent = resultFile.getParentFile
-      if (!parent.exists()) {
-        assert(parent.mkdirs(), "Could not create directory: " + parent)
-      }
-      stringToFile(resultFile, goldenOutput)
-    }
-
-    readGoldenFileAndCompareResults(testCase.resultFile, outputs, ExecutionOutput)
-  }
-
-  override def createScalaTestCase(testCase: TestCase): Unit = {
-    testCase match {
-      case _ if ignoreList.exists(t =>
-        testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT))) =>
-        ignore(s"${testCase.name} is in the ignore list.") {
-          log.debug(s"${testCase.name} is in the ignore list.")
-        }
-      case _: RegularTestCase =>
-        // Create a test case to run this case.
-        test(testCase.name) {
-          runSqlTestCase(testCase, listTestCases)
-        }
-      case _ =>
-        ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
-          log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.")
-        }
-    }
-  }
-
-  override protected def resultFileForInputFile(file: File): String = {
-    val goldenFilePath = new File(baseResourcePath,
-      CrossDbmsQueryTestSuite.RESULT_DIR).getAbsolutePath
-    file.getAbsolutePath.replace(customInputFilePath, goldenFilePath) + ".out"
-  }
-
-  override lazy val listTestCases: Seq[TestCase] = {
-    listFilesRecursively(new File(customInputFilePath)).flatMap { file =>
-      val resultFile = resultFileForInputFile(file)
-      val absPath = file.getAbsolutePath
-      val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator)
-      RegularTestCase(testCaseName, absPath, resultFile) :: Nil
-    }.sortBy(_.name)
-  }
-}
-
-object CrossDbmsQueryTestSuite {
-
-  final val POSTGRES = "postgres"
-  final val SUPPORTED_DBMS = Seq(POSTGRES)
-
-  // Result directory for golden files produced by CrossDBMS. This means that all DBMS will share
-  // the same golden files. This is only possible if we make the queries simple enough that they are
-  // compatible with all DBMS.
-  private final val RESULT_DIR = "crossdbms-results"
-  // System argument to indicate a custom connection URL to the reference DBMS.
-  private final val REF_DBMS_CONNECTION_URL = "REF_DBMS_CONNECTION_URL"
-  // Argument in input files to indicate that the sql file is restricted to certain systems.
-  private final val ONLY_IF_ARG = "--ONLY_IF "
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
deleted file mode 100644
index 653e00d34d562..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/JdbcConnection.scala
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.crossdbms
-
-import java.sql.{DriverManager, ResultSet}
-import java.util.Properties
-
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.sql.{DataFrame, Row}
-import org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry
-
-/**
- * Represents a connection (session) to a database using JDBC.
- */
-private[crossdbms] trait JdbcConnection {
-
-  /**
-   * Executes the given SQL query and returns the result as a sequence of strings.
-   * @return A Seq[String] representing the output, where each element represents a single row.
-   */
-  def runQuery(query: String): Seq[String]
-
-  /**
-   * Drops the table with the specified table name.
-   */
-  def dropTable(tableName: String): Unit
-
-  /**
-   * Creates a table with the specified name and schema.
-   * @param schemaString The schema definition for the table. Note that this may vary depending on
-   *                     the database system.
-   */
-  def createTable(tableName: String, schemaString: String): Unit
-
-  /**
-   * Loads data from the given Spark DataFrame into the table with the specified name.
-   * @param df The Spark DataFrame containing the data to be loaded.
-   */
-  def loadData(df: DataFrame, tableName: String): Unit
-
-  /**
-   * Closes the JDBC connection.
-   */
-  def close(): Unit
-}
-
-/**
- * Represents a connection (session) to a PostgreSQL database.
- */
-private[crossdbms] case class PostgresConnection(connection_url: Option[String] = None)
-  extends JdbcConnection {
-
-  private final val POSTGRES_DRIVER_CLASS_NAME = "org.postgresql.Driver"
-  DriverRegistry.register(POSTGRES_DRIVER_CLASS_NAME)
-
-  private final val DEFAULT_HOST = "localhost"
-  private final val DEFAULT_PORT = "5432"
-  private final val DEFAULT_USER = "postgres"
-  private final val DEFAULT_DB = "postgres"
-  private final val DEFAULT_PASSWORD = "postgres"
-  private final val DEFAULT_CONNECTION_URL = s"jdbc:postgresql://$DEFAULT_HOST:$DEFAULT_PORT/" +
-    s"$DEFAULT_DB?user=$DEFAULT_USER&password=$DEFAULT_PASSWORD"
-  private val url = connection_url.getOrElse(DEFAULT_CONNECTION_URL)
-
-  private val conn = DriverManager.getConnection(url)
-  private val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
-
-  def runQuery(query: String): Seq[String] = {
-    try {
-      val isResultSet = stmt.execute(query)
-      val rows = ArrayBuffer[Row]()
-      if (isResultSet) {
-        val rs = stmt.getResultSet
-        val metadata = rs.getMetaData
-        while (rs.next()) {
-          val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => {
-            val value = rs.getObject(i)
-            if (value == null) { "NULL" } else { value }
-          }))
-          rows.append(row)
-        }
-      }
-      rows.map(_.mkString("\t")).toSeq
-    } catch {
-      case e: Throwable => Seq(e.toString)
-    }
-  }
-
-  def dropTable(tableName: String): Unit = {
-    val st = conn.prepareStatement("DROP TABLE IF EXISTS ?")
-    st.setString(1, tableName)
-    st.executeUpdate()
-  }
-
-  def createTable(tableName: String, schemaString: String): Unit = {
-    val st = conn.prepareStatement("CREATE TABLE ? (?)")
-    st.setString(1, tableName)
-    st.setString(2, schemaString)
-    st.executeUpdate()
-  }
-
-  def loadData(df: DataFrame, tableName: String): Unit = {
-    df.write.option("driver", POSTGRES_DRIVER_CLASS_NAME).mode("append")
-      .jdbc(url, tableName, new Properties())
-  }
-
-  def close(): Unit = if (!conn.isClosed) conn.close()
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/PostgreSQLQueryTestSuite.scala
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
deleted file mode 100644
index d6230cc6b69a1..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/SQLQueryTestRunner.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.crossdbms
-
-/**
- * Trait for classes that can run SQL queries for testing. This is specifically used as a wrapper
- * around a connection to a system that can run SQL queries, to run queries from
- * [[SQLQueryTestSuite]].
- */
-trait SQLQueryTestRunner {
-
-  /**
-   * Runs a given query and returns a Seq[String] that represents the query result output. This is a
-   * Seq where each element represents a single row.
-   */
-  def runQuery(query: String): Seq[String]
-
-  /**
-   * Perform clean up, such as dropping tables and closing the database connection.
-   */
-  def cleanUp(): Unit
-}
-
-/**
- * A runner that takes a JDBC connection and uses it to execute queries.
- */
-private[sql] case class JdbcSQLQueryTestRunner(connection: JdbcConnection)
-  extends SQLQueryTestRunner {
-
-  def runQuery(query: String): Seq[String] = connection.runQuery(query)
-
-  def cleanUp(): Unit = connection.close()
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml
deleted file mode 100644
index 787bdda8ecd31..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/docker-compose.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-version: '3'
-
-services:
-  # Postgres database service
-  postgres-db:
-    image: postgres:latest
-    container_name: postgres-db
-    environment:
-      POSTGRES_USER: postgres
-      POSTGRES_PASSWORD: postgres
-      POSTGRES_DB: postgres
-    ports:
-      - "5432:5432"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh b/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh
deleted file mode 100644
index 32e1573b51e6e..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin/generate_golden_files_with_postgres.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Get the root of the runtime repo
-GIT_ROOT="$(git rev-parse --show-toplevel)"
-echo "Root of repo is at $GIT_ROOT"
-
-echo "Starting DB container.."
-docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" up --build --detach
-
-echo "Waiting for DB container to be ready.."
-# Change if using different container or name.
-DOCKER_CONTAINER_NAME="postgres-db"
-timeout 10s bash -c "until docker exec $DOCKER_CONTAINER_NAME pg_isready ; do sleep 1 ; done"
-
-echo "Generating all golden files for postgres.."
-SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.crossdbms.PostgreSQLQueryTestSuite"
-
-echo "Bringing down DB container.."
-docker-compose --project-directory "${GIT_ROOT}/sql/core/src/test/scala/org/apache/spark/sql/crossdbms/bin" down --volumes

From 81a61f741b64bc88f1bd340d70d2b972dde47118 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 3 Jan 2024 14:49:06 -0800
Subject: [PATCH 61/63] Passing tests

---
 .../querytest/PostgreSQLQueryTestSuite.scala  | 14 +++--
 .../exists-subquery/exists-having.sql.out     | 55 +++++++++----------
 2 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
index 808f3e58fb64c..e9ebd62210af5 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
@@ -55,7 +55,7 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT
         |    IMMUTABLE
         |    RETURNS NULL ON NULL INPUT;
         |""".stripMargin
-    )
+    ).executeUpdate()
   }
 
   protected val baseResourcePath = {
@@ -162,11 +162,13 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT
       val goldenOutput = fileToString(new File(testCase.resultFile))
       val segments = goldenOutput.split("-- !query.*\n")
       outputs.map { output =>
-        val result = ExecutionOutput(
-          segments(curSegment + 1).trim, // SQL
-          None, // Schema
-          normalizeTestResults(segments(curSegment + 3))) // Output
-        curSegment += output.numSegments
+        val result =
+          ExecutionOutput(
+            segments(curSegment + 1).trim, // SQL
+            None, // Schema
+            normalizeTestResults(segments(curSegment + 3))) // Output
+        // Assume that the golden file always has all 3 segments.
+        curSegment += 3
         result
       }
     }
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
index ae4cf010ffc98..2a84516e90abe 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
@@ -1,16 +1,15 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
-CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (100, "emp 1", date "2005-01-01", 100.00D, 10),
-  (200, "emp 2", date "2003-01-01", 200.00D, 10),
-  (300, "emp 3", date "2002-01-01", 300.00D, 20),
-  (400, "emp 4", date "2005-01-01", 400.00D, 30),
-  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
-  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
-  (700, "emp 7", date "2010-01-01", 400.00D, 100),
-  (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id)
+CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
+  (200, 'emp 2', date '2003-01-01', double(200.00), 10),
+  (300, 'emp 3', date '2002-01-01', double(300.00), 20),
+  (400, 'emp 4', date '2005-01-01', double(400.00), 30),
+  (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
+  (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
+  (700, 'emp 7', date '2010-01-01', double(400.00), 100),
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
 -- !query schema
 struct<>
 -- !query output
@@ -18,14 +17,13 @@ struct<>
 
 
 -- !query
-CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
-  (10, "dept 1", "CA"),
-  (20, "dept 2", "NY"),
-  (30, "dept 3", "TX"),
-  (40, "dept 4 - unassigned", "OR"),
-  (50, "dept 5 - unassigned", "NJ"),
-  (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state)
+CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES
+  (10, 'dept 1', 'CA'),
+  (20, 'dept 2', 'NY'),
+  (30, 'dept 3', 'TX'),
+  (40, 'dept 4 - unassigned', 'OR'),
+  (50, 'dept 5 - unassigned', 'NJ'),
+  (70, 'dept 7', 'FL')
 -- !query schema
 struct<>
 -- !query output
@@ -33,16 +31,15 @@ struct<>
 
 
 -- !query
-CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
-  ("emp 1", 10.00D),
-  ("emp 1", 20.00D),
-  ("emp 2", 300.00D),
-  ("emp 2", 100.00D),
-  ("emp 3", 300.00D),
-  ("emp 4", 100.00D),
-  ("emp 5", 1000.00D),
-  ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt)
+CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES
+  ('emp 1', double(10.00)),
+  ('emp 1', double(20.00)),
+  ('emp 2', double(300.00)),
+  ('emp 2', double(100.00)),
+  ('emp 3', double(300.00)),
+  ('emp 4', double(100.00)),
+  ('emp 5', double(1000.00)),
+  ('emp 6 - no dept', double(500.00))
 -- !query schema
 struct<>
 -- !query output

From cf7b0449f43a664b192f5084f28cc5d6624c3b1f Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 3 Jan 2024 14:55:42 -0800
Subject: [PATCH 62/63] Add documnetation and delete previous files

---
 .../querytest/PostgreSQLQueryTestSuite.scala  |  20 +++
 .../exists-subquery/exists-having.sql.out     | 131 ------------------
 .../sql-tests/inputs/subquery/README.md       |  10 --
 3 files changed, 20 insertions(+), 141 deletions(-)
 delete mode 100644 sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out
 delete mode 100644 sql/core/src/test/resources/sql-tests/inputs/subquery/README.md

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
index e9ebd62210af5..ec622b8deb79f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
@@ -27,6 +27,26 @@ import org.apache.spark.sql.SQLQueryTestHelper
 import org.apache.spark.sql.catalyst.util.fileToString
 import org.apache.spark.tags.DockerTest
 
+/**
+ * READ THIS IF YOU ADDED A NEW SQL TEST AND THIS SUITE IS FAILING:
+ * Your new SQL test is automatically opted into this suite. It is likely failing because it is not
+ * compatible with the default Postgres. You have two options:
+ * 1. (Recommended) Modify your queries to be compatible with both systems. This is recommended
+ *    because it will run your queries against postgres, providing higher correctness testing
+ *    confidence, and you won't have to manually verify the golden files generated with your test.
+ * 2. Add this line to your .sql file: --ONLY_IF spark
+ *
+ * This suite builds off of that to allow us to run Postgres against the SQL test golden files (on
+ * which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness.
+ * Note that this is not currently run on all SQL input files by default because there is
+ * incompatibility between SQL dialects for Spark and the other DBMS.
+ *
+ * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for
+ * which is eligible for the SQL file. These strings are defined in the companion object. For
+ * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is
+ * incompatible, add the following comment into the input file:
+ * --ONLY_IF spark
+ */
 @DockerTest
 class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper
   with CrossDbmsQueryTestHelper {
diff --git a/sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out
deleted file mode 100644
index 95229f39865c0..0000000000000
--- a/sql/core/src/test/resources/sql-tests/crossdbms-results/exists-subquery/exists-having.sql.out
+++ /dev/null
@@ -1,131 +0,0 @@
--- Automatically generated by PostgreSQLQueryTestSuite with postgres
--- !query
-CREATE TEMPORARY VIEW EMP(id, emp_name, hiredate, salary, dept_id) AS VALUES
-  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
-  (100, 'emp 1', date '2005-01-01', double(100.00), 10),
-  (200, 'emp 2', date '2003-01-01', double(200.00), 10),
-  (300, 'emp 3', date '2002-01-01', double(300.00), 20),
-  (400, 'emp 4', date '2005-01-01', double(400.00), 30),
-  (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
-  (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
-  (700, 'emp 7', date '2010-01-01', double(400.00), 100),
-  (800, 'emp 8', date '2016-01-01', double(150.00), 70)
--- !query output
-
-
-
--- !query
-CREATE TEMPORARY VIEW DEPT(dept_id, dept_name, state) AS VALUES
-  (10, 'dept 1', 'CA'),
-  (20, 'dept 2', 'NY'),
-  (30, 'dept 3', 'TX'),
-  (40, 'dept 4 - unassigned', 'OR'),
-  (50, 'dept 5 - unassigned', 'NJ'),
-  (70, 'dept 7', 'FL')
--- !query output
-
-
-
--- !query
-CREATE TEMPORARY VIEW BONUS(emp_name, bonus_amt) AS VALUES
-  ('emp 1', double(10.00)),
-  ('emp 1', double(20.00)),
-  ('emp 2', double(300.00)),
-  ('emp 2', double(100.00)),
-  ('emp 3', double(300.00)),
-  ('emp 4', double(100.00)),
-  ('emp 5', double(1000.00)),
-  ('emp 6 - no dept', double(500.00))
--- !query output
-
-
-
--- !query
-SELECT dept_id, count(*) 
-FROM   emp 
-GROUP  BY dept_id 
-HAVING EXISTS (SELECT 1 
-               FROM   bonus 
-               WHERE  bonus_amt < min(emp.salary))
--- !query output
-10	3
-100	2
-20	1
-30	1
-70	1
-NULL	1
-
-
--- !query
-SELECT * 
-FROM   dept 
-WHERE  EXISTS (SELECT dept_id, 
-                      Count(*) 
-               FROM   emp 
-               GROUP  BY dept_id 
-               HAVING EXISTS (SELECT 1 
-                              FROM   bonus 
-                              WHERE bonus_amt < Min(emp.salary)))
--- !query output
-10	dept 1	CA
-20	dept 2	NY
-30	dept 3	TX
-40	dept 4 - unassigned	OR
-50	dept 5 - unassigned	NJ
-70	dept 7	FL
-
-
--- !query
-SELECT dept_id, 
-       Max(salary) 
-FROM   emp gp 
-WHERE  EXISTS (SELECT dept_id, 
-                      Count(*) 
-               FROM   emp p
-               GROUP  BY dept_id 
-               HAVING EXISTS (SELECT 1 
-                              FROM   bonus 
-                              WHERE  bonus_amt < Min(p.salary))) 
-GROUP  BY gp.dept_id
--- !query output
-10	200.0
-100	400.0
-20	300.0
-30	400.0
-70	150.0
-NULL	400.0
-
-
--- !query
-SELECT * 
-FROM   dept 
-WHERE  EXISTS (SELECT dept_id, 
-                        Count(*) 
-                 FROM   emp 
-                 GROUP  BY dept_id 
-                 HAVING EXISTS (SELECT 1 
-                                FROM   bonus 
-                                WHERE  bonus_amt > Min(emp.salary)))
--- !query output
-10	dept 1	CA
-20	dept 2	NY
-30	dept 3	TX
-40	dept 4 - unassigned	OR
-50	dept 5 - unassigned	NJ
-70	dept 7	FL
-
-
--- !query
-SELECT * 
-FROM   dept 
-WHERE  EXISTS (SELECT dept_id, 
-                      count(emp.dept_id)
-               FROM   emp 
-               WHERE  dept.dept_id = dept_id 
-               GROUP  BY dept_id 
-               HAVING EXISTS (SELECT 1 
-                              FROM   bonus 
-                              WHERE  ( bonus_amt > min(emp.salary) 
-                                       AND count(emp.dept_id) > 1 )))
--- !query output
-10	dept 1	CA
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md b/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md
deleted file mode 100644
index a42cbed4003af..0000000000000
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-
-## Note on PostgreSQLQueryTestSuite
-
-The SQL files in this directory are automatically opted into `PostgreSQLQueryTestSuite`, which
-generates golden files with Postgres and runs Spark against these golden files. If you added a new
-SQL test file, please generate the golden files with `PostgreSQLQueryTestSuite`. It is likely to
-fail because of incompatibility between the SQL dialects of Spark and Postgres. To have queries that
-are compatible for both systems, keep your SQL queries simple by using basic types like INTs and
-strings only, if possible, and using functions that are present in both Spark and Postgres. Refer
-to the header comment for `PostgreSQLQueryTestSuite` for more information.
\ No newline at end of file

From a32a693fd6b92916fc1886cefba5e7ae15e29666 Mon Sep 17 00:00:00 2001
From: Andy Lam <andy.lam@databricks.com>
Date: Wed, 3 Jan 2024 15:13:27 -0800
Subject: [PATCH 63/63] Move functions from PostgreSQLQueryTestSuite ->
 CrossDbmsQueryTestSuite

---
 .../querytest/CrossDbmsQueryTestHelper.scala  |  26 ---
 .../querytest/CrossDbmsQueryTestSuite.scala   | 182 ++++++++++++++++++
 .../querytest/PostgreSQLQueryTestSuite.scala  | 162 ++--------------
 3 files changed, 193 insertions(+), 177 deletions(-)
 delete mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala
 create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala
deleted file mode 100644
index cec304093b3f9..0000000000000
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestHelper.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.jdbc
-
-trait CrossDbmsQueryTestHelper {
-
-  val DATABASE_NAME: String
-
-  protected final val POSTGRES = "postgres"
-  // Argument in input files to indicate that the sql file is restricted to certain systems.
-  protected final val ONLY_IF_ARG = "--ONLY_IF "
-}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala
new file mode 100644
index 0000000000000..dfaa334e1c6ca
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/CrossDbmsQueryTestSuite.scala
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.jdbc
+
+import java.io.File
+import java.sql.ResultSet
+
+import scala.collection.mutable.ArrayBuffer
+import scala.util.control.NonFatal
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.SQLQueryTestHelper
+import org.apache.spark.sql.catalyst.util.fileToString
+
+/**
+ * This suite builds off of that to allow us to run other DBMS against the SQL test golden files (on
+ * which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness.
+ * Note that this is not currently run on all SQL input files by default because there is
+ * incompatibility between SQL dialects for Spark and the other DBMS.
+ *
+ * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for
+ * which is eligible for the SQL file. These strings are defined in the companion object. For
+ * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is
+ * incompatible, add the following comment into the input file:
+ * --ONLY_IF spark
+ */
+trait CrossDbmsQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper {
+
+  val DATABASE_NAME: String
+
+  protected val baseResourcePath = {
+    // We use a path based on Spark home for 2 reasons:
+    //   1. Maven can't get correct resource directory when resources in other jars.
+    //   2. We test subclasses in the hive-thriftserver module.
+    getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile
+  }
+  protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath
+  protected val customInputFilePath: String
+  protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath
+
+  protected def listTestCases: Seq[TestCase] = {
+    listFilesRecursively(new File(customInputFilePath)).flatMap { file =>
+      val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
+      val absPath = file.getAbsolutePath
+      val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator)
+      RegularTestCase(testCaseName, absPath, resultFile) :: Nil
+    }.sortBy(_.name)
+  }
+
+  def createScalaTestCase(testCase: TestCase): Unit = {
+    testCase match {
+      case _: RegularTestCase =>
+        // Create a test case to run this case.
+        test(testCase.name) {
+          runSqlTestCase(testCase, listTestCases)
+        }
+      case _ =>
+        ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
+          log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.")
+        }
+    }
+  }
+
+  protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
+    val input = fileToString(new File(testCase.inputFile))
+    val (comments, code) = splitCommentsAndCodes(input)
+    val queries = getQueries(code, comments, listTestCases)
+
+    val dbmsConfig = comments.filter(_.startsWith(CrossDbmsQueryTestSuite.ONLY_IF_ARG))
+      .map(_.substring(CrossDbmsQueryTestSuite.ONLY_IF_ARG.length))
+    // If `--ONLY_IF` is found, check if the DBMS being used is allowed.
+    if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) {
+      log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
+        s"not eligible with $DATABASE_NAME.")
+    } else {
+      runQueriesAndCheckAgainstGoldenFile(queries, testCase)
+    }
+  }
+
+  protected def runQueriesAndCheckAgainstGoldenFile(
+      queries: Seq[String], testCase: TestCase): Unit = {
+    val localSparkSession = spark.newSession()
+    val conn = getConnection()
+    val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
+
+    val outputs: Seq[QueryTestOutput] = queries.map { sql =>
+      val output = {
+        try {
+          val sparkDf = localSparkSession.sql(sql)
+          val isResultSet = stmt.execute(sql)
+          val rows = ArrayBuffer[Row]()
+          if (isResultSet) {
+            val rs = stmt.getResultSet
+            val metadata = rs.getMetaData
+            while (rs.next()) {
+              val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => {
+                val value = rs.getObject(i)
+                if (value == null) {
+                  "NULL"
+                } else {
+                  value
+                }
+              }))
+              rows.append(row)
+            }
+          }
+          val output = rows.map(_.mkString("\t")).toSeq
+          // Use Spark analyzed plan to check if the query result is already semantically sorted.
+          if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
+            output
+          } else {
+            // Sort the answer manually if it isn't sorted.
+            output.sorted
+          }
+        } catch {
+          case NonFatal(e) => Seq(e.getClass.getName, e.getMessage)
+        }
+      }
+
+      ExecutionOutput(
+        sql = sql,
+        // Don't care about the schema for this test. Only care about correctness.
+        schema = None,
+        output = output.mkString("\n"))
+    }
+    conn.close()
+
+    // Read back the golden files.
+    var curSegment = 0
+    val expectedOutputs: Seq[QueryTestOutput] = {
+      val goldenOutput = fileToString(new File(testCase.resultFile))
+      val segments = goldenOutput.split("-- !query.*\n")
+      outputs.map { output =>
+        val result =
+          ExecutionOutput(
+            segments(curSegment + 1).trim, // SQL
+            None, // Schema
+            normalizeTestResults(segments(curSegment + 3))) // Output
+        // Assume that the golden file always has all 3 segments.
+        curSegment += 3
+        result
+      }
+    }
+
+    // Compare results.
+    assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") {
+      outputs.size
+    }
+
+    outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) =>
+      assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") {
+        output.sql
+      }
+      assertResult(expected.output, s"Result did not match" +
+        s" for query #$i\n${expected.sql}") {
+        output.output
+      }
+    }
+  }
+
+}
+
+object CrossDbmsQueryTestSuite {
+
+  final val POSTGRES = "postgres"
+  // Argument in input files to indicate that the sql file is restricted to certain systems.
+  final val ONLY_IF_ARG = "--ONLY_IF "
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
index ec622b8deb79f..16ca831c56f2f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
@@ -17,14 +17,8 @@
 package org.apache.spark.sql.jdbc
 
 import java.io.File
-import java.sql.{Connection, ResultSet}
+import java.sql.Connection
 
-import scala.collection.mutable.ArrayBuffer
-import scala.util.control.NonFatal
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.SQLQueryTestHelper
-import org.apache.spark.sql.catalyst.util.fileToString
 import org.apache.spark.tags.DockerTest
 
 /**
@@ -36,22 +30,19 @@ import org.apache.spark.tags.DockerTest
  *    confidence, and you won't have to manually verify the golden files generated with your test.
  * 2. Add this line to your .sql file: --ONLY_IF spark
  *
- * This suite builds off of that to allow us to run Postgres against the SQL test golden files (on
- * which SQLQueryTestSuite generates and tests against) to perform cross-checking for correctness.
- * Note that this is not currently run on all SQL input files by default because there is
- * incompatibility between SQL dialects for Spark and the other DBMS.
- *
- * This suite adds a new comment argument, --ONLY_IF. This comment is used to indicate the DBMS for
- * which is eligible for the SQL file. These strings are defined in the companion object. For
- * example, if you have a SQL file named `describe.sql`, and you want to indicate that Postgres is
- * incompatible, add the following comment into the input file:
- * --ONLY_IF spark
+ * Note: To run this test suite for a specific version (e.g., postgres:15.1):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.PostgreSQLQueryTestSuite"
+ * }}}
  */
 @DockerTest
-class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryTestHelper
-  with CrossDbmsQueryTestHelper {
+class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
 
-  val DATABASE_NAME = POSTGRES
+  val DATABASE_NAME = CrossDbmsQueryTestSuite.POSTGRES
+  // Scope to only subquery directory for now.
+  protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
 
   override val db = new DatabaseOnDocker {
     override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
@@ -78,136 +69,5 @@ class PostgreSQLQueryTestSuite extends DockerJDBCIntegrationSuite with SQLQueryT
     ).executeUpdate()
   }
 
-  protected val baseResourcePath = {
-    // We use a path based on Spark home for 2 reasons:
-    //   1. Maven can't get correct resource directory when resources in other jars.
-    //   2. We test subclasses in the hive-thriftserver module.
-    getWorkspaceFilePath("sql", "core", "src", "test", "resources", "sql-tests").toFile
-  }
-  protected val inputFilePath = new File(baseResourcePath, "inputs").getAbsolutePath
-  protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
-  protected val goldenFilePath = new File(baseResourcePath, "results").getAbsolutePath
-
-  def listTestCases: Seq[TestCase] = {
-    listFilesRecursively(new File(customInputFilePath)).flatMap { file =>
-      val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
-      val absPath = file.getAbsolutePath
-      val testCaseName = absPath.stripPrefix(customInputFilePath).stripPrefix(File.separator)
-      RegularTestCase(testCaseName, absPath, resultFile) :: Nil
-    }.sortBy(_.name)
-  }
-
-  def createScalaTestCase(testCase: TestCase): Unit = {
-    testCase match {
-      case _: RegularTestCase =>
-        // Create a test case to run this case.
-        test(testCase.name) {
-          runSqlTestCase(testCase, listTestCases)
-        }
-      case _ =>
-        ignore(s"Ignoring test cases that are not [[RegularTestCase]] for now") {
-          log.debug(s"${testCase.name} is not a RegularTestCase and is ignored.")
-        }
-    }
-  }
-
-  protected def runSqlTestCase(testCase: TestCase, listTestCases: Seq[TestCase]): Unit = {
-    val input = fileToString(new File(testCase.inputFile))
-    val (comments, code) = splitCommentsAndCodes(input)
-    val queries = getQueries(code, comments, listTestCases)
-
-    val dbmsConfig = comments.filter(_.startsWith(ONLY_IF_ARG))
-      .map(_.substring(ONLY_IF_ARG.length))
-    // If `--ONLY_IF` is found, check if the DBMS being used is allowed.
-    if (dbmsConfig.nonEmpty && !dbmsConfig.contains(DATABASE_NAME)) {
-      log.info(s"This test case (${testCase.name}) is ignored because it indicates that it is " +
-        s"not eligible with $DATABASE_NAME.")
-    } else {
-      runQueriesAndCheckAgainstGoldenFile(queries, testCase)
-    }
-  }
-
-  protected def runQueriesAndCheckAgainstGoldenFile(
-      queries: Seq[String], testCase: TestCase): Unit = {
-    val localSparkSession = spark.newSession()
-    val conn = getConnection()
-    val stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
-
-    val outputs: Seq[QueryTestOutput] = queries.map { sql =>
-      val output = {
-        try {
-          val sparkDf = localSparkSession.sql(sql)
-
-          val isResultSet = stmt.execute(sql)
-          val rows = ArrayBuffer[Row]()
-          if (isResultSet) {
-            val rs = stmt.getResultSet
-            val metadata = rs.getMetaData
-            while (rs.next()) {
-              val row = Row.fromSeq((1 to metadata.getColumnCount).map(i => {
-                val value = rs.getObject(i)
-                if (value == null) {
-                  "NULL"
-                } else {
-                  value
-                }
-              }))
-              rows.append(row)
-            }
-          }
-          val output = rows.map(_.mkString("\t")).toSeq
-          // Use Spark analyzed plan to check if the query result is already semantically sorted.
-          if (isSemanticallySorted(sparkDf.queryExecution.analyzed)) {
-            output
-          } else {
-            // Sort the answer manually if it isn't sorted.
-            output.sorted
-          }
-        } catch {
-          case NonFatal(e) => Seq(e.getClass.getName, e.getMessage)
-        }
-      }
-
-      ExecutionOutput(
-        sql = sql,
-        // Don't care about the schema for this test. Only care about correctness.
-        schema = None,
-        output = output.mkString("\n"))
-    }
-    conn.close()
-
-    // Read back the golden files.
-    var curSegment = 0
-    val expectedOutputs: Seq[QueryTestOutput] = {
-      val goldenOutput = fileToString(new File(testCase.resultFile))
-      val segments = goldenOutput.split("-- !query.*\n")
-      outputs.map { output =>
-        val result =
-          ExecutionOutput(
-            segments(curSegment + 1).trim, // SQL
-            None, // Schema
-            normalizeTestResults(segments(curSegment + 3))) // Output
-        // Assume that the golden file always has all 3 segments.
-        curSegment += 3
-        result
-      }
-    }
-
-    // Compare results.
-    assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") {
-      outputs.size
-    }
-
-    outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) =>
-      assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") {
-        output.sql
-      }
-      assertResult(expected.output, s"Result did not match" +
-        s" for query #$i\n${expected.sql}") {
-        output.output
-      }
-    }
-  }
-
   listTestCases.foreach(createScalaTestCase)
 }