diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index e17b74873395e..4c26e93606083 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -527,7 +527,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // string, the origin implementation from Hive will not drop the trailing semicolon as expected, // hence we refined this function a little bit. // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql. - private def splitSemiColon(line: String): JList[String] = { + private[hive] def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false var insideSimpleComment = false @@ -613,7 +613,17 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { isStatement = statementInProgress(index) } - if (beginIndex < line.length()) { + // Check the last char is end of nested bracketed comment. + val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 + // Spark SQL support simple comment and nested bracketed comment in query body. + // But if Spark SQL receives a comment alone, it will throw parser exception. + // In Spark SQL CLI, if there is a completed comment in the end of whole query, + // since Spark SQL CLL use `;` to split the query, CLI will pass the comment + // to the backend engine and throw exception. CLI should ignore this comment, + // If there is an uncompleted statement or an uncompleted bracketed comment in the end, + // CLI should also pass this part to the backend engine, which may throw an exception + // with clear error message. + if (!endOfBracketedComment && (isStatement || insideBracketedComment)) { ret.add(line.substring(beginIndex)) } ret diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 234fb89b01a83..4af051746b96e 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -22,17 +22,23 @@ import java.nio.charset.StandardCharsets import java.sql.Timestamp import java.util.Date +import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.concurrent.Promise import scala.concurrent.duration._ +import org.apache.hadoop.hive.cli.CliSessionState import org.apache.hadoop.hive.conf.HiveConf.ConfVars +import org.apache.hadoop.hive.ql.session.SessionState import org.scalatest.BeforeAndAfterAll +import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer -import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.HiveUtils._ +import org.apache.spark.sql.hive.client.HiveClientImpl import org.apache.spark.sql.hive.test.HiveTestJars import org.apache.spark.sql.internal.StaticSQLConf import org.apache.spark.util.{ThreadUtils, Utils} @@ -638,4 +644,40 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { runCliWithin(2.minute, errorResponses = Seq("ParseException"))( "delete jar dummy.jar;" -> "missing 'FROM' at 'jar'(line 1, pos 7)") } + + test("SPARK-37906: Spark SQL CLI should not pass final comment") { + val sparkConf = new SparkConf(loadDefaults = true) + .setMaster("local-cluster[1,1,1024]") + .setAppName("SPARK-37906") + val sparkContext = new SparkContext(sparkConf) + SparkSQLEnv.sparkContext = sparkContext + val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf) + val extraConfigs = HiveUtils.formatTimeVarsForHiveClient(hadoopConf) + val cliConf = HiveClientImpl.newHiveConf(sparkConf, hadoopConf, extraConfigs) + val sessionState = new CliSessionState(cliConf) + SessionState.setCurrentSessionState(sessionState) + val cli = new SparkSQLCLIDriver + Seq("SELECT 1; --comment" -> Seq("SELECT 1"), + "SELECT 1; /* comment */" -> Seq("SELECT 1"), + "SELECT 1; /* comment" -> Seq("SELECT 1", " /* comment"), + "SELECT 1; /* comment select 1;" -> Seq("SELECT 1", " /* comment select 1;"), + "/* This is a comment without end symbol SELECT 1;" -> + Seq("/* This is a comment without end symbol SELECT 1;"), + "SELECT 1; --comment\n" -> Seq("SELECT 1"), + "SELECT 1; /* comment */\n" -> Seq("SELECT 1"), + "SELECT 1; /* comment\n" -> Seq("SELECT 1", " /* comment\n"), + "SELECT 1; /* comment select 1;\n" -> Seq("SELECT 1", " /* comment select 1;\n"), + "/* This is a comment without end symbol SELECT 1;\n" -> + Seq("/* This is a comment without end symbol SELECT 1;\n"), + "/* comment */ SELECT 1;" -> Seq("/* comment */ SELECT 1"), + "SELECT /* comment */ 1;" -> Seq("SELECT /* comment */ 1"), + "-- comment " -> Seq(), + "-- comment \nSELECT 1" -> Seq("-- comment \nSELECT 1"), + "/* comment */ " -> Seq() + ).foreach { case (query, ret) => + assert(cli.splitSemiColon(query).asScala === ret) + } + sessionState.close() + SparkSQLEnv.stop() + } }