Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
// string, the origin implementation from Hive will not drop the trailing semicolon as expected,
// hence we refined this function a little bit.
// Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
private def splitSemiColon(line: String): JList[String] = {
private[hive] def splitSemiColon(line: String): JList[String] = {
var insideSingleQuote = false
var insideDoubleQuote = false
var insideSimpleComment = false
Expand Down Expand Up @@ -613,7 +613,17 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {

isStatement = statementInProgress(index)
}
if (beginIndex < line.length()) {
// Check the last char is end of nested bracketed comment.
val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1
// Spark SQL support simple comment and nested bracketed comment in query body.
// But if Spark SQL receives a comment alone, it will throw parser exception.
// In Spark SQL CLI, if there is a completed comment in the end of whole query,
// since Spark SQL CLL use `;` to split the query, CLI will pass the comment
// to the backend engine and throw exception. CLI should ignore this comment,
// If there is an uncompleted statement or an uncompleted bracketed comment in the end,
// CLI should also pass this part to the backend engine, which may throw an exception
// with clear error message.
if (!endOfBracketedComment && (isStatement || insideBracketedComment)) {
ret.add(line.substring(beginIndex))
}
ret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,23 @@ import java.nio.charset.StandardCharsets
import java.sql.Timestamp
import java.util.Date

import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.Promise
import scala.concurrent.duration._

import org.apache.hadoop.hive.cli.CliSessionState
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.apache.hadoop.hive.ql.session.SessionState
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer
import org.apache.spark.SparkFunSuite
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.HiveUtils._
import org.apache.spark.sql.hive.client.HiveClientImpl
import org.apache.spark.sql.hive.test.HiveTestJars
import org.apache.spark.sql.internal.StaticSQLConf
import org.apache.spark.util.{ThreadUtils, Utils}
Expand Down Expand Up @@ -638,4 +644,40 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
runCliWithin(2.minute, errorResponses = Seq("ParseException"))(
"delete jar dummy.jar;" -> "missing 'FROM' at 'jar'(line 1, pos 7)")
}

test("SPARK-37906: Spark SQL CLI should not pass final comment") {
val sparkConf = new SparkConf(loadDefaults = true)
.setMaster("local-cluster[1,1,1024]")
.setAppName("SPARK-37906")
val sparkContext = new SparkContext(sparkConf)
SparkSQLEnv.sparkContext = sparkContext
val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf)
val extraConfigs = HiveUtils.formatTimeVarsForHiveClient(hadoopConf)
val cliConf = HiveClientImpl.newHiveConf(sparkConf, hadoopConf, extraConfigs)
val sessionState = new CliSessionState(cliConf)
SessionState.setCurrentSessionState(sessionState)
val cli = new SparkSQLCLIDriver
Seq("SELECT 1; --comment" -> Seq("SELECT 1"),
"SELECT 1; /* comment */" -> Seq("SELECT 1"),
"SELECT 1; /* comment" -> Seq("SELECT 1", " /* comment"),
"SELECT 1; /* comment select 1;" -> Seq("SELECT 1", " /* comment select 1;"),
"/* This is a comment without end symbol SELECT 1;" ->
Seq("/* This is a comment without end symbol SELECT 1;"),
"SELECT 1; --comment\n" -> Seq("SELECT 1"),
"SELECT 1; /* comment */\n" -> Seq("SELECT 1"),
"SELECT 1; /* comment\n" -> Seq("SELECT 1", " /* comment\n"),
"SELECT 1; /* comment select 1;\n" -> Seq("SELECT 1", " /* comment select 1;\n"),
"/* This is a comment without end symbol SELECT 1;\n" ->
Seq("/* This is a comment without end symbol SELECT 1;\n"),
"/* comment */ SELECT 1;" -> Seq("/* comment */ SELECT 1"),
"SELECT /* comment */ 1;" -> Seq("SELECT /* comment */ 1"),
"-- comment " -> Seq(),
"-- comment \nSELECT 1" -> Seq("-- comment \nSELECT 1"),
"/* comment */ " -> Seq()
).foreach { case (query, ret) =>
assert(cli.splitSemiColon(query).asScala === ret)
}
sessionState.close()
SparkSQLEnv.stop()
}
}