From 562841bdbeef8f7e3713d97f33aca53b3920436b Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 28 Apr 2020 19:02:11 +0800 Subject: [PATCH 1/4] SPARK-31595 Spark sql cli should allow unescaped quote mark in quoted string --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 5 +++-- .../apache/spark/sql/hive/thriftserver/CliSuite.scala | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 5ed0cb0407484..f477dd64296bf 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -507,6 +507,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon. + // Allow "'" and '"' as closing legal string private def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false @@ -519,13 +520,13 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { for (index <- 0 until line.length) { if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped - if (!escape) { + if (!escape && !insideDoubleQuote) { // flip the boolean variable insideSingleQuote = !insideSingleQuote } } else if (line.charAt(index) == '\"' && !insideComment) { // take a look to see if it is escaped - if (!escape) { + if (!escape && !insideSingleQuote) { // flip the boolean variable insideDoubleQuote = !insideDoubleQuote } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index abefb46b32140..3287448937f26 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -500,4 +500,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE | ;""".stripMargin -> "testcomment" ) } + + test("Should allow unescaped quote mark in quoted string") { + runCliWithin(1.minute)( + """SELECT '"legal string a';select 1 + 234;""".stripMargin -> "235" + ) + runCliWithin(1.minute)( + """SELECT "legal 'string b";select 22222 + 1;""".stripMargin -> "22223" + ) + } } From 1947cbe5b471ca252611ddd25dcc19268b9ecd59 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Fri, 1 May 2020 17:44:25 +0800 Subject: [PATCH 2/4] refine comments --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index f477dd64296bf..bffa24c469601 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -507,7 +507,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon. - // Allow "'" and '"' as closing legal string + // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted + // string, the origin implementation from Hive will not drop the trailing semicolon as expected, + // hence we refined this function a little bit. private def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false @@ -520,12 +522,14 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { for (index <- 0 until line.length) { if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped + // See the comment above about SPARK-31595 if (!escape && !insideDoubleQuote) { // flip the boolean variable insideSingleQuote = !insideSingleQuote } } else if (line.charAt(index) == '\"' && !insideComment) { // take a look to see if it is escaped + // See the comment above about SPARK-31595 if (!escape && !insideSingleQuote) { // flip the boolean variable insideDoubleQuote = !insideDoubleQuote From c20543df53beb534e1349bcc0439d2a657ac4c54 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Sat, 2 May 2020 15:12:55 +0800 Subject: [PATCH 3/4] add jira title in test suite --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 3287448937f26..d1ab9c2eaf936 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -501,7 +501,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE ) } - test("Should allow unescaped quote mark in quoted string") { + test("SPARK-31595 Should allow unescaped quote mark in quoted string") { runCliWithin(1.minute)( """SELECT '"legal string a';select 1 + 234;""".stripMargin -> "235" ) From d185264ae4acc585855b70b2ec3caa0e63c83043 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Wed, 6 May 2020 10:23:54 +0800 Subject: [PATCH 4/4] remove multiline string style --- .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index d1ab9c2eaf936..265e7772a691c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -503,10 +503,10 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE test("SPARK-31595 Should allow unescaped quote mark in quoted string") { runCliWithin(1.minute)( - """SELECT '"legal string a';select 1 + 234;""".stripMargin -> "235" + "SELECT '\"legal string a';select 1 + 234;".stripMargin -> "235" ) runCliWithin(1.minute)( - """SELECT "legal 'string b";select 22222 + 1;""".stripMargin -> "22223" + "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223" ) } }