From d0b2c2278ec7d10cc1ab998be489e6553a8dc193 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 19 Apr 2017 01:49:47 +0000 Subject: [PATCH 01/11] Add a config to fallback string literal parsing consistent with old sql parser behavior. --- .../sql/catalyst/catalog/SessionCatalog.scala | 2 +- .../sql/catalyst/parser/AstBuilder.scala | 11 +++- .../sql/catalyst/parser/ParseDriver.scala | 8 ++- .../sql/catalyst/parser/ParserUtils.scala | 5 ++ .../apache/spark/sql/internal/SQLConf.scala | 10 +++ .../parser/ExpressionParserSuite.scala | 61 +++++++++++++++++-- .../spark/sql/execution/SparkSqlParser.scala | 2 +- .../org/apache/spark/sql/DatasetSuite.scala | 13 ++++ 8 files changed, 103 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 3fbf83f3a38a..41b150bdf1ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -73,7 +73,7 @@ class SessionCatalog( functionRegistry, conf, new Configuration(), - CatalystSqlParser, + new CatalystSqlParser(conf), DummyFunctionResourceLoader) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index e1db1ef5b869..ffce5dda144a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last} import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.random.RandomSampler @@ -44,9 +45,11 @@ import org.apache.spark.util.random.RandomSampler * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or * TableIdentifier. */ -class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { +class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging { import ParserUtils._ + def this() = this(new SQLConf()) + protected def typedVisit[T](ctx: ParseTree): T = { ctx.accept(this).asInstanceOf[T] } @@ -1406,7 +1409,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * Special characters can be escaped by using Hive/C-style escaping. */ private def createString(ctx: StringLiteralContext): String = { - ctx.STRING().asScala.map(string).mkString + if (conf.noUnescapedStringLiteral) { + ctx.STRING().asScala.map(stringWithoutUnescape).mkString + } else { + ctx.STRING().asScala.map(string).mkString + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 80ab75cc17fa..c249022476a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.trees.Origin +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DataType, StructType} /** @@ -120,8 +121,13 @@ abstract class AbstractSqlParser extends ParserInterface with Logging { /** * Concrete SQL parser for Catalyst-only SQL statements. */ +class CatalystSqlParser(conf: SQLConf) extends AbstractSqlParser { + val astBuilder = new AstBuilder(conf) +} + +/** For test-only. */ object CatalystSqlParser extends AbstractSqlParser { - val astBuilder = new AstBuilder + val astBuilder = new AstBuilder(new SQLConf()) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 6fbc33fad735..8a36e65cfc44 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -68,6 +68,11 @@ object ParserUtils { /** Convert a string node into a string. */ def string(node: TerminalNode): String = unescapeSQLString(node.getText) + /** Convert a string node into a string without unescaping. */ + def stringWithoutUnescape(node: TerminalNode): String = { + node.getText.slice(1, node.getText.size - 1) + } + /** Get the origin (line and position) of the token. */ def position(token: Token): Origin = { val opt = Option(token) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 2e1798e22b9f..61950decb64a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -196,6 +196,14 @@ object SQLConf { .booleanConf .createWithDefault(true) + val NO_UNESCAPED_SQL_STRING = buildConf("spark.sql.noUnescapedStringLiteral") + .internal() + .doc("Since Spark 2.0, we use unescaped SQL string for string literals including regex. " + + "It is different than 1.6 behavior. Enabling this config can use no unescaped SQL string " + + "literals and mitigate migration problem.") + .booleanConf + .createWithDefault(false) + val PARQUET_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.parquet.mergeSchema") .doc("When true, the Parquet data source merges schemas collected from all data files, " + "otherwise the schema is picked from the summary file or a random data file " + @@ -911,6 +919,8 @@ class SQLConf extends Serializable with Logging { def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED) + def noUnescapedStringLiteral: Boolean = getConf(NO_UNESCAPED_SQL_STRING) + /** * Returns the [[Resolver]] for the current configuration, which can be used to determine if two * identifiers are equal. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index e7f3b64a7113..63e0a40d0449 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last} import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -39,12 +40,17 @@ class ExpressionParserSuite extends PlanTest { import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ - def assertEqual(sqlCommand: String, e: Expression): Unit = { - compareExpressions(parseExpression(sqlCommand), e) + val defaultParser = CatalystSqlParser + + def assertEqual( + sqlCommand: String, + e: Expression, + parser: ParserInterface = defaultParser): Unit = { + compareExpressions(parser.parseExpression(sqlCommand), e) } def intercept(sqlCommand: String, messages: String*): Unit = { - val e = intercept[ParseException](parseExpression(sqlCommand)) + val e = intercept[ParseException](defaultParser.parseExpression(sqlCommand)) messages.foreach { message => assert(e.message.contains(message)) } @@ -101,7 +107,7 @@ class ExpressionParserSuite extends PlanTest { test("long binary logical expressions") { def testVeryBinaryExpression(op: String, clazz: Class[_]): Unit = { val sql = (1 to 1000).map(x => s"$x == $x").mkString(op) - val e = parseExpression(sql) + val e = defaultParser.parseExpression(sql) assert(e.collect { case _: EqualTo => true }.size === 1000) assert(e.collect { case x if clazz.isInstance(x) => true }.size === 999) } @@ -160,6 +166,15 @@ class ExpressionParserSuite extends PlanTest { assertEqual("a not regexp 'pattern%'", !('a rlike "pattern%")) } + test("like expressions with NO_UNESCAPED_SQL_STRING") { + val conf = new SQLConf() + conf.setConfString("spark.sql.noUnescapedStringLiteral", "true") + val parser = new CatalystSqlParser(conf) + assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser) + assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser) + assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser) + } + test("is null expressions") { assertEqual("a is null", 'a.isNull) assertEqual("a is not null", 'a.isNotNull) @@ -447,6 +462,44 @@ class ExpressionParserSuite extends PlanTest { assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)") } + test("strings with NO_UNESCAPED_SQL_STRING") { + val conf = new SQLConf() + conf.setConfString("spark.sql.noUnescapedStringLiteral", "true") + val parser = new CatalystSqlParser(conf) + + // Single Strings. + assertEqual("\"hello\"", "hello", parser) + assertEqual("'hello'", "hello", parser) + + // Multi-Strings. + assertEqual("\"hello\" 'world'", "helloworld", parser) + assertEqual("'hello' \" \" 'world'", "hello world", parser) + + assertEqual("'pattern%'", "pattern%", parser) + assertEqual("'no-pattern\\%'", "no-pattern\\%", parser) + assertEqual("'pattern\\\\%'", "pattern\\\\%", parser) + assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser) + + // Escaped characters. + assertEqual("'\0'", "\u0000", parser) // ASCII NUL (X'00') + + // Note: Single quote follows 1.6 parsing behavior when NO_UNESCAPED_SQL_STRING is enabled. + val e = intercept[ParseException](parser.parseExpression("'\''")) + assert(e.message.contains("extraneous input '''")) + + assertEqual("'\"'", "\"", parser) // Double quote + assertEqual("'\b'", "\b", parser) // Backspace + assertEqual("'\n'", "\n", parser) // Newline + assertEqual("'\r'", "\r", parser) // Carriage return + assertEqual("'\t'", "\t", parser) // Tab character + + // Octals + assertEqual("'\110\145\154\154\157\041'", "Hello!", parser) + + // Unicode + assertEqual("'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'", "World :)", parser) + } + test("intervals") { def intervalLiteral(u: String, s: String): Literal = { Literal(CalendarInterval.fromSingleUnitString(u, s)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 20dacf88504f..c2c52894860b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -52,7 +52,7 @@ class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser { /** * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier. */ -class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { +class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { import org.apache.spark.sql.catalyst.parser.ParserUtils._ /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 5b5cd28ad0c9..26979f516e31 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SortExec} import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchange} import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ @@ -1168,6 +1169,18 @@ class DatasetSuite extends QueryTest with SharedSQLContext { val ds = Seq(WithMapInOption(Some(Map(1 -> 1)))).toDS() checkDataset(ds, WithMapInOption(Some(Map(1 -> 1)))) } + + test("do not unescaped regex pattern string") { + withSQLConf(SQLConf.NO_UNESCAPED_SQL_STRING.key -> "true") { + val data = Seq("\u0020\u0021\u0023", "abc") + val df = data.toDF() + val rlike1 = df.filter("value rlike '^\\x20[\\x20-\\x23]+$'") + val rlike2 = df.filter($"value".rlike("^\\x20[\\x20-\\x23]+$")) + val rlike3 = df.filter("value rlike '^\\\\x20[\\\\x20-\\\\x23]+$'") + checkAnswer(rlike1, rlike2) + assert(rlike3.count() == 0) + } + } } case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String]) From 8ae074784d145dc2298fa81c0c9097aad48fc349 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 8 May 2017 02:53:33 +0000 Subject: [PATCH 02/11] Address comments. --- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../apache/spark/sql/catalyst/parser/ParserUtils.scala | 1 + .../scala/org/apache/spark/sql/internal/SQLConf.scala | 10 +++++----- .../sql/catalyst/parser/ExpressionParserSuite.scala | 8 ++++---- .../test/scala/org/apache/spark/sql/DatasetSuite.scala | 2 +- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ffce5dda144a..13d287d60a07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1409,7 +1409,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Special characters can be escaped by using Hive/C-style escaping. */ private def createString(ctx: StringLiteralContext): String = { - if (conf.noUnescapedStringLiteral) { + if (conf.escapedStringLiterals) { ctx.STRING().asScala.map(stringWithoutUnescape).mkString } else { ctx.STRING().asScala.map(string).mkString diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 8a36e65cfc44..77fdaa8255aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -70,6 +70,7 @@ object ParserUtils { /** Convert a string node into a string without unescaping. */ def stringWithoutUnescape(node: TerminalNode): String = { + // STRING parser rule forces that the input always has quotes at the starting and ending. node.getText.slice(1, node.getText.size - 1) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 61950decb64a..63a638cee05c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -196,11 +196,11 @@ object SQLConf { .booleanConf .createWithDefault(true) - val NO_UNESCAPED_SQL_STRING = buildConf("spark.sql.noUnescapedStringLiteral") + val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals") .internal() - .doc("Since Spark 2.0, we use unescaped SQL string for string literals including regex. " + - "It is different than 1.6 behavior. Enabling this config can use no unescaped SQL string " + - "literals and mitigate migration problem.") + .doc("When true, string literals (including regex patterns) remains escaped in our SQL " + + "parser. The default is false since Spark 2.0. Setting it to true can restore the behavior " + + "prior to Spark 2.0.") .booleanConf .createWithDefault(false) @@ -919,7 +919,7 @@ class SQLConf extends Serializable with Logging { def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED) - def noUnescapedStringLiteral: Boolean = getConf(NO_UNESCAPED_SQL_STRING) + def escapedStringLiterals: Boolean = getConf(ESCAPED_STRING_LITERALS) /** * Returns the [[Resolver]] for the current configuration, which can be used to determine if two diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 63e0a40d0449..3cd558575c85 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -166,9 +166,9 @@ class ExpressionParserSuite extends PlanTest { assertEqual("a not regexp 'pattern%'", !('a rlike "pattern%")) } - test("like expressions with NO_UNESCAPED_SQL_STRING") { + test("like expressions with ESCAPED_STRING_LITERALS = true") { val conf = new SQLConf() - conf.setConfString("spark.sql.noUnescapedStringLiteral", "true") + conf.setConfString("spark.sql.parser.escapedStringLiterals", "true") val parser = new CatalystSqlParser(conf) assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser) assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser) @@ -462,9 +462,9 @@ class ExpressionParserSuite extends PlanTest { assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)") } - test("strings with NO_UNESCAPED_SQL_STRING") { + test("strings with ESCAPED_STRING_LITERALS = true") { val conf = new SQLConf() - conf.setConfString("spark.sql.noUnescapedStringLiteral", "true") + conf.setConfString("spark.sql.parser.escapedStringLiterals", "true") val parser = new CatalystSqlParser(conf) // Single Strings. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 26979f516e31..bec6e6bb629f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1171,7 +1171,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext { } test("do not unescaped regex pattern string") { - withSQLConf(SQLConf.NO_UNESCAPED_SQL_STRING.key -> "true") { + withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") { val data = Seq("\u0020\u0021\u0023", "abc") val df = data.toDF() val rlike1 = df.filter("value rlike '^\\x20[\\x20-\\x23]+$'") From ab77de7d057ed284160fc051bfbfd6afb0a3a923 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 8 May 2017 05:05:10 +0000 Subject: [PATCH 03/11] Fix code comment. --- .../spark/sql/catalyst/parser/ExpressionParserSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 3cd558575c85..6864b531ab82 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -483,7 +483,7 @@ class ExpressionParserSuite extends PlanTest { // Escaped characters. assertEqual("'\0'", "\u0000", parser) // ASCII NUL (X'00') - // Note: Single quote follows 1.6 parsing behavior when NO_UNESCAPED_SQL_STRING is enabled. + // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled. val e = intercept[ParseException](parser.parseExpression("'\''")) assert(e.message.contains("extraneous input '''")) From 04a9fd34c7489079da2b02a8f3a5ca84d87b0017 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 9 May 2017 03:15:10 +0000 Subject: [PATCH 04/11] Merge tests. --- .../parser/ExpressionParserSuite.scala | 166 ++++++++++-------- 1 file changed, 96 insertions(+), 70 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 6864b531ab82..d7b0e00068db 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -168,7 +168,7 @@ class ExpressionParserSuite extends PlanTest { test("like expressions with ESCAPED_STRING_LITERALS = true") { val conf = new SQLConf() - conf.setConfString("spark.sql.parser.escapedStringLiterals", "true") + conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, "true") val parser = new CatalystSqlParser(conf) assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser) assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser) @@ -428,76 +428,102 @@ class ExpressionParserSuite extends PlanTest { } test("strings") { - // Single Strings. - assertEqual("\"hello\"", "hello") - assertEqual("'hello'", "hello") - - // Multi-Strings. - assertEqual("\"hello\" 'world'", "helloworld") - assertEqual("'hello' \" \" 'world'", "hello world") - - // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a - // regular '%'; to get the correct result you need to add another escaped '\'. - // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method? - assertEqual("'pattern%'", "pattern%") - assertEqual("'no-pattern\\%'", "no-pattern\\%") - assertEqual("'pattern\\\\%'", "pattern\\%") - assertEqual("'pattern\\\\\\%'", "pattern\\\\%") - - // Escaped characters. - // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html - assertEqual("'\\0'", "\u0000") // ASCII NUL (X'00') - assertEqual("'\\''", "\'") // Single quote - assertEqual("'\\\"'", "\"") // Double quote - assertEqual("'\\b'", "\b") // Backspace - assertEqual("'\\n'", "\n") // Newline - assertEqual("'\\r'", "\r") // Carriage return - assertEqual("'\\t'", "\t") // Tab character - assertEqual("'\\Z'", "\u001A") // ASCII 26 - CTRL + Z (EOF on windows) - - // Octals - assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!") - - // Unicode - assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)") - } - - test("strings with ESCAPED_STRING_LITERALS = true") { - val conf = new SQLConf() - conf.setConfString("spark.sql.parser.escapedStringLiterals", "true") - val parser = new CatalystSqlParser(conf) - - // Single Strings. - assertEqual("\"hello\"", "hello", parser) - assertEqual("'hello'", "hello", parser) - - // Multi-Strings. - assertEqual("\"hello\" 'world'", "helloworld", parser) - assertEqual("'hello' \" \" 'world'", "hello world", parser) - - assertEqual("'pattern%'", "pattern%", parser) - assertEqual("'no-pattern\\%'", "no-pattern\\%", parser) - assertEqual("'pattern\\\\%'", "pattern\\\\%", parser) - assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser) - - // Escaped characters. - assertEqual("'\0'", "\u0000", parser) // ASCII NUL (X'00') - - // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled. - val e = intercept[ParseException](parser.parseExpression("'\''")) - assert(e.message.contains("extraneous input '''")) - - assertEqual("'\"'", "\"", parser) // Double quote - assertEqual("'\b'", "\b", parser) // Backspace - assertEqual("'\n'", "\n", parser) // Newline - assertEqual("'\r'", "\r", parser) // Carriage return - assertEqual("'\t'", "\t", parser) // Tab character - - // Octals - assertEqual("'\110\145\154\154\157\041'", "Hello!", parser) + // The SQL commands when ESCAPED_STRING_LITERALS = false (default behavior) + val sqlCommands = Seq( + // Single Strings. + "\"hello\"", + "'hello'", + // Multi-Strings. + "\"hello\" 'world'", + "'hello' \" \" 'world'", + // 'LIKE' string literals. + "'pattern%'", + "'no-pattern\\%'", + "'pattern\\\\%'", + "'pattern\\\\\\%'", + // Escaped characters. + "'\\0'", + "'\\\"'", + "'\\b'", + "'\\n'", + "'\\r'", + "'\\t'", + // Octals + "'\\110\\145\\154\\154\\157\\041'", + // Unicode + "'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'") + + // The SQL commands when ESCAPED_STRING_LITERALS = true + val fallbackSqlCommands = Seq( + // Single Strings. + "\"hello\"", + "'hello'", + // Multi-Strings. + "\"hello\" 'world'", + "'hello' \" \" 'world'", + // 'LIKE' string literals. + "'pattern%'", + "'no-pattern\\%'", + "'pattern\\%'", + "'pattern\\\\%'", + // Escaped characters. + "'\0'", + "'\"'", + "'\b'", + "'\n'", + "'\r'", + "'\t'", + // Octals + "'\110\145\154\154\157\041'", + // Unicode + "'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'") + + val expectedResults = Seq( + // Single Strings. + "hello", + "hello", + // Multi-Strings. + "helloworld", + "hello world", + // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a + // regular '%'; to get the correct result you need to add another escaped '\'. + // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method? + "pattern%", + "no-pattern\\%", + "pattern\\%", + "pattern\\\\%", + // Escaped characters. + // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html + "\u0000", // ASCII NUL (X'00') + "\"", // Double quote + "\b", // Backspace + "\n", // Newline + "\r", // Carriage return + "\t", // Tab character + // Octals + "Hello!", + // Unicode + "World :)") + + val tests = Seq(("false", sqlCommands), ("true", fallbackSqlCommands)) + + tests.map { case (escapedStringLiterals, commands) => + val conf = new SQLConf() + conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escapedStringLiterals) + val parser = new CatalystSqlParser(conf) + commands.zip(expectedResults).foreach { case (sqlCommand, expected) => + assertEqual(sqlCommand, expected, parser) + } + if (escapedStringLiterals == "false") { + assertEqual("'\\''", "\'", parser) // Single quote + assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows) + } else { + // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled. + val e = intercept[ParseException](parser.parseExpression("'\''")) + assert(e.message.contains("extraneous input '''")) + } - // Unicode - assertEqual("'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'", "World :)", parser) + } } test("intervals") { From 9ce7eb0450249fdc25e19adf6bcfe35b274dd086 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 10 May 2017 03:54:51 +0000 Subject: [PATCH 05/11] Address comments. --- .../parser/ExpressionParserSuite.scala | 147 ++++++++---------- .../org/apache/spark/sql/DatasetSuite.scala | 2 +- 2 files changed, 63 insertions(+), 86 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index d7b0e00068db..ab818978e42a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -428,99 +428,76 @@ class ExpressionParserSuite extends PlanTest { } test("strings") { - // The SQL commands when ESCAPED_STRING_LITERALS = false (default behavior) - val sqlCommands = Seq( - // Single Strings. - "\"hello\"", - "'hello'", - // Multi-Strings. - "\"hello\" 'world'", - "'hello' \" \" 'world'", - // 'LIKE' string literals. - "'pattern%'", - "'no-pattern\\%'", - "'pattern\\\\%'", - "'pattern\\\\\\%'", - // Escaped characters. - "'\\0'", - "'\\\"'", - "'\\b'", - "'\\n'", - "'\\r'", - "'\\t'", - // Octals - "'\\110\\145\\154\\154\\157\\041'", - // Unicode - "'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'") - - // The SQL commands when ESCAPED_STRING_LITERALS = true - val fallbackSqlCommands = Seq( - // Single Strings. - "\"hello\"", - "'hello'", - // Multi-Strings. - "\"hello\" 'world'", - "'hello' \" \" 'world'", - // 'LIKE' string literals. - "'pattern%'", - "'no-pattern\\%'", - "'pattern\\%'", - "'pattern\\\\%'", - // Escaped characters. - "'\0'", - "'\"'", - "'\b'", - "'\n'", - "'\r'", - "'\t'", - // Octals - "'\110\145\154\154\157\041'", - // Unicode - "'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'") - - val expectedResults = Seq( + Seq(true, false).foreach { escape => + val conf = new SQLConf() + conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escape.toString) + val parser = new CatalystSqlParser(conf) + + // tests that have same result whatever the conf is // Single Strings. - "hello", - "hello", + assertEqual("\"hello\"", "hello", parser) + assertEqual("'hello'", "hello", parser) + // Multi-Strings. - "helloworld", - "hello world", + assertEqual("\"hello\" 'world'", "helloworld", parser) + assertEqual("'hello' \" \" 'world'", "hello world", parser) + // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a // regular '%'; to get the correct result you need to add another escaped '\'. // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method? - "pattern%", - "no-pattern\\%", - "pattern\\%", - "pattern\\\\%", - // Escaped characters. - // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html - "\u0000", // ASCII NUL (X'00') - "\"", // Double quote - "\b", // Backspace - "\n", // Newline - "\r", // Carriage return - "\t", // Tab character - // Octals - "Hello!", - // Unicode - "World :)") - - val tests = Seq(("false", sqlCommands), ("true", fallbackSqlCommands)) - - tests.map { case (escapedStringLiterals, commands) => - val conf = new SQLConf() - conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escapedStringLiterals) - val parser = new CatalystSqlParser(conf) - commands.zip(expectedResults).foreach { case (sqlCommand, expected) => - assertEqual(sqlCommand, expected, parser) - } - if (escapedStringLiterals == "false") { - assertEqual("'\\''", "\'", parser) // Single quote - assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows) - } else { + assertEqual("'pattern%'", "pattern%", parser) + assertEqual("'no-pattern\\%'", "no-pattern\\%", parser) + + // tests that have different result regarding the conf + if (escape) { + // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to + // Spark 1.6 behavior. + + // 'LIKE' string literals. + assertEqual("'pattern\\\\%'", "pattern\\\\%", parser) + assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser) + + // Escaped characters. + assertEqual("'\0'", "\u0000", parser) // ASCII NUL (X'00') + // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled. val e = intercept[ParseException](parser.parseExpression("'\''")) assert(e.message.contains("extraneous input '''")) + + assertEqual("'\"'", "\"", parser) // Double quote + assertEqual("'\b'", "\b", parser) // Backspace + assertEqual("'\n'", "\n", parser) // Newline + assertEqual("'\r'", "\r", parser) // Carriage return + assertEqual("'\t'", "\t", parser) // Tab character + + // Octals + assertEqual("'\110\145\154\154\157\041'", "Hello!", parser) + // Unicode + assertEqual("'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'", "World :)", parser) + } else { + // Default behavior + + // 'LIKE' string literals. + assertEqual("'pattern\\\\%'", "pattern\\%", parser) + assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser) + + // Escaped characters. + // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html + assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00') + assertEqual("'\\''", "\'", parser) // Single quote + assertEqual("'\\\"'", "\"", parser) // Double quote + assertEqual("'\\b'", "\b", parser) // Backspace + assertEqual("'\\n'", "\n", parser) // Newline + assertEqual("'\\r'", "\r", parser) // Carriage return + assertEqual("'\\t'", "\t", parser) // Tab character + assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows) + + // Octals + assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser) + + // Unicode + assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)", + parser) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index bec6e6bb629f..8eb381b91f46 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1170,7 +1170,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext { checkDataset(ds, WithMapInOption(Some(Map(1 -> 1)))) } - test("do not unescaped regex pattern string") { + test("SPARK-20399: do not unescaped regex pattern when ESCAPED_STRING_LITERALS is enabled") { withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") { val data = Seq("\u0020\u0021\u0023", "abc") val df = data.toDF() From 3241b88c37478652c78b1d8d4809385b47410c51 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 10 May 2017 06:23:26 +0000 Subject: [PATCH 06/11] Fix config doc. --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 63a638cee05c..18ce61a4488e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -198,7 +198,7 @@ object SQLConf { val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals") .internal() - .doc("When true, string literals (including regex patterns) remains escaped in our SQL " + + .doc("When true, string literals (including regex patterns) remain escaped in our SQL " + "parser. The default is false since Spark 2.0. Setting it to true can restore the behavior " + "prior to Spark 2.0.") .booleanConf From c81f030902d3a65298141ec6296d5d564fd68a66 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 10 May 2017 07:12:14 +0000 Subject: [PATCH 07/11] Update RLike function description. --- .../expressions/regexpExpressions.scala | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 3fa84589e3c6..4571ef0f3f73 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -144,7 +144,31 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi } @ExpressionDescription( - usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.") + usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.", + extended = """ + Arguments: + str - a string expression + regexp - a string expression. The pattern string should be a Java regular expression. + + Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. + For example, if the `str` parameter is "abc\td", the `regexp` can match it is: + "^abc\\\\td$". + + Examples: + > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*' + true + + There is a SQL config 'spark.sql.parser.escapedStringLiterals' can be used to fallback + to Spark 1.6 behavior regarding string literal parsing. For example, if the config is + enabled, the `regexp` can match "abc\td" is "^abc\\t$". + + Examples (spark.sql.parser.escapedStringLiterals is enabled): + > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*' + true + + See also: + Use LIKE to match with simple string pattern. +""") case class RLike(left: Expression, right: Expression) extends StringRegexExpression { override def escape(v: String): String = v From e854b104e61681724b4e90fc2e480ccebfd75ae4 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 10 May 2017 12:53:12 +0000 Subject: [PATCH 08/11] Also update doc for Like expression. --- .../spark/sql/catalyst/expressions/regexpExpressions.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 4571ef0f3f73..75bce0c500d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -86,6 +86,13 @@ abstract class StringRegexExpression extends BinaryExpression escape character, the following character is matched literally. It is invalid to escape any other character. + Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order + to match a Tab character "\t", the pattern should be "\\t". + + When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks + to Spark 1.6 behavior regarding string literal parsing. For example, if the config is + enabled, the pattern to match a Tab character should be "\t". + Examples: > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%' true From d8cd670aaf0050c8ebe96302709eadef671c615b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 11 May 2017 05:44:39 +0000 Subject: [PATCH 09/11] Fix doc. --- .../catalyst/expressions/regexpExpressions.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 75bce0c500d5..ffab073268c2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -158,18 +158,18 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi regexp - a string expression. The pattern string should be a Java regular expression. Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. - For example, if the `str` parameter is "abc\td", the `regexp` can match it is: - "^abc\\\\td$". + For example, if to match "abc\td", a regular expression for `regexp` can be "^abc\\\\td$". + + There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback + to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is + enabled, the `regexp` that can match "abc\td" is "^abc\\t$". Examples: + When spark.sql.parser.escapedStringLiterals is disabled (default). > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*' true - There is a SQL config 'spark.sql.parser.escapedStringLiterals' can be used to fallback - to Spark 1.6 behavior regarding string literal parsing. For example, if the config is - enabled, the `regexp` can match "abc\td" is "^abc\\t$". - - Examples (spark.sql.parser.escapedStringLiterals is enabled): + When spark.sql.parser.escapedStringLiterals is enabled. > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*' true From 8ecb2eacaa2bec7837dd68e8dd0563a15204c3d4 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 11 May 2017 14:47:17 +0000 Subject: [PATCH 10/11] Change java string literal to SQL shell string. --- .../sql/catalyst/expressions/regexpExpressions.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index ffab073268c2..c40e3c6e386c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -87,11 +87,11 @@ abstract class StringRegexExpression extends BinaryExpression any other character. Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order - to match a Tab character "\t", the pattern should be "\\t". + to match "\abc", the pattern should be "\\abc". When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks to Spark 1.6 behavior regarding string literal parsing. For example, if the config is - enabled, the pattern to match a Tab character should be "\t". + enabled, the pattern to match "\abc" should be "\abc". Examples: > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%' @@ -158,11 +158,11 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi regexp - a string expression. The pattern string should be a Java regular expression. Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. - For example, if to match "abc\td", a regular expression for `regexp` can be "^abc\\\\td$". + For example, if to match "\abc", a regular expression for `regexp` can be "^\\abc$". There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is - enabled, the `regexp` that can match "abc\td" is "^abc\\t$". + enabled, the `regexp` that can match "\abc" is "^\abc$". Examples: When spark.sql.parser.escapedStringLiterals is disabled (default). From 375eb9cd747cc75d2f51da1dabe824dbbce52790 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 11 May 2017 14:54:00 +0000 Subject: [PATCH 11/11] Fix doc. --- .../spark/sql/catalyst/expressions/regexpExpressions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index c40e3c6e386c..aa5a1b5448c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -158,7 +158,7 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi regexp - a string expression. The pattern string should be a Java regular expression. Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. - For example, if to match "\abc", a regular expression for `regexp` can be "^\\abc$". + For example, to match "\abc", a regular expression for `regexp` can be "^\\abc$". There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is