From 512c114ec6198e67cfb168e35906a53f6b0b425c Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 19 Sep 2016 14:15:49 -0700 Subject: [PATCH 1/7] Add regression test for SPARK-17160 --- .../sql/catalyst/expressions/CodeGenerationSuite.scala | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 0532cf51136d..59cc93a7e813 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen._ -import org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow +import org.apache.spark.sql.catalyst.expressions.objects.{CreateExternalRow, GetExternalRowField, ValidateExternalType} import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -265,4 +265,11 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { Literal.create("\\\\u001/Compilation error occurs", StringType) :: Nil) } + + test("SPARK-17160: field names are properly escaped by GetExternalRowField") { + val inputObject = BoundReference(0, ObjectType(classOf[Row]), nullable = true) + GenerateUnsafeProjection.generate( + ValidateExternalType( + GetExternalRowField(inputObject, index = 0, fieldName = "\"quote"), IntegerType) :: Nil) + } } From 39eeed4a33e935b30662bdd6f8b366b88f270db9 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 19 Sep 2016 14:16:08 -0700 Subject: [PATCH 2/7] Fix SPARK-17160 via additional escaping. --- .../spark/sql/catalyst/expressions/objects/objects.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 4da74a0a272d..7a0a170dee1c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -22,6 +22,8 @@ import java.lang.reflect.Modifier import scala.language.existentials import scala.reflect.ClassTag +import org.apache.commons.lang3.StringEscapeUtils + import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.serializer._ import org.apache.spark.sql.Row @@ -948,7 +950,8 @@ case class GetExternalRowField( } if (${row.value}.isNullAt($index)) { - throw new RuntimeException("The ${index}th field '$fieldName' of input row " + + throw new RuntimeException( + "The ${index}th field '${StringEscapeUtils.escapeJava(fieldName)}' of input row " + "cannot be null."); } From 1430068f7b1f361a09fc0f18fd7e996f797f7c47 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 19 Sep 2016 15:05:20 -0700 Subject: [PATCH 3/7] Add regression test for similar bug in AssertTrue --- .../spark/sql/catalyst/expressions/CodeGenerationSuite.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 59cc93a7e813..45dcfcaf2313 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -272,4 +272,8 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { ValidateExternalType( GetExternalRowField(inputObject, index = 0, fieldName = "\"quote"), IntegerType) :: Nil) } + + test("SPARK-17160: field names are properly escaped by AssertTrue") { + GenerateUnsafeProjection.generate(AssertTrue(Cast(Literal("\""), BooleanType)) :: Nil) + } } From 39e2e026de5b913f2d50869f68ff3d8fe80a8367 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 19 Sep 2016 15:14:14 -0700 Subject: [PATCH 4/7] Fix using codegen context references. --- .../org/apache/spark/sql/catalyst/expressions/misc.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 369207587d86..b544b4492f8c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -501,10 +501,12 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa override def prettyName: String = "assert_true" + private val errMsg = s"'${child.simpleString}' is not true!" + override def eval(input: InternalRow) : Any = { val v = child.eval(input) if (v == null || java.lang.Boolean.FALSE.equals(v)) { - throw new RuntimeException(s"'${child.simpleString}' is not true!") + throw new RuntimeException(errMsg) } else { null } @@ -512,9 +514,10 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) + val errMsgField = ctx.addReferenceObj("errMsg", errMsg) ExprCode(code = s"""${eval.code} |if (${eval.isNull} || !${eval.value}) { - | throw new RuntimeException("'${child.simpleString}' is not true."); + | throw new RuntimeException($errMsgField); |}""".stripMargin, isNull = "true", value = "null") } From 453470527d065ad6895cd728bfbc0eec6928bf36 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 19 Sep 2016 15:18:47 -0700 Subject: [PATCH 5/7] Similar fix in PrintToStderr --- .../org/apache/spark/sql/catalyst/expressions/misc.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index b544b4492f8c..92f8fb85fc0e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -477,10 +477,13 @@ case class PrintToStderr(child: Expression) extends UnaryExpression { protected override def nullSafeEval(input: Any): Any = input + private val outputPrefix = s"Result of ${child.simpleString} is " + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val outputPrefixField = ctx.addReferenceObj("outputPrefix", outputPrefix) nullSafeCodeGen(ctx, ev, c => s""" - | System.err.println("Result of ${child.simpleString} is " + $c); + | System.err.println($outputPrefixField + $c); | ${ev.value} = $c; """.stripMargin) } From 17a9fced0409d18df371256b6dabe0493e501d85 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 19 Sep 2016 15:22:01 -0700 Subject: [PATCH 6/7] Use same fix for GetExternalRowField --- .../spark/sql/catalyst/expressions/objects/objects.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 7a0a170dee1c..37d24dfca5a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -22,8 +22,6 @@ import java.lang.reflect.Modifier import scala.language.existentials import scala.reflect.ClassTag -import org.apache.commons.lang3.StringEscapeUtils - import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.serializer._ import org.apache.spark.sql.Row @@ -940,7 +938,10 @@ case class GetExternalRowField( override def eval(input: InternalRow): Any = throw new UnsupportedOperationException("Only code-generated evaluation is supported") + private val errMsg = s"The ${index}th field '$fieldName' of input row cannot be null." + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val errMsgField = ctx.addReferenceObj("errMsg", errMsg) val row = child.genCode(ctx) val code = s""" ${row.code} @@ -950,9 +951,7 @@ case class GetExternalRowField( } if (${row.value}.isNullAt($index)) { - throw new RuntimeException( - "The ${index}th field '${StringEscapeUtils.escapeJava(fieldName)}' of input row " + - "cannot be null."); + throw new RuntimeException($errMsgField); } final Object ${ev.value} = ${row.value}.get($index); From 35b62d590e8995aa08b8c494d104ba6ff5f6e801 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 19 Sep 2016 15:26:24 -0700 Subject: [PATCH 7/7] Similar change in ValidateExternalType --- .../spark/sql/catalyst/expressions/objects/objects.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 37d24dfca5a4..faf8fecd79f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -976,7 +976,10 @@ case class ValidateExternalType(child: Expression, expected: DataType) override def eval(input: InternalRow): Any = throw new UnsupportedOperationException("Only code-generated evaluation is supported") + private val errMsg = s" is not a valid external type for schema of ${expected.simpleString}" + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val errMsgField = ctx.addReferenceObj("errMsg", errMsg) val input = child.genCode(ctx) val obj = input.value @@ -997,8 +1000,7 @@ case class ValidateExternalType(child: Expression, expected: DataType) if ($typeCheck) { ${ev.value} = (${ctx.boxedType(dataType)}) $obj; } else { - throw new RuntimeException($obj.getClass().getName() + " is not a valid " + - "external type for schema of ${expected.simpleString}"); + throw new RuntimeException($obj.getClass().getName() + $errMsgField); } }