From a53803d2f4c32173579050f1f7eea25d6b5a5ec0 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 18 Jun 2018 21:33:43 +0900 Subject: [PATCH 1/4] Fix --- .../catalyst/expressions/codegen/BufferHolder.java | 2 +- .../apache/spark/sql/catalyst/expressions/Cast.scala | 7 ++++++- .../spark/sql/catalyst/expressions/SortOrder.scala | 6 +++--- .../catalyst/expressions/codegen/CodeGenerator.scala | 4 ++-- .../catalyst/expressions/collectionOperations.scala | 4 ++-- .../catalyst/expressions/datetimeExpressions.scala | 2 +- .../sql/catalyst/expressions/objects/objects.scala | 11 +++++++---- .../spark/sql/execution/basicPhysicalOperators.scala | 3 ++- .../scala/org/apache/spark/sql/execution/limit.scala | 2 +- 9 files changed, 25 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java index 6a52a5b0e066..a1385ae2eb1c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java @@ -31,7 +31,7 @@ * for each incoming record, we should call `reset` of BufferHolder instance before write the record * and reuse the data buffer. */ -final class BufferHolder { +public final class BufferHolder { private static final int ARRAY_MAX = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 100b9cfd70f5..a119a3d7a964 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -897,7 +897,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case StringType => val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) (c, evPrim, evNull) => code""" - scala.Option $intOpt = + scala.Option $intOpt = org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c); if ($intOpt.isDefined()) { $evPrim = ((Integer) $intOpt.get()).intValue(); @@ -990,8 +990,13 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass) val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => +<<<<<<< HEAD code""" scala.Option $longOpt = +======= + s""" + scala.Option $longOpt = +>>>>>>> Fix org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $tz); if ($longOpt.isDefined()) { $evPrim = ((Long) $longOpt.get()).longValue(); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 536276b5cb29..e52ce130b154 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -188,9 +188,9 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val childCode = child.child.genCode(ctx) val input = childCode.value - val BinaryPrefixCmp = classOf[BinaryPrefixComparator].getName - val DoublePrefixCmp = classOf[DoublePrefixComparator].getName - val StringPrefixCmp = classOf[StringPrefixComparator].getName + val BinaryPrefixCmp = classOf[BinaryPrefixComparator].getCanonicalName + val DoublePrefixCmp = classOf[DoublePrefixComparator].getCanonicalName + val StringPrefixCmp = classOf[StringPrefixComparator].getCanonicalName val prefixCode = child.child.dataType match { case BooleanType => s"$input ? 1L : 0L" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 2c56456cd4da..ce59a92112f7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -133,7 +133,7 @@ class CodegenContext { def addReferenceObj(objName: String, obj: Any, className: String = null): String = { val idx = references.length references += obj - val clsName = Option(className).getOrElse(obj.getClass.getName) + val clsName = Option(className).getOrElse(obj.getClass.getCanonicalName) s"(($clsName) references[$idx] /* $objName */)" } @@ -1624,7 +1624,7 @@ object CodeGenerator extends Logging { case _: MapType => "MapData" case udt: UserDefinedType[_] => javaType(udt.sqlType) case ObjectType(cls) if cls.isArray => s"${javaType(ObjectType(cls.getComponentType))}[]" - case ObjectType(cls) => cls.getName + case ObjectType(cls) => Option(cls.getCanonicalName).getOrElse(cls.getName) case _ => "Object" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index cf9796ef1948..a47246c319ba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1619,7 +1619,7 @@ case class ArraysOverlap(left: Expression, right: Expression) val getFromSmaller = CodeGenerator.getValue(smaller, elementType, i) val getFromBigger = CodeGenerator.getValue(bigger, elementType, i) val javaElementClass = CodeGenerator.boxedType(elementType) - val javaSet = classOf[java.util.HashSet[_]].getName + val javaSet = classOf[java.util.HashSet[_]].getCanonicalName val set = ctx.freshName("set") val addToSetFromSmallerCode = nullSafeElementCodegen( smaller, i, s"$set.add($getFromSmaller);", s"${ev.isNull} = true;") @@ -2826,7 +2826,7 @@ case class Sequence( val arr = ctx.freshName("arr") val arrElemType = CodeGenerator.javaType(dataType.elementType) s""" - |final $arrElemType[] $arr = null; + |$arrElemType[] $arr = null; |${impl.genCode(ctx, startGen.value, stopGen.value, stepGen.value, arr, arrElemType)} |${ev.value} = UnsafeArrayData.fromPrimitiveArray($arr); """.stripMargin diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index f95798d64db1..1a45e0b5fbe2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1048,7 +1048,7 @@ case class StringToTimestampWithoutTimezone(child: Expression, timeZoneId: Optio |${CodeGenerator.JAVA_BOOLEAN} ${ev.isNull} = true; |${CodeGenerator.JAVA_LONG} ${ev.value} = ${CodeGenerator.defaultValue(TimestampType)}; |if (!${eval.isNull}) { - | scala.Option $longOpt = $dtu.stringToTimestamp(${eval.value}, $tz, true); + | scala.Option $longOpt = $dtu.stringToTimestamp(${eval.value}, $tz, true); | if ($longOpt.isDefined()) { | ${ev.value} = ((Long) $longOpt.get()).longValue(); | ${ev.isNull} = false; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 3189e6841a52..ee9061894b9a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -219,8 +219,8 @@ case class StaticInvoke( propagateNull: Boolean = true, returnNullable: Boolean = true) extends InvokeLike { - val objectName = staticObject.getName.stripSuffix("$") - val cls = if (staticObject.getName == objectName) { + val objectName = staticObject.getCanonicalName.stripSuffix("$") + val cls = if (staticObject.getCanonicalName == objectName) { staticObject } else { Utils.classForName(objectName) @@ -434,7 +434,7 @@ case class NewInstance( propagateNull: Boolean, dataType: DataType, outerPointer: Option[() => AnyRef]) extends InvokeLike { - private val className = cls.getName + private val className = cls.getCanonicalName override def nullable: Boolean = needNullCheck @@ -1335,7 +1335,7 @@ case class ExternalMapToCatalyst private( val (defineEntries, defineKeyValue) = child.dataType match { case ObjectType(cls) if classOf[java.util.Map[_, _]].isAssignableFrom(cls) => val javaIteratorCls = classOf[java.util.Iterator[_]].getName - val javaMapEntryCls = classOf[java.util.Map.Entry[_, _]].getName + val javaMapEntryCls = classOf[java.util.Map.Entry[_, _]].getCanonicalName val defineEntries = s"final $javaIteratorCls $entries = ${inputMap.value}.entrySet().iterator();" @@ -1585,6 +1585,9 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + // Resolves setters before compilation + require(resolvedSetters.nonEmpty) + val instanceGen = beanInstance.genCode(ctx) val javaBeanInstance = ctx.freshName("javaBean") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 9434ceb7cd16..f7c5f67dbf74 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -318,7 +318,8 @@ case class SampleExec( v => s""" | $v = new $samplerClass($lowerBound, $upperBound, false); | $v.setSeed(${seed}L + partitionIndex); - """.stripMargin.trim) + """.stripMargin.trim, + forceInline = true) s""" | if ($sampler.sample() != 0) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala index 392ca13724bc..eeb731fbc38b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala @@ -79,7 +79,7 @@ case class LocalLimitExec(limit: Int, child: SparkPlan) extends UnaryExecNode wi ctx.addNewFunction("stopEarly", s""" @Override - protected boolean stopEarly() { + public boolean stopEarly() { return $stopEarly; } """, inlineToOuterClass = true) From dad96ebe071a30f960994aeea2f6cddb6ed41523 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 16 Jul 2018 00:56:35 +0900 Subject: [PATCH 2/4] Fix --- .../expressions/codegen/CodeGenerator.scala | 13 +++++++++++-- .../sql/catalyst/expressions/objects/objects.scala | 3 +-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index ce59a92112f7..ae72d0b5d1c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -133,7 +133,7 @@ class CodegenContext { def addReferenceObj(objName: String, obj: Any, className: String = null): String = { val idx = references.length references += obj - val clsName = Option(className).getOrElse(obj.getClass.getCanonicalName) + val clsName = Option(className).getOrElse(getClassName(obj.getClass)) s"(($clsName) references[$idx] /* $objName */)" } @@ -1604,6 +1604,15 @@ object CodeGenerator extends Logging { def primitiveTypeName(dt: DataType): String = primitiveTypeName(javaType(dt)) + def getClassName(cls: Class[_]): String = { + try { + return Option(cls.getCanonicalName).getOrElse(cls.getName) + } catch { + case err: InternalError => + return cls.getName + } + } + /** * Returns the Java type for a DataType. */ @@ -1624,7 +1633,7 @@ object CodeGenerator extends Logging { case _: MapType => "MapData" case udt: UserDefinedType[_] => javaType(udt.sqlType) case ObjectType(cls) if cls.isArray => s"${javaType(ObjectType(cls.getComponentType))}[]" - case ObjectType(cls) => Option(cls.getCanonicalName).getOrElse(cls.getName) + case ObjectType(cls) => getClassName(cls) case _ => "Object" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index ee9061894b9a..62d523338263 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -36,7 +36,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData} import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} import org.apache.spark.util.Utils /** @@ -434,7 +433,7 @@ case class NewInstance( propagateNull: Boolean, dataType: DataType, outerPointer: Option[() => AnyRef]) extends InvokeLike { - private val className = cls.getCanonicalName + private val className = CodeGenerator.getClassName(cls) override def nullable: Boolean = needNullCheck From 074cbf9bac60e6f8d6aec3e977f109aa87de57fc Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 16 Jul 2018 11:07:37 +0900 Subject: [PATCH 3/4] Fix --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index ae72d0b5d1c8..945d6e5fc025 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1606,10 +1606,9 @@ object CodeGenerator extends Logging { def getClassName(cls: Class[_]): String = { try { - return Option(cls.getCanonicalName).getOrElse(cls.getName) + Option(cls.getCanonicalName).getOrElse(cls.getName) } catch { - case err: InternalError => - return cls.getName + case err: InternalError => cls.getName } } From 5a70a7cb33c6fbdf114b39fc8f0196b8d01f8582 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 18 Jul 2018 20:22:08 +0900 Subject: [PATCH 4/4] Fix --- .../spark/sql/catalyst/expressions/Cast.scala | 5 ---- .../expressions/codegen/CodeGenerator.scala | 26 ++++++++++++++++--- .../expressions/collectionOperations.scala | 2 +- .../expressions/objects/objects.scala | 2 +- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index a119a3d7a964..b16da75f1d49 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -990,13 +990,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass) val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => -<<<<<<< HEAD code""" - scala.Option $longOpt = -======= - s""" scala.Option $longOpt = ->>>>>>> Fix org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $tz); if ($longOpt.isDefined()) { $evPrim = ((Long) $longOpt.get()).longValue(); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 945d6e5fc025..41bd3de0ebf0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1605,10 +1605,28 @@ object CodeGenerator extends Logging { def primitiveTypeName(dt: DataType): String = primitiveTypeName(javaType(dt)) def getClassName(cls: Class[_]): String = { - try { - Option(cls.getCanonicalName).getOrElse(cls.getName) - } catch { - case err: InternalError => cls.getName + val getNameValue = cls.getName + // `getName` and `getCanonicalName` return different names for inner classes, e.g., + // + // scala> classOf[PrefixComparators.BinaryPrefixComparator].getName + // o.a.s.util.collection.unsafe.sort.PrefixComparators$BinaryPrefixComparator + // scala> classOf[PrefixComparators.BinaryPrefixComparator].getCanonicalName + // o.a.s.util.collection.unsafe.sort.PrefixComparators.BinaryPrefixComparator + // + // Janino can handle both forms for casts and imports though, JDK Java compilers cannot handle + // the former one. So, this method basically uses `getCanonicalName`. If `getCanonicalName` + // returns null or throws an exception (See SPARK-24216), it uses `getName`. + // + // Note that there is only one exception; Janino cannot handle the `getCanonicalName` form + // for package objects, so we need to use `getName` for the case. + if (getNameValue.matches(""".*\.package\$.*""")) { + getNameValue + } else { + try { + Option(cls.getCanonicalName).getOrElse(getNameValue) + } catch { + case err: InternalError => cls.getName + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index a47246c319ba..46b64520ce2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1619,7 +1619,7 @@ case class ArraysOverlap(left: Expression, right: Expression) val getFromSmaller = CodeGenerator.getValue(smaller, elementType, i) val getFromBigger = CodeGenerator.getValue(bigger, elementType, i) val javaElementClass = CodeGenerator.boxedType(elementType) - val javaSet = classOf[java.util.HashSet[_]].getCanonicalName + val javaSet = classOf[java.util.HashSet[_]].getName val set = ctx.freshName("set") val addToSetFromSmallerCode = nullSafeElementCodegen( smaller, i, s"$set.add($getFromSmaller);", s"${ev.isNull} = true;") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 62d523338263..043d1df84e19 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -1585,7 +1585,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { // Resolves setters before compilation - require(resolvedSetters.nonEmpty) + require(setters.isEmpty || resolvedSetters.nonEmpty) val instanceGen = beanInstance.genCode(ctx)