diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java index 6a52a5b0e066..a1385ae2eb1c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java @@ -31,7 +31,7 @@ * for each incoming record, we should call `reset` of BufferHolder instance before write the record * and reuse the data buffer. */ -final class BufferHolder { +public final class BufferHolder { private static final int ARRAY_MAX = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 100b9cfd70f5..b16da75f1d49 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -897,7 +897,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case StringType => val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) (c, evPrim, evNull) => code""" - scala.Option $intOpt = + scala.Option $intOpt = org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c); if ($intOpt.isDefined()) { $evPrim = ((Integer) $intOpt.get()).intValue(); @@ -991,7 +991,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => code""" - scala.Option $longOpt = + scala.Option $longOpt = org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $tz); if ($longOpt.isDefined()) { $evPrim = ((Long) $longOpt.get()).longValue(); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 536276b5cb29..e52ce130b154 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -188,9 +188,9 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val childCode = child.child.genCode(ctx) val input = childCode.value - val BinaryPrefixCmp = classOf[BinaryPrefixComparator].getName - val DoublePrefixCmp = classOf[DoublePrefixComparator].getName - val StringPrefixCmp = classOf[StringPrefixComparator].getName + val BinaryPrefixCmp = classOf[BinaryPrefixComparator].getCanonicalName + val DoublePrefixCmp = classOf[DoublePrefixComparator].getCanonicalName + val StringPrefixCmp = classOf[StringPrefixComparator].getCanonicalName val prefixCode = child.child.dataType match { case BooleanType => s"$input ? 1L : 0L" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 2c56456cd4da..41bd3de0ebf0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -133,7 +133,7 @@ class CodegenContext { def addReferenceObj(objName: String, obj: Any, className: String = null): String = { val idx = references.length references += obj - val clsName = Option(className).getOrElse(obj.getClass.getName) + val clsName = Option(className).getOrElse(getClassName(obj.getClass)) s"(($clsName) references[$idx] /* $objName */)" } @@ -1604,6 +1604,32 @@ object CodeGenerator extends Logging { def primitiveTypeName(dt: DataType): String = primitiveTypeName(javaType(dt)) + def getClassName(cls: Class[_]): String = { + val getNameValue = cls.getName + // `getName` and `getCanonicalName` return different names for inner classes, e.g., + // + // scala> classOf[PrefixComparators.BinaryPrefixComparator].getName + // o.a.s.util.collection.unsafe.sort.PrefixComparators$BinaryPrefixComparator + // scala> classOf[PrefixComparators.BinaryPrefixComparator].getCanonicalName + // o.a.s.util.collection.unsafe.sort.PrefixComparators.BinaryPrefixComparator + // + // Janino can handle both forms for casts and imports though, JDK Java compilers cannot handle + // the former one. So, this method basically uses `getCanonicalName`. If `getCanonicalName` + // returns null or throws an exception (See SPARK-24216), it uses `getName`. + // + // Note that there is only one exception; Janino cannot handle the `getCanonicalName` form + // for package objects, so we need to use `getName` for the case. + if (getNameValue.matches(""".*\.package\$.*""")) { + getNameValue + } else { + try { + Option(cls.getCanonicalName).getOrElse(getNameValue) + } catch { + case err: InternalError => cls.getName + } + } + } + /** * Returns the Java type for a DataType. */ @@ -1624,7 +1650,7 @@ object CodeGenerator extends Logging { case _: MapType => "MapData" case udt: UserDefinedType[_] => javaType(udt.sqlType) case ObjectType(cls) if cls.isArray => s"${javaType(ObjectType(cls.getComponentType))}[]" - case ObjectType(cls) => cls.getName + case ObjectType(cls) => getClassName(cls) case _ => "Object" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index cf9796ef1948..46b64520ce2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -2826,7 +2826,7 @@ case class Sequence( val arr = ctx.freshName("arr") val arrElemType = CodeGenerator.javaType(dataType.elementType) s""" - |final $arrElemType[] $arr = null; + |$arrElemType[] $arr = null; |${impl.genCode(ctx, startGen.value, stopGen.value, stepGen.value, arr, arrElemType)} |${ev.value} = UnsafeArrayData.fromPrimitiveArray($arr); """.stripMargin diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index f95798d64db1..1a45e0b5fbe2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1048,7 +1048,7 @@ case class StringToTimestampWithoutTimezone(child: Expression, timeZoneId: Optio |${CodeGenerator.JAVA_BOOLEAN} ${ev.isNull} = true; |${CodeGenerator.JAVA_LONG} ${ev.value} = ${CodeGenerator.defaultValue(TimestampType)}; |if (!${eval.isNull}) { - | scala.Option $longOpt = $dtu.stringToTimestamp(${eval.value}, $tz, true); + | scala.Option $longOpt = $dtu.stringToTimestamp(${eval.value}, $tz, true); | if ($longOpt.isDefined()) { | ${ev.value} = ((Long) $longOpt.get()).longValue(); | ${ev.isNull} = false; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 3189e6841a52..043d1df84e19 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -36,7 +36,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData} import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} import org.apache.spark.util.Utils /** @@ -219,8 +218,8 @@ case class StaticInvoke( propagateNull: Boolean = true, returnNullable: Boolean = true) extends InvokeLike { - val objectName = staticObject.getName.stripSuffix("$") - val cls = if (staticObject.getName == objectName) { + val objectName = staticObject.getCanonicalName.stripSuffix("$") + val cls = if (staticObject.getCanonicalName == objectName) { staticObject } else { Utils.classForName(objectName) @@ -434,7 +433,7 @@ case class NewInstance( propagateNull: Boolean, dataType: DataType, outerPointer: Option[() => AnyRef]) extends InvokeLike { - private val className = cls.getName + private val className = CodeGenerator.getClassName(cls) override def nullable: Boolean = needNullCheck @@ -1335,7 +1334,7 @@ case class ExternalMapToCatalyst private( val (defineEntries, defineKeyValue) = child.dataType match { case ObjectType(cls) if classOf[java.util.Map[_, _]].isAssignableFrom(cls) => val javaIteratorCls = classOf[java.util.Iterator[_]].getName - val javaMapEntryCls = classOf[java.util.Map.Entry[_, _]].getName + val javaMapEntryCls = classOf[java.util.Map.Entry[_, _]].getCanonicalName val defineEntries = s"final $javaIteratorCls $entries = ${inputMap.value}.entrySet().iterator();" @@ -1585,6 +1584,9 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + // Resolves setters before compilation + require(setters.isEmpty || resolvedSetters.nonEmpty) + val instanceGen = beanInstance.genCode(ctx) val javaBeanInstance = ctx.freshName("javaBean") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 9434ceb7cd16..f7c5f67dbf74 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -318,7 +318,8 @@ case class SampleExec( v => s""" | $v = new $samplerClass($lowerBound, $upperBound, false); | $v.setSeed(${seed}L + partitionIndex); - """.stripMargin.trim) + """.stripMargin.trim, + forceInline = true) s""" | if ($sampler.sample() != 0) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala index 392ca13724bc..eeb731fbc38b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala @@ -79,7 +79,7 @@ case class LocalLimitExec(limit: Int, child: SparkPlan) extends UnaryExecNode wi ctx.addNewFunction("stopEarly", s""" @Override - protected boolean stopEarly() { + public boolean stopEarly() { return $stopEarly; } """, inlineToOuterClass = true)