diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala index 500ff447a975..de1173b1fc3d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.codegen._ -import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.Row +import org.apache.spark.sql.types._ /** * User-defined function. @@ -45,1017 +47,132 @@ case class ScalaUDF( override def toString: String = s"UDF(${children.mkString(", ")})" - // scalastyle:off line.size.limit + // The dataType used in output expression encoder + // The return values of UDF will be encoded in a field in an internal row + def getDataType(): StructType = StructType(StructField("_c0", dataType) :: Nil) - /** This method has been generated by this script - - (1 to 22).map { x => - val anys = (1 to x).map(x => "Any").reduce(_ + ", " + _) - val childs = (0 to x - 1).map(x => s"val child$x = children($x)").reduce(_ + "\n " + _) - val converters = (0 to x - 1).map(x => s"lazy val converter$x = CatalystTypeConverters.createToScalaConverter(child$x.dataType)").reduce(_ + "\n " + _) - val evals = (0 to x - 1).map(x => s"converter$x(child$x.eval(input))").reduce(_ + ",\n " + _) - - s"""case $x => - val func = function.asInstanceOf[($anys) => Any] - $childs - $converters - (input: InternalRow) => { - func( - $evals) - } - """ - }.foreach(println) - - */ - - // Accessors used in genCode - def userDefinedFunc(): AnyRef = function - def getChildren(): Seq[Expression] = children - - private[this] val f = children.size match { - case 0 => - val func = function.asInstanceOf[() => Any] - (input: InternalRow) => { - func() - } - - case 1 => - val func = function.asInstanceOf[(Any) => Any] - val child0 = children(0) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input))) - } - - case 2 => - val func = function.asInstanceOf[(Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input))) - } - - case 3 => - val func = function.asInstanceOf[(Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input))) - } - - case 4 => - val func = function.asInstanceOf[(Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input))) - } - - case 5 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input))) - } - - case 6 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input))) - } - - case 7 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input))) - } - - case 8 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input))) - } - - case 9 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input))) - } - - case 10 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input))) - } - - case 11 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input))) - } - - case 12 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input))) - } - - case 13 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input))) - } - - case 14 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input))) - } - - case 15 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input))) - } - - case 16 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input))) + lazy val inputSchema: StructType = { + val fields = if (inputTypes == Nil) { + // from the deprecated callUDF codepath + children.zipWithIndex.map { case (e, i) => + StructField(s"_c$i", e.dataType) } - - case 17 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input))) - } - - case 18 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input))) - } - - case 19 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - val child18 = children(18) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - lazy val converter18 = CatalystTypeConverters.createToScalaConverter(child18.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input)), - converter18(child18.eval(input))) + } else { + inputTypes.zipWithIndex.map { case (t, i) => + StructField(s"_c$i", t) } - - case 20 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - val child18 = children(18) - val child19 = children(19) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - lazy val converter18 = CatalystTypeConverters.createToScalaConverter(child18.dataType) - lazy val converter19 = CatalystTypeConverters.createToScalaConverter(child19.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input)), - converter18(child18.eval(input)), - converter19(child19.eval(input))) - } - - case 21 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - val child18 = children(18) - val child19 = children(19) - val child20 = children(20) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - lazy val converter18 = CatalystTypeConverters.createToScalaConverter(child18.dataType) - lazy val converter19 = CatalystTypeConverters.createToScalaConverter(child19.dataType) - lazy val converter20 = CatalystTypeConverters.createToScalaConverter(child20.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input)), - converter18(child18.eval(input)), - converter19(child19.eval(input)), - converter20(child20.eval(input))) - } - - case 22 => - val func = function.asInstanceOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any] - val child0 = children(0) - val child1 = children(1) - val child2 = children(2) - val child3 = children(3) - val child4 = children(4) - val child5 = children(5) - val child6 = children(6) - val child7 = children(7) - val child8 = children(8) - val child9 = children(9) - val child10 = children(10) - val child11 = children(11) - val child12 = children(12) - val child13 = children(13) - val child14 = children(14) - val child15 = children(15) - val child16 = children(16) - val child17 = children(17) - val child18 = children(18) - val child19 = children(19) - val child20 = children(20) - val child21 = children(21) - lazy val converter0 = CatalystTypeConverters.createToScalaConverter(child0.dataType) - lazy val converter1 = CatalystTypeConverters.createToScalaConverter(child1.dataType) - lazy val converter2 = CatalystTypeConverters.createToScalaConverter(child2.dataType) - lazy val converter3 = CatalystTypeConverters.createToScalaConverter(child3.dataType) - lazy val converter4 = CatalystTypeConverters.createToScalaConverter(child4.dataType) - lazy val converter5 = CatalystTypeConverters.createToScalaConverter(child5.dataType) - lazy val converter6 = CatalystTypeConverters.createToScalaConverter(child6.dataType) - lazy val converter7 = CatalystTypeConverters.createToScalaConverter(child7.dataType) - lazy val converter8 = CatalystTypeConverters.createToScalaConverter(child8.dataType) - lazy val converter9 = CatalystTypeConverters.createToScalaConverter(child9.dataType) - lazy val converter10 = CatalystTypeConverters.createToScalaConverter(child10.dataType) - lazy val converter11 = CatalystTypeConverters.createToScalaConverter(child11.dataType) - lazy val converter12 = CatalystTypeConverters.createToScalaConverter(child12.dataType) - lazy val converter13 = CatalystTypeConverters.createToScalaConverter(child13.dataType) - lazy val converter14 = CatalystTypeConverters.createToScalaConverter(child14.dataType) - lazy val converter15 = CatalystTypeConverters.createToScalaConverter(child15.dataType) - lazy val converter16 = CatalystTypeConverters.createToScalaConverter(child16.dataType) - lazy val converter17 = CatalystTypeConverters.createToScalaConverter(child17.dataType) - lazy val converter18 = CatalystTypeConverters.createToScalaConverter(child18.dataType) - lazy val converter19 = CatalystTypeConverters.createToScalaConverter(child19.dataType) - lazy val converter20 = CatalystTypeConverters.createToScalaConverter(child20.dataType) - lazy val converter21 = CatalystTypeConverters.createToScalaConverter(child21.dataType) - (input: InternalRow) => { - func( - converter0(child0.eval(input)), - converter1(child1.eval(input)), - converter2(child2.eval(input)), - converter3(child3.eval(input)), - converter4(child4.eval(input)), - converter5(child5.eval(input)), - converter6(child6.eval(input)), - converter7(child7.eval(input)), - converter8(child8.eval(input)), - converter9(child9.eval(input)), - converter10(child10.eval(input)), - converter11(child11.eval(input)), - converter12(child12.eval(input)), - converter13(child13.eval(input)), - converter14(child14.eval(input)), - converter15(child15.eval(input)), - converter16(child16.eval(input)), - converter17(child17.eval(input)), - converter18(child18.eval(input)), - converter19(child19.eval(input)), - converter20(child20.eval(input)), - converter21(child21.eval(input))) - } - } - - // scalastyle:on line.size.limit - - // Generate codes used to convert the arguments to Scala type for user-defined functions - private[this] def genCodeForConverter(ctx: CodegenContext, index: Int): String = { - val converterClassName = classOf[Any => Any].getName - val typeConvertersClassName = CatalystTypeConverters.getClass.getName + ".MODULE$" - val expressionClassName = classOf[Expression].getName - val scalaUDFClassName = classOf[ScalaUDF].getName - - val converterTerm = ctx.freshName("converter") - val expressionIdx = ctx.references.size - 1 - ctx.addMutableState(converterClassName, converterTerm, - s"this.$converterTerm = ($converterClassName)$typeConvertersClassName" + - s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" + - s"references[$expressionIdx]).getChildren().apply($index))).dataType());") - converterTerm + } + StructType(fields) } override def genCode( ctx: CodegenContext, ev: ExprCode): String = { - ctx.references += this - val scalaUDFClassName = classOf[ScalaUDF].getName + val scalaUDFObject = ctx.addReferenceObj("scalaUDF", this, scalaUDFClassName) + val converterClassName = classOf[Any => Any].getName val typeConvertersClassName = CatalystTypeConverters.getClass.getName + ".MODULE$" val expressionClassName = classOf[Expression].getName - - // Generate codes used to convert the returned value of user-defined functions to Catalyst type - val catalystConverterTerm = ctx.freshName("catalystConverter") - val catalystConverterTermIdx = ctx.references.size - 1 - ctx.addMutableState(converterClassName, catalystConverterTerm, - s"this.$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" + - s".createToCatalystConverter((($scalaUDFClassName)references" + - s"[$catalystConverterTermIdx]).dataType());") + val expressionEncoderClassName = classOf[ExpressionEncoder[Row]].getName + val rowEncoderClassName = RowEncoder.getClass.getName + ".MODULE$" + val structTypeClassName = StructType.getClass.getName + ".MODULE$" + val rowClassName = Row.getClass.getName + ".MODULE$" + val rowClass = classOf[Row].getName + val internalRowClassName = classOf[InternalRow].getName + // scalastyle:off + // JavaConversions has been banned for implicit conversion between Java and Scala types. + // However, we are not going to use it in Scala side but use it in generated Java codes. + // JavaConverters doesn't provide simple and direct method to call for the purpose here. + // So we turn off scalastyle here temporarily. + val javaConversionClassName = scala.collection.JavaConversions.getClass.getName + ".MODULE$" + // scalastyle:on + + // Generate code for input encoder + val inputExpressionEncoderTerm = ctx.freshName("inputExpressionEncoder") + ctx.addMutableState(expressionEncoderClassName, inputExpressionEncoderTerm, + s"this.$inputExpressionEncoderTerm = ($expressionEncoderClassName)$rowEncoderClassName" + + s".apply($scalaUDFObject.inputSchema());") + + // Generate code for output encoder + val outputExpressionEncoderTerm = ctx.freshName("outputExpressionEncoder") + ctx.addMutableState(expressionEncoderClassName, outputExpressionEncoderTerm, + s"this.$outputExpressionEncoderTerm = ($expressionEncoderClassName)$rowEncoderClassName" + + s".apply($scalaUDFObject.getDataType());") val resultTerm = ctx.freshName("result") - // This must be called before children expressions' codegen - // because ctx.references is used in genCodeForConverter - val converterTerms = children.indices.map(genCodeForConverter(ctx, _)) - // Initialize user-defined function val funcClassName = s"scala.Function${children.size}" val funcTerm = ctx.freshName("udf") - val funcExpressionIdx = ctx.references.size - 1 ctx.addMutableState(funcClassName, funcTerm, - s"this.$funcTerm = ($funcClassName)((($scalaUDFClassName)references" + - s"[$funcExpressionIdx]).userDefinedFunc());") + s"this.$funcTerm = ($funcClassName)($scalaUDFObject.function());") // codegen for children expressions val evals = children.map(_.gen(ctx)) + val (converters, evalsArgs) = evals.zipWithIndex.map { case (eval, i) => + val argTerm = ctx.freshName("arg") + val convert = if (ctx.isPrimitiveType(children(i).dataType)) { + s"${ctx.boxedType(children(i).dataType)} $argTerm = ${eval.isNull} ? null " + + s": new ${ctx.boxedType(children(i).dataType)}(${eval.value});" + } else { + s"${ctx.boxedType(children(i).dataType)} $argTerm = ${eval.isNull} ? null " + + s": (${ctx.boxedType(children(i).dataType)}) ${eval.value};" + } + (convert, argTerm) + }.unzip + val evalsAsSeq = s"$javaConversionClassName.collectionAsScalaIterable" + + s"(java.util.Arrays.asList(${evalsArgs.mkString(", ")})).toList()" + + // Encode children expression results to Scala objects + val inputInternalRowTerm = ctx.freshName("inputRow") + val inputInternalRow = s"$rowClass $inputInternalRowTerm = " + + s"($rowClass)$inputExpressionEncoderTerm.fromRow(InternalRow.fromSeq($evalsAsSeq));" // Generate the codes for expressions and calling user-defined function // We need to get the boxedType of dataType's javaType here. Because for the dataType // such as IntegerType, its javaType is `int` and the returned type of user-defined // function is Object. Trying to convert an Object to `int` will cause casting exception. val evalCode = evals.map(_.code).mkString - val (converters, funcArguments) = converterTerms.zipWithIndex.map { case (converter, i) => - val eval = evals(i) - val argTerm = ctx.freshName("arg") - val convert = s"Object $argTerm = ${eval.isNull} ? null : $converter.apply(${eval.value});" - (convert, argTerm) - }.unzip + val funcArguments = (0 until children.size).map { i => + s"$inputInternalRowTerm.get($i)" + }.mkString(", ") + + val rowParametersTerm = ctx.freshName("rowParameters") + val innerRow = s"$rowClass $rowParametersTerm = $rowClassName.apply(" + + s"$javaConversionClassName.collectionAsScalaIterable" + + s"(java.util.Arrays.asList($funcTerm.apply($funcArguments))).toList());" + + // Encode Scala objects of UDF return values to Spark SQL internal row + val internalRowTerm = ctx.freshName("internalRow") + val internalRow = s"$internalRowClassName $internalRowTerm = ($internalRowClassName)" + + s"${outputExpressionEncoderTerm}.toRow($rowParametersTerm).copy();" + + // UDF return values are encoded as the field 0 as StructType in the internal row + // We extract it back + val udfDataType = s"$scalaUDFObject.dataType()" val callFunc = s"${ctx.boxedType(dataType)} $resultTerm = " + - s"(${ctx.boxedType(dataType)})${catalystConverterTerm}" + - s".apply($funcTerm.apply(${funcArguments.mkString(", ")}));" + s"(${ctx.boxedType(dataType)}) $internalRowTerm.get(0, $udfDataType);" - s""" - $evalCode + evalCode + s""" ${converters.mkString("\n")} + ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; + + $inputInternalRow + $innerRow + $internalRow $callFunc boolean ${ev.isNull} = $resultTerm == null; - ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { ${ev.value} = $resultTerm; } """ } - private[this] val converter = CatalystTypeConverters.createToCatalystConverter(dataType) - - override def eval(input: InternalRow): Any = converter(f(input)) + override def eval(input: InternalRow): Any = + throw new UnsupportedOperationException("Only code-generated evaluation is supported.") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala index ec950332c5f6..ad8844369a72 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql +import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.test.SQLTestData._ @@ -193,8 +194,21 @@ class UDFSuite extends QueryTest with SharedSQLContext { assert(sql("SELECT intExpected(1.0)").head().getInt(0) === 1) } + test("udf in interpreted projection") { + sqlContext.udf.register("testDataFunc", (n: Int, s: String) => { (n, s) }) + + // TakeOrderedAndProject uses InterpretedProjection to do projection + // So this SQL will call interpreted version of ScalaUDF + checkAnswer( + sql(""" + | SELECT testDataFunc(t.key, t.value) FROM + | (SELECT key, value FROM testData ORDER BY key) t LIMIT 100 + """.stripMargin).toDF(), testData.select(struct("key", "value"))) + } + test("udf in different types") { sqlContext.udf.register("testDataFunc", (n: Int, s: String) => { (n, s) }) + sqlContext.udf.register("testDataFunc2", (ns: Row) => { (ns.getInt(0), ns.getString(1)) }) sqlContext.udf.register("decimalDataFunc", (a: java.math.BigDecimal, b: java.math.BigDecimal) => { (a, b) }) sqlContext.udf.register("binaryDataFunc", (a: Array[Byte], b: Int) => { (a, b) }) @@ -208,6 +222,9 @@ class UDFSuite extends QueryTest with SharedSQLContext { checkAnswer( sql("SELECT tmp.t.* FROM (SELECT testDataFunc(key, value) AS t from testData) tmp").toDF(), testData) + checkAnswer( + sql("SELECT testDataFunc2(s) AS t from complexData").toDF(), + complexData.select("s")) checkAnswer( sql(""" | SELECT tmp.t.* FROM