Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ package org.apache.spark.sql.hive
import java.lang.reflect.InvocationTargetException
import java.util.Locale

import scala.reflect.ClassTag
import scala.reflect.runtime.universe._
import scala.util.{Failure, Success, Try}
import scala.util.control.NonFatal

Expand All @@ -29,11 +31,15 @@ import org.apache.hadoop.hive.ql.exec.{FunctionRegistry => HiveFunctionRegistry}
import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, GenericUDF, GenericUDTF}

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.{FunctionIdentifier, ScalaReflection}
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.aggregate.ScalaAggregator
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
import org.apache.spark.sql.types.{DecimalType, DoubleType}
import org.apache.spark.util.Utils
Expand Down Expand Up @@ -119,10 +125,41 @@ private[sql] class HiveSessionCatalog(
try {
super.makeFunctionExpression(name, clazz, input)
} catch {
// If `super.makeFunctionExpression` throw `InvalidUDFClassException`, we construct
// Hive UDF/UDAF/UDTF with function definition. Otherwise, we just throw it earlier.
// If `super.makeFunctionExpression` throw `InvalidUDFClassException`, we try to construct
// ScalaAggregator or Hive UDF/UDAF/UDTF with function definition. Otherwise,
// we just throw it earlier.
case _: InvalidUDFClassException =>
makeHiveFunctionExpression(name, clazz, input)
val clsForAggregator = classOf[Aggregator[_, _, _]]
if (clsForAggregator.isAssignableFrom(clazz)) {
val aggregator =
clazz.getConstructor().newInstance().asInstanceOf[Aggregator[Any, Any, Any]]
// Construct the input encoder
val mirror = runtimeMirror(clazz.getClassLoader)
val classType = mirror.classSymbol(clazz)
val baseClassType = typeOf[Aggregator[_, _, _]].typeSymbol.asClass
val baseType = internal.thisType(classType).baseType(baseClassType)
val tpe = baseType.typeArgs.head
val cls = mirror.runtimeClass(tpe)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you copy the code above from somewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code references

val serializer = ScalaReflection.serializerForType(tpe)

val serializer = ScalaReflection.serializerForType(tpe)
val deserializer = ScalaReflection.deserializerForType(tpe)
val inputEncoder = new ExpressionEncoder[Any](
serializer,
deserializer,
ClassTag(cls))

val e = new ScalaAggregator[Any, Any, Any](input, aggregator, inputEncoder,
aggregator.bufferEncoder.asInstanceOf[ExpressionEncoder[Any]],
aggregatorName = Some(name))

// Check input argument size
if (e.inputTypes.size != input.size) {
throw QueryCompilationErrors.invalidFunctionArgumentsError(
name, e.inputTypes.size.toString, input.size)
}
e
} else {
makeHiveFunctionExpression(name, clazz, input)
}
case NonFatal(e) => throw e
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import org.apache.spark.sql.hive.HiveUtils
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.types.{NullType, StructType}

class MyDoubleAverage extends MyDoubleAvgAggBase

/**
* A test suite for Hive view related functionality.
*/
Expand Down Expand Up @@ -179,4 +181,24 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton {
}
}
}

test("SPARK-37018: Spark SQL should support create function with Aggregator") {
val avgFuncClass = "org.apache.spark.sql.hive.execution.MyDoubleAverage"
val functionName = "test_udf"
withTempDatabase { dbName =>
withUserDefinedFunction(
s"default.$functionName" -> false,
s"$dbName.$functionName" -> false,
functionName -> true) {
// create a function in default database
sql("USE DEFAULT")
sql(s"CREATE FUNCTION $functionName AS '$avgFuncClass'")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we do some basic test to make sure the function can be called? and with compatible input types to test implicit cast, incompatible input types to make sure the type check works.

// create a view using a function in 'default' database
withView("v1") {
sql(s"CREATE VIEW v1 AS SELECT $functionName(col1) AS func FROM VALUES (1), (2), (3)")
checkAnswer(sql("SELECT * FROM v1"), Seq(Row(102.0)))
}
}
}
}
}