Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ExpressionInfo}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
import org.apache.spark.sql.hive.client.HiveClient
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DecimalType, DoubleType}
import org.apache.spark.util.Utils


Expand Down Expand Up @@ -163,6 +164,19 @@ private[sql] class HiveSessionCatalog(
}

override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
try {
lookupFunction0(name, children)
} catch {
case NonFatal(_) =>
// SPARK-16228 ExternalCatalog may recognize `double`-type only.
val newChildren = children.map { child =>
if (child.dataType.isInstanceOf[DecimalType]) Cast(child, DoubleType) else child
}
lookupFunction0(name, newChildren)
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dongjoon-hyun What is the reason that we need to catch an exception instead of letting the analyzer do the job?


private def lookupFunction0(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
// TODO: Once lookupFunction accepts a FunctionIdentifier, we should refactor this method to
// if (super.functionExists(name)) {
// super.lookupFunction(name, children)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
sql("SELECT array(max(key), max(key)) FROM src").collect().toSeq)
}

test("SPARK-16228 Percentile needs explicit cast to double") {
sql("select percentile(value, cast(0.5 as double)) from values 1,2,3 T(value)")
sql("select percentile_approx(value, cast(0.5 as double)) from values 1.0,2.0,3.0 T(value)")
sql("select percentile(value, 0.5) from values 1,2,3 T(value)")
sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0 T(value)")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't cover all interfaces of percentile. E.g. Missing the case

sql("select percentile(value, array(0.5,0.99)) from values 1,2,3 T(value)")

still throws the seen error as outlined in https://issues.apache.org/jira/browse/SPARK-16228?focusedCommentId=15673869&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15673869

Should I report as bug? Thanks.

}

test("Generic UDAF aggregates") {
checkAnswer(sql("SELECT ceiling(percentile_approx(key, 0.99999D)) FROM src LIMIT 1"),
sql("SELECT max(key) FROM src LIMIT 1").collect().toSeq)
Expand Down