Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {

object ExtractableLiteral {
def unapply(expr: Expression): Option[String] = expr match {
case Literal(null, _) => None // `null`s can be cast as other types; we want to avoid NPEs.
case Literal(value, _: IntegralType) => Some(value.toString)
case Literal(value, _: StringType) => Some(quoteStringLiteral(value.toString))
case _ => None
Expand All @@ -606,7 +607,23 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {

object ExtractableLiterals {
def unapply(exprs: Seq[Expression]): Option[Seq[String]] = {
val extractables = exprs.map(ExtractableLiteral.unapply)
// SPARK-24879: The Hive metastore filter parser does not support "null", but we still want
// to push down as many predicates as we can while still maintaining correctness.
// In SQL, the `IN` expression evaluates as follows:
// > `1 in (2, NULL)` -> NULL
// > `1 in (1, NULL)` -> true
// > `1 in (2)` -> false
// Since Hive metastore filters are NULL-intolerant binary operations joined only by
// `AND` and `OR`, we can treat `NULL` as `false` and thus rewrite `1 in (2, NULL)` as
// `1 in (2)`.
// If the Hive metastore begins supporting NULL-tolerant predicates and Spark starts
// pushing down these predicates, then this optimization will become incorrect and need
// to be changed.
val extractables = exprs
.filter {
case Literal(null, _) => false
case _ => true
}.map(ExtractableLiteral.unapply)
if (extractables.nonEmpty && extractables.forall(_.isDefined)) {
Some(extractables.map(_.get))
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,20 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
(Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil,
"""stringcol = 'p1" and q="q1' and 'p2" and q="q2' = stringcol""")

filterTest("SPARK-24879 null literals should be ignored for IN constructs",
(a("intcol", IntegerType) in (Literal(1), Literal(null))) :: Nil,
"(intcol = 1)")

// Applying the predicate `x IN (NULL)` should return an empty set, but since this optimization
// will be applied by Catalyst, this filter converter does not need to account for this.
filterTest("SPARK-24879 IN predicates with only NULLs will not cause a NPE",
(a("intcol", IntegerType) in Literal(null)) :: Nil,
"")

filterTest("typecast null literals should not be pushed down in simple predicates",
(a("intcol", IntegerType) === Literal(null, IntegerType)) :: Nil,
"")

private def filterTest(name: String, filters: Seq[Expression], result: String) = {
test(name) {
withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> "true") {
Expand Down