diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index ed088648bc20..8ccb17ce3592 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -769,7 +769,9 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { case InSet(child, values) if useAdvanced && values.size > inSetThreshold => val dataType = child.dataType - val sortedValues = values.toSeq.sorted(TypeUtils.getInterpretedOrdering(dataType)) + // Skip null here is safe, more details could see at ExtractableLiterals. + val sortedValues = values.filter(_ != null).toSeq + .sorted(TypeUtils.getInterpretedOrdering(dataType)) convert(And(GreaterThanOrEqual(child, Literal(sortedValues.head, dataType)), LessThanOrEqual(child, Literal(sortedValues.last, dataType)))) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala index 12ed0e530529..6962f9dd6b18 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala @@ -179,5 +179,13 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { } } + test("SPARK-34515: Fix NPE if InSet contains null value during getPartitionsByFilter") { + withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "2") { + val filter = InSet(a("p", IntegerType), Set(null, 1, 2)) + val converted = shim.convertFilters(testTable, Seq(filter), conf.sessionLocalTimeZone) + assert(converted == "(p >= 1 and p <= 2)") + } + } + private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }