apache · joyhaldar · Nov 15, 2025 · Nov 16, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
@@ -327,6 +327,29 @@ public <T> Boolean eq(Bound<T> term, Literal<T> lit) {
     public <T> Boolean notEq(Bound<T> term, Literal<T> lit) {
       // because the bounds are not necessarily a min or max value, this cannot be answered using
       // them. notEq(col, X) with (X, Y) doesn't guarantee that X is a value in col.
+      // However, when min == max and the file has no nulls or NaN values, we can safely prune
+      // if that value equals the literal.
+      int id = term.ref().fieldId();
+      if (mayContainNull(id)) {
+        return ROWS_MIGHT_MATCH;
+      }
+      T lower = lowerBound(term);
+      T upper = upperBound(term);
+
+      if (lower == null || upper == null || NaNUtil.isNaN(lower) || NaNUtil.isNaN(upper)) {
+        return ROWS_MIGHT_MATCH;
+      }
+
+      if (nanCounts != null && nanCounts.containsKey(id) && nanCounts.get(id) != 0) {
+        return ROWS_MIGHT_MATCH;
+      }
+
+      if (lower.equals(upper)) {
+        int cmp = lit.comparator().compare(lower, lit.value());
+        if (cmp == 0) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
       return ROWS_MIGHT_MATCH;
     }
 
@@ -381,6 +404,28 @@ public <T> Boolean in(Bound<T> term, Set<T> literalSet) {
     public <T> Boolean notIn(Bound<T> term, Set<T> literalSet) {
       // because the bounds are not necessarily a min or max value, this cannot be answered using
       // them. notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X is a value in col.
+      // However, when min == max and the file has no nulls or NaN values, we can safely prune
+      // if that value is in the exclusion set.
+      int id = term.ref().fieldId();
+      if (mayContainNull(id)) {
+        return ROWS_MIGHT_MATCH;
+      }
+      T lower = lowerBound(term);
+      T upper = upperBound(term);
+
+      if (lower == null || upper == null || NaNUtil.isNaN(lower) || NaNUtil.isNaN(upper)) {
+        return ROWS_MIGHT_MATCH;
+      }
+
+      if (nanCounts != null && nanCounts.containsKey(id) && nanCounts.get(id) != 0) {
+        return ROWS_MIGHT_MATCH;
+      }
+
+      if (lower.equals(upper)) {
+        if (literalSet.contains(lower)) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
       return ROWS_MIGHT_MATCH;
     }
 

diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java b/api/src/test/java/org/apache/iceberg/expressions/TestInclusiveMetricsEvaluator.java
@@ -970,4 +970,172 @@ public void testNotNullInNestedStruct() {
         .as("Should not read: optional_address.optional_street2 is optional")
         .isFalse();
   }
+
+  @Test
+  public void testNotEqWithSingleValue() {
+    DataFile rangeOfValues =
+        new TestDataFile(
+            "range_of_values.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(3, 10L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "aaa")),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "zzz")));
+
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notEqual("required", "aaa")).eval(rangeOfValues);
+    assertThat(shouldRead)
+        .as("Should read: file has range of values, optimization doesn't apply")
+        .isTrue();
+
+    DataFile singleValueFile =
+        new TestDataFile(
+            "single_value.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(3, 10L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")));
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notEqual("required", "abc")).eval(singleValueFile);
+    assertThat(shouldRead)
+        .as("Should prune: file contains single value equal to literal")
+        .isFalse();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notEqual("required", "def")).eval(singleValueFile);
+    assertThat(shouldRead)
+        .as("Should read: file contains single value not equal to literal")
+        .isTrue();
+
+    DataFile singleValueWithNulls =
+        new TestDataFile(
+            "single_value_nulls.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(3, 10L),
+            ImmutableMap.of(3, 2L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")));
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notEqual("required", "abc"))
+            .eval(singleValueWithNulls);
+    assertThat(shouldRead).as("Should read: file has nulls which match != predicate").isTrue();
+
+    DataFile singleValueWithNaN =
+        new TestDataFile(
+            "single_value_nan.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(9, 10L),
+            ImmutableMap.of(9, 0L),
+            ImmutableMap.of(9, 2L),
+            ImmutableMap.of(9, toByteBuffer(Types.FloatType.get(), 5.0F)),
+            ImmutableMap.of(9, toByteBuffer(Types.FloatType.get(), 5.0F)));
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notEqual("no_nans", 5.0F)).eval(singleValueWithNaN);
+    assertThat(shouldRead).as("Should read: file has NaN values which match != predicate").isTrue();
+
+    DataFile singleValueNaNBounds =
+        new TestDataFile(
+            "single_value_nan_bounds.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(9, 10L),
+            ImmutableMap.of(9, 0L),
+            ImmutableMap.of(9, 0L),
+            ImmutableMap.of(9, toByteBuffer(Types.FloatType.get(), Float.NaN)),
+            ImmutableMap.of(9, toByteBuffer(Types.FloatType.get(), Float.NaN)));
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notEqual("no_nans", 5.0F)).eval(singleValueNaNBounds);
+    assertThat(shouldRead).as("Should read: bounds are NaN").isTrue();
+  }
+
+  @Test
+  public void testNotInWithSingleValue() {
+    DataFile rangeOfValues =
+        new TestDataFile(
+            "range_of_values.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(3, 10L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "aaa")),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "zzz")));
+
+    boolean shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notIn("required", "aaa", "bbb")).eval(rangeOfValues);
+    assertThat(shouldRead)
+        .as("Should read: file has range of values, optimization doesn't apply")
+        .isTrue();
+
+    DataFile singleValueFile =
+        new TestDataFile(
+            "single_value.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(3, 10L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")));
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notIn("required", "abc", "def"))
+            .eval(singleValueFile);
+    assertThat(shouldRead)
+        .as("Should prune: file contains single value in exclusion list")
+        .isFalse();
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notIn("required", "def", "ghi"))
+            .eval(singleValueFile);
+    assertThat(shouldRead)
+        .as("Should read: file contains single value not in exclusion list")
+        .isTrue();
+
+    DataFile singleValueWithNulls =
+        new TestDataFile(
+            "single_value_nulls.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(3, 10L),
+            ImmutableMap.of(3, 2L),
+            ImmutableMap.of(3, 0L),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
+            ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")));
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notIn("required", "abc", "def"))
+            .eval(singleValueWithNulls);
+    assertThat(shouldRead).as("Should read: file has nulls which match NOT IN predicate").isTrue();
+
+    DataFile singleValueWithNaN =
+        new TestDataFile(
+            "single_value_nan.avro",
+            Row.of(),
+            10,
+            ImmutableMap.of(9, 10L),
+            ImmutableMap.of(9, 0L),
+            ImmutableMap.of(9, 2L),
+            ImmutableMap.of(9, toByteBuffer(Types.FloatType.get(), 5.0F)),
+            ImmutableMap.of(9, toByteBuffer(Types.FloatType.get(), 5.0F)));
+
+    shouldRead =
+        new InclusiveMetricsEvaluator(SCHEMA, notIn("no_nans", 5.0F, 10.0F))
+            .eval(singleValueWithNaN);
+    assertThat(shouldRead)
+        .as("Should read: file has NaN values which match NOT IN predicate")
+        .isTrue();
+  }
 }
diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
@@ -461,8 +461,7 @@ public void testUnpartitionedYears() throws Exception {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    // notEq can't be answered using column bounds because they are not exact
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
   }
 
   @TestTemplate
@@ -771,7 +770,7 @@ public void testUnpartitionedTruncateString() throws Exception {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
 
     // NOT NotEqual
     builder = scanBuilder();
@@ -990,7 +989,7 @@ public void testUnpartitionedOr() throws Exception {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
   }
 
   @TestTemplate

diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
@@ -461,8 +461,7 @@ public void testUnpartitionedYears() throws Exception {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    // notEq can't be answered using column bounds because they are not exact
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
   }
 
   @TestTemplate
@@ -771,7 +770,7 @@ public void testUnpartitionedTruncateString() throws Exception {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
 
     // NOT NotEqual
     builder = scanBuilder();
@@ -990,7 +989,7 @@ public void testUnpartitionedOr() throws Exception {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
   }
 
   @TestTemplate

diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkScan.java
@@ -461,8 +461,7 @@ public void testUnpartitionedYears() throws Exception {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    // notEq can't be answered using column bounds because they are not exact
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
   }
 
   @TestTemplate
@@ -771,7 +770,7 @@ public void testUnpartitionedTruncateString() throws Exception {
     pushFilters(builder, predicate);
     Batch scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
 
     // NOT NotEqual
     builder = scanBuilder();
@@ -990,7 +989,7 @@ public void testUnpartitionedOr() throws Exception {
     pushFilters(builder, predicate);
     scan = builder.build().toBatch();
 
-    assertThat(scan.planInputPartitions()).hasSize(10);
+    assertThat(scan.planInputPartitions()).hasSize(5);
   }
 
   @TestTemplate