Skip to content

Commit aa3733f

Browse files
authored
Fix pushdown of non-partition predicates within NOT (#51)
1 parent 9f73cd4 commit aa3733f

File tree

2 files changed

+41
-7
lines changed

2 files changed

+41
-7
lines changed

hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveExpressions.java

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
class HiveExpressions {
3535

36+
private static final Expression REMOVED = (Expression) () -> null;
37+
3638
private HiveExpressions() {}
3739

3840
/**
@@ -48,9 +50,18 @@ private HiveExpressions() {}
4850
*/
4951
static Expression simplifyPartitionFilter(Expression expr, Set<String> partitionColumnNames) {
5052
try {
51-
Expression partitionPredicatesOnly = ExpressionVisitors.visit(expr,
53+
// Pushing down NOTs is critical for the correctness of RemoveNonPartitionPredicates
54+
// e.g. consider a predicate on a partition field (P) and a predicate on a non-partition field (NP)
55+
// With simply ignoring NP, NOT(P and NP) will be written to NOT(P)
56+
// However the correct behaviour is NOT(P and NP) => NOT(P) OR NOT(NP) => True
57+
Expression notPushedDown = Expressions.rewriteNot(expr);
58+
Expression partitionPredicatesOnly = ExpressionVisitors.visit(notPushedDown,
5259
new RemoveNonPartitionPredicates(partitionColumnNames));
53-
return ExpressionVisitors.visit(partitionPredicatesOnly, new RewriteUnsupportedOperators());
60+
if (partitionPredicatesOnly == REMOVED) {
61+
return Expressions.alwaysTrue();
62+
} else {
63+
return ExpressionVisitors.visit(partitionPredicatesOnly, new RewriteUnsupportedOperators());
64+
}
5465
} catch (Exception e) {
5566
throw new RuntimeException("Error while processing expression: " + expr, e);
5667
}
@@ -93,17 +104,27 @@ public Expression alwaysFalse() {
93104

94105
@Override
95106
public Expression not(Expression result) {
96-
return Expressions.not(result);
107+
return (result == REMOVED) ? REMOVED : Expressions.not(result);
97108
}
98109

99110
@Override
100111
public Expression and(Expression leftResult, Expression rightResult) {
101-
return Expressions.and(leftResult, rightResult);
112+
// if one of the children is a non partition predicate, we can ignore it as it will be applied as a post-scan
113+
// filter
114+
if (leftResult == REMOVED && rightResult == REMOVED) {
115+
return REMOVED;
116+
} else if (leftResult == REMOVED) {
117+
return rightResult;
118+
} else if (rightResult == REMOVED) {
119+
return leftResult;
120+
} else {
121+
return Expressions.and(leftResult, rightResult);
122+
}
102123
}
103124

104125
@Override
105126
public Expression or(Expression leftResult, Expression rightResult) {
106-
return Expressions.or(leftResult, rightResult);
127+
return (leftResult == REMOVED || rightResult == REMOVED) ? REMOVED : Expressions.or(leftResult, rightResult);
107128
}
108129

109130
@Override
@@ -113,8 +134,7 @@ public <T> Expression predicate(BoundPredicate<T> pred) {
113134

114135
@Override
115136
public <T> Expression predicate(UnboundPredicate<T> pred) {
116-
return (partitionColumnNamesLowerCase.contains(pred.ref().name().toLowerCase())) ? pred
117-
: Expressions.alwaysTrue();
137+
return (partitionColumnNamesLowerCase.contains(pred.ref().name().toLowerCase())) ? pred : REMOVED;
118138
}
119139
}
120140

hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestHiveExpressions.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,20 @@ public void testSimplifyRemoveAlwaysFalseChildren() {
105105
Assert.assertEquals(expected.toString(), simplifyPartitionFilter(input, ImmutableSet.of("pcol")).toString());
106106
}
107107

108+
@Test
109+
public void testSimplifyRemoveNonPartitionColumnsWithinNot1() {
110+
Expression input = and(not(equal("nonpcol", "1")), equal("pcol", "1"));
111+
Expression expected = equal("pcol", "1");
112+
Assert.assertEquals(expected.toString(), simplifyPartitionFilter(input, ImmutableSet.of("pcol")).toString());
113+
}
114+
115+
@Test
116+
public void testSimplifyRemoveNonPartitionColumnsWithinNot2() {
117+
Expression input = not(and(equal("nonpcol", "1"), equal("pcol", "1")));
118+
Expression expected = alwaysTrue();
119+
Assert.assertEquals(expected.toString(), simplifyPartitionFilter(input, ImmutableSet.of("pcol")).toString());
120+
}
121+
108122
@Test
109123
public void testToPartitionFilterStringEscapeStringLiterals() {
110124
Expression input = equal("pcol", "s'1");

0 commit comments

Comments
 (0)