Skip to content

Commit daf0620

Browse files
moulimukherjeerdblue
authored andcommitted
Add projectStrict for Dates and Timestamps (#283)
1 parent 1596d61 commit daf0620

File tree

7 files changed

+658
-35
lines changed

7 files changed

+658
-35
lines changed

api/src/main/java/org/apache/iceberg/expressions/Projections.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -221,9 +221,10 @@ public <T> Expression predicate(BoundPredicate<T> pred) {
221221
// similarly, if partitioning by day(ts) and hour(ts), the more restrictive
222222
// projection should be used. ts = 2019-01-01T01:00:00 produces day=2019-01-01 and
223223
// hour=2019-01-01-01. the value will be in 2019-01-01-01 and not in 2019-01-01-02.
224-
result = Expressions.and(
225-
result,
226-
((Transform<T, ?>) part.transform()).project(part.name(), pred));
224+
UnboundPredicate<?> inclusiveProjection = ((Transform<T, ?>) part.transform()).project(part.name(), pred);
225+
if (inclusiveProjection != null) {
226+
result = Expressions.and(result, inclusiveProjection);
227+
}
227228
}
228229

229230
return result;
@@ -251,9 +252,10 @@ public <T> Expression predicate(BoundPredicate<T> pred) {
251252
// any timestamp where either projection predicate is true must match the original
252253
// predicate. For example, ts = 2019-01-01T03:00:00 matches the hour projection but not
253254
// the day, but does match the original predicate.
254-
result = Expressions.or(
255-
result,
256-
((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred));
255+
UnboundPredicate<?> strictProjection = ((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred);
256+
if (strictProjection != null) {
257+
result = Expressions.or(result, strictProjection);
258+
}
257259
}
258260

259261
return result;

api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,9 @@
1919

2020
package org.apache.iceberg.expressions;
2121

22-
import com.google.common.collect.Iterables;
23-
import com.google.common.collect.Lists;
2422
import java.io.Serializable;
2523
import java.util.Comparator;
2624
import java.util.List;
27-
import java.util.Objects;
2825
import org.apache.iceberg.PartitionField;
2926
import org.apache.iceberg.PartitionSpec;
3027
import org.apache.iceberg.StructLike;
@@ -200,42 +197,66 @@ public <T> Expression notEq(BoundReference<T> ref, Literal<T> lit) {
200197
@Override
201198
@SuppressWarnings("unchecked")
202199
public <T> Expression predicate(BoundPredicate<T> pred) {
203-
// Get the strict projection of this predicate in partition data, then use it to determine
204-
// whether to return the original predicate. The strict projection returns true iff the
205-
// original predicate would have returned true, so the predicate can be eliminated if the
206-
// strict projection evaluates to true.
200+
/**
201+
* Get the strict projection and inclusive projection of this predicate in partition data,
202+
* then use them to determine whether to return the original predicate. The strict projection
203+
* returns true iff the original predicate would have returned true, so the predicate can be
204+
* eliminated if the strict projection evaluates to true. Similarly the inclusive projection
205+
* returns false iff the original predicate would have returned false, so the predicate can
206+
* also be eliminated if the inclusive projection evaluates to false.
207+
*/
208+
207209
//
208210
// If there is no strict projection or if it evaluates to false, then return the predicate.
209211
List<PartitionField> parts = spec.getFieldsBySourceId(pred.ref().fieldId());
210212
if (parts == null) {
211213
return pred; // not associated inclusive a partition field, can't be evaluated
212214
}
213215

214-
List<UnboundPredicate<?>> strictProjections = Lists.transform(parts,
215-
part -> ((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred));
216+
for (PartitionField part : parts) {
216217

217-
if (Iterables.all(strictProjections, Objects::isNull)) {
218-
// if there are no strict projections, the predicate must be in the residual
219-
return pred;
220-
}
218+
// checking the strict projection
219+
UnboundPredicate<?> strictProjection = ((Transform<T, ?>) part.transform()).projectStrict(part.name(), pred);
220+
Expression strictResult = null;
221+
222+
if (strictProjection != null) {
223+
Expression bound = strictProjection.bind(spec.partitionType(), caseSensitive);
224+
if (bound instanceof BoundPredicate) {
225+
strictResult = super.predicate((BoundPredicate<?>) bound);
226+
} else {
227+
// if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse
228+
strictResult = bound;
229+
}
230+
}
221231

222-
Expression result = Expressions.alwaysFalse();
223-
for (UnboundPredicate<?> strictProjection : strictProjections) {
224-
if (strictProjection == null) {
225-
continue;
232+
if (strictResult != null && strictResult.op() == Expression.Operation.TRUE) {
233+
// If strict is true, returning true
234+
return Expressions.alwaysTrue();
226235
}
227236

228-
Expression bound = strictProjection.bind(spec.partitionType(), caseSensitive);
229-
if (bound instanceof BoundPredicate) {
230-
// evaluate the bound predicate, which will return alwaysTrue or alwaysFalse
231-
result = Expressions.or(result, super.predicate((BoundPredicate<?>) bound));
232-
} else {
233-
// update the result expression with the non-predicate residual (e.g. alwaysTrue)
234-
result = Expressions.or(result, bound);
237+
// checking the inclusive projection
238+
UnboundPredicate<?> inclusiveProjection = ((Transform<T, ?>) part.transform()).project(part.name(), pred);
239+
Expression inclusiveResult = null;
240+
if (inclusiveProjection != null) {
241+
Expression boundInclusive = inclusiveProjection.bind(spec.partitionType(), caseSensitive);
242+
if (boundInclusive instanceof BoundPredicate) {
243+
// using predicate method specific to inclusive
244+
inclusiveResult = super.predicate((BoundPredicate<?>) boundInclusive);
245+
} else {
246+
// if the result is not a predicate, then it must be a constant like alwaysTrue or alwaysFalse
247+
inclusiveResult = boundInclusive;
248+
}
235249
}
250+
251+
if (inclusiveResult != null && inclusiveResult.op() == Expression.Operation.FALSE) {
252+
// If inclusive is false, returning false
253+
return Expressions.alwaysFalse();
254+
}
255+
236256
}
237257

238-
return result;
258+
// neither strict not inclusive predicate was conclusive, returning the original pred
259+
return pred;
239260
}
240261

241262
@Override

api/src/main/java/org/apache/iceberg/transforms/Dates.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,11 @@ public UnboundPredicate<Integer> project(String fieldName, BoundPredicate<Intege
7373
}
7474

7575
@Override
76-
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Integer> predicate) {
77-
return null;
76+
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Integer> pred) {
77+
if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
78+
return Expressions.predicate(pred.op(), fieldName);
79+
}
80+
return ProjectionUtil.truncateIntegerStrict(fieldName, pred, this);
7881
}
7982

8083
@Override

api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,90 @@ static <T> UnboundPredicate<T> truncateInteger(
5252
}
5353
}
5454

55+
static UnboundPredicate<Integer> truncateIntegerStrict(
56+
String name, BoundPredicate<Integer> pred, Transform<Integer, Integer> transform) {
57+
int boundary = pred.literal().value();
58+
switch (pred.op()) {
59+
case LT:
60+
// predicate would be <= the previous partition
61+
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
62+
case LT_EQ:
63+
// Checking if the literal is at the upper partition boundary
64+
if (transform.apply(boundary + 1).equals(transform.apply(boundary))) {
65+
// Literal is not at upper boundary, for eg: 2019-07-02T02:12:34.0000
66+
// the predicate can be < 2019-07-01
67+
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
68+
} else {
69+
// Literal is not at upper boundary, for eg: 2019-07-02T23:59:59.99999
70+
// the predicate can be <= 2019-07-02
71+
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
72+
}
73+
case GT:
74+
// predicate would be >= the next partition
75+
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
76+
case GT_EQ:
77+
// Checking if the literal is at the lower partition boundary
78+
if (transform.apply(boundary - 1).equals(transform.apply(boundary))) {
79+
// Literal is not at lower boundary, for eg: 2019-07-02T02:12:34.0000
80+
// the predicate can be >= 2019-07-03
81+
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
82+
} else {
83+
// Literal was at the lower boundary, for eg: 2019-07-02T00:00:00.0000
84+
// the predicate can be >= 2019-07-02
85+
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
86+
}
87+
case NOT_EQ:
88+
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
89+
case EQ:
90+
// there is no predicate that guarantees equality because adjacent ints transform to the same value
91+
return null;
92+
default:
93+
return null;
94+
}
95+
}
96+
97+
static UnboundPredicate<Integer> truncateLongStrict(
98+
String name, BoundPredicate<Long> pred, Transform<Long, Integer> transform) {
99+
long boundary = pred.literal().value();
100+
switch (pred.op()) {
101+
case LT:
102+
// predicate would be <= the previous partition
103+
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
104+
case LT_EQ:
105+
// Checking if the literal is at the upper partition boundary
106+
if (transform.apply(boundary + 1L).equals(transform.apply(boundary))) {
107+
// Literal is not at upper boundary, for eg: 2019-07-02T02:12:34.0000
108+
// the predicate can be <= 2019-07-01
109+
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary) - 1);
110+
} else {
111+
// Literal is not at upper boundary, for eg: 2019-07-02T23:59:59.99999
112+
// the predicate can be <= 2019-07-02
113+
return predicate(Expression.Operation.LT_EQ, name, transform.apply(boundary));
114+
}
115+
case GT:
116+
// predicate would be >= the next partition
117+
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
118+
case GT_EQ:
119+
// Checking if the literal is at the lower partition boundary
120+
if (transform.apply(boundary - 1L).equals(transform.apply(boundary))) {
121+
// Literal is not at lower boundary, for eg: 2019-07-02T02:12:34.0000
122+
// the predicate can be >= 2019-07-03
123+
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary) + 1);
124+
} else {
125+
// Literal was at the lower boundary, for eg: 2019-07-02T00:00:00.0000
126+
// the predicate can be >= 2019-07-02
127+
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
128+
}
129+
case NOT_EQ:
130+
return predicate(Expression.Operation.NOT_EQ, name, transform.apply(boundary));
131+
case EQ:
132+
// there is no predicate that guarantees equality because adjacent longs transform to the same value
133+
return null;
134+
default:
135+
return null;
136+
}
137+
}
138+
55139
static <T> UnboundPredicate<T> truncateLong(
56140
String name, BoundPredicate<Long> pred, Transform<Long, T> transform) {
57141
long boundary = pred.literal().value();

api/src/main/java/org/apache/iceberg/transforms/Timestamps.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,11 @@ public UnboundPredicate<Integer> project(String fieldName, BoundPredicate<Long>
7676
}
7777

7878
@Override
79-
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Long> predicate) {
80-
return null;
79+
public UnboundPredicate<Integer> projectStrict(String fieldName, BoundPredicate<Long> pred) {
80+
if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
81+
return Expressions.predicate(pred.op(), fieldName);
82+
}
83+
return ProjectionUtil.truncateLongStrict(fieldName, pred, this);
8184
}
8285

8386
@Override

0 commit comments

Comments
 (0)