Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -328,18 +328,31 @@ private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean vis
Column<T> filterColumn = ud.getColumn();
ColumnChunkMetaData columnChunk = getColumnChunk(filterColumn.getColumnPath());
U udp = ud.getUserDefinedPredicate();

if (columnChunk == null) {
// the column isn't in this file so all values are null.
// lets run the udp with null value to see if it keeps null or not.
if (inverted) {
return udp.keep(null);
} else {
return !udp.keep(null);
}
}

Statistics<T> stats = columnChunk.getStatistics();

if (stats.isEmpty()) {
// we have no statistics available, we cannot drop any chunks
return false;
return BLOCK_MIGHT_MATCH;
}

if (isAllNulls(columnChunk)) {
// there is no min max, there is nothing
// else we can say about this chunk, we
// cannot drop it.
return false;
// lets run the udp with null value to see if it keeps null or not.
if (inverted) {
return udp.keep(null);
} else {
return !udp.keep(null);
}
}

org.apache.parquet.filter2.predicate.Statistics<T> udpStats =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ private static ColumnChunkMetaData getDoubleColumnMeta(DoubleStatistics stats, l
private static final IntColumn intColumn = intColumn("int.column");
private static final DoubleColumn doubleColumn = doubleColumn("double.column");
private static final BinaryColumn missingColumn = binaryColumn("missing");
private static final IntColumn missingColumn2 = intColumn("missing.int");

private static final IntStatistics intStats = new IntStatistics();
private static final IntStatistics nullIntStats = new IntStatistics();
Expand Down Expand Up @@ -269,7 +270,10 @@ public static class SevensAndEightsUdp extends UserDefinedPredicate<Integer> {

@Override
public boolean keep(Integer value) {
throw new RuntimeException("this method should not be called");
if (value == null) {
return true;
}
throw new RuntimeException("this method should not be called with value != null");
}

@Override
Expand All @@ -283,11 +287,27 @@ public boolean inverseCanDrop(Statistics<Integer> statistics) {
}
}

public static class DropNullUdp extends SevensAndEightsUdp {
@Override
public boolean keep(Integer value) {
if (value == null) {
return false;
}
throw new RuntimeException("this method should not be called with value != null");
}
}

@Test
public void testUdp() {
FilterPredicate pred = userDefined(intColumn, SevensAndEightsUdp.class);
FilterPredicate invPred = LogicalInverseRewriter.rewrite(not(userDefined(intColumn, SevensAndEightsUdp.class)));

FilterPredicate udpDropMissingColumn = userDefined(missingColumn2, DropNullUdp.class);
FilterPredicate invUdpDropMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, DropNullUdp.class)));

FilterPredicate udpKeepMissingColumn = userDefined(missingColumn2, SevensAndEightsUdp.class);
FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter.rewrite(not(userDefined(missingColumn2, SevensAndEightsUdp.class)));

IntStatistics seven = new IntStatistics();
seven.setMinMax(7, 7);

Expand Down Expand Up @@ -320,6 +340,58 @@ public void testUdp() {
assertFalse(canDrop(invPred, Arrays.asList(
getIntColumnMeta(neither, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

// udpDropMissingColumn drops null column.
assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(
getIntColumnMeta(seven, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(
getIntColumnMeta(eight, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

assertTrue(canDrop(udpDropMissingColumn, Arrays.asList(
getIntColumnMeta(neither, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

// invUdpDropMissingColumn (i.e., not(udpDropMissingColumn)) keeps null column.
assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(
getIntColumnMeta(seven, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(
getIntColumnMeta(eight, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

assertFalse(canDrop(invUdpDropMissingColumn, Arrays.asList(
getIntColumnMeta(neither, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

// udpKeepMissingColumn keeps null column.
assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(
getIntColumnMeta(seven, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(
getIntColumnMeta(eight, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

assertFalse(canDrop(udpKeepMissingColumn, Arrays.asList(
getIntColumnMeta(neither, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

// invUdpKeepMissingColumn (i.e., not(udpKeepMissingColumn)) drops null column.
assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(
getIntColumnMeta(seven, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(
getIntColumnMeta(eight, 177L),
getDoubleColumnMeta(doubleStats, 177L))));

assertTrue(canDrop(invUdpKeepMissingColumn, Arrays.asList(
getIntColumnMeta(neither, 177L),
getDoubleColumnMeta(doubleStats, 177L))));
}

@Test
Expand Down