2222import java .util .HashSet ;
2323import java .util .List ;
2424
25- import org .apache .parquet .io .api .Binary ;
2625import org .junit .Test ;
2726
2827import org .apache .parquet .column .Encoding ;
@@ -87,14 +86,18 @@ private static ColumnChunkMetaData getDoubleColumnMeta(DoubleStatistics stats, l
8786
8887 private static final IntStatistics intStats = new IntStatistics ();
8988 private static final IntStatistics nullIntStats = new IntStatistics ();
89+ private static final IntStatistics missingMinMaxIntStats = new IntStatistics ();
9090 private static final DoubleStatistics doubleStats = new DoubleStatistics ();
91+ private static final DoubleStatistics missingMinMaxDoubleStats = new DoubleStatistics ();
9192
9293 static {
9394 intStats .setMinMax (10 , 100 );
9495 doubleStats .setMinMax (10 , 100 );
9596
9697 nullIntStats .setMinMax (0 , 0 );
9798 nullIntStats .setNumNulls (177 );
99+
100+ missingMinMaxDoubleStats .setNumNulls (100 );
98101 }
99102
100103 private static final List <ColumnChunkMetaData > columnMetas = Arrays .asList (
@@ -105,6 +108,9 @@ private static ColumnChunkMetaData getDoubleColumnMeta(DoubleStatistics stats, l
105108 getIntColumnMeta (nullIntStats , 177L ), // column of all nulls
106109 getDoubleColumnMeta (doubleStats , 177L ));
107110
111+ private static final List <ColumnChunkMetaData > missingMinMaxColumnMetas = Arrays .asList (
112+ getIntColumnMeta (missingMinMaxIntStats , 177L ), // missing min/max, no null values => stats is empty
113+ getDoubleColumnMeta (missingMinMaxDoubleStats , 177L )); // missing min/max, some null values
108114
109115 @ Test
110116 public void testEqNonNull () {
@@ -116,6 +122,9 @@ public void testEqNonNull() {
116122 // drop columns of all nulls when looking for non-null value
117123 assertTrue (canDrop (eq (intColumn , 0 ), nullColumnMetas ));
118124 assertTrue (canDrop (eq (missingColumn , fromString ("any" )), columnMetas ));
125+
126+ assertFalse (canDrop (eq (intColumn , 50 ), missingMinMaxColumnMetas ));
127+ assertFalse (canDrop (eq (doubleColumn , 50.0 ), missingMinMaxColumnMetas ));
119128 }
120129
121130 @ Test
@@ -137,6 +146,9 @@ public void testEqNull() {
137146 getDoubleColumnMeta (doubleStats , 177L ))));
138147
139148 assertFalse (canDrop (eq (missingColumn , null ), columnMetas ));
149+
150+ assertFalse (canDrop (eq (intColumn , null ), missingMinMaxColumnMetas ));
151+ assertFalse (canDrop (eq (doubleColumn , null ), missingMinMaxColumnMetas ));
140152 }
141153
142154 @ Test
@@ -163,6 +175,9 @@ public void testNotEqNonNull() {
163175 getDoubleColumnMeta (doubleStats , 177L ))));
164176
165177 assertFalse (canDrop (notEq (missingColumn , fromString ("any" )), columnMetas ));
178+
179+ assertFalse (canDrop (notEq (intColumn , 50 ), missingMinMaxColumnMetas ));
180+ assertFalse (canDrop (notEq (doubleColumn , 50.0 ), missingMinMaxColumnMetas ));
166181 }
167182
168183 @ Test
@@ -192,6 +207,9 @@ public void testNotEqNull() {
192207 getDoubleColumnMeta (doubleStats , 177L ))));
193208
194209 assertTrue (canDrop (notEq (missingColumn , null ), columnMetas ));
210+
211+ assertFalse (canDrop (notEq (intColumn , null ), missingMinMaxColumnMetas ));
212+ assertFalse (canDrop (notEq (doubleColumn , null ), missingMinMaxColumnMetas ));
195213 }
196214
197215 @ Test
@@ -205,6 +223,9 @@ public void testLt() {
205223 assertTrue (canDrop (lt (intColumn , 7 ), nullColumnMetas ));
206224
207225 assertTrue (canDrop (lt (missingColumn , fromString ("any" )), columnMetas ));
226+
227+ assertFalse (canDrop (lt (intColumn , 0 ), missingMinMaxColumnMetas ));
228+ assertFalse (canDrop (lt (doubleColumn , 0.0 ), missingMinMaxColumnMetas ));
208229 }
209230
210231 @ Test
@@ -218,6 +239,9 @@ public void testLtEq() {
218239 assertTrue (canDrop (ltEq (intColumn , 7 ), nullColumnMetas ));
219240
220241 assertTrue (canDrop (ltEq (missingColumn , fromString ("any" )), columnMetas ));
242+
243+ assertFalse (canDrop (ltEq (intColumn , -1 ), missingMinMaxColumnMetas ));
244+ assertFalse (canDrop (ltEq (doubleColumn , -0.1 ), missingMinMaxColumnMetas ));
221245 }
222246
223247 @ Test
@@ -231,6 +255,9 @@ public void testGt() {
231255 assertTrue (canDrop (gt (intColumn , 7 ), nullColumnMetas ));
232256
233257 assertTrue (canDrop (gt (missingColumn , fromString ("any" )), columnMetas ));
258+
259+ assertFalse (canDrop (gt (intColumn , 0 ), missingMinMaxColumnMetas ));
260+ assertFalse (canDrop (gt (doubleColumn , 0.0 ), missingMinMaxColumnMetas ));
234261 }
235262
236263 @ Test
@@ -244,6 +271,9 @@ public void testGtEq() {
244271 assertTrue (canDrop (gtEq (intColumn , 7 ), nullColumnMetas ));
245272
246273 assertTrue (canDrop (gtEq (missingColumn , fromString ("any" )), columnMetas ));
274+
275+ assertFalse (canDrop (gtEq (intColumn , 1 ), missingMinMaxColumnMetas ));
276+ assertFalse (canDrop (gtEq (doubleColumn , 0.1 ), missingMinMaxColumnMetas ));
247277 }
248278
249279 @ Test
@@ -297,6 +327,26 @@ public boolean keep(Integer value) {
297327 }
298328 }
299329
330+ public static class AllPositiveUdp extends UserDefinedPredicate <Double > {
331+ @ Override
332+ public boolean keep (Double value ) {
333+ if (value == null ) {
334+ return true ;
335+ }
336+ throw new RuntimeException ("this method should not be called with value != null" );
337+ }
338+
339+ @ Override
340+ public boolean canDrop (Statistics <Double > statistics ) {
341+ return statistics .getMin () <= 0.0 ;
342+ }
343+
344+ @ Override
345+ public boolean inverseCanDrop (Statistics <Double > statistics ) {
346+ return statistics .getMin () > 0.0 ;
347+ }
348+ }
349+
300350 @ Test
301351 public void testUdp () {
302352 FilterPredicate pred = userDefined (intColumn , SevensAndEightsUdp .class );
@@ -308,6 +358,8 @@ public void testUdp() {
308358 FilterPredicate udpKeepMissingColumn = userDefined (missingColumn2 , SevensAndEightsUdp .class );
309359 FilterPredicate invUdpKeepMissingColumn = LogicalInverseRewriter .rewrite (not (userDefined (missingColumn2 , SevensAndEightsUdp .class )));
310360
361+ FilterPredicate allPositivePred = userDefined (doubleColumn , AllPositiveUdp .class );
362+
311363 IntStatistics seven = new IntStatistics ();
312364 seven .setMinMax (7 , 7 );
313365
@@ -392,6 +444,8 @@ public void testUdp() {
392444 assertTrue (canDrop (invUdpKeepMissingColumn , Arrays .asList (
393445 getIntColumnMeta (neither , 177L ),
394446 getDoubleColumnMeta (doubleStats , 177L ))));
447+
448+ assertFalse (canDrop (allPositivePred , missingMinMaxColumnMetas ));
395449 }
396450
397451 @ Test
0 commit comments