@@ -67,11 +67,13 @@ private ColumnChunkMetaData getColumnChunk(ColumnPath columnPath) {
6767 }
6868
6969 // is this column chunk composed entirely of nulls?
70+ // assumes the column chunk's statistics is not empty
7071 private boolean isAllNulls (ColumnChunkMetaData column ) {
7172 return column .getStatistics ().getNumNulls () == column .getValueCount ();
7273 }
7374
7475 // are there any nulls in this column chunk?
76+ // assumes the column chunk's statistics is not empty
7577 private boolean hasNulls (ColumnChunkMetaData column ) {
7678 return column .getStatistics ().getNumNulls () > 0 ;
7779 }
@@ -84,11 +86,7 @@ public <T extends Comparable<T>> Boolean visit(Eq<T> eq) {
8486 Statistics <T > stats = columnChunk .getStatistics ();
8587
8688 if (stats .isEmpty ()) {
87- // Do not filter in case of empty statistics
88- // pre-statistics files will have no such info
89- // best not to filter these.
90- // Also, in case of all nulls in a column, empty statistics object
91- // is returned. This fixes the read path because of that bug
89+ // we have no statistics available, we cannot drop any chunks
9290 return false ;
9391 }
9492
@@ -116,11 +114,7 @@ public <T extends Comparable<T>> Boolean visit(NotEq<T> notEq) {
116114 Statistics <T > stats = columnChunk .getStatistics ();
117115
118116 if (stats .isEmpty ()) {
119- // Do not filter in case of empty statistics
120- // pre-statistics files will have no such info
121- // best not to filter these.
122- // Also, in case of all nulls in a column, empty statistics object
123- // is returned. This fixes the read path because of that bug
117+ // we have no statistics available, we cannot drop any chunks
124118 return false ;
125119 }
126120
@@ -136,6 +130,13 @@ public <T extends Comparable<T>> Boolean visit(NotEq<T> notEq) {
136130 return false ;
137131 }
138132
133+ if (isAllNulls (columnChunk )) {
134+ // there is no min max, there is nothing
135+ // else we can say about this chunk, we
136+ // cannot drop it.
137+ return false ;
138+ }
139+
139140 // drop if this is a column where min = max = value
140141 return value .compareTo (stats .genericGetMin ()) == 0 && value .compareTo (stats .genericGetMax ()) == 0 ;
141142 }
@@ -148,11 +149,7 @@ public <T extends Comparable<T>> Boolean visit(Lt<T> lt) {
148149 Statistics <T > stats = columnChunk .getStatistics ();
149150
150151 if (stats .isEmpty ()) {
151- // Do not filter in case of empty statistics
152- // pre-statistics files will have no such info
153- // best not to filter these.
154- // Also, in case of all nulls in a column, empty statistics object
155- // is returned. This fixes the read path because of that bug
152+ // we have no statistics available, we cannot drop any chunks
156153 return false ;
157154 }
158155
@@ -174,11 +171,7 @@ public <T extends Comparable<T>> Boolean visit(LtEq<T> ltEq) {
174171 Statistics <T > stats = columnChunk .getStatistics ();
175172
176173 if (stats .isEmpty ()) {
177- // Do not filter in case of empty statistics
178- // pre-statistics files will have no such info
179- // best not to filter these.
180- // Also, in case of all nulls in a column, empty statistics object
181- // is returned. This fixes the read path because of that bug
174+ // we have no statistics available, we cannot drop any chunks
182175 return false ;
183176 }
184177
@@ -200,11 +193,7 @@ public <T extends Comparable<T>> Boolean visit(Gt<T> gt) {
200193 Statistics <T > stats = columnChunk .getStatistics ();
201194
202195 if (stats .isEmpty ()) {
203- // Do not filter in case of empty statistics
204- // pre-statistics files will have no such info
205- // best not to filter these.
206- // Also, in case of all nulls in a column, empty statistics object
207- // is returned. This fixes the read path because of that bug
196+ // we have no statistics available, we cannot drop any chunks
208197 return false ;
209198 }
210199
@@ -226,11 +215,7 @@ public <T extends Comparable<T>> Boolean visit(GtEq<T> gtEq) {
226215 Statistics <T > stats = columnChunk .getStatistics ();
227216
228217 if (stats .isEmpty ()) {
229- // Do not filter in case of empty statistics
230- // pre-statistics files will have no such info
231- // best not to filter these.
232- // Also, in case of all nulls in a column, empty statistics object
233- // is returned. This fixes the read path because of that bug
218+ // we have no statistics available, we cannot drop any chunks
234219 return false ;
235220 }
236221
@@ -269,12 +254,16 @@ private <T extends Comparable<T>, U extends UserDefinedPredicate<T>> Boolean vis
269254 ColumnChunkMetaData columnChunk = getColumnChunk (filterColumn .getColumnPath ());
270255 U udp = ud .getUserDefinedPredicate ();
271256 Statistics <T > stats = columnChunk .getStatistics ();
257+
272258 if (stats .isEmpty ()) {
273- // Do not filter in case of empty statistics
274- // pre-statistics files will have no such info
275- // best not to filter these.
276- // Also, in case of all nulls in a column, empty statistics object
277- // is returned. This fixes the read path because of that bug
259+ // we have no statistics available, we cannot drop any chunks
260+ return false ;
261+ }
262+
263+ if (isAllNulls (columnChunk )) {
264+ // there is no min max, there is nothing
265+ // else we can say about this chunk, we
266+ // cannot drop it.
278267 return false ;
279268 }
280269
0 commit comments