diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java index 9b03f82b19..dc1d649c39 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilter.java @@ -123,6 +123,12 @@ public > Boolean visit(Eq eq) { filterColumn.getColumnPath(); + if (value == null) { + // the dictionary contains only non-null values so isn't helpful. this + // could check the column stats, but the StatisticsFilter is responsible + return BLOCK_MIGHT_MATCH; + } + try { Set dictSet = expandDictionary(meta); if (dictSet != null && !dictSet.contains(value)) { @@ -150,6 +156,12 @@ public > Boolean visit(NotEq notEq) { filterColumn.getColumnPath(); + if (value == null) { + // the dictionary contains only non-null values so isn't helpful. this + // could check the column stats, but the StatisticsFilter is responsible + return BLOCK_MIGHT_MATCH; + } + try { Set dictSet = expandDictionary(meta); if (dictSet != null && dictSet.size() == 1 && dictSet.contains(value)) { diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java index 754da686b9..35b944dee0 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/dictionarylevel/DictionaryFilterTest.java @@ -193,6 +193,9 @@ public void testEqBinary() throws Exception { assertTrue("Should drop block for upper case letters", canDrop(eq(b, Binary.fromString("A")), ccmd, dictionaries)); + + assertFalse("Should not drop block for null", + canDrop(eq(b, null), ccmd, dictionaries)); } @Test @@ -211,6 +214,9 @@ public void testNotEqBinary() throws Exception { assertFalse("Should not drop block with a known value", canDrop(notEq(b, Binary.fromString("B")), ccmd, dictionaries)); + + assertFalse("Should not drop block for null", + canDrop(notEq(b, null), ccmd, dictionaries)); } @Test