diff --git a/crates/polars-io/src/parquet/read/predicates.rs b/crates/polars-io/src/parquet/read/predicates.rs index a3269341c1a3..5cadd92cef8a 100644 --- a/crates/polars-io/src/parquet/read/predicates.rs +++ b/crates/polars-io/src/parquet/read/predicates.rs @@ -25,12 +25,12 @@ pub(crate) fn collect_statistics( let stats = schema .iter_values() .map(|field| { - let iter = md.columns_under_root_iter(&field.name).unwrap(); - - Ok(if iter.len() == 0 { - ColumnStats::new(field.into(), None, None, None) - } else { - ColumnStats::from_arrow_stats(deserialize(field, iter)?, field) + let iter = md.columns_under_root_iter(&field.name); + Ok(match iter { + Some(x) if { x.len() > 0 } => { + ColumnStats::from_arrow_stats(deserialize(field, x)?, field) + }, + _ => ColumnStats::new(field.into(), None, None, None), }) }) .collect::>>()?; diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py index 71061fa6a4d5..3e335dc0f4f9 100644 --- a/py-polars/tests/unit/io/test_parquet.py +++ b/py-polars/tests/unit/io/test_parquet.py @@ -2710,3 +2710,24 @@ def test_boolean_slice_pushdown_20314() -> None: f.seek(0) assert pl.scan_parquet(f).slice(2, 1).collect().item() + + +def test_allow_missing_columns_predicate_pushdown_20361() -> None: + f1 = io.BytesIO() + f2 = io.BytesIO() + + pl.DataFrame({"a": [1, 2], "b": [1, 3], "c": [False, False]}).write_parquet(f1) + pl.DataFrame({"a": [4, 5], "c": [True, False]}).write_parquet(f2) + + f1.seek(0) + f2.seek(0) + + df = ( + pl.scan_parquet([f1, f2], allow_missing_columns=True) # type: ignore[arg-type] + .filter((pl.col.a == pl.col.b) | pl.col.c) + .collect() + ) + + assert_frame_equal( + df, pl.DataFrame({"a": [1, 4], "b": [1, None], "c": [False, True]}) + )