Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Accept i64 for u32
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jun 21, 2022
1 parent d1ab4ef commit 6c69c6b
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 16 deletions.
31 changes: 23 additions & 8 deletions src/io/parquet/read/deserialize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,29 @@ where
}
UInt32 => {
init.push(InitNested::Primitive(field.is_nullable));
types.pop();
primitive::iter_to_arrays_nested(
columns.pop().unwrap(),
init,
field.data_type().clone(),
chunk_size,
|x: i32| x as u32,
)
let type_ = types.pop().unwrap();
match type_.physical_type {
PhysicalType::Int32 => primitive::iter_to_arrays_nested(
columns.pop().unwrap(),
init,
field.data_type().clone(),
chunk_size,
|x: i32| x as u32,
),
// some implementations of parquet write arrow's u32 into i64.
PhysicalType::Int64 => primitive::iter_to_arrays_nested(
columns.pop().unwrap(),
init,
field.data_type().clone(),
chunk_size,
|x: i64| x as u32,
),
other => {
return Err(Error::nyi(format!(
"Deserializing UInt32 from {other:?}'s parquet"
)))
}
}
}
UInt64 => {
init.push(InitNested::Primitive(field.is_nullable));
Expand Down
27 changes: 21 additions & 6 deletions src/io/parquet/read/deserialize/simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,27 @@ pub fn page_iter_to_arrays<'a, I: 'a + DataPages>(
chunk_size,
|x: i32| x as u16,
))),
UInt32 => dyn_iter(iden(primitive::Iter::new(
pages,
data_type,
chunk_size,
|x: i32| x as u32,
))),
UInt32 => match physical_type {
PhysicalType::Int32 => dyn_iter(iden(primitive::Iter::new(
pages,
data_type,
chunk_size,
|x: i32| x as u32,
))),
// some implementations of parquet write arrow's u32 into i64.
PhysicalType::Int64 => dyn_iter(iden(primitive::Iter::new(
pages,
data_type,
chunk_size,
|x: i64| x as u32,
))),
other => {
return Err(Error::NotYetImplemented(format!(
"Reading uin32 from {:?}-encoded parquet still not implemented",
other
)))
}
},
Int8 => dyn_iter(iden(primitive::Iter::new(
pages,
data_type,
Expand Down
12 changes: 11 additions & 1 deletion src/io/parquet/read/statistics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,17 @@ fn push(
}
UInt8 => primitive::push(from, min, max, |x: i32| Ok(x as u8)),
UInt16 => primitive::push(from, min, max, |x: i32| Ok(x as u16)),
UInt32 => primitive::push(from, min, max, |x: i32| Ok(x as u32)),
UInt32 => match physical_type {
// some implementations of parquet write arrow's u32 into i64.
ParquetPhysicalType::Int64 => primitive::push(from, min, max, |x: i64| Ok(x as u32)),
ParquetPhysicalType::Int32 => primitive::push(from, min, max, |x: i32| Ok(x as u32)),
other => {
return Err(Error::NotYetImplemented(format!(
"Can't decode UInt32 type from parquet type {:?}",
other
)))
}
},
Int32 => primitive::push(from, min, max, |x: i32| Ok(x as i32)),
Int64 | Date64 | Time64(_) | Duration(_) => {
primitive::push(from, min, max, |x: i64| Ok(x as i64))
Expand Down
6 changes: 5 additions & 1 deletion tests/it/io/parquet/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,15 @@ fn v1_timestamp_ms_nullable() -> Result<()> {
}

#[test]
#[ignore] // pyarrow issue; see https://issues.apache.org/jira/browse/ARROW-12201
fn v1_u32_nullable() -> Result<()> {
test_pyarrow_integration("uint32", 1, "basic", false, false, None)
}

#[test]
fn v2_u32_nullable() -> Result<()> {
test_pyarrow_integration("uint32", 2, "basic", false, false, None)
}

#[test]
fn v2_int64_nullable() -> Result<()> {
test_pyarrow_integration("int64", 2, "basic", false, false, None)
Expand Down

0 comments on commit 6c69c6b

Please sign in to comment.