Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added support to read and write float dict from parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jan 19, 2022
1 parent b70483e commit 130292d
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 1 deletion.
6 changes: 6 additions & 0 deletions src/io/parquet/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,12 @@ fn dict_read<
Int64 | Date64 | Time64(_) | Duration(_) | Timestamp(_, _) => {
primitive::iter_to_dict_array::<K, _, _, _, _, _>(iter, metadata, data_type, |x: i64| x)
}
Float32 => {
primitive::iter_to_dict_array::<K, _, _, _, _, _>(iter, metadata, data_type, |x: f32| x)
}
Float64 => {
primitive::iter_to_dict_array::<K, _, _, _, _, _>(iter, metadata, data_type, |x: f64| x)
}
Utf8 => binary::iter_to_dict_array::<K, i32, _, _>(iter, metadata, data_type),
LargeUtf8 => binary::iter_to_dict_array::<K, i64, _, _>(iter, metadata, data_type),
other => Err(ArrowError::NotYetImplemented(format!(
Expand Down
2 changes: 2 additions & 0 deletions src/io/parquet/write/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ pub fn array_to_pages<K: DictionaryKey>(
DataType::UInt16 => dyn_prim!(u16, i32, array, options),
DataType::UInt32 => dyn_prim!(u32, i32, array, options),
DataType::UInt64 => dyn_prim!(i64, i64, array, options),
DataType::Float32 => dyn_prim!(f32, f32, array, options),
DataType::Float64 => dyn_prim!(f64, f64, array, options),
DataType::Utf8 => {
let values = array.values().as_any().downcast_ref().unwrap();

Expand Down
12 changes: 11 additions & 1 deletion tests/it/io/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -716,11 +716,21 @@ fn arrow_type() -> Result<()> {
let dt1 = DataType::Duration(TimeUnit::Second);
let array = PrimitiveArray::<i64>::from([Some(1), None, Some(2)]).to(dt1.clone());
let array2 = Utf8Array::<i64>::from([Some("a"), None, Some("bb")]);

let indices = PrimitiveArray::from_values((0..3u64).map(|x| x % 2));
let values = PrimitiveArray::from_slice([1.0f32, 3.0]);
let array3 = DictionaryArray::from_data(indices, std::sync::Arc::new(values));

let schema = Schema::from(vec![
Field::new("a1", dt1, true),
Field::new("a2", array2.data_type().clone(), true),
Field::new("a3", array3.data_type().clone(), true),
]);
let batch = Chunk::try_new(vec![Arc::new(array) as Arc<dyn Array>, Arc::new(array2)])?;
let batch = Chunk::try_new(vec![
Arc::new(array) as Arc<dyn Array>,
Arc::new(array2),
Arc::new(array3),
])?;

let r = integration_write(&schema, &[batch.clone()])?;

Expand Down

0 comments on commit 130292d

Please sign in to comment.