diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs index 743f7f483e27..b0493b6ce0d3 100644 --- a/arrow/src/util/display.rs +++ b/arrow/src/util/display.rs @@ -23,8 +23,9 @@ use std::sync::Arc; use crate::array::Array; use crate::datatypes::{ - ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, - Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + ArrowNativeType, ArrowPrimitiveType, DataType, Field, Int16Type, Int32Type, + Int64Type, Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + UnionMode, }; use crate::{array, datatypes::IntervalUnit}; @@ -395,6 +396,7 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result union_to_string(column, row, field_vec, mode), _ => Err(ArrowError::InvalidArgumentError(format!( "Pretty printing not implemented for {:?} type", column.data_type() @@ -402,6 +404,42 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result Result { + let list = column + .as_any() + .downcast_ref::() + .ok_or_else(|| { + ArrowError::InvalidArgumentError( + "Repl error: could not convert union column to union array.".to_string(), + ) + })?; + let type_id = list.type_id(row); + let name = fields + .get(type_id as usize) + .ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "Repl error: could not get field name for type id: {} in union array.", + type_id, + )) + })? + .name(); + + let value = array_value_to_string( + &list.child(type_id), + match mode { + UnionMode::Dense => list.value_offset(row) as usize, + UnionMode::Sparse => row, + }, + )?; + + Ok(format!("{{{}={}}}", name, value)) +} /// Converts the value of the dictionary array at `row` to a String fn dict_array_value_to_string( colum: &array::ArrayRef, diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs index f7e05bc073c8..3fa2729ba412 100644 --- a/arrow/src/util/pretty.rs +++ b/arrow/src/util/pretty.rs @@ -109,17 +109,18 @@ mod tests { use crate::{ array::{ self, new_null_array, Array, Date32Array, Date64Array, - FixedSizeBinaryBuilder, Float16Array, PrimitiveBuilder, StringArray, - StringBuilder, StringDictionaryBuilder, StructArray, Time32MillisecondArray, - Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, - TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, + FixedSizeBinaryBuilder, Float16Array, Int32Array, PrimitiveBuilder, + StringArray, StringBuilder, StringDictionaryBuilder, StructArray, + Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, + Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, UnionArray, UnionBuilder, }, - datatypes::{DataType, Field, Int32Type, Schema}, + buffer::Buffer, + datatypes::{DataType, Field, Float64Type, Int32Type, Schema, UnionMode}, }; use super::*; - use crate::array::{DecimalArray, FixedSizeListBuilder, Int32Array}; + use crate::array::{DecimalArray, FixedSizeListBuilder}; use std::fmt::Write; use std::sync::Arc; @@ -647,6 +648,148 @@ mod tests { Ok(()) } + #[test] + fn test_pretty_format_dense_union() -> Result<()> { + let mut builder = UnionBuilder::new_dense(4); + builder.append::("a", 1).unwrap(); + builder.append::("b", 3.2234).unwrap(); + builder.append_null::("b").unwrap(); + builder.append_null::("a").unwrap(); + let union = builder.build().unwrap(); + + let schema = Schema::new(vec![Field::new( + "Teamsters", + DataType::Union( + vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Float64, false), + ], + UnionMode::Dense, + ), + false, + )]); + + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap(); + let table = pretty_format_batches(&[batch])?.to_string(); + let actual: Vec<&str> = table.lines().collect(); + let expected = vec![ + "+------------+", + "| Teamsters |", + "+------------+", + "| {a=1} |", + "| {b=3.2234} |", + "| {b=} |", + "| {a=} |", + "+------------+", + ]; + + assert_eq!(expected, actual); + Ok(()) + } + + #[test] + fn test_pretty_format_sparse_union() -> Result<()> { + let mut builder = UnionBuilder::new_sparse(4); + builder.append::("a", 1).unwrap(); + builder.append::("b", 3.2234).unwrap(); + builder.append_null::("b").unwrap(); + builder.append_null::("a").unwrap(); + let union = builder.build().unwrap(); + + let schema = Schema::new(vec![Field::new( + "Teamsters", + DataType::Union( + vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Float64, false), + ], + UnionMode::Sparse, + ), + false, + )]); + + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap(); + let table = pretty_format_batches(&[batch])?.to_string(); + let actual: Vec<&str> = table.lines().collect(); + let expected = vec![ + "+------------+", + "| Teamsters |", + "+------------+", + "| {a=1} |", + "| {b=3.2234} |", + "| {b=} |", + "| {a=} |", + "+------------+", + ]; + + assert_eq!(expected, actual); + Ok(()) + } + + #[test] + fn test_pretty_format_nested_union() -> Result<()> { + //Inner UnionArray + let mut builder = UnionBuilder::new_dense(5); + builder.append::("b", 1).unwrap(); + builder.append::("c", 3.2234).unwrap(); + builder.append_null::("c").unwrap(); + builder.append_null::("b").unwrap(); + builder.append_null::("c").unwrap(); + let inner = builder.build().unwrap(); + + let inner_field = Field::new( + "European Union", + DataType::Union( + vec![ + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Float64, false), + ], + UnionMode::Dense, + ), + false, + ); + + // Can't use UnionBuilder with non-primitive types, so manually build outer UnionArray + let a_array = Int32Array::from(vec![None, None, None, Some(1234), Some(23)]); + let type_ids = Buffer::from_slice_ref(&[1_i8, 1, 0, 0, 1]); + + let children: Vec<(Field, Arc)> = vec![ + (Field::new("a", DataType::Int32, true), Arc::new(a_array)), + (inner_field.clone(), Arc::new(inner)), + ]; + + let outer = UnionArray::try_new(type_ids, None, children).unwrap(); + + let schema = Schema::new(vec![Field::new( + "Teamsters", + DataType::Union( + vec![Field::new("a", DataType::Int32, true), inner_field], + UnionMode::Sparse, + ), + false, + )]); + + let batch = + RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap(); + let table = pretty_format_batches(&[batch])?.to_string(); + let actual: Vec<&str> = table.lines().collect(); + let expected = vec![ + "+-----------------------------+", + "| Teamsters |", + "+-----------------------------+", + "| {European Union={b=1}} |", + "| {European Union={c=3.2234}} |", + "| {a=} |", + "| {a=1234} |", + "| {European Union={c=}} |", + "+-----------------------------+", + ]; + assert_eq!(expected, actual); + Ok(()) + } + #[test] fn test_writing_formatted_batches() -> Result<()> { // define a schema.