@@ -1033,15 +1033,15 @@ impl ArrowColumnWriterFactory {
10331033
10341034 match data_type {
10351035 _ if data_type. is_primitive ( ) => out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) ,
1036- ArrowDataType :: FixedSizeBinary ( _) | ArrowDataType :: Boolean | ArrowDataType :: Null => out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) ,
1036+ ArrowDataType :: FixedSizeBinary ( _) | ArrowDataType :: Boolean | ArrowDataType :: Null => {
1037+ out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?)
1038+ }
10371039 ArrowDataType :: LargeBinary
10381040 | ArrowDataType :: Binary
10391041 | ArrowDataType :: Utf8
10401042 | ArrowDataType :: LargeUtf8
10411043 | ArrowDataType :: BinaryView
1042- | ArrowDataType :: Utf8View => {
1043- out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?)
1044- }
1044+ | ArrowDataType :: Utf8View => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) ,
10451045 ArrowDataType :: List ( f)
10461046 | ArrowDataType :: LargeList ( f)
10471047 | ArrowDataType :: FixedSizeList ( f, _) => {
@@ -1058,21 +1058,30 @@ impl ArrowColumnWriterFactory {
10581058 self . get_arrow_column_writer ( f[ 1 ] . data_type ( ) , props, leaves, out) ?
10591059 }
10601060 _ => unreachable ! ( "invalid map type" ) ,
1061- }
1061+ } ,
10621062 ArrowDataType :: Dictionary ( _, value_type) => match value_type. as_ref ( ) {
1063- ArrowDataType :: Utf8 | ArrowDataType :: LargeUtf8 | ArrowDataType :: Binary | ArrowDataType :: LargeBinary => {
1064- out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?)
1065- }
1063+ ArrowDataType :: Utf8
1064+ | ArrowDataType :: LargeUtf8
1065+ | ArrowDataType :: Binary
1066+ | ArrowDataType :: LargeBinary => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) ,
10661067 ArrowDataType :: Utf8View | ArrowDataType :: BinaryView => {
10671068 out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?)
10681069 }
1069- ArrowDataType :: FixedSizeBinary ( _) => {
1070+ ArrowDataType :: FixedSizeBinary ( _) => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) ,
1071+ _ => out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) ,
1072+ } ,
1073+ // TODO: Don't know what I'm doing here!
1074+ ArrowDataType :: RunEndEncoded ( _run_ends, value_type) => match value_type. data_type ( ) {
1075+ ArrowDataType :: Utf8
1076+ | ArrowDataType :: LargeUtf8
1077+ | ArrowDataType :: Binary
1078+ | ArrowDataType :: LargeBinary => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) ,
1079+ ArrowDataType :: Utf8View | ArrowDataType :: BinaryView => {
10701080 out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?)
10711081 }
1072- _ => {
1073- out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?)
1074- }
1075- }
1082+ ArrowDataType :: FixedSizeBinary ( _) => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) ,
1083+ _ => out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) ,
1084+ } ,
10761085 _ => return Err ( ParquetError :: NYI (
10771086 format ! (
10781087 "Attempting to write an Arrow type {data_type:?} to parquet that is not yet implemented"
@@ -1166,6 +1175,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
11661175 write_primitive ( typed, array. values ( ) , levels)
11671176 }
11681177 } ,
1178+ ArrowDataType :: RunEndEncoded ( _run_ends, _value_type) => todo ! ( ) ,
11691179 _ => {
11701180 let array = arrow_cast:: cast ( column, & ArrowDataType :: Int32 ) ?;
11711181 let array = array. as_primitive :: < Int32Type > ( ) ;
@@ -1248,6 +1258,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
12481258 write_primitive ( typed, array. values ( ) , levels)
12491259 }
12501260 } ,
1261+ ArrowDataType :: RunEndEncoded ( _run_ends, _values) => todo ! ( ) ,
12511262 _ => {
12521263 let array = arrow_cast:: cast ( column, & ArrowDataType :: Int64 ) ?;
12531264 let array = array. as_primitive :: < Int64Type > ( ) ;
@@ -1324,6 +1335,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
13241335 let array = column. as_primitive :: < Float16Type > ( ) ;
13251336 get_float_16_array_slice ( array, indices)
13261337 }
1338+ ArrowDataType :: RunEndEncoded ( _run_ends, _values) => todo ! ( ) ,
13271339 _ => {
13281340 return Err ( ParquetError :: NYI (
13291341 "Attempting to write an Arrow type that is not yet implemented" . to_string ( ) ,
@@ -1494,6 +1506,7 @@ mod tests {
14941506 use arrow:: util:: pretty:: pretty_format_batches;
14951507 use arrow:: { array:: * , buffer:: Buffer } ;
14961508 use arrow_buffer:: { i256, IntervalDayTime , IntervalMonthDayNano , NullBuffer } ;
1509+ use arrow_ipc:: RunEndEncoded ;
14971510 use arrow_schema:: Fields ;
14981511 use half:: f16;
14991512 use num:: { FromPrimitive , ToPrimitive } ;
@@ -4293,4 +4306,35 @@ mod tests {
42934306 assert_eq ! ( get_dict_page_size( col0_meta) , 1024 * 1024 ) ;
42944307 assert_eq ! ( get_dict_page_size( col1_meta) , 1024 * 1024 * 4 ) ;
42954308 }
4309+
4310+ // TODO: Remove. Just added this to compare with arrow_writer_run_end_encoded
4311+ #[ test]
4312+ fn arrow_writer_string_dictionary_two ( ) {
4313+ let mut builder = StringDictionaryBuilder :: < Int32Type > :: new ( ) ;
4314+ builder. extend ( [ Some ( "alpha" ) , Some ( "alpha" ) , Some ( "beta" ) ] ) ;
4315+ let dict_array = builder. finish ( ) ;
4316+ println ! ( "dict_array type: {:?}" , dict_array. data_type( ) ) ;
4317+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
4318+ "dict" ,
4319+ dict_array. data_type( ) . clone( ) ,
4320+ dict_array. is_nullable( ) ,
4321+ ) ] ) ) ;
4322+ one_column_roundtrip_with_schema ( Arc :: new ( dict_array) , schema) ;
4323+ }
4324+
4325+ #[ test]
4326+ fn arrow_writer_run_end_encoded ( ) {
4327+ // Create a run array of strings
4328+ let mut builder = StringRunBuilder :: < Int32Type > :: new ( ) ;
4329+ builder. extend ( [ Some ( "alpha" ) , Some ( "alpha" ) , Some ( "beta" ) ] ) ;
4330+ let run_array: RunArray < Int32Type > = builder. finish ( ) ;
4331+ println ! ( "run_array type: {:?}" , run_array. data_type( ) ) ;
4332+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
4333+ "ree" ,
4334+ run_array. data_type( ) . clone( ) ,
4335+ run_array. is_nullable( ) ,
4336+ ) ] ) ) ;
4337+ // This fails because we don't read it back as RunArray?
4338+ one_column_roundtrip_with_schema ( Arc :: new ( run_array) , schema) ;
4339+ }
42964340}
0 commit comments