1717
1818//! Options related to how parquet files should be written
1919
20- use base64:: Engine ;
2120use std:: sync:: Arc ;
2221
2322use crate :: {
@@ -26,6 +25,7 @@ use crate::{
2625} ;
2726
2827use arrow:: datatypes:: Schema ;
28+ use parquet:: arrow:: encode_arrow_schema;
2929// TODO: handle once deprecated
3030#[ allow( deprecated) ]
3131use parquet:: {
@@ -166,30 +166,6 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
166166 }
167167}
168168
169- /// Encodes the Arrow schema into the IPC format, and base64 encodes it
170- ///
171- /// TODO: use extern parquet's private method, once publicly available.
172- /// Refer to <https://github.com/apache/arrow-rs/pull/6916>
173- fn encode_arrow_schema ( schema : & Arc < Schema > ) -> String {
174- let options = arrow_ipc:: writer:: IpcWriteOptions :: default ( ) ;
175- let mut dictionary_tracker = arrow_ipc:: writer:: DictionaryTracker :: new ( true ) ;
176- let data_gen = arrow_ipc:: writer:: IpcDataGenerator :: default ( ) ;
177- let mut serialized_schema = data_gen. schema_to_bytes_with_dictionary_tracker (
178- schema,
179- & mut dictionary_tracker,
180- & options,
181- ) ;
182-
183- // manually prepending the length to the schema as arrow uses the legacy IPC format
184- // TODO: change after addressing ARROW-9777
185- let schema_len = serialized_schema. ipc_message . len ( ) ;
186- let mut len_prefix_schema = Vec :: with_capacity ( schema_len + 8 ) ;
187- len_prefix_schema. append ( & mut vec ! [ 255u8 , 255 , 255 , 255 ] ) ;
188- len_prefix_schema. append ( ( schema_len as u32 ) . to_le_bytes ( ) . to_vec ( ) . as_mut ( ) ) ;
189- len_prefix_schema. append ( & mut serialized_schema. ipc_message ) ;
190-
191- base64:: prelude:: BASE64_STANDARD . encode ( & len_prefix_schema)
192- }
193169
194170impl ParquetOptions {
195171 /// Convert the global session options, [`ParquetOptions`], into a single write action's [`WriterPropertiesBuilder`].
@@ -526,7 +502,7 @@ mod tests {
526502 EnabledStatistics :: Chunk => "chunk" ,
527503 EnabledStatistics :: Page => "page" ,
528504 }
529- . into ( ) ,
505+ . into ( ) ,
530506 ) ,
531507 bloom_filter_fpp : bloom_filter_default_props. map ( |p| p. fpp ) ,
532508 bloom_filter_ndv : bloom_filter_default_props. map ( |p| p. ndv ) ,
@@ -671,7 +647,7 @@ mod tests {
671647 COL_NAME . into ( ) ,
672648 column_options_with_non_defaults ( & parquet_options) ,
673649 ) ]
674- . into ( ) ,
650+ . into ( ) ,
675651 key_value_metadata : [ ( key, value) ] . into ( ) ,
676652 crypto : Default :: default ( ) ,
677653 } ;
0 commit comments