1717
1818//! Options related to how parquet files should be written
1919
20- use base64:: Engine ;
2120use std:: sync:: Arc ;
2221
2322use crate :: {
@@ -26,6 +25,7 @@ use crate::{
2625} ;
2726
2827use arrow:: datatypes:: Schema ;
28+ use parquet:: arrow:: encode_arrow_schema;
2929// TODO: handle once deprecated
3030#[ allow( deprecated) ]
3131use parquet:: {
@@ -166,31 +166,6 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
166166 }
167167}
168168
169- /// Encodes the Arrow schema into the IPC format, and base64 encodes it
170- ///
171- /// TODO: use extern parquet's private method, once publicly available.
172- /// Refer to <https://github.com/apache/arrow-rs/pull/6916>
173- fn encode_arrow_schema ( schema : & Arc < Schema > ) -> String {
174- let options = arrow_ipc:: writer:: IpcWriteOptions :: default ( ) ;
175- let mut dictionary_tracker = arrow_ipc:: writer:: DictionaryTracker :: new ( true ) ;
176- let data_gen = arrow_ipc:: writer:: IpcDataGenerator :: default ( ) ;
177- let mut serialized_schema = data_gen. schema_to_bytes_with_dictionary_tracker (
178- schema,
179- & mut dictionary_tracker,
180- & options,
181- ) ;
182-
183- // manually prepending the length to the schema as arrow uses the legacy IPC format
184- // TODO: change after addressing ARROW-9777
185- let schema_len = serialized_schema. ipc_message . len ( ) ;
186- let mut len_prefix_schema = Vec :: with_capacity ( schema_len + 8 ) ;
187- len_prefix_schema. append ( & mut vec ! [ 255u8 , 255 , 255 , 255 ] ) ;
188- len_prefix_schema. append ( ( schema_len as u32 ) . to_le_bytes ( ) . to_vec ( ) . as_mut ( ) ) ;
189- len_prefix_schema. append ( & mut serialized_schema. ipc_message ) ;
190-
191- base64:: prelude:: BASE64_STANDARD . encode ( & len_prefix_schema)
192- }
193-
194169impl ParquetOptions {
195170 /// Convert the global session options, [`ParquetOptions`], into a single write action's [`WriterPropertiesBuilder`].
196171 ///
0 commit comments