Skip to content

Commit 4659d57

Browse files
committed
Using encode_arrow_schema from arrow-rs.
1 parent fdc54b7 commit 4659d57

File tree

3 files changed

+1
-28
lines changed

3 files changed

+1
-28
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ apache-avro = { version = "0.20", default-features = false, features = [
6060
], optional = true }
6161
arrow = { workspace = true }
6262
arrow-ipc = { workspace = true }
63-
base64 = "0.22.1"
6463
chrono = { workspace = true }
6564
half = { workspace = true }
6665
hashbrown = { workspace = true }

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
//! Options related to how parquet files should be written
1919
20-
use base64::Engine;
2120
use std::sync::Arc;
2221

2322
use crate::{
@@ -26,6 +25,7 @@ use crate::{
2625
};
2726

2827
use arrow::datatypes::Schema;
28+
use parquet::arrow::encode_arrow_schema;
2929
// TODO: handle once deprecated
3030
#[allow(deprecated)]
3131
use parquet::{
@@ -166,31 +166,6 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
166166
}
167167
}
168168

169-
/// Encodes the Arrow schema into the IPC format, and base64 encodes it
170-
///
171-
/// TODO: use extern parquet's private method, once publicly available.
172-
/// Refer to <https://github.com/apache/arrow-rs/pull/6916>
173-
fn encode_arrow_schema(schema: &Arc<Schema>) -> String {
174-
let options = arrow_ipc::writer::IpcWriteOptions::default();
175-
let mut dictionary_tracker = arrow_ipc::writer::DictionaryTracker::new(true);
176-
let data_gen = arrow_ipc::writer::IpcDataGenerator::default();
177-
let mut serialized_schema = data_gen.schema_to_bytes_with_dictionary_tracker(
178-
schema,
179-
&mut dictionary_tracker,
180-
&options,
181-
);
182-
183-
// manually prepending the length to the schema as arrow uses the legacy IPC format
184-
// TODO: change after addressing ARROW-9777
185-
let schema_len = serialized_schema.ipc_message.len();
186-
let mut len_prefix_schema = Vec::with_capacity(schema_len + 8);
187-
len_prefix_schema.append(&mut vec![255u8, 255, 255, 255]);
188-
len_prefix_schema.append((schema_len as u32).to_le_bytes().to_vec().as_mut());
189-
len_prefix_schema.append(&mut serialized_schema.ipc_message);
190-
191-
base64::prelude::BASE64_STANDARD.encode(&len_prefix_schema)
192-
}
193-
194169
impl ParquetOptions {
195170
/// Convert the global session options, [`ParquetOptions`], into a single write action's [`WriterPropertiesBuilder`].
196171
///

0 commit comments

Comments
 (0)