diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index cfad15550bcf..18e8a6fcfe77 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -18,6 +18,7 @@ //! Contains writer which writes arrow data into parquet data. use std::collections::VecDeque; +use std::fmt::Debug; use std::io::Write; use std::sync::Arc; @@ -92,6 +93,30 @@ pub struct ArrowWriter { max_row_group_size: usize, } +impl Debug for ArrowWriter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let buffered_batches = self.buffer.len(); + let mut buffered_memory = 0; + + for batch in self.buffer.iter() { + for arr in batch.iter() { + buffered_memory += arr.get_array_memory_size() + } + } + + f.debug_struct("ArrowWriter") + .field("writer", &self.writer) + .field( + "buffer", + &format!("{buffered_batches} , {buffered_memory} bytes"), + ) + .field("buffered_rows", &self.buffered_rows) + .field("arrow_schema", &self.arrow_schema) + .field("max_row_group_size", &self.max_row_group_size) + .finish() + } +} + impl ArrowWriter { /// Try to create a new Arrow writer /// diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index b4ae777bb131..93e8319b0c3e 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -21,6 +21,7 @@ use crate::bloom_filter::Sbbf; use crate::format as parquet; use crate::format::{ColumnIndex, OffsetIndex, RowGroup}; +use std::fmt::Debug; use std::io::{BufWriter, IoSlice, Read}; use std::{io::Write, sync::Arc}; use thrift::protocol::{TCompactOutputProtocol, TSerializable}; @@ -147,6 +148,18 @@ pub struct SerializedFileWriter { kv_metadatas: Vec, } +impl Debug for SerializedFileWriter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // implement Debug so this can be used with #[derive(Debug)] + // in client code rather than actually listing all the fields + f.debug_struct("SerializedFileWriter") + .field("descr", &self.descr) + .field("row_group_index", &self.row_group_index) + .field("kv_metadatas", &self.kv_metadatas) + .finish_non_exhaustive() + } +} + impl SerializedFileWriter { /// Creates new file writer. pub fn new(buf: W, schema: TypePtr, properties: WriterPropertiesPtr) -> Result {