This repository has been archived by the owner on Feb 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 224
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improved performance of writing to CSV (20-25%) (#382)
* Added bench of csv write. * Optimized writing of CSV.
- Loading branch information
1 parent
3af5ffe
commit 4f8d793
Showing
13 changed files
with
248 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
use std::sync::Arc; | ||
|
||
use arrow2::util::bench_util::*; | ||
use criterion::{criterion_group, criterion_main, Criterion}; | ||
|
||
use arrow2::array::*; | ||
use arrow2::datatypes::*; | ||
use arrow2::error::Result; | ||
use arrow2::io::csv::write; | ||
use arrow2::record_batch::RecordBatch; | ||
|
||
fn write_batch(batch: &RecordBatch) -> Result<()> { | ||
let writer = &mut write::WriterBuilder::new().from_writer(vec![]); | ||
|
||
write::write_header(writer, batch.schema())?; | ||
|
||
let options = write::SerializeOptions::default(); | ||
write::write_batch(writer, batch, &options) | ||
} | ||
|
||
fn make_batch(array: impl Array + 'static) -> RecordBatch { | ||
let schema = Arc::new(Schema::new(vec![Field::new( | ||
"a", | ||
array.data_type().clone(), | ||
true, | ||
)])); | ||
RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap() | ||
} | ||
|
||
fn add_benchmark(c: &mut Criterion) { | ||
(10..=18).step_by(2).for_each(|log2_size| { | ||
let size = 2usize.pow(log2_size); | ||
|
||
let array = create_primitive_array::<i32>(size, DataType::Int32, 0.1); | ||
let batch = make_batch(array); | ||
|
||
c.bench_function(&format!("csv write i32 2^{}", log2_size), |b| { | ||
b.iter(|| write_batch(&batch)) | ||
}); | ||
|
||
let array = create_string_array::<i32>(size, 100, 0.1, 42); | ||
let batch = make_batch(array); | ||
|
||
c.bench_function(&format!("csv write utf8 2^{}", log2_size), |b| { | ||
b.iter(|| write_batch(&batch)) | ||
}); | ||
|
||
let array = create_primitive_array::<f64>(size, DataType::Float64, 0.1); | ||
let batch = make_batch(array); | ||
|
||
c.bench_function(&format!("csv write f64 2^{}", log2_size), |b| { | ||
b.iter(|| write_batch(&batch)) | ||
}); | ||
}); | ||
} | ||
|
||
criterion_group!(benches, add_benchmark); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
pub use streaming_iterator::StreamingIterator; | ||
|
||
/// A [`StreamingIterator`] with an internal buffer of [`Vec<u8>`] used to efficiently | ||
/// present items of type `T` as `&[u8]`. | ||
/// It is generic over the type `T` and the transformation `F: T -> &[u8]`. | ||
pub struct BufStreamingIterator<I, F, T> | ||
where | ||
I: Iterator<Item = T>, | ||
F: Fn(T, &mut Vec<u8>), | ||
{ | ||
iterator: I, | ||
f: F, | ||
buffer: Vec<u8>, | ||
is_valid: bool, | ||
} | ||
|
||
impl<I, F, T> BufStreamingIterator<I, F, T> | ||
where | ||
I: Iterator<Item = T>, | ||
F: Fn(T, &mut Vec<u8>), | ||
{ | ||
#[inline] | ||
pub fn new(iterator: I, f: F, buffer: Vec<u8>) -> Self { | ||
Self { | ||
iterator, | ||
f, | ||
buffer, | ||
is_valid: false, | ||
} | ||
} | ||
} | ||
|
||
impl<I, F, T> StreamingIterator for BufStreamingIterator<I, F, T> | ||
where | ||
I: Iterator<Item = T>, | ||
F: Fn(T, &mut Vec<u8>), | ||
{ | ||
type Item = [u8]; | ||
|
||
#[inline] | ||
fn advance(&mut self) { | ||
let a = self.iterator.next(); | ||
if let Some(a) = a { | ||
self.is_valid = true; | ||
self.buffer.clear(); | ||
(self.f)(a, &mut self.buffer); | ||
} else { | ||
self.is_valid = false; | ||
} | ||
} | ||
|
||
#[inline] | ||
fn get(&self) -> Option<&Self::Item> { | ||
if self.is_valid { | ||
Some(&self.buffer) | ||
} else { | ||
None | ||
} | ||
} | ||
|
||
#[inline] | ||
fn size_hint(&self) -> (usize, Option<usize>) { | ||
self.iterator.size_hint() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.