Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Fixed bug in writing csv with buffer resizing #965

Merged
merged 1 commit into from
Apr 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions src/io/csv/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,7 @@ fn new_utf8_serializer<'a, O: Offset>(
.delimiter(options.delimiter)
.build();

let resize = |local_buf: &mut Vec<u8>| {
let additional = local_buf.len();
let resize = |local_buf: &mut Vec<u8>, additional: usize| {
local_buf.extend(std::iter::repeat(0u8).take(additional))
};

Expand All @@ -236,16 +235,15 @@ fn new_utf8_serializer<'a, O: Offset>(
// This will ensure a csv parser will not read them as missing
// in a delimited field
Some("") => buf.extend_from_slice(b"\"\""),
Some(s) => loop {
// first write field
Some(s) => {
if s.len() < local_buf.len() * 3 {
resize(&mut local_buf, s.len() * 3)
}
match ser_writer.field(s.as_bytes(), &mut local_buf) {
(WriteResult::OutputFull, _, _) => resize(&mut local_buf),
// then on success write delimiter
// we need to make this call because we might need to end with quotes
(WriteResult::InputEmpty, _, n_out) => {
// the writer::delimiter call writes a maximum of 2 bytes
if local_buf.len() - n_out < 2 {
resize(&mut local_buf);
resize(&mut local_buf, 2);
}
match ser_writer.delimiter(&mut local_buf[n_out..]) {
(WriteResult::InputEmpty, n_out_delimiter) => {
Expand All @@ -256,10 +254,10 @@ fn new_utf8_serializer<'a, O: Offset>(
}
_ => unreachable!(),
}
break;
}
_ => unreachable!(),
}
},
}
_ => {}
}
},
Expand Down
19 changes: 19 additions & 0 deletions tests/it/io/csv/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,22 @@ fn write_escaping() {

assert_eq!(csv, "\"Acme co., Ltd.\"\n");
}

#[test]
fn write_escaping_resize_local_buf() {
// tests if local buffer reallocates properly
let a = Utf8Array::<i32>::from_slice(&[
"bar,123456789012345678901234567890123456789012345678901234567890",
]);
let columns = Chunk::new(vec![Arc::new(a) as Arc<dyn Array>]);

let mut writer = vec![];
let options = SerializeOptions::default();
write_chunk(&mut writer, &columns, &options).unwrap();
let csv = std::str::from_utf8(&writer).unwrap();

assert_eq!(
csv,
"\"bar,123456789012345678901234567890123456789012345678901234567890\"\n"
);
}