Skip to content

Commit

Permalink
Fixed bug in writing csv with buffer resizing (jorgecarleitao#965)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored and ygf11 committed Apr 28, 2022
1 parent 0d01207 commit 0718c9a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 10 deletions.
18 changes: 8 additions & 10 deletions src/io/csv/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,7 @@ fn new_utf8_serializer<'a, O: Offset>(
.delimiter(options.delimiter)
.build();

let resize = |local_buf: &mut Vec<u8>| {
let additional = local_buf.len();
let resize = |local_buf: &mut Vec<u8>, additional: usize| {
local_buf.extend(std::iter::repeat(0u8).take(additional))
};

Expand All @@ -236,16 +235,15 @@ fn new_utf8_serializer<'a, O: Offset>(
// This will ensure a csv parser will not read them as missing
// in a delimited field
Some("") => buf.extend_from_slice(b"\"\""),
Some(s) => loop {
// first write field
Some(s) => {
if s.len() < local_buf.len() * 3 {
resize(&mut local_buf, s.len() * 3)
}
match ser_writer.field(s.as_bytes(), &mut local_buf) {
(WriteResult::OutputFull, _, _) => resize(&mut local_buf),
// then on success write delimiter
// we need to make this call because we might need to end with quotes
(WriteResult::InputEmpty, _, n_out) => {
// the writer::delimiter call writes a maximum of 2 bytes
if local_buf.len() - n_out < 2 {
resize(&mut local_buf);
resize(&mut local_buf, 2);
}
match ser_writer.delimiter(&mut local_buf[n_out..]) {
(WriteResult::InputEmpty, n_out_delimiter) => {
Expand All @@ -256,10 +254,10 @@ fn new_utf8_serializer<'a, O: Offset>(
}
_ => unreachable!(),
}
break;
}
_ => unreachable!(),
}
},
}
_ => {}
}
},
Expand Down
19 changes: 19 additions & 0 deletions tests/it/io/csv/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,22 @@ fn write_escaping() {

assert_eq!(csv, "\"Acme co., Ltd.\"\n");
}

#[test]
fn write_escaping_resize_local_buf() {
// tests if local buffer reallocates properly
let a = Utf8Array::<i32>::from_slice(&[
"bar,123456789012345678901234567890123456789012345678901234567890",
]);
let columns = Chunk::new(vec![Arc::new(a) as Arc<dyn Array>]);

let mut writer = vec![];
let options = SerializeOptions::default();
write_chunk(&mut writer, &columns, &options).unwrap();
let csv = std::str::from_utf8(&writer).unwrap();

assert_eq!(
csv,
"\"bar,123456789012345678901234567890123456789012345678901234567890\"\n"
);
}

0 comments on commit 0718c9a

Please sign in to comment.