Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Encoding of keys of structs
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Dec 24, 2021
1 parent fff420b commit fe16138
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 13 deletions.
29 changes: 16 additions & 13 deletions src/io/json/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,7 @@ fn utf8_serializer<'a, O: Offset>(
array.iter(),
|x, buf| {
if let Some(x) = x {
if x.as_bytes().is_ascii() {
buf.reserve(x.len() + 2);
buf.push(b'"');
buf.extend_from_slice(x.as_bytes());
buf.push(b'"');
} else {
// proper escaping requires this atm;
// todo: avoid this roundtrip over serde_json
serde_json::to_writer(buf, &Value::String(x.to_string())).unwrap();
}
utf8_serialize(x, buf)
} else {
buf.extend_from_slice(b"null")
}
Expand Down Expand Up @@ -145,6 +136,20 @@ fn list_serializer<'a, O: Offset>(
))
}

#[inline]
fn utf8_serialize(value: &str, buf: &mut Vec<u8>) {
if value.as_bytes().is_ascii() {
buf.reserve(value.len() + 2);
buf.push(b'"');
buf.extend_from_slice(value.as_bytes());
buf.push(b'"');
} else {
// it may contain reserved keywords: perform roundtrip for
// todo: avoid this roundtrip over serde_json
serde_json::to_writer(buf, &Value::String(value.to_string())).unwrap();
}
}

fn new_serializer<'a>(
array: &'a dyn Array,
) -> Box<dyn StreamingIterator<Item = [u8]> + 'a + Send + Sync> {
Expand Down Expand Up @@ -183,9 +188,7 @@ fn serialize_item<F: JsonFormat>(
buffer.push(b',');
}
first_item = false;
buffer.push(b'"');
buffer.extend(key.as_bytes());
buffer.push(b'"');
utf8_serialize(key, buffer);
buffer.push(b':');
buffer.extend(*value);
}
Expand Down
16 changes: 16 additions & 0 deletions tests/it/io/json/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,19 @@ fn write_list_of_struct() -> Result<()> {
);
Ok(())
}

#[test]
fn write_escaped_utf8() -> Result<()> {
let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, false)]);
let a = Utf8Array::<i32>::from(&vec![Some("a\na"), None]);

let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap();

let buf = write_batch(batch)?;

assert_eq!(
String::from_utf8(buf).unwrap().as_bytes(),
b"{\"c1\":\"a\na\"}\n{\"c1\":null}\n"
);
Ok(())
}

0 comments on commit fe16138

Please sign in to comment.