Skip to content
80 changes: 79 additions & 1 deletion arrow-json/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,9 @@ mod tests {

use serde_json::json;

use arrow_array::builder::{Int32Builder, Int64Builder, MapBuilder, StringBuilder};
use arrow_array::builder::{
FixedSizeBinaryBuilder, Int32Builder, Int64Builder, MapBuilder, StringBuilder,
};
use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
use arrow_data::ArrayData;

Expand Down Expand Up @@ -2137,4 +2139,80 @@ mod tests {

Ok(())
}

#[test]
fn test_writer_fixed_size_binary() {
// set up schema:
let size = 11;
let schema = SchemaRef::new(Schema::new(vec![Field::new(
"bytes",
DataType::FixedSizeBinary(size),
true,
)]));

// build record batch:
let mut builder = FixedSizeBinaryBuilder::new(size);
let values = [Some(b"hello world"), None, Some(b"summer rain")];
for value in values {
match value {
Some(v) => builder.append_value(v).unwrap(),
None => builder.append_null(),
}
}
let array = Arc::new(builder.finish()) as ArrayRef;
let batch = RecordBatch::try_new(schema, vec![array]).unwrap();

// encode and check JSON with explicit nulls:
{
let mut buf = Vec::new();
let json_value: Value = {
let mut writer = WriterBuilder::new()
.with_explicit_nulls(true)
.build::<_, JsonArray>(&mut buf);
writer.write(&batch).unwrap();
writer.close().unwrap();
serde_json::from_slice(&buf).unwrap()
};

assert_eq!(
json!([
{
"bytes": "68656c6c6f20776f726c64"
},
{
"bytes": null // the explicit null
},
{
"bytes": "73756d6d6572207261696e"
}
]),
json_value,
);
}
// encode and check JSON with no explicit nulls:
{
let mut buf = Vec::new();
let json_value: Value = {
// explicit nulls are off by default, so we don't need
// to set that when creating the writer:
let mut writer = ArrayWriter::new(&mut buf);
writer.write(&batch).unwrap();
writer.close().unwrap();
serde_json::from_slice(&buf).unwrap()
};

assert_eq!(
json!([
{
"bytes": "68656c6c6f20776f726c64"
},
{}, // empty because nulls are omitted
{
"bytes": "73756d6d6572207261696e"
}
]),
json_value,
);
}
}
}
38 changes: 31 additions & 7 deletions arrow-json/src/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,16 @@ fn make_encoder_impl<'a>(
DataType::Float64 => primitive_helper!(Float64Type),
DataType::Boolean => {
let array = array.as_boolean();
(Box::new(BooleanEncoder(array.clone())), array.nulls().cloned())
(Box::new(BooleanEncoder(array)), array.nulls().cloned())
}
DataType::Null => (Box::new(NullEncoder), array.logical_nulls()),
DataType::Utf8 => {
let array = array.as_string::<i32>();
(Box::new(StringEncoder(array.clone())) as _, array.nulls().cloned())
(Box::new(StringEncoder(array)) as _, array.nulls().cloned())
}
DataType::LargeUtf8 => {
let array = array.as_string::<i64>();
(Box::new(StringEncoder(array.clone())) as _, array.nulls().cloned())
(Box::new(StringEncoder(array)) as _, array.nulls().cloned())
}
DataType::List(_) => {
let array = array.as_list::<i32>();
Expand All @@ -99,6 +99,11 @@ fn make_encoder_impl<'a>(
(Box::new(MapEncoder::try_new(array, options)?) as _, array.nulls().cloned())
}

DataType::FixedSizeBinary(_) => {
let array = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
(Box::new(FixedSizeBinaryEncoder::new(array)) as _, array.nulls().cloned())
}

DataType::Struct(fields) => {
let array = array.as_struct();
let encoders = fields.iter().zip(array.columns()).map(|(field, array)| {
Expand Down Expand Up @@ -259,9 +264,9 @@ impl<N: PrimitiveEncode> Encoder for PrimitiveEncoder<N> {
}
}

struct BooleanEncoder(BooleanArray);
struct BooleanEncoder<'a>(&'a BooleanArray);

impl Encoder for BooleanEncoder {
impl<'a> Encoder for BooleanEncoder<'a> {
fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
match self.0.value(idx) {
true => out.extend_from_slice(b"true"),
Expand All @@ -270,9 +275,9 @@ impl Encoder for BooleanEncoder {
}
}

struct StringEncoder<O: OffsetSizeTrait>(GenericStringArray<O>);
struct StringEncoder<'a, O: OffsetSizeTrait>(&'a GenericStringArray<O>);

impl<O: OffsetSizeTrait> Encoder for StringEncoder<O> {
impl<'a, O: OffsetSizeTrait> Encoder for StringEncoder<'a, O> {
fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
encode_string(self.0.value(idx), out);
}
Expand Down Expand Up @@ -443,3 +448,22 @@ impl<'a> Encoder for MapEncoder<'a> {
out.push(b'}');
}
}

struct FixedSizeBinaryEncoder<'a>(&'a FixedSizeBinaryArray);

impl<'a> FixedSizeBinaryEncoder<'a> {
fn new(array: &'a FixedSizeBinaryArray) -> Self {
Self(array)
}
}

impl<'a> Encoder for FixedSizeBinaryEncoder<'a> {
fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
out.push(b'"');
for byte in self.0.value(idx) {
// this write is infallible
write!(out, "{byte:02x}").unwrap();
}
out.push(b'"');
}
}