Skip to content

Commit

Permalink
Added support for byte vectors and slices to parquet_derive (apache#3864
Browse files Browse the repository at this point in the history
  • Loading branch information
waymost authored and spebern committed Mar 25, 2023
1 parent ea2bd57 commit 8c60437
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 9 deletions.
10 changes: 10 additions & 0 deletions parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,16 @@ impl From<Vec<u8>> for ByteArray {
}
}

impl<'a> From<&'a [u8]> for ByteArray {
fn from(b: &'a [u8]) -> ByteArray {
let mut v = Vec::new();
v.extend_from_slice(b);
Self {
data: Some(ByteBufferPtr::new(v)),
}
}
}

impl<'a> From<&'a str> for ByteArray {
fn from(s: &'a str) -> ByteArray {
let mut v = Vec::new();
Expand Down
76 changes: 67 additions & 9 deletions parquet_derive/src/parquet_field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,18 +92,38 @@ impl Field {
Type::TypePath(_) => self.option_into_vals(),
_ => unimplemented!("Unsupported type encountered"),
},
Type::Vec(ref first_type) => match **first_type {
Type::TypePath(_) => self.option_into_vals(),
_ => unimplemented!("Unsupported type encountered"),
},
ref f => unimplemented!("Unsupported: {:#?}", f),
},
Type::Reference(_, ref first_type) => match **first_type {
Type::TypePath(_) => self.copied_direct_vals(),
Type::Option(ref second_type) => match **second_type {
Type::TypePath(_) => self.option_into_vals(),
Type::Reference(_, ref second_type) => match **second_type {
Type::TypePath(_) => self.option_into_vals(),
Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) => self.option_into_vals(),
ref f => unimplemented!("Unsupported: {:#?}", f),
},
_ => unimplemented!("Unsupported type encountered"),
},
Type::Vec(ref first_type) => match **first_type {
Type::TypePath(_) => self.option_into_vals(),
_ => unimplemented!("Unsupported type encountered"),
},
ref f => unimplemented!("Unsupported: {:#?}", f),
},
Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) => self.copied_direct_vals(),
ref f => unimplemented!("Unsupported: {:#?}", f),
},
ref f => unimplemented!("Unsupported: {:#?}", f),
},
Type::Vec(ref first_type) => match **first_type {
Type::TypePath(_) => self.copied_direct_vals(),
ref f => unimplemented!("Unsupported: {:#?}", f),
},
f => unimplemented!("Unsupported: {:#?}", f),
Expand All @@ -116,26 +136,55 @@ impl Field {
Type::Option(_) => unimplemented!("Unsupported nesting encountered"),
Type::Reference(_, ref second_type)
| Type::Vec(ref second_type)
| Type::Array(ref second_type) => match **second_type {
| Type::Array(ref second_type)
| Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
_ => unimplemented!("Unsupported nesting encountered"),
},
},
Type::Reference(_, ref first_type)
| Type::Vec(ref first_type)
| Type::Array(ref first_type) => match **first_type {
| Type::Array(ref first_type)
| Type::Slice(ref first_type) => match **first_type {
Type::TypePath(_) => None,
Type::Reference(_, ref second_type)
| Type::Vec(ref second_type)
Type::Vec(ref second_type)
| Type::Array(ref second_type)
| Type::Option(ref second_type) => match **second_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
| Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) => None,
Type::Reference(_, ref third_type) => match **third_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
Type::TypePath(_) => None,
_ => unimplemented!("Unsupported definition encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
},
Type::Reference(_, ref second_type) | Type::Option(ref second_type) => {
match **second_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
Type::Vec(ref third_type)
| Type::Array(ref third_type)
| Type::Slice(ref third_type) => match **third_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
Type::Reference(_, ref fourth_type) => match **fourth_type {
Type::TypePath(_) => {
Some(self.optional_definition_levels())
}
_ => unimplemented!("Unsupported definition encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
},
Type::Reference(_, ref third_type) => match **third_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
Type::Slice(ref fourth_type) => match **fourth_type {
Type::TypePath(_) => {
Some(self.optional_definition_levels())
}
_ => unimplemented!("Unsupported definition encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
}
}
},
};

Expand Down Expand Up @@ -323,6 +372,7 @@ impl Field {
enum Type {
Array(Box<Type>),
Option(Box<Type>),
Slice(Box<Type>),
Vec(Box<Type>),
TypePath(syn::Type),
Reference(Option<syn::Lifetime>, Box<Type>),
Expand Down Expand Up @@ -374,6 +424,7 @@ impl Type {
Type::Option(ref first_type)
| Type::Vec(ref first_type)
| Type::Array(ref first_type)
| Type::Slice(ref first_type)
| Type::Reference(_, ref first_type) => {
Type::leaf_type_recursive_helper(first_type, Some(ty))
}
Expand All @@ -391,6 +442,7 @@ impl Type {
Type::Option(ref first_type)
| Type::Vec(ref first_type)
| Type::Array(ref first_type)
| Type::Slice(ref first_type)
| Type::Reference(_, ref first_type) => match **first_type {
Type::TypePath(ref type_) => type_,
_ => unimplemented!("leaf_type() should only return shallow types"),
Expand Down Expand Up @@ -443,7 +495,7 @@ impl Type {
}
}
}
Type::Vec(ref first_type) => {
Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
if let Type::TypePath(_) = **first_type {
if last_part == "u8" {
return BasicType::BYTE_ARRAY;
Expand Down Expand Up @@ -484,7 +536,7 @@ impl Type {
}
}
}
Type::Vec(ref first_type) => {
Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
if let Type::TypePath(_) = **first_type {
if last_part == "u8" {
return quote! { None };
Expand Down Expand Up @@ -572,6 +624,7 @@ impl Type {
syn::Type::Path(ref p) => Type::from_type_path(f, p),
syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr),
syn::Type::Array(ref ta) => Type::from_type_array(f, ta),
syn::Type::Slice(ref ts) => Type::from_type_slice(f, ts),
other => unimplemented!(
"Unable to derive {:?} - it is currently an unsupported type\n{:#?}",
f.ident.as_ref().unwrap(),
Expand Down Expand Up @@ -622,6 +675,11 @@ impl Type {
let inner_type = Type::from_type(f, ta.elem.as_ref());
Type::Array(Box::new(inner_type))
}

fn from_type_slice(f: &syn::Field, ts: &syn::TypeSlice) -> Self {
let inner_type = Type::from_type(f, ts.elem.as_ref());
Type::Slice(Box::new(inner_type))
}
}

#[cfg(test)]
Expand Down
18 changes: 18 additions & 0 deletions parquet_derive_test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ struct ACompleteRecord<'a> {
pub borrowed_maybe_a_string: &'a Option<String>,
pub borrowed_maybe_a_str: &'a Option<&'a str>,
pub now: chrono::NaiveDateTime,
pub byte_vec: Vec<u8>,
pub maybe_byte_vec: Option<Vec<u8>>,
pub borrowed_byte_vec: &'a [u8],
pub borrowed_maybe_byte_vec: &'a Option<Vec<u8>>,
pub borrowed_maybe_borrowed_byte_vec: &'a Option<&'a [u8]>,
}

#[cfg(test)]
Expand Down Expand Up @@ -84,6 +89,11 @@ mod tests {
OPTIONAL BINARY borrowed_maybe_a_string (STRING);
OPTIONAL BINARY borrowed_maybe_a_str (STRING);
REQUIRED INT64 now (TIMESTAMP_MILLIS);
REQUIRED BINARY byte_vec;
OPTIONAL BINARY maybe_byte_vec;
REQUIRED BINARY borrowed_byte_vec;
OPTIONAL BINARY borrowed_maybe_byte_vec;
OPTIONAL BINARY borrowed_maybe_borrowed_byte_vec;
}";

let schema = Arc::new(parse_message_type(schema_str).unwrap());
Expand All @@ -92,6 +102,9 @@ mod tests {
let a_borrowed_string = "cool news".to_owned();
let maybe_a_string = Some("it's true, I'm a string".to_owned());
let maybe_a_str = Some(&a_str[..]);
let borrowed_byte_vec = vec![0x68, 0x69, 0x70];
let borrowed_maybe_byte_vec = Some(vec![0x71, 0x72]);
let borrowed_maybe_borrowed_byte_vec = Some(&borrowed_byte_vec[..]);

let drs: Vec<ACompleteRecord> = vec![ACompleteRecord {
a_bool: true,
Expand All @@ -115,6 +128,11 @@ mod tests {
borrowed_maybe_a_string: &maybe_a_string,
borrowed_maybe_a_str: &maybe_a_str,
now: chrono::Utc::now().naive_local(),
byte_vec: vec![0x65, 0x66, 0x67],
maybe_byte_vec: Some(vec![0x88, 0x89, 0x90]),
borrowed_byte_vec: &borrowed_byte_vec,
borrowed_maybe_byte_vec: &borrowed_maybe_byte_vec,
borrowed_maybe_borrowed_byte_vec: &borrowed_maybe_borrowed_byte_vec,
}];

let generated_schema = drs.as_slice().schema().unwrap();
Expand Down

0 comments on commit 8c60437

Please sign in to comment.