Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for byte vectors and slices to parquet_derive (#3864) #3878

Merged
merged 1 commit into from
Mar 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,16 @@ impl From<Vec<u8>> for ByteArray {
}
}

impl<'a> From<&'a [u8]> for ByteArray {
fn from(b: &'a [u8]) -> ByteArray {
let mut v = Vec::new();
v.extend_from_slice(b);
Self {
data: Some(ByteBufferPtr::new(v)),
}
}
}

impl<'a> From<&'a str> for ByteArray {
fn from(s: &'a str) -> ByteArray {
let mut v = Vec::new();
Expand Down
76 changes: 67 additions & 9 deletions parquet_derive/src/parquet_field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,18 +92,38 @@ impl Field {
Type::TypePath(_) => self.option_into_vals(),
_ => unimplemented!("Unsupported type encountered"),
},
Type::Vec(ref first_type) => match **first_type {
Type::TypePath(_) => self.option_into_vals(),
_ => unimplemented!("Unsupported type encountered"),
},
ref f => unimplemented!("Unsupported: {:#?}", f),
},
Type::Reference(_, ref first_type) => match **first_type {
Type::TypePath(_) => self.copied_direct_vals(),
Type::Option(ref second_type) => match **second_type {
Type::TypePath(_) => self.option_into_vals(),
Type::Reference(_, ref second_type) => match **second_type {
Type::TypePath(_) => self.option_into_vals(),
Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) => self.option_into_vals(),
ref f => unimplemented!("Unsupported: {:#?}", f),
},
_ => unimplemented!("Unsupported type encountered"),
},
Type::Vec(ref first_type) => match **first_type {
Type::TypePath(_) => self.option_into_vals(),
_ => unimplemented!("Unsupported type encountered"),
},
ref f => unimplemented!("Unsupported: {:#?}", f),
},
Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) => self.copied_direct_vals(),
ref f => unimplemented!("Unsupported: {:#?}", f),
},
ref f => unimplemented!("Unsupported: {:#?}", f),
},
Type::Vec(ref first_type) => match **first_type {
Type::TypePath(_) => self.copied_direct_vals(),
ref f => unimplemented!("Unsupported: {:#?}", f),
},
f => unimplemented!("Unsupported: {:#?}", f),
Expand All @@ -116,26 +136,55 @@ impl Field {
Type::Option(_) => unimplemented!("Unsupported nesting encountered"),
Type::Reference(_, ref second_type)
| Type::Vec(ref second_type)
| Type::Array(ref second_type) => match **second_type {
| Type::Array(ref second_type)
| Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
_ => unimplemented!("Unsupported nesting encountered"),
},
},
Type::Reference(_, ref first_type)
| Type::Vec(ref first_type)
| Type::Array(ref first_type) => match **first_type {
| Type::Array(ref first_type)
| Type::Slice(ref first_type) => match **first_type {
Type::TypePath(_) => None,
Type::Reference(_, ref second_type)
| Type::Vec(ref second_type)
Type::Vec(ref second_type)
| Type::Array(ref second_type)
| Type::Option(ref second_type) => match **second_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
| Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) => None,
Type::Reference(_, ref third_type) => match **third_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
Type::TypePath(_) => None,
_ => unimplemented!("Unsupported definition encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
},
Type::Reference(_, ref second_type) | Type::Option(ref second_type) => {
match **second_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
Type::Vec(ref third_type)
| Type::Array(ref third_type)
| Type::Slice(ref third_type) => match **third_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
Type::Reference(_, ref fourth_type) => match **fourth_type {
Type::TypePath(_) => {
Some(self.optional_definition_levels())
}
_ => unimplemented!("Unsupported definition encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
},
Type::Reference(_, ref third_type) => match **third_type {
Type::TypePath(_) => Some(self.optional_definition_levels()),
Type::Slice(ref fourth_type) => match **fourth_type {
Type::TypePath(_) => {
Some(self.optional_definition_levels())
}
_ => unimplemented!("Unsupported definition encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
}
}
},
};

Expand Down Expand Up @@ -323,6 +372,7 @@ impl Field {
enum Type {
Array(Box<Type>),
Option(Box<Type>),
Slice(Box<Type>),
Vec(Box<Type>),
TypePath(syn::Type),
Reference(Option<syn::Lifetime>, Box<Type>),
Expand Down Expand Up @@ -374,6 +424,7 @@ impl Type {
Type::Option(ref first_type)
| Type::Vec(ref first_type)
| Type::Array(ref first_type)
| Type::Slice(ref first_type)
| Type::Reference(_, ref first_type) => {
Type::leaf_type_recursive_helper(first_type, Some(ty))
}
Expand All @@ -391,6 +442,7 @@ impl Type {
Type::Option(ref first_type)
| Type::Vec(ref first_type)
| Type::Array(ref first_type)
| Type::Slice(ref first_type)
| Type::Reference(_, ref first_type) => match **first_type {
Type::TypePath(ref type_) => type_,
_ => unimplemented!("leaf_type() should only return shallow types"),
Expand Down Expand Up @@ -443,7 +495,7 @@ impl Type {
}
}
}
Type::Vec(ref first_type) => {
Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
if let Type::TypePath(_) = **first_type {
if last_part == "u8" {
return BasicType::BYTE_ARRAY;
Expand Down Expand Up @@ -484,7 +536,7 @@ impl Type {
}
}
}
Type::Vec(ref first_type) => {
Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
if let Type::TypePath(_) = **first_type {
if last_part == "u8" {
return quote! { None };
Expand Down Expand Up @@ -572,6 +624,7 @@ impl Type {
syn::Type::Path(ref p) => Type::from_type_path(f, p),
syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr),
syn::Type::Array(ref ta) => Type::from_type_array(f, ta),
syn::Type::Slice(ref ts) => Type::from_type_slice(f, ts),
other => unimplemented!(
"Unable to derive {:?} - it is currently an unsupported type\n{:#?}",
f.ident.as_ref().unwrap(),
Expand Down Expand Up @@ -622,6 +675,11 @@ impl Type {
let inner_type = Type::from_type(f, ta.elem.as_ref());
Type::Array(Box::new(inner_type))
}

fn from_type_slice(f: &syn::Field, ts: &syn::TypeSlice) -> Self {
let inner_type = Type::from_type(f, ts.elem.as_ref());
Type::Slice(Box::new(inner_type))
}
}

#[cfg(test)]
Expand Down
18 changes: 18 additions & 0 deletions parquet_derive_test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ struct ACompleteRecord<'a> {
pub borrowed_maybe_a_string: &'a Option<String>,
pub borrowed_maybe_a_str: &'a Option<&'a str>,
pub now: chrono::NaiveDateTime,
pub byte_vec: Vec<u8>,
pub maybe_byte_vec: Option<Vec<u8>>,
pub borrowed_byte_vec: &'a [u8],
pub borrowed_maybe_byte_vec: &'a Option<Vec<u8>>,
pub borrowed_maybe_borrowed_byte_vec: &'a Option<&'a [u8]>,
}

#[cfg(test)]
Expand Down Expand Up @@ -84,6 +89,11 @@ mod tests {
OPTIONAL BINARY borrowed_maybe_a_string (STRING);
OPTIONAL BINARY borrowed_maybe_a_str (STRING);
REQUIRED INT64 now (TIMESTAMP_MILLIS);
REQUIRED BINARY byte_vec;
OPTIONAL BINARY maybe_byte_vec;
REQUIRED BINARY borrowed_byte_vec;
OPTIONAL BINARY borrowed_maybe_byte_vec;
OPTIONAL BINARY borrowed_maybe_borrowed_byte_vec;
}";

let schema = Arc::new(parse_message_type(schema_str).unwrap());
Expand All @@ -92,6 +102,9 @@ mod tests {
let a_borrowed_string = "cool news".to_owned();
let maybe_a_string = Some("it's true, I'm a string".to_owned());
let maybe_a_str = Some(&a_str[..]);
let borrowed_byte_vec = vec![0x68, 0x69, 0x70];
let borrowed_maybe_byte_vec = Some(vec![0x71, 0x72]);
let borrowed_maybe_borrowed_byte_vec = Some(&borrowed_byte_vec[..]);

let drs: Vec<ACompleteRecord> = vec![ACompleteRecord {
a_bool: true,
Expand All @@ -115,6 +128,11 @@ mod tests {
borrowed_maybe_a_string: &maybe_a_string,
borrowed_maybe_a_str: &maybe_a_str,
now: chrono::Utc::now().naive_local(),
byte_vec: vec![0x65, 0x66, 0x67],
maybe_byte_vec: Some(vec![0x88, 0x89, 0x90]),
borrowed_byte_vec: &borrowed_byte_vec,
borrowed_maybe_byte_vec: &borrowed_maybe_byte_vec,
borrowed_maybe_borrowed_byte_vec: &borrowed_maybe_borrowed_byte_vec,
}];

let generated_schema = drs.as_slice().schema().unwrap();
Expand Down