diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 40d54c78ed1d..48ee7f89fc5d 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -199,6 +199,16 @@ impl From> for ByteArray { } } +impl<'a> From<&'a [u8]> for ByteArray { + fn from(b: &'a [u8]) -> ByteArray { + let mut v = Vec::new(); + v.extend_from_slice(b); + Self { + data: Some(ByteBufferPtr::new(v)), + } + } +} + impl<'a> From<&'a str> for ByteArray { fn from(s: &'a str) -> ByteArray { let mut v = Vec::new(); diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs index 48b6d3ac41b8..ea6878283a33 100644 --- a/parquet_derive/src/parquet_field.rs +++ b/parquet_derive/src/parquet_field.rs @@ -92,6 +92,10 @@ impl Field { Type::TypePath(_) => self.option_into_vals(), _ => unimplemented!("Unsupported type encountered"), }, + Type::Vec(ref first_type) => match **first_type { + Type::TypePath(_) => self.option_into_vals(), + _ => unimplemented!("Unsupported type encountered"), + }, ref f => unimplemented!("Unsupported: {:#?}", f), }, Type::Reference(_, ref first_type) => match **first_type { @@ -99,11 +103,27 @@ impl Field { Type::Option(ref second_type) => match **second_type { Type::TypePath(_) => self.option_into_vals(), Type::Reference(_, ref second_type) => match **second_type { + Type::TypePath(_) => self.option_into_vals(), + Type::Slice(ref second_type) => match **second_type { + Type::TypePath(_) => self.option_into_vals(), + ref f => unimplemented!("Unsupported: {:#?}", f), + }, + _ => unimplemented!("Unsupported type encountered"), + }, + Type::Vec(ref first_type) => match **first_type { Type::TypePath(_) => self.option_into_vals(), _ => unimplemented!("Unsupported type encountered"), }, ref f => unimplemented!("Unsupported: {:#?}", f), }, + Type::Slice(ref second_type) => match **second_type { + Type::TypePath(_) => self.copied_direct_vals(), + ref f => unimplemented!("Unsupported: {:#?}", f), + }, + ref f => unimplemented!("Unsupported: {:#?}", f), + }, + Type::Vec(ref first_type) => match **first_type { + Type::TypePath(_) => self.copied_direct_vals(), ref f => unimplemented!("Unsupported: {:#?}", f), }, f => unimplemented!("Unsupported: {:#?}", f), @@ -116,26 +136,55 @@ impl Field { Type::Option(_) => unimplemented!("Unsupported nesting encountered"), Type::Reference(_, ref second_type) | Type::Vec(ref second_type) - | Type::Array(ref second_type) => match **second_type { + | Type::Array(ref second_type) + | Type::Slice(ref second_type) => match **second_type { Type::TypePath(_) => Some(self.optional_definition_levels()), _ => unimplemented!("Unsupported nesting encountered"), }, }, Type::Reference(_, ref first_type) | Type::Vec(ref first_type) - | Type::Array(ref first_type) => match **first_type { + | Type::Array(ref first_type) + | Type::Slice(ref first_type) => match **first_type { Type::TypePath(_) => None, - Type::Reference(_, ref second_type) - | Type::Vec(ref second_type) + Type::Vec(ref second_type) | Type::Array(ref second_type) - | Type::Option(ref second_type) => match **second_type { - Type::TypePath(_) => Some(self.optional_definition_levels()), + | Type::Slice(ref second_type) => match **second_type { + Type::TypePath(_) => None, Type::Reference(_, ref third_type) => match **third_type { - Type::TypePath(_) => Some(self.optional_definition_levels()), + Type::TypePath(_) => None, _ => unimplemented!("Unsupported definition encountered"), }, _ => unimplemented!("Unsupported definition encountered"), }, + Type::Reference(_, ref second_type) | Type::Option(ref second_type) => { + match **second_type { + Type::TypePath(_) => Some(self.optional_definition_levels()), + Type::Vec(ref third_type) + | Type::Array(ref third_type) + | Type::Slice(ref third_type) => match **third_type { + Type::TypePath(_) => Some(self.optional_definition_levels()), + Type::Reference(_, ref fourth_type) => match **fourth_type { + Type::TypePath(_) => { + Some(self.optional_definition_levels()) + } + _ => unimplemented!("Unsupported definition encountered"), + }, + _ => unimplemented!("Unsupported definition encountered"), + }, + Type::Reference(_, ref third_type) => match **third_type { + Type::TypePath(_) => Some(self.optional_definition_levels()), + Type::Slice(ref fourth_type) => match **fourth_type { + Type::TypePath(_) => { + Some(self.optional_definition_levels()) + } + _ => unimplemented!("Unsupported definition encountered"), + }, + _ => unimplemented!("Unsupported definition encountered"), + }, + _ => unimplemented!("Unsupported definition encountered"), + } + } }, }; @@ -323,6 +372,7 @@ impl Field { enum Type { Array(Box), Option(Box), + Slice(Box), Vec(Box), TypePath(syn::Type), Reference(Option, Box), @@ -374,6 +424,7 @@ impl Type { Type::Option(ref first_type) | Type::Vec(ref first_type) | Type::Array(ref first_type) + | Type::Slice(ref first_type) | Type::Reference(_, ref first_type) => { Type::leaf_type_recursive_helper(first_type, Some(ty)) } @@ -391,6 +442,7 @@ impl Type { Type::Option(ref first_type) | Type::Vec(ref first_type) | Type::Array(ref first_type) + | Type::Slice(ref first_type) | Type::Reference(_, ref first_type) => match **first_type { Type::TypePath(ref type_) => type_, _ => unimplemented!("leaf_type() should only return shallow types"), @@ -443,7 +495,7 @@ impl Type { } } } - Type::Vec(ref first_type) => { + Type::Vec(ref first_type) | Type::Slice(ref first_type) => { if let Type::TypePath(_) = **first_type { if last_part == "u8" { return BasicType::BYTE_ARRAY; @@ -484,7 +536,7 @@ impl Type { } } } - Type::Vec(ref first_type) => { + Type::Vec(ref first_type) | Type::Slice(ref first_type) => { if let Type::TypePath(_) = **first_type { if last_part == "u8" { return quote! { None }; @@ -572,6 +624,7 @@ impl Type { syn::Type::Path(ref p) => Type::from_type_path(f, p), syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr), syn::Type::Array(ref ta) => Type::from_type_array(f, ta), + syn::Type::Slice(ref ts) => Type::from_type_slice(f, ts), other => unimplemented!( "Unable to derive {:?} - it is currently an unsupported type\n{:#?}", f.ident.as_ref().unwrap(), @@ -622,6 +675,11 @@ impl Type { let inner_type = Type::from_type(f, ta.elem.as_ref()); Type::Array(Box::new(inner_type)) } + + fn from_type_slice(f: &syn::Field, ts: &syn::TypeSlice) -> Self { + let inner_type = Type::from_type(f, ts.elem.as_ref()); + Type::Slice(Box::new(inner_type)) + } } #[cfg(test)] diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs index 746644793ff2..2aa174974aba 100644 --- a/parquet_derive_test/src/lib.rs +++ b/parquet_derive_test/src/lib.rs @@ -42,6 +42,11 @@ struct ACompleteRecord<'a> { pub borrowed_maybe_a_string: &'a Option, pub borrowed_maybe_a_str: &'a Option<&'a str>, pub now: chrono::NaiveDateTime, + pub byte_vec: Vec, + pub maybe_byte_vec: Option>, + pub borrowed_byte_vec: &'a [u8], + pub borrowed_maybe_byte_vec: &'a Option>, + pub borrowed_maybe_borrowed_byte_vec: &'a Option<&'a [u8]>, } #[cfg(test)] @@ -84,6 +89,11 @@ mod tests { OPTIONAL BINARY borrowed_maybe_a_string (STRING); OPTIONAL BINARY borrowed_maybe_a_str (STRING); REQUIRED INT64 now (TIMESTAMP_MILLIS); + REQUIRED BINARY byte_vec; + OPTIONAL BINARY maybe_byte_vec; + REQUIRED BINARY borrowed_byte_vec; + OPTIONAL BINARY borrowed_maybe_byte_vec; + OPTIONAL BINARY borrowed_maybe_borrowed_byte_vec; }"; let schema = Arc::new(parse_message_type(schema_str).unwrap()); @@ -92,6 +102,9 @@ mod tests { let a_borrowed_string = "cool news".to_owned(); let maybe_a_string = Some("it's true, I'm a string".to_owned()); let maybe_a_str = Some(&a_str[..]); + let borrowed_byte_vec = vec![0x68, 0x69, 0x70]; + let borrowed_maybe_byte_vec = Some(vec![0x71, 0x72]); + let borrowed_maybe_borrowed_byte_vec = Some(&borrowed_byte_vec[..]); let drs: Vec = vec![ACompleteRecord { a_bool: true, @@ -115,6 +128,11 @@ mod tests { borrowed_maybe_a_string: &maybe_a_string, borrowed_maybe_a_str: &maybe_a_str, now: chrono::Utc::now().naive_local(), + byte_vec: vec![0x65, 0x66, 0x67], + maybe_byte_vec: Some(vec![0x88, 0x89, 0x90]), + borrowed_byte_vec: &borrowed_byte_vec, + borrowed_maybe_byte_vec: &borrowed_maybe_byte_vec, + borrowed_maybe_borrowed_byte_vec: &borrowed_maybe_borrowed_byte_vec, }]; let generated_schema = drs.as_slice().schema().unwrap();