Skip to content

Commit b531d52

Browse files
committed
feat: add Struct Accessors to BoundReferences
1 parent 4e89ac7 commit b531d52

File tree

6 files changed

+241
-5
lines changed

6 files changed

+241
-5
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::spec::{Datum, Literal, PrimitiveType, Struct};
19+
use crate::{Error, ErrorKind};
20+
use serde_derive::{Deserialize, Serialize};
21+
use std::sync::Arc;
22+
23+
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
24+
pub struct StructAccessor {
25+
position: usize,
26+
r#type: PrimitiveType,
27+
inner: Option<Box<StructAccessor>>,
28+
}
29+
30+
pub(crate) type StructAccessorRef = Arc<StructAccessor>;
31+
32+
impl StructAccessor {
33+
pub(crate) fn new(position: usize, r#type: PrimitiveType) -> Self {
34+
StructAccessor {
35+
position,
36+
r#type,
37+
inner: None,
38+
}
39+
}
40+
41+
pub(crate) fn wrap(position: usize, inner: Box<StructAccessor>) -> Self {
42+
StructAccessor {
43+
position,
44+
r#type: inner.r#type().clone(),
45+
inner: Some(inner),
46+
}
47+
}
48+
49+
pub(crate) fn position(&self) -> usize {
50+
self.position
51+
}
52+
53+
pub(crate) fn r#type(&self) -> &PrimitiveType {
54+
&self.r#type
55+
}
56+
57+
pub(crate) fn get<'a>(&'a self, container: &'a Struct) -> crate::Result<Datum> {
58+
match &self.inner {
59+
None => {
60+
if let Literal::Primitive(literal) = &container[self.position] {
61+
Ok(Datum::new(self.r#type().clone(), literal.clone()))
62+
} else {
63+
Err(Error::new(
64+
ErrorKind::Unexpected,
65+
"Expected Literal to be Primitive",
66+
))
67+
}
68+
}
69+
Some(inner) => {
70+
if let Literal::Struct(wrapped) = &container[self.position] {
71+
inner.get(wrapped)
72+
} else {
73+
Err(Error::new(
74+
ErrorKind::Unexpected,
75+
"Nested accessor should only be wrapping a Struct",
76+
))
77+
}
78+
}
79+
}
80+
}
81+
}
82+
83+
#[cfg(test)]
84+
mod tests {
85+
use crate::expr::accessor::StructAccessor;
86+
use crate::spec::{Datum, Literal, PrimitiveType, Struct};
87+
88+
#[test]
89+
fn test_single_level_accessor() {
90+
let accessor = StructAccessor::new(1, PrimitiveType::Boolean);
91+
92+
assert_eq!(accessor.r#type(), &PrimitiveType::Boolean);
93+
assert_eq!(accessor.position(), 1);
94+
95+
let test_struct =
96+
Struct::from_iter(vec![Some(Literal::bool(false)), Some(Literal::bool(true))]);
97+
98+
assert_eq!(accessor.get(&test_struct).unwrap(), Datum::bool(true));
99+
}
100+
101+
#[test]
102+
fn test_nested_accessor() {
103+
let nested_accessor = StructAccessor::new(1, PrimitiveType::Boolean);
104+
let accessor = StructAccessor::wrap(2, Box::new(nested_accessor));
105+
106+
assert_eq!(accessor.r#type(), &PrimitiveType::Boolean);
107+
//assert_eq!(accessor.position(), 1);
108+
109+
let nested_test_struct =
110+
Struct::from_iter(vec![Some(Literal::bool(false)), Some(Literal::bool(true))]);
111+
112+
let test_struct = Struct::from_iter(vec![
113+
Some(Literal::bool(false)),
114+
Some(Literal::bool(false)),
115+
Some(Literal::Struct(nested_test_struct)),
116+
]);
117+
118+
assert_eq!(accessor.get(&test_struct).unwrap(), Datum::bool(true));
119+
}
120+
}

crates/iceberg/src/expr/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ mod term;
2222
use std::fmt::{Display, Formatter};
2323

2424
pub use term::*;
25+
pub(crate) mod accessor;
2526
mod predicate;
2627

2728
use crate::spec::SchemaRef;

crates/iceberg/src/expr/term.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::fmt::{Display, Formatter};
2121

2222
use fnv::FnvHashSet;
2323

24+
use crate::expr::accessor::{StructAccessor, StructAccessorRef};
2425
use crate::expr::Bind;
2526
use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression, UnaryExpression};
2627
use crate::spec::{Datum, NestedField, NestedFieldRef, SchemaRef};
@@ -333,7 +334,19 @@ impl Bind for Reference {
333334
format!("Field {} not found in schema", self.name),
334335
)
335336
})?;
336-
Ok(BoundReference::new(self.name.clone(), field.clone()))
337+
338+
let accessor = schema.accessor_for_field_id(field.id).ok_or_else(|| {
339+
Error::new(
340+
ErrorKind::DataInvalid,
341+
format!("Accessor for Field {} not found", self.name),
342+
)
343+
})?;
344+
345+
Ok(BoundReference::new(
346+
self.name.clone(),
347+
field.clone(),
348+
accessor.clone(),
349+
))
337350
}
338351
}
339352

@@ -344,21 +357,32 @@ pub struct BoundReference {
344357
// For example, if the field is `a.b.c`, then `field.name` is `c`, but `original_name` is `a.b.c`.
345358
column_name: String,
346359
field: NestedFieldRef,
360+
accessor: StructAccessorRef,
347361
}
348362

349363
impl BoundReference {
350364
/// Creates a new bound reference.
351-
pub fn new(name: impl Into<String>, field: NestedFieldRef) -> Self {
365+
pub fn new(
366+
name: impl Into<String>,
367+
field: NestedFieldRef,
368+
accessor: StructAccessorRef,
369+
) -> Self {
352370
Self {
353371
column_name: name.into(),
354372
field,
373+
accessor,
355374
}
356375
}
357376

358377
/// Return the field of this reference.
359378
pub fn field(&self) -> &NestedField {
360379
&self.field
361380
}
381+
382+
/// Get this BoundReference's Accessor
383+
pub fn accessor(&self) -> &StructAccessor {
384+
&self.accessor
385+
}
362386
}
363387

364388
impl Display for BoundReference {
@@ -374,6 +398,7 @@ pub type BoundTerm = BoundReference;
374398
mod tests {
375399
use std::sync::Arc;
376400

401+
use crate::expr::accessor::StructAccessor;
377402
use crate::expr::{Bind, BoundReference, Reference};
378403
use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};
379404

@@ -397,9 +422,11 @@ mod tests {
397422
let schema = table_schema_simple();
398423
let reference = Reference::new("bar").bind(schema, true).unwrap();
399424

425+
let accessor_ref = Arc::new(StructAccessor::new(1, PrimitiveType::Int));
400426
let expected_ref = BoundReference::new(
401427
"bar",
402428
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
429+
accessor_ref.clone(),
403430
);
404431

405432
assert_eq!(expected_ref, reference);
@@ -410,9 +437,11 @@ mod tests {
410437
let schema = table_schema_simple();
411438
let reference = Reference::new("BAR").bind(schema, false).unwrap();
412439

440+
let accessor_ref = Arc::new(StructAccessor::new(1, PrimitiveType::Int));
413441
let expected_ref = BoundReference::new(
414442
"BAR",
415443
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
444+
accessor_ref.clone(),
416445
);
417446

418447
assert_eq!(expected_ref, reference);

crates/iceberg/src/spec/schema.rs

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
//! This module defines schema in iceberg.
1919
2020
use crate::error::Result;
21+
use crate::expr::accessor::StructAccessor;
2122
use crate::spec::datatypes::{
2223
ListType, MapType, NestedFieldRef, PrimitiveType, StructType, Type, LIST_FILED_NAME,
2324
MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME,
@@ -55,6 +56,8 @@ pub struct Schema {
5556
name_to_id: HashMap<String, i32>,
5657
lowercase_name_to_id: HashMap<String, i32>,
5758
id_to_name: HashMap<i32, String>,
59+
60+
field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>,
5861
}
5962

6063
impl PartialEq for Schema {
@@ -105,6 +108,8 @@ impl SchemaBuilder {
105108
pub fn build(self) -> Result<Schema> {
106109
let highest_field_id = self.fields.iter().map(|f| f.id).max().unwrap_or(0);
107110

111+
let field_id_to_accessor = self.build_accessors();
112+
108113
let r#struct = StructType::new(self.fields);
109114
let id_to_field = index_by_id(&r#struct)?;
110115

@@ -137,9 +142,61 @@ impl SchemaBuilder {
137142
name_to_id,
138143
lowercase_name_to_id,
139144
id_to_name,
145+
146+
field_id_to_accessor,
140147
})
141148
}
142149

150+
fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> {
151+
let mut map = HashMap::new();
152+
153+
for (pos, field) in self.fields.iter().enumerate() {
154+
if let Type::Primitive(prim_type) = field.field_type.as_ref() {
155+
// add an accessor for this field
156+
let accessor = Arc::new(StructAccessor::new(pos, prim_type.clone()));
157+
map.insert(field.id, accessor.clone());
158+
159+
if let Type::Struct(nested) = field.field_type.as_ref() {
160+
// add accessors for nested fields
161+
for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
162+
let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor));
163+
map.insert(field_id, new_accessor.clone());
164+
}
165+
}
166+
}
167+
}
168+
169+
map
170+
}
171+
172+
fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Box<StructAccessor>)> {
173+
let mut results = vec![];
174+
for (pos, field) in fields.iter().enumerate() {
175+
match field.field_type.as_ref() {
176+
Type::Primitive(prim_type) => {
177+
let accessor = Box::new(StructAccessor::new(pos, prim_type.clone()));
178+
results.push((field.id, accessor));
179+
}
180+
Type::Struct(nested) => {
181+
let nested_accessors = Self::build_accessors_nested(nested.fields());
182+
183+
let wrapped_nested_accessors =
184+
nested_accessors.into_iter().map(|(id, accessor)| {
185+
let new_accessor = Box::new(StructAccessor::wrap(pos, accessor));
186+
(id, new_accessor.clone())
187+
});
188+
189+
results.extend(wrapped_nested_accessors);
190+
}
191+
_ => {
192+
todo!()
193+
}
194+
}
195+
}
196+
197+
results
198+
}
199+
143200
fn validate_identifier_ids(
144201
r#struct: &StructType,
145202
id_to_field: &HashMap<i32, NestedFieldRef>,
@@ -264,6 +321,11 @@ impl Schema {
264321
pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> {
265322
self.id_to_name.get(&field_id).map(String::as_str)
266323
}
324+
325+
/// Get an accessor for retrieving data in a struct
326+
pub fn accessor_for_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
327+
self.field_id_to_accessor.get(&field_id).cloned()
328+
}
267329
}
268330

269331
impl Display for Schema {
@@ -381,7 +443,7 @@ pub fn visit_schema<V: SchemaVisitor>(schema: &Schema, visitor: &mut V) -> Resul
381443
visitor.schema(schema, result)
382444
}
383445

384-
/// Creates an field id to field map.
446+
/// Creates a field id to field map.
385447
pub fn index_by_id(r#struct: &StructType) -> Result<HashMap<i32, NestedFieldRef>> {
386448
struct IndexById(HashMap<i32, NestedFieldRef>);
387449

crates/iceberg/src/spec/values.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121

2222
use std::fmt::{Display, Formatter};
23+
use std::ops::Index;
2324
use std::str::FromStr;
2425
use std::{any::Any, collections::BTreeMap};
2526

@@ -141,6 +142,11 @@ impl From<Datum> for Literal {
141142
}
142143

143144
impl Datum {
145+
/// Creates a `Datum` from a `PrimitiveType` and a `PrimitiveLiteral`
146+
pub(crate) fn new(r#type: PrimitiveType, literal: PrimitiveLiteral) -> Self {
147+
Datum { r#type, literal }
148+
}
149+
144150
/// Creates a boolean value.
145151
///
146152
/// Example:
@@ -1143,6 +1149,14 @@ impl Struct {
11431149
}
11441150
}
11451151

1152+
impl Index<usize> for Struct {
1153+
type Output = Literal;
1154+
1155+
fn index(&self, idx: usize) -> &Self::Output {
1156+
&self.fields[idx]
1157+
}
1158+
}
1159+
11461160
/// An iterator that moves out of a struct.
11471161
pub struct StructValueIntoIter {
11481162
null_bitmap: bitvec::boxed::IntoIter,

0 commit comments

Comments
 (0)