Skip to content

Commit 2f463cb

Browse files
committed
feat: add Struct Accessors to BoundReferences
1 parent 301a0af commit 2f463cb

File tree

5 files changed

+167
-3
lines changed

5 files changed

+167
-3
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::spec::{Literal, Struct, Type};
19+
use serde_derive::{Deserialize, Serialize};
20+
use std::sync::Arc;
21+
22+
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
23+
enum InnerOrType {
24+
Inner(Arc<StructAccessor>),
25+
Type(Type),
26+
}
27+
28+
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
29+
pub struct StructAccessor {
30+
position: i32,
31+
inner_or_type: InnerOrType,
32+
}
33+
34+
pub(crate) type StructAccessorRef = Arc<StructAccessor>;
35+
36+
impl StructAccessor {
37+
pub(crate) fn new(position: i32, r#type: Type) -> Self {
38+
StructAccessor {
39+
position,
40+
inner_or_type: InnerOrType::Type(r#type),
41+
}
42+
}
43+
44+
pub(crate) fn wrap(position: i32, inner: StructAccessorRef) -> Self {
45+
StructAccessor {
46+
position,
47+
inner_or_type: InnerOrType::Inner(inner),
48+
}
49+
}
50+
51+
pub fn position(&self) -> i32 {
52+
self.position
53+
}
54+
55+
fn r#type(&self) -> &Type {
56+
match &self.inner_or_type {
57+
InnerOrType::Inner(inner) => inner.r#type(),
58+
InnerOrType::Type(r#type) => r#type,
59+
}
60+
}
61+
62+
fn get<'a>(&'a self, container: &'a Struct) -> &Literal {
63+
match &self.inner_or_type {
64+
InnerOrType::Inner(inner) => match container.get(self.position) {
65+
Literal::Struct(wrapped) => inner.get(wrapped),
66+
_ => {
67+
panic!("Nested accessor should only be wrapping a Struct")
68+
}
69+
},
70+
InnerOrType::Type(_) => container.get(self.position),
71+
}
72+
}
73+
}

crates/iceberg/src/expr/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ mod term;
2222
use std::fmt::{Display, Formatter};
2323

2424
pub use term::*;
25+
pub(crate) mod accessor;
2526
mod predicate;
2627

2728
use crate::spec::SchemaRef;

crates/iceberg/src/expr/term.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::fmt::{Display, Formatter};
2121

2222
use fnv::FnvHashSet;
2323

24+
use crate::expr::accessor::{StructAccessor, StructAccessorRef};
2425
use crate::expr::Bind;
2526
use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression, UnaryExpression};
2627
use crate::spec::{Datum, NestedField, NestedFieldRef, SchemaRef};
@@ -333,7 +334,19 @@ impl Bind for Reference {
333334
format!("Field {} not found in schema", self.name),
334335
)
335336
})?;
336-
Ok(BoundReference::new(self.name.clone(), field.clone()))
337+
338+
let accessor = schema.accessor_for_field_id(field.id).ok_or_else(|| {
339+
Error::new(
340+
ErrorKind::DataInvalid,
341+
format!("Accessor for Field {} not found", self.name),
342+
)
343+
})?;
344+
345+
Ok(BoundReference::new(
346+
self.name.clone(),
347+
field.clone(),
348+
accessor.clone(),
349+
))
337350
}
338351
}
339352

@@ -344,21 +357,32 @@ pub struct BoundReference {
344357
// For example, if the field is `a.b.c`, then `field.name` is `c`, but `original_name` is `a.b.c`.
345358
column_name: String,
346359
field: NestedFieldRef,
360+
accessor: StructAccessorRef,
347361
}
348362

349363
impl BoundReference {
350364
/// Creates a new bound reference.
351-
pub fn new(name: impl Into<String>, field: NestedFieldRef) -> Self {
365+
pub fn new(
366+
name: impl Into<String>,
367+
field: NestedFieldRef,
368+
accessor: StructAccessorRef,
369+
) -> Self {
352370
Self {
353371
column_name: name.into(),
354372
field,
373+
accessor,
355374
}
356375
}
357376

358377
/// Return the field of this reference.
359378
pub fn field(&self) -> &NestedField {
360379
&self.field
361380
}
381+
382+
/// Get this BoundReference's Accessor
383+
pub fn accessor(&self) -> &StructAccessor {
384+
&self.accessor
385+
}
362386
}
363387

364388
impl Display for BoundReference {
@@ -374,6 +398,7 @@ pub type BoundTerm = BoundReference;
374398
mod tests {
375399
use std::sync::Arc;
376400

401+
use crate::expr::accessor::StructAccessor;
377402
use crate::expr::{Bind, BoundReference, Reference};
378403
use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};
379404

@@ -397,9 +422,11 @@ mod tests {
397422
let schema = table_schema_simple();
398423
let reference = Reference::new("bar").bind(schema, true).unwrap();
399424

425+
let accessor_ref = Arc::new(StructAccessor::new(1, Type::Primitive(PrimitiveType::Int)));
400426
let expected_ref = BoundReference::new(
401427
"bar",
402428
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
429+
accessor_ref.clone(),
403430
);
404431

405432
assert_eq!(expected_ref, reference);
@@ -410,9 +437,11 @@ mod tests {
410437
let schema = table_schema_simple();
411438
let reference = Reference::new("BAR").bind(schema, false).unwrap();
412439

440+
let accessor_ref = Arc::new(StructAccessor::new(1, Type::Primitive(PrimitiveType::Int)));
413441
let expected_ref = BoundReference::new(
414442
"BAR",
415443
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
444+
accessor_ref.clone(),
416445
);
417446

418447
assert_eq!(expected_ref, reference);

crates/iceberg/src/spec/schema.rs

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
//! This module defines schema in iceberg.
1919
2020
use crate::error::Result;
21+
use crate::expr::accessor::StructAccessor;
2122
use crate::spec::datatypes::{
2223
ListType, MapType, NestedFieldRef, PrimitiveType, StructType, Type, LIST_FILED_NAME,
2324
MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME,
@@ -55,6 +56,8 @@ pub struct Schema {
5556
name_to_id: HashMap<String, i32>,
5657
lowercase_name_to_id: HashMap<String, i32>,
5758
id_to_name: HashMap<i32, String>,
59+
60+
field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>,
5861
}
5962

6063
impl PartialEq for Schema {
@@ -105,6 +108,8 @@ impl SchemaBuilder {
105108
pub fn build(self) -> Result<Schema> {
106109
let highest_field_id = self.fields.iter().map(|f| f.id).max().unwrap_or(0);
107110

111+
let field_id_to_accessor = self.build_accessors();
112+
108113
let r#struct = StructType::new(self.fields);
109114
let id_to_field = index_by_id(&r#struct)?;
110115

@@ -137,9 +142,55 @@ impl SchemaBuilder {
137142
name_to_id,
138143
lowercase_name_to_id,
139144
id_to_name,
145+
146+
field_id_to_accessor,
140147
})
141148
}
142149

150+
fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> {
151+
let mut map = HashMap::new();
152+
153+
for (pos, field) in self.fields.iter().enumerate() {
154+
// add an accessor for this field
155+
156+
let accessor = Arc::new(StructAccessor::new(pos as i32, *field.field_type.clone()));
157+
map.insert(field.id, accessor.clone());
158+
159+
if let Type::Struct(nested) = field.field_type.as_ref() {
160+
// add accessors for nested fields
161+
for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
162+
let new_accessor = Arc::new(StructAccessor::wrap(pos as i32, accessor));
163+
map.insert(field_id, new_accessor.clone());
164+
}
165+
}
166+
}
167+
168+
map
169+
}
170+
171+
fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Arc<StructAccessor>)> {
172+
let mut results = vec![];
173+
for (pos, field) in fields.iter().enumerate() {
174+
if let Type::Struct(nested) = field.field_type.as_ref() {
175+
let nested_accessors = Self::build_accessors_nested(nested.fields());
176+
177+
let wrapped_nested_accessors =
178+
nested_accessors.into_iter().map(|(id, accessor)| {
179+
let new_accessor = Arc::new(StructAccessor::wrap(pos as i32, accessor));
180+
(id, new_accessor.clone())
181+
});
182+
183+
results.extend(wrapped_nested_accessors);
184+
}
185+
186+
let accessor = Arc::new(StructAccessor::new(pos as i32, *field.field_type.clone()));
187+
188+
results.push((field.id, accessor.clone()));
189+
}
190+
191+
results
192+
}
193+
143194
fn validate_identifier_ids(
144195
r#struct: &StructType,
145196
id_to_field: &HashMap<i32, NestedFieldRef>,
@@ -264,6 +315,11 @@ impl Schema {
264315
pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> {
265316
self.id_to_name.get(&field_id).map(String::as_str)
266317
}
318+
319+
/// Get an accessor for retrieving data in a struct
320+
pub fn accessor_for_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
321+
self.field_id_to_accessor.get(&field_id).cloned()
322+
}
267323
}
268324

269325
impl Display for Schema {
@@ -381,7 +437,7 @@ pub fn visit_schema<V: SchemaVisitor>(schema: &Schema, visitor: &mut V) -> Resul
381437
visitor.schema(schema, result)
382438
}
383439

384-
/// Creates an field id to field map.
440+
/// Creates a field id to field map.
385441
pub fn index_by_id(r#struct: &StructType) -> Result<HashMap<i32, NestedFieldRef>> {
386442
struct IndexById(HashMap<i32, NestedFieldRef>);
387443

crates/iceberg/src/spec/values.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,11 @@ impl Struct {
11411141
},
11421142
)
11431143
}
1144+
1145+
/// Gets a ref to the field at `position` within the `Struct`
1146+
pub fn get(&self, position: i32) -> &Literal {
1147+
&self.fields[position as usize]
1148+
}
11441149
}
11451150

11461151
/// An iterator that moves out of a struct.

0 commit comments

Comments
 (0)