|
18 | 18 | //! This module defines schema in iceberg. |
19 | 19 |
|
20 | 20 | use crate::error::Result; |
| 21 | +use crate::expr::accessor::StructAccessor; |
21 | 22 | use crate::spec::datatypes::{ |
22 | 23 | ListType, MapType, NestedFieldRef, PrimitiveType, StructType, Type, LIST_FILED_NAME, |
23 | 24 | MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, |
@@ -55,6 +56,8 @@ pub struct Schema { |
55 | 56 | name_to_id: HashMap<String, i32>, |
56 | 57 | lowercase_name_to_id: HashMap<String, i32>, |
57 | 58 | id_to_name: HashMap<i32, String>, |
| 59 | + |
| 60 | + field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>, |
58 | 61 | } |
59 | 62 |
|
60 | 63 | impl PartialEq for Schema { |
@@ -105,6 +108,8 @@ impl SchemaBuilder { |
105 | 108 | pub fn build(self) -> Result<Schema> { |
106 | 109 | let highest_field_id = self.fields.iter().map(|f| f.id).max().unwrap_or(0); |
107 | 110 |
|
| 111 | + let field_id_to_accessor = self.build_accessors(); |
| 112 | + |
108 | 113 | let r#struct = StructType::new(self.fields); |
109 | 114 | let id_to_field = index_by_id(&r#struct)?; |
110 | 115 |
|
@@ -137,9 +142,61 @@ impl SchemaBuilder { |
137 | 142 | name_to_id, |
138 | 143 | lowercase_name_to_id, |
139 | 144 | id_to_name, |
| 145 | + |
| 146 | + field_id_to_accessor, |
140 | 147 | }) |
141 | 148 | } |
142 | 149 |
|
| 150 | + fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> { |
| 151 | + let mut map = HashMap::new(); |
| 152 | + |
| 153 | + for (pos, field) in self.fields.iter().enumerate() { |
| 154 | + if let Type::Primitive(prim_type) = field.field_type.as_ref() { |
| 155 | + // add an accessor for this field |
| 156 | + let accessor = Arc::new(StructAccessor::new(pos, prim_type.clone())); |
| 157 | + map.insert(field.id, accessor.clone()); |
| 158 | + |
| 159 | + if let Type::Struct(nested) = field.field_type.as_ref() { |
| 160 | + // add accessors for nested fields |
| 161 | + for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) { |
| 162 | + let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor)); |
| 163 | + map.insert(field_id, new_accessor.clone()); |
| 164 | + } |
| 165 | + } |
| 166 | + } |
| 167 | + } |
| 168 | + |
| 169 | + map |
| 170 | + } |
| 171 | + |
| 172 | + fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Box<StructAccessor>)> { |
| 173 | + let mut results = vec![]; |
| 174 | + for (pos, field) in fields.iter().enumerate() { |
| 175 | + match field.field_type.as_ref() { |
| 176 | + Type::Primitive(prim_type) => { |
| 177 | + let accessor = Box::new(StructAccessor::new(pos, prim_type.clone())); |
| 178 | + results.push((field.id, accessor)); |
| 179 | + } |
| 180 | + Type::Struct(nested) => { |
| 181 | + let nested_accessors = Self::build_accessors_nested(nested.fields()); |
| 182 | + |
| 183 | + let wrapped_nested_accessors = |
| 184 | + nested_accessors.into_iter().map(|(id, accessor)| { |
| 185 | + let new_accessor = Box::new(StructAccessor::wrap(pos, accessor)); |
| 186 | + (id, new_accessor.clone()) |
| 187 | + }); |
| 188 | + |
| 189 | + results.extend(wrapped_nested_accessors); |
| 190 | + } |
| 191 | + _ => { |
| 192 | + todo!() |
| 193 | + } |
| 194 | + } |
| 195 | + } |
| 196 | + |
| 197 | + results |
| 198 | + } |
| 199 | + |
143 | 200 | fn validate_identifier_ids( |
144 | 201 | r#struct: &StructType, |
145 | 202 | id_to_field: &HashMap<i32, NestedFieldRef>, |
@@ -264,6 +321,11 @@ impl Schema { |
264 | 321 | pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> { |
265 | 322 | self.id_to_name.get(&field_id).map(String::as_str) |
266 | 323 | } |
| 324 | + |
| 325 | + /// Get an accessor for retrieving data in a struct |
| 326 | + pub fn accessor_for_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> { |
| 327 | + self.field_id_to_accessor.get(&field_id).cloned() |
| 328 | + } |
267 | 329 | } |
268 | 330 |
|
269 | 331 | impl Display for Schema { |
@@ -381,7 +443,7 @@ pub fn visit_schema<V: SchemaVisitor>(schema: &Schema, visitor: &mut V) -> Resul |
381 | 443 | visitor.schema(schema, result) |
382 | 444 | } |
383 | 445 |
|
384 | | -/// Creates an field id to field map. |
| 446 | +/// Creates a field id to field map. |
385 | 447 | pub fn index_by_id(r#struct: &StructType) -> Result<HashMap<i32, NestedFieldRef>> { |
386 | 448 | struct IndexById(HashMap<i32, NestedFieldRef>); |
387 | 449 |
|
|
0 commit comments