|
20 | 20 |
|
21 | 21 | use std::{ |
22 | 22 | any::Any, borrow::Cow, collections::HashMap, fmt::Debug, fmt::Formatter, |
23 | | - fmt::Result as FmtResult, marker::PhantomData, str::FromStr, sync::Arc, |
| 23 | + fmt::Result as FmtResult, marker::PhantomData, mem::size_of, sync::Arc, vec, |
24 | 24 | }; |
25 | 25 |
|
26 | 26 | use arrow::{ |
@@ -1129,6 +1129,8 @@ pub struct ExtendedColumnProjector { |
1129 | 1129 | projected_metadata_indexes: Vec<usize>, |
1130 | 1130 | /// The schema of the table once the projection was applied. |
1131 | 1131 | projected_schema: SchemaRef, |
| 1132 | + /// Mapping between the column name and the metadata column. |
| 1133 | + metadata_map: HashMap<String, MetadataColumn>, |
1132 | 1134 | } |
1133 | 1135 |
|
1134 | 1136 | impl ExtendedColumnProjector { |
@@ -1162,11 +1164,17 @@ impl ExtendedColumnProjector { |
1162 | 1164 | projected_metadata_indexes.sort(); |
1163 | 1165 | } |
1164 | 1166 |
|
| 1167 | + let mut metadata_map = HashMap::new(); |
| 1168 | + for metadata_col in metadata_cols.iter() { |
| 1169 | + metadata_map.insert(metadata_col.name().to_string(), metadata_col.clone()); |
| 1170 | + } |
| 1171 | + |
1165 | 1172 | Self { |
1166 | 1173 | key_buffer_cache: Default::default(), |
1167 | 1174 | projected_partition_indexes, |
1168 | 1175 | projected_metadata_indexes, |
1169 | 1176 | projected_schema, |
| 1177 | + metadata_map, |
1170 | 1178 | } |
1171 | 1179 | } |
1172 | 1180 |
|
@@ -1238,8 +1246,11 @@ impl ExtendedColumnProjector { |
1238 | 1246 | for &sidx in &self.projected_metadata_indexes { |
1239 | 1247 | // Get the metadata column type from the field name |
1240 | 1248 | let field_name = self.projected_schema.field(sidx).name(); |
1241 | | - let metadata_col = MetadataColumn::from_str(field_name).map_err(|e| { |
1242 | | - DataFusionError::Execution(format!("Invalid metadata column: {}", e)) |
| 1249 | + let metadata_col = self.metadata_map.get(field_name).ok_or_else(|| { |
| 1250 | + DataFusionError::Execution(format!( |
| 1251 | + "Invalid metadata column: {}", |
| 1252 | + field_name |
| 1253 | + )) |
1243 | 1254 | })?; |
1244 | 1255 |
|
1245 | 1256 | // Convert metadata to scalar value based on the column type |
@@ -2416,7 +2427,7 @@ mod tests { |
2416 | 2427 | fn test_projected_schema_with_metadata_col() { |
2417 | 2428 | let file_schema = aggr_test_schema(); |
2418 | 2429 | let metadata_cols = vec![ |
2419 | | - MetadataColumn::Location, |
| 2430 | + MetadataColumn::Location(None), |
2420 | 2431 | MetadataColumn::Size, |
2421 | 2432 | MetadataColumn::LastModified, |
2422 | 2433 | ]; |
@@ -2450,7 +2461,7 @@ mod tests { |
2450 | 2461 | #[test] |
2451 | 2462 | fn test_projected_schema_with_projection_and_metadata_cols() { |
2452 | 2463 | let file_schema = aggr_test_schema(); |
2453 | | - let metadata_cols = vec![MetadataColumn::Location, MetadataColumn::Size]; |
| 2464 | + let metadata_cols = vec![MetadataColumn::Location(None), MetadataColumn::Size]; |
2454 | 2465 |
|
2455 | 2466 | // Create projection that includes only the first two columns from file schema plus metadata |
2456 | 2467 | let file_schema_len = file_schema.fields().len(); |
@@ -2492,7 +2503,7 @@ mod tests { |
2492 | 2503 | wrap_partition_type_in_dict(DataType::Int32), |
2493 | 2504 | ), |
2494 | 2505 | ]); |
2495 | | - let metadata_cols = vec![MetadataColumn::Location, MetadataColumn::Size]; |
| 2506 | + let metadata_cols = vec![MetadataColumn::Location(None), MetadataColumn::Size]; |
2496 | 2507 |
|
2497 | 2508 | // Create config with partition and metadata columns |
2498 | 2509 | let conf = FileScanConfigBuilder::new( |
@@ -2528,7 +2539,7 @@ mod tests { |
2528 | 2539 |
|
2529 | 2540 | // Create metadata columns |
2530 | 2541 | let metadata_cols = vec![ |
2531 | | - MetadataColumn::Location, |
| 2542 | + MetadataColumn::Location(None), |
2532 | 2543 | MetadataColumn::Size, |
2533 | 2544 | MetadataColumn::LastModified, |
2534 | 2545 | ]; |
@@ -2619,7 +2630,7 @@ mod tests { |
2619 | 2630 | ]; |
2620 | 2631 |
|
2621 | 2632 | // Create metadata columns |
2622 | | - let metadata_cols = vec![MetadataColumn::Location, MetadataColumn::Size]; |
| 2633 | + let metadata_cols = vec![MetadataColumn::Location(None), MetadataColumn::Size]; |
2623 | 2634 |
|
2624 | 2635 | // Create test object metadata |
2625 | 2636 | let object_meta = create_test_object_meta("bucket/file.parquet", 1024); |
|
0 commit comments