File tree Expand file tree Collapse file tree 4 files changed +28
-4
lines changed Expand file tree Collapse file tree 4 files changed +28
-4
lines changed Original file line number Diff line number Diff line change @@ -135,6 +135,10 @@ impl FileFormat for AvroFormat {
135135        Ok ( Arc :: new ( merged_schema) ) 
136136    } 
137137
138+     async  fn  transform_schema ( & self ,  schema :  SchemaRef )  -> Result < SchemaRef >  { 
139+         Ok ( schema) 
140+     } 
141+ 
138142    async  fn  infer_stats ( 
139143        & self , 
140144        _state :  & dyn  Session , 
Original file line number Diff line number Diff line change @@ -393,6 +393,10 @@ impl FileFormat for CsvFormat {
393393        Ok ( Arc :: new ( merged_schema) ) 
394394    } 
395395
396+     async  fn  transform_schema ( & self ,  schema :  SchemaRef )  -> Result < SchemaRef >  { 
397+         Ok ( schema) 
398+     } 
399+ 
396400    async  fn  infer_stats ( 
397401        & self , 
398402        _state :  & dyn  Session , 
Original file line number Diff line number Diff line change @@ -233,6 +233,10 @@ impl FileFormat for JsonFormat {
233233        Ok ( Arc :: new ( schema) ) 
234234    } 
235235
236+     async  fn  transform_schema ( & self ,  schema :  SchemaRef )  -> Result < SchemaRef >  { 
237+         Ok ( schema) 
238+     } 
239+ 
236240    async  fn  infer_stats ( 
237241        & self , 
238242        _state :  & dyn  Session , 
Original file line number Diff line number Diff line change @@ -359,19 +359,29 @@ impl FileFormat for ParquetFormat {
359359            Schema :: try_merge ( schemas) 
360360        } ?; 
361361
362+         self . transform_schema ( Arc :: new ( schema) ) . await 
363+     } 
364+ 
365+     /// transform_schema for parquet format now contains two steps: 
366+ /// 
367+ /// 1. Transform a schema so that any binary types are strings 
368+ /// see [transform_binary_to_string] 
369+ /// 
370+ /// 2. Transform a schema to use view types for Utf8 and Binary 
371+ /// See [transform_schema_to_view] for details 
372+ async  fn  transform_schema ( & self ,  schema :  SchemaRef )  -> Result < SchemaRef >  { 
362373        let  schema = if  self . binary_as_string ( )  { 
363-             transform_binary_to_string ( & schema) 
374+             Arc :: new ( transform_binary_to_string ( schema. as_ref ( ) ) ) 
364375        }  else  { 
365376            schema
366377        } ; 
367378
368379        let  schema = if  self . force_view_types ( )  { 
369-             transform_schema_to_view ( & schema) 
380+             Arc :: new ( transform_schema_to_view ( schema. as_ref ( ) ) ) 
370381        }  else  { 
371382            schema
372383        } ; 
373- 
374-         Ok ( Arc :: new ( schema) ) 
384+         Ok ( schema) 
375385    } 
376386
377387    async  fn  infer_stats ( 
@@ -598,6 +608,8 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
598608} 
599609
600610/// Transform a schema so that any binary types are strings 
611+ /// 
612+ /// See [ParquetFormat::binary_as_string] for details 
601613pub  fn  transform_binary_to_string ( schema :  & Schema )  -> Schema  { 
602614    let  transformed_fields:  Vec < Arc < Field > >  = schema
603615        . fields 
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments