1919
2020use std:: sync:: Arc ;
2121
22+ use arrow:: array:: Array ;
2223use arrow:: datatypes:: { DataType as ArrowType , Schema , SchemaRef } ;
2324use arrow:: error:: Result as ArrowResult ;
2425use arrow:: record_batch:: { RecordBatch , RecordBatchReader } ;
2526use arrow:: { array:: StructArray , error:: ArrowError } ;
2627
27- use crate :: arrow:: array_reader:: { build_array_reader, ArrayReader , StructArrayReader } ;
28+ use crate :: arrow:: array_reader:: { build_array_reader, ArrayReader } ;
2829use crate :: arrow:: schema:: parquet_to_arrow_schema;
2930use crate :: arrow:: schema:: {
3031 parquet_to_arrow_schema_by_columns, parquet_to_arrow_schema_by_root_columns,
3132} ;
32- use crate :: errors:: { ParquetError , Result } ;
33+ use crate :: errors:: Result ;
3334use crate :: file:: metadata:: { KeyValue , ParquetMetaData } ;
3435use crate :: file:: reader:: FileReader ;
3536
@@ -234,20 +235,10 @@ impl Iterator for ParquetRecordBatchReader {
234235 "Struct array reader should return struct array" . to_string ( ) ,
235236 )
236237 } ) ;
238+
237239 match struct_array {
238240 Err ( err) => Some ( Err ( err) ) ,
239- Ok ( e) => {
240- match RecordBatch :: try_new ( self . schema . clone ( ) , e. columns_ref ( ) ) {
241- Err ( err) => Some ( Err ( err) ) ,
242- Ok ( record_batch) => {
243- if record_batch. num_rows ( ) > 0 {
244- Some ( Ok ( record_batch) )
245- } else {
246- None
247- }
248- }
249- }
250- }
241+ Ok ( e) => ( e. len ( ) > 0 ) . then ( || Ok ( RecordBatch :: from ( e) ) ) ,
251242 }
252243 }
253244 }
@@ -265,12 +256,6 @@ impl ParquetRecordBatchReader {
265256 batch_size : usize ,
266257 array_reader : Box < dyn ArrayReader > ,
267258 ) -> Result < Self > {
268- // Check that array reader is struct array reader
269- array_reader
270- . as_any ( )
271- . downcast_ref :: < StructArrayReader > ( )
272- . ok_or_else ( || general_err ! ( "The input must be struct array reader!" ) ) ?;
273-
274259 let schema = match array_reader. get_data_type ( ) {
275260 ArrowType :: Struct ( ref fields) => Schema :: new ( fields. clone ( ) ) ,
276261 _ => unreachable ! ( "Struct array reader's data type is not struct!" ) ,
@@ -1386,4 +1371,26 @@ mod tests {
13861371 schema_without_metadata. as_ref( )
13871372 ) ;
13881373 }
1374+
1375+ #[ test]
1376+ fn test_empty_projection ( ) {
1377+ let testdata = arrow:: util:: test_util:: parquet_test_data ( ) ;
1378+ let path = format ! ( "{}/alltypes_plain.parquet" , testdata) ;
1379+ let file = File :: open ( & path) . unwrap ( ) ;
1380+ let reader = SerializedFileReader :: try_from ( file) . unwrap ( ) ;
1381+ let expected_rows = reader. metadata ( ) . file_metadata ( ) . num_rows ( ) as usize ;
1382+
1383+ let mut arrow_reader = ParquetFileArrowReader :: new ( Arc :: new ( reader) ) ;
1384+ let batch_reader = arrow_reader. get_record_reader_by_columns ( [ ] , 2 ) . unwrap ( ) ;
1385+
1386+ let mut total_rows = 0 ;
1387+ for maybe_batch in batch_reader {
1388+ let batch = maybe_batch. unwrap ( ) ;
1389+ total_rows += batch. num_rows ( ) ;
1390+ assert_eq ! ( batch. num_columns( ) , 0 ) ;
1391+ assert ! ( batch. num_rows( ) <= 2 ) ;
1392+ }
1393+
1394+ assert_eq ! ( total_rows, expected_rows) ;
1395+ }
13891396}
0 commit comments