@@ -82,7 +82,6 @@ use tokio::io::AsyncWriteExt;
8282/// ```
8383#[ derive( Debug , Clone , Default ) ]
8484pub struct CsvSource {
85- batch_size : Option < usize > ,
8685 file_schema : Option < SchemaRef > ,
8786 file_projection : Option < Vec < usize > > ,
8887 pub ( crate ) has_header : bool ,
@@ -159,8 +158,8 @@ impl CsvSource {
159158}
160159
161160impl CsvSource {
162- fn open < R : Read > ( & self , reader : R ) -> Result < csv:: Reader < R > > {
163- Ok ( self . builder ( ) . build ( reader) ?)
161+ fn open < R : Read > ( & self , reader : R , batch_size : usize ) -> Result < csv:: Reader < R > > {
162+ Ok ( self . builder ( ) . with_batch_size ( batch_size ) . build ( reader) ?)
164163 }
165164
166165 fn builder ( & self ) -> csv:: ReaderBuilder {
@@ -170,10 +169,6 @@ impl CsvSource {
170169 . expect ( "Schema must be set before initializing builder" ) ,
171170 ) )
172171 . with_delimiter ( self . delimiter )
173- . with_batch_size (
174- self . batch_size
175- . expect ( "Batch size must be set before initializing builder" ) ,
176- )
177172 . with_header ( self . has_header )
178173 . with_quote ( self . quote ) ;
179174 if let Some ( terminator) = self . terminator {
@@ -198,6 +193,7 @@ pub struct CsvOpener {
198193 config : Arc < CsvSource > ,
199194 file_compression_type : FileCompressionType ,
200195 object_store : Arc < dyn ObjectStore > ,
196+ batch_size : usize ,
201197}
202198
203199impl CsvOpener {
@@ -206,11 +202,13 @@ impl CsvOpener {
206202 config : Arc < CsvSource > ,
207203 file_compression_type : FileCompressionType ,
208204 object_store : Arc < dyn ObjectStore > ,
205+ batch_size : usize ,
209206 ) -> Self {
210207 Self {
211208 config,
212209 file_compression_type,
213210 object_store,
211+ batch_size,
214212 }
215213 }
216214}
@@ -227,24 +225,20 @@ impl FileSource for CsvSource {
227225 object_store : Arc < dyn ObjectStore > ,
228226 base_config : & FileScanConfig ,
229227 _partition : usize ,
228+ batch_size : usize ,
230229 ) -> Arc < dyn FileOpener > {
231230 Arc :: new ( CsvOpener {
232231 config : Arc :: new ( self . clone ( ) ) ,
233232 file_compression_type : base_config. file_compression_type ,
234233 object_store,
234+ batch_size,
235235 } )
236236 }
237237
238238 fn as_any ( & self ) -> & dyn Any {
239239 self
240240 }
241241
242- fn with_batch_size ( & self , batch_size : usize ) -> Arc < dyn FileSource > {
243- let mut conf = self . clone ( ) ;
244- conf. batch_size = Some ( batch_size) ;
245- Arc :: new ( conf)
246- }
247-
248242 fn with_schema ( & self , schema : SchemaRef ) -> Arc < dyn FileSource > {
249243 let mut conf = self . clone ( ) ;
250244 conf. file_schema = Some ( schema) ;
@@ -354,6 +348,7 @@ impl FileOpener for CsvOpener {
354348
355349 let store = Arc :: clone ( & self . object_store ) ;
356350 let terminator = self . config . terminator ;
351+ let batch_size = self . batch_size ;
357352
358353 Ok ( Box :: pin ( async move {
359354 // Current partition contains bytes [start_byte, end_byte) (might contain incomplete lines at boundaries)
@@ -392,7 +387,7 @@ impl FileOpener for CsvOpener {
392387 ) ?
393388 } ;
394389
395- Ok ( futures:: stream:: iter ( config. open ( decoder) ?) . boxed ( ) )
390+ Ok ( futures:: stream:: iter ( config. open ( decoder, batch_size ) ?) . boxed ( ) )
396391 }
397392 GetResultPayload :: Stream ( s) => {
398393 let decoder = config. builder ( ) . build_decoder ( ) ;
0 commit comments