@@ -397,12 +397,16 @@ where
397
397
}
398
398
}
399
399
400
- // This uses an adaptive system to extend the vector when it fills. We want to
401
- // avoid paying to allocate and zero a huge chunk of memory if the reader only
402
- // has 4 bytes while still making large reads if the reader does have a ton
403
- // of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
404
- // time is 4,500 times (!) slower than a default reservation size of 32 if the
405
- // reader has a very small amount of data to return.
400
+ // Here we must serve many masters with conflicting goals:
401
+ //
402
+ // - avoid allocating unless necessary
403
+ // - avoid overallocating if we know the exact size (#89165)
404
+ // - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820)
405
+ // - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads
406
+ // - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems
407
+ // at the same time, i.e. small reads suffer from syscall overhead, all reads incur initialization cost
408
+ // proportional to buffer size (#110650)
409
+ //
406
410
pub ( crate ) fn default_read_to_end < R : Read + ?Sized > (
407
411
r : & mut R ,
408
412
buf : & mut Vec < u8 > ,
@@ -412,20 +416,58 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
412
416
let start_cap = buf. capacity ( ) ;
413
417
// Optionally limit the maximum bytes read on each iteration.
414
418
// This adds an arbitrary fiddle factor to allow for more data than we expect.
415
- let max_read_size =
416
- size_hint. and_then ( |s| s. checked_add ( 1024 ) ?. checked_next_multiple_of ( DEFAULT_BUF_SIZE ) ) ;
419
+ let mut max_read_size = size_hint
420
+ . and_then ( |s| s. checked_add ( 1024 ) ?. checked_next_multiple_of ( DEFAULT_BUF_SIZE ) )
421
+ . unwrap_or ( DEFAULT_BUF_SIZE ) ;
417
422
418
423
let mut initialized = 0 ; // Extra initialized bytes from previous loop iteration
424
+
425
+ const PROBE_SIZE : usize = 32 ;
426
+
427
+ fn small_probe_read < R : Read + ?Sized > ( r : & mut R , buf : & mut Vec < u8 > ) -> Result < usize > {
428
+ let mut probe = [ 0u8 ; PROBE_SIZE ] ;
429
+
430
+ loop {
431
+ match r. read ( & mut probe) {
432
+ Ok ( n) => {
433
+ buf. extend_from_slice ( & probe[ ..n] ) ;
434
+ return Ok ( n) ;
435
+ }
436
+ Err ( ref e) if e. is_interrupted ( ) => continue ,
437
+ Err ( e) => return Err ( e) ,
438
+ }
439
+ }
440
+ }
441
+
442
+ // avoid inflating empty/small vecs before we have determined that there's anything to read
443
+ if ( size_hint. is_none ( ) || size_hint == Some ( 0 ) ) && buf. capacity ( ) - buf. len ( ) < PROBE_SIZE {
444
+ let read = small_probe_read ( r, buf) ?;
445
+
446
+ if read == 0 {
447
+ return Ok ( 0 ) ;
448
+ }
449
+ }
450
+
419
451
loop {
452
+ if buf. len ( ) == buf. capacity ( ) && buf. capacity ( ) == start_cap {
453
+ // The buffer might be an exact fit. Let's read into a probe buffer
454
+ // and see if it returns `Ok(0)`. If so, we've avoided an
455
+ // unnecessary doubling of the capacity. But if not, append the
456
+ // probe buffer to the primary buffer and let its capacity grow.
457
+ let read = small_probe_read ( r, buf) ?;
458
+
459
+ if read == 0 {
460
+ return Ok ( buf. len ( ) - start_len) ;
461
+ }
462
+ }
463
+
420
464
if buf. len ( ) == buf. capacity ( ) {
421
- buf. reserve ( 32 ) ; // buf is full, need more space
465
+ buf. reserve ( PROBE_SIZE ) ; // buf is full, need more space
422
466
}
423
467
424
468
let mut spare = buf. spare_capacity_mut ( ) ;
425
- if let Some ( size) = max_read_size {
426
- let len = cmp:: min ( spare. len ( ) , size) ;
427
- spare = & mut spare[ ..len]
428
- }
469
+ let buf_len = cmp:: min ( spare. len ( ) , max_read_size) ;
470
+ spare = & mut spare[ ..buf_len] ;
429
471
let mut read_buf: BorrowedBuf < ' _ > = spare. into ( ) ;
430
472
431
473
// SAFETY: These bytes were initialized but not filled in the previous loop
@@ -434,42 +476,44 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
434
476
}
435
477
436
478
let mut cursor = read_buf. unfilled ( ) ;
437
- match r. read_buf ( cursor. reborrow ( ) ) {
438
- Ok ( ( ) ) => { }
439
- Err ( e) if e. is_interrupted ( ) => continue ,
440
- Err ( e) => return Err ( e) ,
479
+ loop {
480
+ match r. read_buf ( cursor. reborrow ( ) ) {
481
+ Ok ( ( ) ) => break ,
482
+ Err ( e) if e. is_interrupted ( ) => continue ,
483
+ Err ( e) => return Err ( e) ,
484
+ }
441
485
}
442
486
443
- if cursor. written ( ) == 0 {
487
+ let unfilled_but_initialized = cursor. init_ref ( ) . len ( ) ;
488
+ let bytes_read = cursor. written ( ) ;
489
+ let was_fully_initialized = read_buf. init_len ( ) == buf_len;
490
+
491
+ if bytes_read == 0 {
444
492
return Ok ( buf. len ( ) - start_len) ;
445
493
}
446
494
447
495
// store how much was initialized but not filled
448
- initialized = cursor . init_ref ( ) . len ( ) ;
496
+ initialized = unfilled_but_initialized ;
449
497
450
498
// SAFETY: BorrowedBuf's invariants mean this much memory is initialized.
451
499
unsafe {
452
- let new_len = read_buf . filled ( ) . len ( ) + buf. len ( ) ;
500
+ let new_len = bytes_read + buf. len ( ) ;
453
501
buf. set_len ( new_len) ;
454
502
}
455
503
456
- if buf. len ( ) == buf. capacity ( ) && buf. capacity ( ) == start_cap {
457
- // The buffer might be an exact fit. Let's read into a probe buffer
458
- // and see if it returns `Ok(0)`. If so, we've avoided an
459
- // unnecessary doubling of the capacity. But if not, append the
460
- // probe buffer to the primary buffer and let its capacity grow.
461
- let mut probe = [ 0u8 ; 32 ] ;
462
-
463
- loop {
464
- match r. read ( & mut probe) {
465
- Ok ( 0 ) => return Ok ( buf. len ( ) - start_len) ,
466
- Ok ( n) => {
467
- buf. extend_from_slice ( & probe[ ..n] ) ;
468
- break ;
469
- }
470
- Err ( ref e) if e. is_interrupted ( ) => continue ,
471
- Err ( e) => return Err ( e) ,
472
- }
504
+ // Use heuristics to determine the max read size if no initial size hint was provided
505
+ if size_hint. is_none ( ) {
506
+ // The reader is returning short reads but it doesn't call ensure_init().
507
+ // In that case we no longer need to restrict read sizes to avoid
508
+ // initialization costs.
509
+ if !was_fully_initialized {
510
+ max_read_size = usize:: MAX ;
511
+ }
512
+
513
+ // we have passed a larger buffer than previously and the
514
+ // reader still hasn't returned a short read
515
+ if buf_len >= max_read_size && bytes_read == buf_len {
516
+ max_read_size = max_read_size. saturating_mul ( 2 ) ;
473
517
}
474
518
}
475
519
}
0 commit comments