33// For the full copyright and license information, please view the LICENSE
44// file that was distributed with this source code.
55
6- // spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle Signum
6+ // spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch
7+ // spell-checker:ignore (ToDO) Uncategorized filehandle Signum memrchr
78// spell-checker:ignore (libs) kqueue
89// spell-checker:ignore (acronyms)
910// spell-checker:ignore (env/flags)
@@ -24,11 +25,12 @@ pub use args::uu_app;
2425use args:: { FilterMode , Settings , Signum , parse_args} ;
2526use chunks:: ReverseChunks ;
2627use follow:: Observer ;
28+ use memchr:: { memchr_iter, memrchr_iter} ;
2729use paths:: { FileExtTail , HeaderPrinter , Input , InputKind , MetadataExtTail } ;
2830use same_file:: Handle ;
2931use std:: cmp:: Ordering ;
3032use std:: fs:: File ;
31- use std:: io:: { self , BufRead , BufReader , BufWriter , Read , Seek , SeekFrom , Write , stdin, stdout} ;
33+ use std:: io:: { self , BufReader , BufWriter , ErrorKind , Read , Seek , SeekFrom , Write , stdin, stdout} ;
3234use std:: path:: { Path , PathBuf } ;
3335use uucore:: display:: Quotable ;
3436use uucore:: error:: { FromIo , UResult , USimpleError , get_exit_code, set_exit_code} ;
@@ -285,34 +287,42 @@ fn tail_stdin(
285287/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
286288/// assert_eq!(i, 2);
287289/// ```
288- fn forwards_thru_file < R > (
289- reader : & mut R ,
290+ fn forwards_thru_file (
291+ reader : & mut impl Read ,
290292 num_delimiters : u64 ,
291293 delimiter : u8 ,
292- ) -> std:: io:: Result < usize >
293- where
294- R : Read ,
295- {
296- let mut reader = BufReader :: new ( reader ) ;
297-
298- let mut buf = vec ! [ ] ;
294+ ) -> std:: io:: Result < usize > {
295+ // If num_delimiters == 0, always return 0.
296+ if num_delimiters == 0 {
297+ return Ok ( 0 ) ;
298+ }
299+ // Use a 32K buffer.
300+ let mut buf = [ 0 ; 32 * 1024 ] ;
299301 let mut total = 0 ;
300- for _ in 0 ..num_delimiters {
301- match reader. read_until ( delimiter, & mut buf) {
302- Ok ( 0 ) => {
303- return Ok ( total) ;
304- }
302+ let mut count = 0 ;
303+ // Iterate through the input, using `count` to record the number of times `delimiter`
304+ // is seen. Once we find `num_delimiters` instances, return the offset of the byte
305+ // immediately following that delimiter.
306+ loop {
307+ match reader. read ( & mut buf) {
308+ // Ok(0) => EoF before we found `num_delimiters` instance of `delimiter`.
309+ // Return the total number of bytes read in that case.
310+ Ok ( 0 ) => return Ok ( total) ,
305311 Ok ( n) => {
312+ // Use memchr_iter since it greatly improves search performance.
313+ for offset in memchr_iter ( delimiter, & buf[ ..n] ) {
314+ count += 1 ;
315+ if count == num_delimiters {
316+ // Return offset of the byte after the `delimiter` instance.
317+ return Ok ( total + offset + 1 ) ;
318+ }
319+ }
306320 total += n;
307- buf. clear ( ) ;
308- continue ;
309- }
310- Err ( e) => {
311- return Err ( e) ;
312321 }
322+ Err ( e) if e. kind ( ) == ErrorKind :: Interrupted => continue ,
323+ Err ( e) => return Err ( e) ,
313324 }
314325 }
315- Ok ( total)
316326}
317327
318328/// Iterate over bytes in the file, in reverse, until we find the
@@ -322,35 +332,36 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) {
322332 // This variable counts the number of delimiters found in the file
323333 // so far (reading from the end of the file toward the beginning).
324334 let mut counter = 0 ;
325-
326- for ( block_idx , slice) in ReverseChunks :: new ( file) . enumerate ( ) {
335+ let mut first_slice = true ;
336+ for slice in ReverseChunks :: new ( file) {
327337 // Iterate over each byte in the slice in reverse order.
328- let mut iter = slice . iter ( ) . enumerate ( ) . rev ( ) ;
338+ let mut iter = memrchr_iter ( delimiter , & slice ) ;
329339
330340 // Ignore a trailing newline in the last block, if there is one.
331- if block_idx == 0 {
341+ if first_slice {
332342 if let Some ( c) = slice. last ( ) {
333343 if * c == delimiter {
334344 iter. next ( ) ;
335345 }
336346 }
347+ first_slice = false ;
337348 }
338349
339350 // For each byte, increment the count of the number of
340351 // delimiters found. If we have found more than the specified
341352 // number of delimiters, terminate the search and seek to the
342353 // appropriate location in the file.
343- for ( i , ch ) in iter {
344- if * ch == delimiter {
345- counter += 1 ;
346- if counter >= num_delimiters {
347- // After each iteration of the outer loop, the
348- // cursor in the file is at the *beginning* of the
349- // block, so seeking forward by `i + 1` bytes puts
350- // us right after the found delimiter.
351- file . seek ( SeekFrom :: Current ( ( i + 1 ) as i64 ) ) . unwrap ( ) ;
352- return ;
353- }
354+ for i in iter {
355+ counter += 1 ;
356+ if counter >= num_delimiters {
357+ // We should never over-count - assert that.
358+ assert_eq ! ( counter , num_delimiters ) ;
359+ // After each iteration of the outer loop, the
360+ // cursor in the file is at the *beginning* of the
361+ // block, so seeking forward by `i + 1` bytes puts
362+ // us right after the found delimiter.
363+ file . seek ( SeekFrom :: Current ( ( i + 1 ) as i64 ) ) . unwrap ( ) ;
364+ return ;
354365 }
355366 }
356367 }
0 commit comments