Skip to content

Commit 15bb29c

Browse files
authored
Merge pull request #7643 from karlmcdowall/tail_forwards_through_file
tail: Performance improvements
2 parents 3971bb3 + b264457 commit 15bb29c

File tree

1 file changed

+48
-37
lines changed

1 file changed

+48
-37
lines changed

src/uu/tail/src/tail.rs

Lines changed: 48 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6-
// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle Signum
6+
// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch
7+
// spell-checker:ignore (ToDO) Uncategorized filehandle Signum memrchr
78
// spell-checker:ignore (libs) kqueue
89
// spell-checker:ignore (acronyms)
910
// spell-checker:ignore (env/flags)
@@ -24,11 +25,12 @@ pub use args::uu_app;
2425
use args::{FilterMode, Settings, Signum, parse_args};
2526
use chunks::ReverseChunks;
2627
use follow::Observer;
28+
use memchr::{memchr_iter, memrchr_iter};
2729
use paths::{FileExtTail, HeaderPrinter, Input, InputKind, MetadataExtTail};
2830
use same_file::Handle;
2931
use std::cmp::Ordering;
3032
use std::fs::File;
31-
use std::io::{self, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write, stdin, stdout};
33+
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write, stdin, stdout};
3234
use std::path::{Path, PathBuf};
3335
use uucore::display::Quotable;
3436
use uucore::error::{FromIo, UResult, USimpleError, get_exit_code, set_exit_code};
@@ -285,34 +287,42 @@ fn tail_stdin(
285287
/// let i = forwards_thru_file(&mut reader, 2, b'\n').unwrap();
286288
/// assert_eq!(i, 2);
287289
/// ```
288-
fn forwards_thru_file<R>(
289-
reader: &mut R,
290+
fn forwards_thru_file(
291+
reader: &mut impl Read,
290292
num_delimiters: u64,
291293
delimiter: u8,
292-
) -> std::io::Result<usize>
293-
where
294-
R: Read,
295-
{
296-
let mut reader = BufReader::new(reader);
297-
298-
let mut buf = vec![];
294+
) -> std::io::Result<usize> {
295+
// If num_delimiters == 0, always return 0.
296+
if num_delimiters == 0 {
297+
return Ok(0);
298+
}
299+
// Use a 32K buffer.
300+
let mut buf = [0; 32 * 1024];
299301
let mut total = 0;
300-
for _ in 0..num_delimiters {
301-
match reader.read_until(delimiter, &mut buf) {
302-
Ok(0) => {
303-
return Ok(total);
304-
}
302+
let mut count = 0;
303+
// Iterate through the input, using `count` to record the number of times `delimiter`
304+
// is seen. Once we find `num_delimiters` instances, return the offset of the byte
305+
// immediately following that delimiter.
306+
loop {
307+
match reader.read(&mut buf) {
308+
// Ok(0) => EoF before we found `num_delimiters` instance of `delimiter`.
309+
// Return the total number of bytes read in that case.
310+
Ok(0) => return Ok(total),
305311
Ok(n) => {
312+
// Use memchr_iter since it greatly improves search performance.
313+
for offset in memchr_iter(delimiter, &buf[..n]) {
314+
count += 1;
315+
if count == num_delimiters {
316+
// Return offset of the byte after the `delimiter` instance.
317+
return Ok(total + offset + 1);
318+
}
319+
}
306320
total += n;
307-
buf.clear();
308-
continue;
309-
}
310-
Err(e) => {
311-
return Err(e);
312321
}
322+
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
323+
Err(e) => return Err(e),
313324
}
314325
}
315-
Ok(total)
316326
}
317327

318328
/// Iterate over bytes in the file, in reverse, until we find the
@@ -322,35 +332,36 @@ fn backwards_thru_file(file: &mut File, num_delimiters: u64, delimiter: u8) {
322332
// This variable counts the number of delimiters found in the file
323333
// so far (reading from the end of the file toward the beginning).
324334
let mut counter = 0;
325-
326-
for (block_idx, slice) in ReverseChunks::new(file).enumerate() {
335+
let mut first_slice = true;
336+
for slice in ReverseChunks::new(file) {
327337
// Iterate over each byte in the slice in reverse order.
328-
let mut iter = slice.iter().enumerate().rev();
338+
let mut iter = memrchr_iter(delimiter, &slice);
329339

330340
// Ignore a trailing newline in the last block, if there is one.
331-
if block_idx == 0 {
341+
if first_slice {
332342
if let Some(c) = slice.last() {
333343
if *c == delimiter {
334344
iter.next();
335345
}
336346
}
347+
first_slice = false;
337348
}
338349

339350
// For each byte, increment the count of the number of
340351
// delimiters found. If we have found more than the specified
341352
// number of delimiters, terminate the search and seek to the
342353
// appropriate location in the file.
343-
for (i, ch) in iter {
344-
if *ch == delimiter {
345-
counter += 1;
346-
if counter >= num_delimiters {
347-
// After each iteration of the outer loop, the
348-
// cursor in the file is at the *beginning* of the
349-
// block, so seeking forward by `i + 1` bytes puts
350-
// us right after the found delimiter.
351-
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
352-
return;
353-
}
354+
for i in iter {
355+
counter += 1;
356+
if counter >= num_delimiters {
357+
// We should never over-count - assert that.
358+
assert_eq!(counter, num_delimiters);
359+
// After each iteration of the outer loop, the
360+
// cursor in the file is at the *beginning* of the
361+
// block, so seeking forward by `i + 1` bytes puts
362+
// us right after the found delimiter.
363+
file.seek(SeekFrom::Current((i + 1) as i64)).unwrap();
364+
return;
354365
}
355366
}
356367
}

0 commit comments

Comments
 (0)