Skip to content

Commit f451bd4

Browse files
yuankunzhangnickorlow
authored andcommitted
split: fix a racing condition that causes issue uutils#7869
1 parent c8a240a commit f451bd4

File tree

2 files changed

+59
-18
lines changed

2 files changed

+59
-18
lines changed

src/uu/split/src/split.rs

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -834,13 +834,7 @@ struct LineChunkWriter<'a> {
834834
impl<'a> LineChunkWriter<'a> {
835835
fn new(chunk_size: u64, settings: &'a Settings) -> UResult<Self> {
836836
let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?;
837-
let filename = filename_iterator
838-
.next()
839-
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
840-
if settings.verbose {
841-
println!("creating file {}", filename.quote());
842-
}
843-
let inner = settings.instantiate_current_writer(&filename, true)?;
837+
let inner = Self::start_new_chunk(settings, &mut filename_iterator)?;
844838
Ok(LineChunkWriter {
845839
settings,
846840
chunk_size,
@@ -850,6 +844,19 @@ impl<'a> LineChunkWriter<'a> {
850844
filename_iterator,
851845
})
852846
}
847+
848+
fn start_new_chunk(
849+
settings: &Settings,
850+
filename_iterator: &mut FilenameIterator,
851+
) -> io::Result<BufWriter<Box<dyn Write>>> {
852+
let filename = filename_iterator
853+
.next()
854+
.ok_or_else(|| io::Error::other("output file suffixes exhausted"))?;
855+
if settings.verbose {
856+
println!("creating file {}", filename.quote());
857+
}
858+
settings.instantiate_current_writer(&filename, true)
859+
}
853860
}
854861

855862
impl Write for LineChunkWriter<'_> {
@@ -869,14 +876,7 @@ impl Write for LineChunkWriter<'_> {
869876
// corresponding writer.
870877
if self.num_lines_remaining_in_current_chunk == 0 {
871878
self.num_chunks_written += 1;
872-
let filename = self
873-
.filename_iterator
874-
.next()
875-
.ok_or_else(|| io::Error::other("output file suffixes exhausted"))?;
876-
if self.settings.verbose {
877-
println!("creating file {}", filename.quote());
878-
}
879-
self.inner = self.settings.instantiate_current_writer(&filename, true)?;
879+
self.inner = Self::start_new_chunk(self.settings, &mut self.filename_iterator)?;
880880
self.num_lines_remaining_in_current_chunk = self.chunk_size;
881881
}
882882

@@ -889,9 +889,19 @@ impl Write for LineChunkWriter<'_> {
889889
self.num_lines_remaining_in_current_chunk -= 1;
890890
}
891891

892-
let num_bytes_written =
893-
custom_write(&buf[prev..buf.len()], &mut self.inner, self.settings)?;
894-
total_bytes_written += num_bytes_written;
892+
// There might be bytes remaining in the buffer, and we write
893+
// them to the current chunk. But first, we may need to rotate
894+
// the current chunk in case it has already reached its line
895+
// limit.
896+
if prev < buf.len() {
897+
if self.num_lines_remaining_in_current_chunk == 0 {
898+
self.inner = Self::start_new_chunk(self.settings, &mut self.filename_iterator)?;
899+
self.num_lines_remaining_in_current_chunk = self.chunk_size;
900+
}
901+
let num_bytes_written =
902+
custom_write(&buf[prev..buf.len()], &mut self.inner, self.settings)?;
903+
total_bytes_written += num_bytes_written;
904+
}
895905
Ok(total_bytes_written)
896906
}
897907

tests/by-util/test_split.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,15 @@ impl RandomFile {
120120
n -= 1;
121121
}
122122
}
123+
124+
/// Add n lines each of the given size.
125+
fn add_lines_with_line_size(&mut self, lines: usize, line_size: usize) {
126+
let mut n = lines;
127+
while n > 0 {
128+
writeln!(self.inner, "{}", random_chars(line_size)).unwrap();
129+
n -= 1;
130+
}
131+
}
123132
}
124133

125134
#[test]
@@ -430,6 +439,28 @@ fn test_split_lines_number() {
430439
.stderr_only("split: invalid number of lines: 'file'\n");
431440
}
432441

442+
/// Test interference between split line size and IO buffer capacity.
443+
/// See issue #7869.
444+
#[test]
445+
fn test_split_lines_interfere_with_io_buf_capacity() {
446+
let buf_capacity = BufWriter::new(Vec::new()).capacity();
447+
// We intentionally set the line size to be less than the IO write buffer
448+
// capacity. This is to trigger the condition where after the first split
449+
// file is written, there are still bytes left in the buffer. We then
450+
// test that those bytes are written to the next split file.
451+
let line_size = buf_capacity - 2;
452+
453+
let (at, mut ucmd) = at_and_ucmd!();
454+
let name = "split_lines_interfere_with_io_buf_capacity";
455+
RandomFile::new(&at, name).add_lines_with_line_size(2, line_size);
456+
ucmd.args(&["-l", "1", name]).succeeds();
457+
458+
// Note that `lines_size` doesn't take the trailing newline into account,
459+
// we add 1 for adjustment.
460+
assert_eq!(at.read("xaa").len(), line_size + 1);
461+
assert_eq!(at.read("xab").len(), line_size + 1);
462+
}
463+
433464
/// Test short lines option with value concatenated
434465
#[test]
435466
fn test_split_lines_short_concatenated_with_value() {

0 commit comments

Comments
 (0)