Skip to content

Commit 7a33b08

Browse files
QelxirosRenjiSann
authored andcommitted
csplit: don't add a newline if the file doesn't end with one (uutils#7901)
* csplit: don't add a newline if the file doesn't end with one * refactor test * refactor
1 parent 9384182 commit 7a33b08

File tree

2 files changed

+63
-21
lines changed

2 files changed

+63
-21
lines changed

src/uu/csplit/src/csplit.rs

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#![allow(rustdoc::private_intra_doc_links)]
77

88
use std::cmp::Ordering;
9-
use std::io::{self, BufReader};
9+
use std::io::{self, BufReader, ErrorKind};
1010
use std::{
1111
fs::{File, remove_file},
1212
io::{BufRead, BufWriter, Write},
@@ -71,6 +71,35 @@ impl CsplitOptions {
7171
}
7272
}
7373

74+
pub struct LinesWithNewlines<T: BufRead> {
75+
inner: T,
76+
}
77+
78+
impl<T: BufRead> LinesWithNewlines<T> {
79+
fn new(s: T) -> Self {
80+
Self { inner: s }
81+
}
82+
}
83+
84+
impl<T: BufRead> Iterator for LinesWithNewlines<T> {
85+
type Item = io::Result<String>;
86+
87+
fn next(&mut self) -> Option<Self::Item> {
88+
fn ret(v: Vec<u8>) -> io::Result<String> {
89+
String::from_utf8(v).map_err(|_| {
90+
io::Error::new(ErrorKind::InvalidData, "stream did not contain valid UTF-8")
91+
})
92+
}
93+
94+
let mut v = Vec::new();
95+
match self.inner.read_until(b'\n', &mut v) {
96+
Ok(0) => None,
97+
Ok(_) => Some(ret(v)),
98+
Err(e) => Some(Err(e)),
99+
}
100+
}
101+
}
102+
74103
/// Splits a file into severals according to the command line patterns.
75104
///
76105
/// # Errors
@@ -87,8 +116,7 @@ pub fn csplit<T>(options: &CsplitOptions, patterns: &[String], input: T) -> Resu
87116
where
88117
T: BufRead,
89118
{
90-
let enumerated_input_lines = input
91-
.lines()
119+
let enumerated_input_lines = LinesWithNewlines::new(input)
92120
.map(|line| line.map_err_context(|| "read error".to_string()))
93121
.enumerate();
94122
let mut input_iter = InputSplitter::new(enumerated_input_lines);
@@ -243,7 +271,7 @@ impl SplitWriter<'_> {
243271
self.dev_null = true;
244272
}
245273

246-
/// Writes the line to the current split, appending a newline character.
274+
/// Writes the line to the current split.
247275
/// If [`self.dev_null`] is true, then the line is discarded.
248276
///
249277
/// # Errors
@@ -255,8 +283,7 @@ impl SplitWriter<'_> {
255283
Some(ref mut current_writer) => {
256284
let bytes = line.as_bytes();
257285
current_writer.write_all(bytes)?;
258-
current_writer.write_all(b"\n")?;
259-
self.size += bytes.len() + 1;
286+
self.size += bytes.len();
260287
}
261288
None => panic!("trying to write to a split that was not created"),
262289
}
@@ -321,11 +348,11 @@ impl SplitWriter<'_> {
321348

322349
let mut ret = Err(CsplitError::LineOutOfRange(pattern_as_str.to_string()));
323350
while let Some((ln, line)) = input_iter.next() {
324-
let l = line?;
351+
let line = line?;
325352
match n.cmp(&(&ln + 1)) {
326353
Ordering::Less => {
327354
assert!(
328-
input_iter.add_line_to_buffer(ln, l).is_none(),
355+
input_iter.add_line_to_buffer(ln, line).is_none(),
329356
"the buffer is big enough to contain 1 line"
330357
);
331358
ret = Ok(());
@@ -334,15 +361,15 @@ impl SplitWriter<'_> {
334361
Ordering::Equal => {
335362
assert!(
336363
self.options.suppress_matched
337-
|| input_iter.add_line_to_buffer(ln, l).is_none(),
364+
|| input_iter.add_line_to_buffer(ln, line).is_none(),
338365
"the buffer is big enough to contain 1 line"
339366
);
340367
ret = Ok(());
341368
break;
342369
}
343370
Ordering::Greater => (),
344371
}
345-
self.writeln(&l)?;
372+
self.writeln(&line)?;
346373
}
347374
self.finish_split();
348375
ret
@@ -379,23 +406,26 @@ impl SplitWriter<'_> {
379406
input_iter.set_size_of_buffer(1);
380407

381408
while let Some((ln, line)) = input_iter.next() {
382-
let l = line?;
383-
if regex.is_match(&l) {
409+
let line = line?;
410+
let l = line
411+
.strip_suffix("\r\n")
412+
.unwrap_or_else(|| line.strip_suffix('\n').unwrap_or(&line));
413+
if regex.is_match(l) {
384414
let mut next_line_suppress_matched = false;
385415
match (self.options.suppress_matched, offset) {
386416
// no offset, add the line to the next split
387417
(false, 0) => {
388418
assert!(
389-
input_iter.add_line_to_buffer(ln, l).is_none(),
419+
input_iter.add_line_to_buffer(ln, line).is_none(),
390420
"the buffer is big enough to contain 1 line"
391421
);
392422
}
393423
// a positive offset, some more lines need to be added to the current split
394-
(false, _) => self.writeln(&l)?,
424+
(false, _) => self.writeln(&line)?,
395425
// suppress matched option true, but there is a positive offset, so the line is printed
396426
(true, 1..) => {
397427
next_line_suppress_matched = true;
398-
self.writeln(&l)?;
428+
self.writeln(&line)?;
399429
}
400430
_ => (),
401431
};
@@ -424,7 +454,7 @@ impl SplitWriter<'_> {
424454
}
425455
return Ok(());
426456
}
427-
self.writeln(&l)?;
457+
self.writeln(&line)?;
428458
}
429459
} else {
430460
// With a negative offset we use a buffer to keep the lines within the offset.
@@ -435,21 +465,24 @@ impl SplitWriter<'_> {
435465
let offset_usize = -offset as usize;
436466
input_iter.set_size_of_buffer(offset_usize);
437467
while let Some((ln, line)) = input_iter.next() {
438-
let l = line?;
439-
if regex.is_match(&l) {
468+
let line = line?;
469+
let l = line
470+
.strip_suffix("\r\n")
471+
.unwrap_or_else(|| line.strip_suffix('\n').unwrap_or(&line));
472+
if regex.is_match(l) {
440473
for line in input_iter.shrink_buffer_to_size() {
441474
self.writeln(&line)?;
442475
}
443476
if self.options.suppress_matched {
444477
// since offset_usize is for sure greater than 0
445478
// the first element of the buffer should be removed and this
446479
// line inserted to be coherent with GNU implementation
447-
input_iter.add_line_to_buffer(ln, l);
480+
input_iter.add_line_to_buffer(ln, line);
448481
} else {
449482
// add 1 to the buffer size to make place for the matched line
450483
input_iter.set_size_of_buffer(offset_usize + 1);
451484
assert!(
452-
input_iter.add_line_to_buffer(ln, l).is_none(),
485+
input_iter.add_line_to_buffer(ln, line).is_none(),
453486
"should be big enough to hold every lines"
454487
);
455488
}
@@ -460,7 +493,7 @@ impl SplitWriter<'_> {
460493
}
461494
return Ok(());
462495
}
463-
if let Some(line) = input_iter.add_line_to_buffer(ln, l) {
496+
if let Some(line) = input_iter.add_line_to_buffer(ln, line) {
464497
self.writeln(&line)?;
465498
}
466499
}

tests/by-util/test_csplit.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,3 +1476,12 @@ fn test_directory_input_file() {
14761476
.fails_with_code(1)
14771477
.stderr_only("csplit: cannot open 'test_directory' for reading: Permission denied\n");
14781478
}
1479+
1480+
#[test]
1481+
fn test_stdin_no_trailing_newline() {
1482+
new_ucmd!()
1483+
.args(&["-", "2"])
1484+
.pipe_in("a\nb\nc\nd")
1485+
.succeeds()
1486+
.stdout_only("2\n5\n");
1487+
}

0 commit comments

Comments
 (0)