66#![ allow( rustdoc:: private_intra_doc_links) ]
77
88use std:: cmp:: Ordering ;
9- use std:: io:: { self , BufReader } ;
9+ use std:: io:: { self , BufReader , ErrorKind } ;
1010use std:: {
1111 fs:: { File , remove_file} ,
1212 io:: { BufRead , BufWriter , Write } ,
@@ -71,6 +71,35 @@ impl CsplitOptions {
7171 }
7272}
7373
74+ pub struct LinesWithNewlines < T : BufRead > {
75+ inner : T ,
76+ }
77+
78+ impl < T : BufRead > LinesWithNewlines < T > {
79+ fn new ( s : T ) -> Self {
80+ Self { inner : s }
81+ }
82+ }
83+
84+ impl < T : BufRead > Iterator for LinesWithNewlines < T > {
85+ type Item = io:: Result < String > ;
86+
87+ fn next ( & mut self ) -> Option < Self :: Item > {
88+ fn ret ( v : Vec < u8 > ) -> io:: Result < String > {
89+ String :: from_utf8 ( v) . map_err ( |_| {
90+ io:: Error :: new ( ErrorKind :: InvalidData , "stream did not contain valid UTF-8" )
91+ } )
92+ }
93+
94+ let mut v = Vec :: new ( ) ;
95+ match self . inner . read_until ( b'\n' , & mut v) {
96+ Ok ( 0 ) => None ,
97+ Ok ( _) => Some ( ret ( v) ) ,
98+ Err ( e) => Some ( Err ( e) ) ,
99+ }
100+ }
101+ }
102+
74103/// Splits a file into severals according to the command line patterns.
75104///
76105/// # Errors
@@ -87,8 +116,7 @@ pub fn csplit<T>(options: &CsplitOptions, patterns: &[String], input: T) -> Resu
87116where
88117 T : BufRead ,
89118{
90- let enumerated_input_lines = input
91- . lines ( )
119+ let enumerated_input_lines = LinesWithNewlines :: new ( input)
92120 . map ( |line| line. map_err_context ( || "read error" . to_string ( ) ) )
93121 . enumerate ( ) ;
94122 let mut input_iter = InputSplitter :: new ( enumerated_input_lines) ;
@@ -243,7 +271,7 @@ impl SplitWriter<'_> {
243271 self . dev_null = true ;
244272 }
245273
246- /// Writes the line to the current split, appending a newline character .
274+ /// Writes the line to the current split.
247275 /// If [`self.dev_null`] is true, then the line is discarded.
248276 ///
249277 /// # Errors
@@ -255,8 +283,7 @@ impl SplitWriter<'_> {
255283 Some ( ref mut current_writer) => {
256284 let bytes = line. as_bytes ( ) ;
257285 current_writer. write_all ( bytes) ?;
258- current_writer. write_all ( b"\n " ) ?;
259- self . size += bytes. len ( ) + 1 ;
286+ self . size += bytes. len ( ) ;
260287 }
261288 None => panic ! ( "trying to write to a split that was not created" ) ,
262289 }
@@ -321,11 +348,11 @@ impl SplitWriter<'_> {
321348
322349 let mut ret = Err ( CsplitError :: LineOutOfRange ( pattern_as_str. to_string ( ) ) ) ;
323350 while let Some ( ( ln, line) ) = input_iter. next ( ) {
324- let l = line?;
351+ let line = line?;
325352 match n. cmp ( & ( & ln + 1 ) ) {
326353 Ordering :: Less => {
327354 assert ! (
328- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
355+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
329356 "the buffer is big enough to contain 1 line"
330357 ) ;
331358 ret = Ok ( ( ) ) ;
@@ -334,15 +361,15 @@ impl SplitWriter<'_> {
334361 Ordering :: Equal => {
335362 assert ! (
336363 self . options. suppress_matched
337- || input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
364+ || input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
338365 "the buffer is big enough to contain 1 line"
339366 ) ;
340367 ret = Ok ( ( ) ) ;
341368 break ;
342369 }
343370 Ordering :: Greater => ( ) ,
344371 }
345- self . writeln ( & l ) ?;
372+ self . writeln ( & line ) ?;
346373 }
347374 self . finish_split ( ) ;
348375 ret
@@ -379,23 +406,26 @@ impl SplitWriter<'_> {
379406 input_iter. set_size_of_buffer ( 1 ) ;
380407
381408 while let Some ( ( ln, line) ) = input_iter. next ( ) {
382- let l = line?;
383- if regex. is_match ( & l) {
409+ let line = line?;
410+ let l = line
411+ . strip_suffix ( "\r \n " )
412+ . unwrap_or_else ( || line. strip_suffix ( '\n' ) . unwrap_or ( & line) ) ;
413+ if regex. is_match ( l) {
384414 let mut next_line_suppress_matched = false ;
385415 match ( self . options . suppress_matched , offset) {
386416 // no offset, add the line to the next split
387417 ( false , 0 ) => {
388418 assert ! (
389- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
419+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
390420 "the buffer is big enough to contain 1 line"
391421 ) ;
392422 }
393423 // a positive offset, some more lines need to be added to the current split
394- ( false , _) => self . writeln ( & l ) ?,
424+ ( false , _) => self . writeln ( & line ) ?,
395425 // suppress matched option true, but there is a positive offset, so the line is printed
396426 ( true , 1 ..) => {
397427 next_line_suppress_matched = true ;
398- self . writeln ( & l ) ?;
428+ self . writeln ( & line ) ?;
399429 }
400430 _ => ( ) ,
401431 } ;
@@ -424,7 +454,7 @@ impl SplitWriter<'_> {
424454 }
425455 return Ok ( ( ) ) ;
426456 }
427- self . writeln ( & l ) ?;
457+ self . writeln ( & line ) ?;
428458 }
429459 } else {
430460 // With a negative offset we use a buffer to keep the lines within the offset.
@@ -435,21 +465,24 @@ impl SplitWriter<'_> {
435465 let offset_usize = -offset as usize ;
436466 input_iter. set_size_of_buffer ( offset_usize) ;
437467 while let Some ( ( ln, line) ) = input_iter. next ( ) {
438- let l = line?;
439- if regex. is_match ( & l) {
468+ let line = line?;
469+ let l = line
470+ . strip_suffix ( "\r \n " )
471+ . unwrap_or_else ( || line. strip_suffix ( '\n' ) . unwrap_or ( & line) ) ;
472+ if regex. is_match ( l) {
440473 for line in input_iter. shrink_buffer_to_size ( ) {
441474 self . writeln ( & line) ?;
442475 }
443476 if self . options . suppress_matched {
444477 // since offset_usize is for sure greater than 0
445478 // the first element of the buffer should be removed and this
446479 // line inserted to be coherent with GNU implementation
447- input_iter. add_line_to_buffer ( ln, l ) ;
480+ input_iter. add_line_to_buffer ( ln, line ) ;
448481 } else {
449482 // add 1 to the buffer size to make place for the matched line
450483 input_iter. set_size_of_buffer ( offset_usize + 1 ) ;
451484 assert ! (
452- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
485+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
453486 "should be big enough to hold every lines"
454487 ) ;
455488 }
@@ -460,7 +493,7 @@ impl SplitWriter<'_> {
460493 }
461494 return Ok ( ( ) ) ;
462495 }
463- if let Some ( line) = input_iter. add_line_to_buffer ( ln, l ) {
496+ if let Some ( line) = input_iter. add_line_to_buffer ( ln, line ) {
464497 self . writeln ( & line) ?;
465498 }
466499 }
0 commit comments