@@ -27,10 +27,6 @@ pub(crate) enum LineEndings {
27
27
impl LineEndings {
28
28
/// Replaces `\r\n` with `\n` in-place in `src`.
29
29
pub ( crate ) fn normalize ( src : String ) -> ( String , LineEndings ) {
30
- if !src. as_bytes ( ) . contains ( & b'\r' ) {
31
- return ( src, LineEndings :: Unix ) ;
32
- }
33
-
34
30
// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
35
31
// While we *can* call `as_mut_vec` and do surgery on the live string
36
32
// directly, let's rather steal the contents of `src`. This makes the code
@@ -39,10 +35,19 @@ impl LineEndings {
39
35
let mut buf = src. into_bytes ( ) ;
40
36
let mut gap_len = 0 ;
41
37
let mut tail = buf. as_mut_slice ( ) ;
38
+ let mut crlf_seen = false ;
39
+
40
+ let find_crlf = |src : & [ u8 ] | src. windows ( 2 ) . position ( |it| it == b"\r \n " ) ;
41
+
42
42
loop {
43
43
let idx = match find_crlf ( & tail[ gap_len..] ) {
44
- None => tail. len ( ) ,
45
- Some ( idx) => idx + gap_len,
44
+ None if crlf_seen => tail. len ( ) ,
45
+ // SAFETY: buf is unchanged and therefor still contains utf8 data
46
+ None => return ( unsafe { String :: from_utf8_unchecked ( buf) } , LineEndings :: Unix ) ,
47
+ Some ( idx) => {
48
+ crlf_seen = true ;
49
+ idx + gap_len
50
+ }
46
51
} ;
47
52
tail. copy_within ( gap_len..idx, 0 ) ;
48
53
tail = & mut tail[ idx - gap_len..] ;
@@ -54,15 +59,48 @@ impl LineEndings {
54
59
55
60
// Account for removed `\r`.
56
61
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
57
- let new_len = buf. len ( ) - gap_len;
58
62
let src = unsafe {
63
+ let new_len = buf. len ( ) - gap_len;
59
64
buf. set_len ( new_len) ;
60
65
String :: from_utf8_unchecked ( buf)
61
66
} ;
62
- return ( src, LineEndings :: Dos ) ;
67
+ ( src, LineEndings :: Dos )
68
+ }
69
+ }
63
70
64
- fn find_crlf ( src : & [ u8 ] ) -> Option < usize > {
65
- src. windows ( 2 ) . position ( |it| it == b"\r \n " )
66
- }
71
+ #[ cfg( test) ]
72
+ mod tests {
73
+ use super :: * ;
74
+
75
+ #[ test]
76
+ fn unix ( ) {
77
+ let src = "a\n b\n c\n \n \n \n " ;
78
+ let ( res, endings) = LineEndings :: normalize ( src. into ( ) ) ;
79
+ assert_eq ! ( endings, LineEndings :: Unix ) ;
80
+ assert_eq ! ( res, src) ;
81
+ }
82
+
83
+ #[ test]
84
+ fn dos ( ) {
85
+ let src = "\r \n a\r \n \r \n b\r \n c\r \n \r \n \r \n \r \n " ;
86
+ let ( res, endings) = LineEndings :: normalize ( src. into ( ) ) ;
87
+ assert_eq ! ( endings, LineEndings :: Dos ) ;
88
+ assert_eq ! ( res, "\n a\n \n b\n c\n \n \n \n " ) ;
89
+ }
90
+
91
+ #[ test]
92
+ fn mixed ( ) {
93
+ let src = "a\r \n b\r \n c\r \n \n \r \n \n " ;
94
+ let ( res, endings) = LineEndings :: normalize ( src. into ( ) ) ;
95
+ assert_eq ! ( endings, LineEndings :: Dos ) ;
96
+ assert_eq ! ( res, "a\n b\n c\n \n \n \n " ) ;
97
+ }
98
+
99
+ #[ test]
100
+ fn none ( ) {
101
+ let src = "abc" ;
102
+ let ( res, endings) = LineEndings :: normalize ( src. into ( ) ) ;
103
+ assert_eq ! ( endings, LineEndings :: Unix ) ;
104
+ assert_eq ! ( res, src) ;
67
105
}
68
106
}
0 commit comments