Skip to content

Commit 28afe57

Browse files
committed
Add tests for LineEndings::normalize
1 parent e468a1a commit 28afe57

File tree

1 file changed

+49
-11
lines changed

1 file changed

+49
-11
lines changed

crates/rust-analyzer/src/line_index.rs

+49-11
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,6 @@ pub(crate) enum LineEndings {
2727
impl LineEndings {
2828
/// Replaces `\r\n` with `\n` in-place in `src`.
2929
pub(crate) fn normalize(src: String) -> (String, LineEndings) {
30-
if !src.as_bytes().contains(&b'\r') {
31-
return (src, LineEndings::Unix);
32-
}
33-
3430
// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
3531
// While we *can* call `as_mut_vec` and do surgery on the live string
3632
// directly, let's rather steal the contents of `src`. This makes the code
@@ -39,10 +35,19 @@ impl LineEndings {
3935
let mut buf = src.into_bytes();
4036
let mut gap_len = 0;
4137
let mut tail = buf.as_mut_slice();
38+
let mut crlf_seen = false;
39+
40+
let find_crlf = |src: &[u8]| src.windows(2).position(|it| it == b"\r\n");
41+
4242
loop {
4343
let idx = match find_crlf(&tail[gap_len..]) {
44-
None => tail.len(),
45-
Some(idx) => idx + gap_len,
44+
None if crlf_seen => tail.len(),
45+
// SAFETY: buf is unchanged and therefor still contains utf8 data
46+
None => return (unsafe { String::from_utf8_unchecked(buf) }, LineEndings::Unix),
47+
Some(idx) => {
48+
crlf_seen = true;
49+
idx + gap_len
50+
}
4651
};
4752
tail.copy_within(gap_len..idx, 0);
4853
tail = &mut tail[idx - gap_len..];
@@ -54,15 +59,48 @@ impl LineEndings {
5459

5560
// Account for removed `\r`.
5661
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
57-
let new_len = buf.len() - gap_len;
5862
let src = unsafe {
63+
let new_len = buf.len() - gap_len;
5964
buf.set_len(new_len);
6065
String::from_utf8_unchecked(buf)
6166
};
62-
return (src, LineEndings::Dos);
67+
(src, LineEndings::Dos)
68+
}
69+
}
6370

64-
fn find_crlf(src: &[u8]) -> Option<usize> {
65-
src.windows(2).position(|it| it == b"\r\n")
66-
}
71+
#[cfg(test)]
72+
mod tests {
73+
use super::*;
74+
75+
#[test]
76+
fn unix() {
77+
let src = "a\nb\nc\n\n\n\n";
78+
let (res, endings) = LineEndings::normalize(src.into());
79+
assert_eq!(endings, LineEndings::Unix);
80+
assert_eq!(res, src);
81+
}
82+
83+
#[test]
84+
fn dos() {
85+
let src = "\r\na\r\n\r\nb\r\nc\r\n\r\n\r\n\r\n";
86+
let (res, endings) = LineEndings::normalize(src.into());
87+
assert_eq!(endings, LineEndings::Dos);
88+
assert_eq!(res, "\na\n\nb\nc\n\n\n\n");
89+
}
90+
91+
#[test]
92+
fn mixed() {
93+
let src = "a\r\nb\r\nc\r\n\n\r\n\n";
94+
let (res, endings) = LineEndings::normalize(src.into());
95+
assert_eq!(endings, LineEndings::Dos);
96+
assert_eq!(res, "a\nb\nc\n\n\n\n");
97+
}
98+
99+
#[test]
100+
fn none() {
101+
let src = "abc";
102+
let (res, endings) = LineEndings::normalize(src.into());
103+
assert_eq!(endings, LineEndings::Unix);
104+
assert_eq!(res, src);
67105
}
68106
}

0 commit comments

Comments
 (0)