Skip to content

Commit 6609c67

Browse files
committed
Auto merge of #96551 - ferrocene:pa-ignore-paths-when-abbreviating, r=Mark-Simulacrum
[compiletest] Ignore known paths when abbreviating output To prevent out of memory conditions, compiletest limits the amount of output a test can generate, abbreviating it if the test emits more than a threshold. While the behavior is desirable, it also causes some issues (like #96229, #94322 and #92211). The latest one happened recently, when the `src/test/ui/numeric/numeric-cast.rs` test started to fail on systems where the path of the rust-lang/rust checkout is too long. This includes my own development machine and [LLVM's CI](#96362 (comment)). Rust's CI uses a pretty short directory name for the checkout, which hides these sort of problems until someone runs the test suite on their own computer. When developing the fix I tried to find the most targeted fix that would prevent this class of failures from happening in the future, deferring the decision on if/how to redesign abbreviation to a later date. The solution I came up with was to ignore known base paths when calculating whether the output exceeds the abbreviation threshold, which removes this kind of nondeterminism. This PR is best reviewed commit-by-commit.
2 parents 760237f + 8ea9598 commit 6609c67

File tree

3 files changed

+249
-60
lines changed

3 files changed

+249
-60
lines changed

src/tools/compiletest/src/read2.rs

+101-56
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,24 @@
11
// FIXME: This is a complete copy of `cargo/src/cargo/util/read2.rs`
22
// Consider unify the read2() in libstd, cargo and this to prevent further code duplication.
33

4+
#[cfg(test)]
5+
mod tests;
6+
47
pub use self::imp::read2;
5-
use std::io;
8+
use std::io::{self, Write};
9+
use std::mem::replace;
610
use std::process::{Child, Output};
711

8-
pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
9-
use io::Write;
10-
use std::mem::replace;
11-
12-
const HEAD_LEN: usize = 160 * 1024;
13-
const TAIL_LEN: usize = 256 * 1024;
14-
15-
enum ProcOutput {
16-
Full(Vec<u8>),
17-
Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
18-
}
19-
20-
impl ProcOutput {
21-
fn extend(&mut self, data: &[u8]) {
22-
let new_self = match *self {
23-
ProcOutput::Full(ref mut bytes) => {
24-
bytes.extend_from_slice(data);
25-
let new_len = bytes.len();
26-
if new_len <= HEAD_LEN + TAIL_LEN {
27-
return;
28-
}
29-
let tail = bytes.split_off(new_len - TAIL_LEN).into_boxed_slice();
30-
let head = replace(bytes, Vec::new());
31-
let skipped = new_len - HEAD_LEN - TAIL_LEN;
32-
ProcOutput::Abbreviated { head, skipped, tail }
33-
}
34-
ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
35-
*skipped += data.len();
36-
if data.len() <= TAIL_LEN {
37-
tail[..data.len()].copy_from_slice(data);
38-
tail.rotate_left(data.len());
39-
} else {
40-
tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
41-
}
42-
return;
43-
}
44-
};
45-
*self = new_self;
46-
}
47-
48-
fn into_bytes(self) -> Vec<u8> {
49-
match self {
50-
ProcOutput::Full(bytes) => bytes,
51-
ProcOutput::Abbreviated { mut head, skipped, tail } => {
52-
write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
53-
head.extend_from_slice(&tail);
54-
head
55-
}
56-
}
57-
}
58-
}
59-
60-
let mut stdout = ProcOutput::Full(Vec::new());
61-
let mut stderr = ProcOutput::Full(Vec::new());
12+
pub fn read2_abbreviated(mut child: Child, filter_paths_from_len: &[String]) -> io::Result<Output> {
13+
let mut stdout = ProcOutput::new();
14+
let mut stderr = ProcOutput::new();
6215

6316
drop(child.stdin.take());
6417
read2(
6518
child.stdout.take().unwrap(),
6619
child.stderr.take().unwrap(),
6720
&mut |is_stdout, data, _| {
68-
if is_stdout { &mut stdout } else { &mut stderr }.extend(data);
21+
if is_stdout { &mut stdout } else { &mut stderr }.extend(data, filter_paths_from_len);
6922
data.clear();
7023
},
7124
)?;
@@ -74,6 +27,98 @@ pub fn read2_abbreviated(mut child: Child) -> io::Result<Output> {
7427
Ok(Output { status, stdout: stdout.into_bytes(), stderr: stderr.into_bytes() })
7528
}
7629

30+
const HEAD_LEN: usize = 160 * 1024;
31+
const TAIL_LEN: usize = 256 * 1024;
32+
33+
// Whenever a path is filtered when counting the length of the output, we need to add some
34+
// placeholder length to ensure a compiler emitting only filtered paths doesn't cause a OOM.
35+
//
36+
// 32 was chosen semi-arbitrarily: it was the highest power of two that still allowed the test
37+
// suite to pass at the moment of implementing path filtering.
38+
const FILTERED_PATHS_PLACEHOLDER_LEN: usize = 32;
39+
40+
enum ProcOutput {
41+
Full { bytes: Vec<u8>, filtered_len: usize },
42+
Abbreviated { head: Vec<u8>, skipped: usize, tail: Box<[u8]> },
43+
}
44+
45+
impl ProcOutput {
46+
fn new() -> Self {
47+
ProcOutput::Full { bytes: Vec::new(), filtered_len: 0 }
48+
}
49+
50+
fn extend(&mut self, data: &[u8], filter_paths_from_len: &[String]) {
51+
let new_self = match *self {
52+
ProcOutput::Full { ref mut bytes, ref mut filtered_len } => {
53+
let old_len = bytes.len();
54+
bytes.extend_from_slice(data);
55+
*filtered_len += data.len();
56+
57+
// We had problems in the past with tests failing only in some environments,
58+
// due to the length of the base path pushing the output size over the limit.
59+
//
60+
// To make those failures deterministic across all environments we ignore known
61+
// paths when calculating the string length, while still including the full
62+
// path in the output. This could result in some output being larger than the
63+
// threshold, but it's better than having nondeterministic failures.
64+
//
65+
// The compiler emitting only excluded strings is addressed by adding a
66+
// placeholder size for each excluded segment, which will eventually reach
67+
// the configured threshold.
68+
for path in filter_paths_from_len {
69+
let path_bytes = path.as_bytes();
70+
// We start matching `path_bytes - 1` into the previously loaded data,
71+
// to account for the fact a path_bytes might be included across multiple
72+
// `extend` calls. Starting from `- 1` avoids double-counting paths.
73+
let matches = (&bytes[(old_len.saturating_sub(path_bytes.len() - 1))..])
74+
.windows(path_bytes.len())
75+
.filter(|window| window == &path_bytes)
76+
.count();
77+
*filtered_len -= matches * path_bytes.len();
78+
79+
// We can't just remove the length of the filtered path from the output lenght,
80+
// otherwise a compiler emitting only filtered paths would OOM compiletest. Add
81+
// a fixed placeholder length for each path to prevent that.
82+
*filtered_len += matches * FILTERED_PATHS_PLACEHOLDER_LEN;
83+
}
84+
85+
let new_len = bytes.len();
86+
if *filtered_len <= HEAD_LEN + TAIL_LEN {
87+
return;
88+
}
89+
90+
let mut head = replace(bytes, Vec::new());
91+
let mut middle = head.split_off(HEAD_LEN);
92+
let tail = middle.split_off(middle.len() - TAIL_LEN).into_boxed_slice();
93+
let skipped = new_len - HEAD_LEN - TAIL_LEN;
94+
ProcOutput::Abbreviated { head, skipped, tail }
95+
}
96+
ProcOutput::Abbreviated { ref mut skipped, ref mut tail, .. } => {
97+
*skipped += data.len();
98+
if data.len() <= TAIL_LEN {
99+
tail[..data.len()].copy_from_slice(data);
100+
tail.rotate_left(data.len());
101+
} else {
102+
tail.copy_from_slice(&data[(data.len() - TAIL_LEN)..]);
103+
}
104+
return;
105+
}
106+
};
107+
*self = new_self;
108+
}
109+
110+
fn into_bytes(self) -> Vec<u8> {
111+
match self {
112+
ProcOutput::Full { bytes, .. } => bytes,
113+
ProcOutput::Abbreviated { mut head, skipped, tail } => {
114+
write!(&mut head, "\n\n<<<<<< SKIPPED {} BYTES >>>>>>\n\n", skipped).unwrap();
115+
head.extend_from_slice(&tail);
116+
head
117+
}
118+
}
119+
}
120+
}
121+
77122
#[cfg(not(any(unix, windows)))]
78123
mod imp {
79124
use std::io::{self, Read};
+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
use crate::read2::{ProcOutput, FILTERED_PATHS_PLACEHOLDER_LEN, HEAD_LEN, TAIL_LEN};
2+
3+
#[test]
4+
fn test_abbreviate_short_string() {
5+
let mut out = ProcOutput::new();
6+
out.extend(b"Hello world!", &[]);
7+
assert_eq!(b"Hello world!", &*out.into_bytes());
8+
}
9+
10+
#[test]
11+
fn test_abbreviate_short_string_multiple_steps() {
12+
let mut out = ProcOutput::new();
13+
14+
out.extend(b"Hello ", &[]);
15+
out.extend(b"world!", &[]);
16+
17+
assert_eq!(b"Hello world!", &*out.into_bytes());
18+
}
19+
20+
#[test]
21+
fn test_abbreviate_long_string() {
22+
let mut out = ProcOutput::new();
23+
24+
let data = vec![b'.'; HEAD_LEN + TAIL_LEN + 16];
25+
out.extend(&data, &[]);
26+
27+
let mut expected = vec![b'.'; HEAD_LEN];
28+
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 16 BYTES >>>>>>\n\n");
29+
expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);
30+
31+
// We first check the length to avoid endless terminal output if the length differs, since
32+
// `out` is hundreds of KBs in size.
33+
let out = out.into_bytes();
34+
assert_eq!(expected.len(), out.len());
35+
assert_eq!(expected, out);
36+
}
37+
38+
#[test]
39+
fn test_abbreviate_long_string_multiple_steps() {
40+
let mut out = ProcOutput::new();
41+
42+
out.extend(&vec![b'.'; HEAD_LEN], &[]);
43+
out.extend(&vec![b'.'; TAIL_LEN], &[]);
44+
// Also test whether the rotation works
45+
out.extend(&vec![b'!'; 16], &[]);
46+
out.extend(&vec![b'?'; 16], &[]);
47+
48+
let mut expected = vec![b'.'; HEAD_LEN];
49+
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 32 BYTES >>>>>>\n\n");
50+
expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 32]);
51+
expected.extend_from_slice(&vec![b'!'; 16]);
52+
expected.extend_from_slice(&vec![b'?'; 16]);
53+
54+
// We first check the length to avoid endless terminal output if the length differs, since
55+
// `out` is hundreds of KBs in size.
56+
let out = out.into_bytes();
57+
assert_eq!(expected.len(), out.len());
58+
assert_eq!(expected, out);
59+
}
60+
61+
#[test]
62+
fn test_abbreviate_filterss_are_detected() {
63+
let mut out = ProcOutput::new();
64+
let filters = &["foo".to_string(), "quux".to_string()];
65+
66+
out.extend(b"Hello foo", filters);
67+
// Check items from a previous extension are not double-counted.
68+
out.extend(b"! This is a qu", filters);
69+
// Check items are detected across extensions.
70+
out.extend(b"ux.", filters);
71+
72+
match &out {
73+
ProcOutput::Full { bytes, filtered_len } => assert_eq!(
74+
*filtered_len,
75+
bytes.len() + FILTERED_PATHS_PLACEHOLDER_LEN * filters.len()
76+
- filters.iter().map(|i| i.len()).sum::<usize>()
77+
),
78+
ProcOutput::Abbreviated { .. } => panic!("out should not be abbreviated"),
79+
}
80+
81+
assert_eq!(b"Hello foo! This is a quux.", &*out.into_bytes());
82+
}
83+
84+
#[test]
85+
fn test_abbreviate_filters_avoid_abbreviations() {
86+
let mut out = ProcOutput::new();
87+
let filters = &[std::iter::repeat('a').take(64).collect::<String>()];
88+
89+
let mut expected = vec![b'.'; HEAD_LEN - FILTERED_PATHS_PLACEHOLDER_LEN as usize];
90+
expected.extend_from_slice(filters[0].as_bytes());
91+
expected.extend_from_slice(&vec![b'.'; TAIL_LEN]);
92+
93+
out.extend(&expected, filters);
94+
95+
// We first check the length to avoid endless terminal output if the length differs, since
96+
// `out` is hundreds of KBs in size.
97+
let out = out.into_bytes();
98+
assert_eq!(expected.len(), out.len());
99+
assert_eq!(expected, out);
100+
}
101+
102+
#[test]
103+
fn test_abbreviate_filters_can_still_cause_abbreviations() {
104+
let mut out = ProcOutput::new();
105+
let filters = &[std::iter::repeat('a').take(64).collect::<String>()];
106+
107+
let mut input = vec![b'.'; HEAD_LEN];
108+
input.extend_from_slice(&vec![b'.'; TAIL_LEN]);
109+
input.extend_from_slice(filters[0].as_bytes());
110+
111+
let mut expected = vec![b'.'; HEAD_LEN];
112+
expected.extend_from_slice(b"\n\n<<<<<< SKIPPED 64 BYTES >>>>>>\n\n");
113+
expected.extend_from_slice(&vec![b'.'; TAIL_LEN - 64]);
114+
expected.extend_from_slice(&vec![b'a'; 64]);
115+
116+
out.extend(&input, filters);
117+
118+
// We first check the length to avoid endless terminal output if the length differs, since
119+
// `out` is hundreds of KBs in size.
120+
let out = out.into_bytes();
121+
assert_eq!(expected.len(), out.len());
122+
assert_eq!(expected, out);
123+
}

src/tools/compiletest/src/runtest.rs

+25-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use std::hash::{Hash, Hasher};
2828
use std::io::prelude::*;
2929
use std::io::{self, BufReader};
3030
use std::path::{Path, PathBuf};
31-
use std::process::{Command, ExitStatus, Output, Stdio};
31+
use std::process::{Child, Command, ExitStatus, Output, Stdio};
3232
use std::str;
3333

3434
use glob::glob;
@@ -1745,6 +1745,28 @@ impl<'test> TestCx<'test> {
17451745
dylib
17461746
}
17471747

1748+
fn read2_abbreviated(&self, child: Child) -> Output {
1749+
let mut filter_paths_from_len = Vec::new();
1750+
let mut add_path = |path: &Path| {
1751+
let path = path.display().to_string();
1752+
let windows = path.replace("\\", "\\\\");
1753+
if windows != path {
1754+
filter_paths_from_len.push(windows);
1755+
}
1756+
filter_paths_from_len.push(path);
1757+
};
1758+
1759+
// List of paths that will not be measured when determining whether the output is larger
1760+
// than the output truncation threshold.
1761+
//
1762+
// Note: avoid adding a subdirectory of an already filtered directory here, otherwise the
1763+
// same slice of text will be double counted and the truncation might not happen.
1764+
add_path(&self.config.src_base);
1765+
add_path(&self.config.build_base);
1766+
1767+
read2_abbreviated(child, &filter_paths_from_len).expect("failed to read output")
1768+
}
1769+
17481770
fn compose_and_run(
17491771
&self,
17501772
mut command: Command,
@@ -1779,8 +1801,7 @@ impl<'test> TestCx<'test> {
17791801
child.stdin.as_mut().unwrap().write_all(input.as_bytes()).unwrap();
17801802
}
17811803

1782-
let Output { status, stdout, stderr } =
1783-
read2_abbreviated(child).expect("failed to read output");
1804+
let Output { status, stdout, stderr } = self.read2_abbreviated(child);
17841805

17851806
let result = ProcRes {
17861807
status,
@@ -2969,7 +2990,7 @@ impl<'test> TestCx<'test> {
29692990
}
29702991
}
29712992

2972-
let output = cmd.spawn().and_then(read2_abbreviated).expect("failed to spawn `make`");
2993+
let output = self.read2_abbreviated(cmd.spawn().expect("failed to spawn `make`"));
29732994
if !output.status.success() {
29742995
let res = ProcRes {
29752996
status: output.status,

0 commit comments

Comments
 (0)