Skip to content

Commit 2d99c88

Browse files
authored
Rollup merge of rust-lang#138454 - Kobzol:post-merge-workflow-fixes, r=jieyouxu
Improve post-merge workflow Contains various fixes for the post-merge workflow implemented in rust-lang#138013, which were suggested on Zulip. This PR changes the grouping of test diffs and ignores doctests, as they are too noisy. I'll post an example output (before/after this PR) in comments below. r? ``@jieyouxu``
2 parents 93647cf + 6ef465b commit 2d99c88

File tree

2 files changed

+144
-78
lines changed

2 files changed

+144
-78
lines changed

.github/workflows/post-merge.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,13 @@ jobs:
3535
3636
cd src/ci/citool
3737
38-
echo "Post-merge analysis result" > output.log
38+
printf "*This is an experimental post-merge analysis report. You can ignore it.*\n\n" > output.log
39+
printf "<details>\n<summary>Post-merge report</summary>\n\n" >> output.log
40+
3941
cargo run --release post-merge-report ${PARENT_COMMIT} ${{ github.sha }} >> output.log
42+
43+
printf "</details>\n" >> output.log
44+
4045
cat output.log
4146
4247
gh pr comment ${HEAD_PR} -F output.log

src/ci/citool/src/merge_report.rs

+138-77
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
use std::cmp::Reverse;
2-
use std::collections::HashMap;
1+
use std::collections::{HashMap, HashSet};
2+
use std::path::PathBuf;
33

44
use anyhow::Context;
5-
use build_helper::metrics::{JsonRoot, TestOutcome};
5+
use build_helper::metrics::{JsonRoot, TestOutcome, TestSuiteMetadata};
66

77
use crate::jobs::JobDatabase;
88
use crate::metrics::get_test_suites;
@@ -13,8 +13,10 @@ type JobName = String;
1313
/// Computes a post merge CI analysis report between the `parent` and `current` commits.
1414
pub fn post_merge_report(job_db: JobDatabase, parent: Sha, current: Sha) -> anyhow::Result<()> {
1515
let jobs = download_all_metrics(&job_db, &parent, &current)?;
16-
let diffs = aggregate_test_diffs(&jobs)?;
17-
report_test_changes(diffs);
16+
let aggregated_test_diffs = aggregate_test_diffs(&jobs)?;
17+
18+
println!("Comparing {parent} (base) -> {current} (this PR)\n");
19+
report_test_diffs(aggregated_test_diffs);
1820

1921
Ok(())
2022
}
@@ -54,7 +56,16 @@ Maybe it was newly added?"#,
5456
Ok(jobs)
5557
}
5658

59+
/// Downloads job metrics of the given job for the given commit.
60+
/// Caches the result on the local disk.
5761
fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
62+
let cache_path = PathBuf::from(".citool-cache").join(sha).join(job_name).join("metrics.json");
63+
if let Some(cache_entry) =
64+
std::fs::read_to_string(&cache_path).ok().and_then(|data| serde_json::from_str(&data).ok())
65+
{
66+
return Ok(cache_entry);
67+
}
68+
5869
let url = get_metrics_url(job_name, sha);
5970
let mut response = ureq::get(&url).call()?;
6071
if !response.status().is_success() {
@@ -68,6 +79,13 @@ fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
6879
.body_mut()
6980
.read_json()
7081
.with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;
82+
83+
// Ignore errors if cache cannot be created
84+
if std::fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
85+
if let Ok(serialized) = serde_json::to_string(&data) {
86+
let _ = std::fs::write(&cache_path, &serialized);
87+
}
88+
}
7189
Ok(data)
7290
}
7391

@@ -76,81 +94,80 @@ fn get_metrics_url(job_name: &str, sha: &str) -> String {
7694
format!("https://ci-artifacts.rust-lang.org/rustc-builds{suffix}/{sha}/metrics-{job_name}.json")
7795
}
7896

97+
/// Represents a difference in the outcome of tests between a base and a current commit.
98+
/// Maps test diffs to jobs that contained them.
99+
#[derive(Debug)]
100+
struct AggregatedTestDiffs {
101+
diffs: HashMap<TestDiff, Vec<JobName>>,
102+
}
103+
79104
fn aggregate_test_diffs(
80105
jobs: &HashMap<JobName, JobMetrics>,
81-
) -> anyhow::Result<Vec<AggregatedTestDiffs>> {
82-
let mut job_diffs = vec![];
106+
) -> anyhow::Result<AggregatedTestDiffs> {
107+
let mut diffs: HashMap<TestDiff, Vec<JobName>> = HashMap::new();
83108

84109
// Aggregate test suites
85110
for (name, metrics) in jobs {
86111
if let Some(parent) = &metrics.parent {
87112
let tests_parent = aggregate_tests(parent);
88113
let tests_current = aggregate_tests(&metrics.current);
89-
let test_diffs = calculate_test_diffs(tests_parent, tests_current);
90-
if !test_diffs.is_empty() {
91-
job_diffs.push((name.clone(), test_diffs));
114+
for diff in calculate_test_diffs(tests_parent, tests_current) {
115+
diffs.entry(diff).or_default().push(name.to_string());
92116
}
93117
}
94118
}
95119

96-
// Aggregate jobs with the same diff, as often the same diff will appear in many jobs
97-
let job_diffs: HashMap<Vec<(Test, TestOutcomeDiff)>, Vec<String>> =
98-
job_diffs.into_iter().fold(HashMap::new(), |mut acc, (job, diffs)| {
99-
acc.entry(diffs).or_default().push(job);
100-
acc
101-
});
120+
Ok(AggregatedTestDiffs { diffs })
121+
}
102122

103-
Ok(job_diffs
104-
.into_iter()
105-
.map(|(test_diffs, jobs)| AggregatedTestDiffs { jobs, test_diffs })
106-
.collect())
123+
#[derive(Eq, PartialEq, Hash, Debug)]
124+
enum TestOutcomeDiff {
125+
ChangeOutcome { before: TestOutcome, after: TestOutcome },
126+
Missing { before: TestOutcome },
127+
Added(TestOutcome),
107128
}
108129

109-
fn calculate_test_diffs(
110-
reference: TestSuiteData,
111-
current: TestSuiteData,
112-
) -> Vec<(Test, TestOutcomeDiff)> {
113-
let mut diffs = vec![];
130+
#[derive(Eq, PartialEq, Hash, Debug)]
131+
struct TestDiff {
132+
test: Test,
133+
diff: TestOutcomeDiff,
134+
}
135+
136+
fn calculate_test_diffs(parent: TestSuiteData, current: TestSuiteData) -> HashSet<TestDiff> {
137+
let mut diffs = HashSet::new();
114138
for (test, outcome) in &current.tests {
115-
match reference.tests.get(test) {
139+
match parent.tests.get(test) {
116140
Some(before) => {
117141
if before != outcome {
118-
diffs.push((
119-
test.clone(),
120-
TestOutcomeDiff::ChangeOutcome {
142+
diffs.insert(TestDiff {
143+
test: test.clone(),
144+
diff: TestOutcomeDiff::ChangeOutcome {
121145
before: before.clone(),
122146
after: outcome.clone(),
123147
},
124-
));
148+
});
125149
}
126150
}
127-
None => diffs.push((test.clone(), TestOutcomeDiff::Added(outcome.clone()))),
151+
None => {
152+
diffs.insert(TestDiff {
153+
test: test.clone(),
154+
diff: TestOutcomeDiff::Added(outcome.clone()),
155+
});
156+
}
128157
}
129158
}
130-
for (test, outcome) in &reference.tests {
159+
for (test, outcome) in &parent.tests {
131160
if !current.tests.contains_key(test) {
132-
diffs.push((test.clone(), TestOutcomeDiff::Missing { before: outcome.clone() }));
161+
diffs.insert(TestDiff {
162+
test: test.clone(),
163+
diff: TestOutcomeDiff::Missing { before: outcome.clone() },
164+
});
133165
}
134166
}
135167

136168
diffs
137169
}
138170

139-
/// Represents a difference in the outcome of tests between a base and a current commit.
140-
#[derive(Debug)]
141-
struct AggregatedTestDiffs {
142-
/// All jobs that had the exact same test diffs.
143-
jobs: Vec<String>,
144-
test_diffs: Vec<(Test, TestOutcomeDiff)>,
145-
}
146-
147-
#[derive(Eq, PartialEq, Hash, Debug)]
148-
enum TestOutcomeDiff {
149-
ChangeOutcome { before: TestOutcome, after: TestOutcome },
150-
Missing { before: TestOutcome },
151-
Added(TestOutcome),
152-
}
153-
154171
/// Aggregates test suite executions from all bootstrap invocations in a given CI job.
155172
#[derive(Default)]
156173
struct TestSuiteData {
@@ -160,6 +177,7 @@ struct TestSuiteData {
160177
#[derive(Hash, PartialEq, Eq, Debug, Clone)]
161178
struct Test {
162179
name: String,
180+
is_doctest: bool,
163181
}
164182

165183
/// Extracts all tests from the passed metrics and map them to their outcomes.
@@ -168,7 +186,10 @@ fn aggregate_tests(metrics: &JsonRoot) -> TestSuiteData {
168186
let test_suites = get_test_suites(&metrics);
169187
for suite in test_suites {
170188
for test in &suite.tests {
171-
let test_entry = Test { name: normalize_test_name(&test.name) };
189+
// Poor man's detection of doctests based on the "(line XYZ)" suffix
190+
let is_doctest = matches!(suite.metadata, TestSuiteMetadata::CargoPackage { .. })
191+
&& test.name.contains("(line");
192+
let test_entry = Test { name: normalize_test_name(&test.name), is_doctest };
172193
tests.insert(test_entry, test.outcome.clone());
173194
}
174195
}
@@ -181,16 +202,13 @@ fn normalize_test_name(name: &str) -> String {
181202
}
182203

183204
/// Prints test changes in Markdown format to stdout.
184-
fn report_test_changes(mut diffs: Vec<AggregatedTestDiffs>) {
205+
fn report_test_diffs(diff: AggregatedTestDiffs) {
185206
println!("## Test differences");
186-
if diffs.is_empty() {
207+
if diff.diffs.is_empty() {
187208
println!("No test diffs found");
188209
return;
189210
}
190211

191-
// Sort diffs in decreasing order by diff count
192-
diffs.sort_by_key(|entry| Reverse(entry.test_diffs.len()));
193-
194212
fn format_outcome(outcome: &TestOutcome) -> String {
195213
match outcome {
196214
TestOutcome::Passed => "pass".to_string(),
@@ -219,36 +237,79 @@ fn report_test_changes(mut diffs: Vec<AggregatedTestDiffs>) {
219237
}
220238
}
221239

222-
let max_diff_count = 10;
223-
let max_job_count = 5;
224-
let max_test_count = 10;
225-
226-
for diff in diffs.iter().take(max_diff_count) {
227-
let mut jobs = diff.jobs.clone();
228-
jobs.sort();
229-
230-
let jobs = jobs.iter().take(max_job_count).map(|j| format!("`{j}`")).collect::<Vec<_>>();
240+
fn format_job_group(group: u64) -> String {
241+
format!("**J{group}**")
242+
}
231243

232-
let extra_jobs = diff.jobs.len().saturating_sub(max_job_count);
233-
let suffix = if extra_jobs > 0 {
234-
format!(" (and {extra_jobs} {})", pluralize("other", extra_jobs))
235-
} else {
236-
String::new()
244+
// It would be quite noisy to repeat the jobs that contained the test changes after/next to
245+
// every test diff. At the same time, grouping the test diffs by
246+
// [unique set of jobs that contained them] also doesn't work well, because the test diffs
247+
// would have to be duplicated several times.
248+
// Instead, we create a set of unique job groups, and then print a job group after each test.
249+
// We then print the job groups at the end, as a sort of index.
250+
let mut grouped_diffs: Vec<(&TestDiff, u64)> = vec![];
251+
let mut job_list_to_group: HashMap<&[JobName], u64> = HashMap::new();
252+
let mut job_index: Vec<&[JobName]> = vec![];
253+
254+
let original_diff_count = diff.diffs.len();
255+
let diffs = diff
256+
.diffs
257+
.into_iter()
258+
.filter(|(diff, _)| !diff.test.is_doctest)
259+
.map(|(diff, mut jobs)| {
260+
jobs.sort();
261+
(diff, jobs)
262+
})
263+
.collect::<Vec<_>>();
264+
let doctest_count = original_diff_count.saturating_sub(diffs.len());
265+
266+
let max_diff_count = 100;
267+
for (diff, jobs) in diffs.iter().take(max_diff_count) {
268+
let jobs = &*jobs;
269+
let job_group = match job_list_to_group.get(jobs.as_slice()) {
270+
Some(id) => *id,
271+
None => {
272+
let id = job_index.len() as u64;
273+
job_index.push(jobs);
274+
job_list_to_group.insert(jobs, id);
275+
id
276+
}
237277
};
238-
println!("- {}{suffix}", jobs.join(","));
278+
grouped_diffs.push((diff, job_group));
279+
}
239280

240-
let extra_tests = diff.test_diffs.len().saturating_sub(max_test_count);
241-
for (test, outcome_diff) in diff.test_diffs.iter().take(max_test_count) {
242-
println!(" - {}: {}", test.name, format_diff(&outcome_diff));
243-
}
244-
if extra_tests > 0 {
245-
println!(" - (and {extra_tests} additional {})", pluralize("tests", extra_tests));
246-
}
281+
// Sort diffs by job group and test name
282+
grouped_diffs.sort_by(|(d1, g1), (d2, g2)| g1.cmp(&g2).then(d1.test.name.cmp(&d2.test.name)));
283+
284+
for (diff, job_group) in grouped_diffs {
285+
println!(
286+
"- `{}`: {} ({})",
287+
diff.test.name,
288+
format_diff(&diff.diff),
289+
format_job_group(job_group)
290+
);
247291
}
248292

249293
let extra_diffs = diffs.len().saturating_sub(max_diff_count);
250294
if extra_diffs > 0 {
251-
println!("\n(and {extra_diffs} additional {})", pluralize("diff", extra_diffs));
295+
println!("\n(and {extra_diffs} additional {})", pluralize("test diff", extra_diffs));
296+
}
297+
298+
if doctest_count > 0 {
299+
println!(
300+
"\nAdditionally, {doctest_count} doctest {} were found. These are ignored, as they are noisy.",
301+
pluralize("diff", doctest_count)
302+
);
303+
}
304+
305+
// Now print the job group index
306+
println!("\n**Job group index**\n");
307+
for (group, jobs) in job_index.into_iter().enumerate() {
308+
println!(
309+
"- {}: {}",
310+
format_job_group(group as u64),
311+
jobs.iter().map(|j| format!("`{j}`")).collect::<Vec<_>>().join(", ")
312+
);
252313
}
253314
}
254315

0 commit comments

Comments
 (0)