Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve post-merge workflow #138454

Merged
merged 8 commits into from
Mar 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/post-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,13 @@ jobs:

cd src/ci/citool

echo "Post-merge analysis result" > output.log
printf "*This is an experimental post-merge analysis report. You can ignore it.*\n\n" > output.log
printf "<details>\n<summary>Post-merge report</summary>\n\n" >> output.log

cargo run --release post-merge-report ${PARENT_COMMIT} ${{ github.sha }} >> output.log

printf "</details>\n" >> output.log
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
printf "</details>\n" >> output.log
printf "\n</details>\n" >> output.log

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the additional newline needed? The program output should end with a newline.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to always an empty line between the Markdown and HTML, but you already did it after the <summary> so maybe it's good enough?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GitHub seems to accept it fine. So feel free to ignore the suggestion.


cat output.log

gh pr comment ${HEAD_PR} -F output.log
215 changes: 138 additions & 77 deletions src/ci/citool/src/merge_report.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::cmp::Reverse;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;

use anyhow::Context;
use build_helper::metrics::{JsonRoot, TestOutcome};
use build_helper::metrics::{JsonRoot, TestOutcome, TestSuiteMetadata};

use crate::jobs::JobDatabase;
use crate::metrics::get_test_suites;
Expand All @@ -13,8 +13,10 @@ type JobName = String;
/// Computes a post merge CI analysis report between the `parent` and `current` commits.
pub fn post_merge_report(job_db: JobDatabase, parent: Sha, current: Sha) -> anyhow::Result<()> {
let jobs = download_all_metrics(&job_db, &parent, &current)?;
let diffs = aggregate_test_diffs(&jobs)?;
report_test_changes(diffs);
let aggregated_test_diffs = aggregate_test_diffs(&jobs)?;

println!("Comparing {parent} (base) -> {current} (this PR)\n");
report_test_diffs(aggregated_test_diffs);

Ok(())
}
Expand Down Expand Up @@ -54,7 +56,16 @@ Maybe it was newly added?"#,
Ok(jobs)
}

/// Downloads job metrics of the given job for the given commit.
/// Caches the result on the local disk.
fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
let cache_path = PathBuf::from(".citool-cache").join(sha).join(job_name).join("metrics.json");
if let Some(cache_entry) =
std::fs::read_to_string(&cache_path).ok().and_then(|data| serde_json::from_str(&data).ok())
{
return Ok(cache_entry);
}

let url = get_metrics_url(job_name, sha);
let mut response = ureq::get(&url).call()?;
if !response.status().is_success() {
Expand All @@ -68,6 +79,13 @@ fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
.body_mut()
.read_json()
.with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;

// Ignore errors if cache cannot be created
if std::fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
if let Ok(serialized) = serde_json::to_string(&data) {
let _ = std::fs::write(&cache_path, &serialized);
}
}
Ok(data)
}

Expand All @@ -76,81 +94,80 @@ fn get_metrics_url(job_name: &str, sha: &str) -> String {
format!("https://ci-artifacts.rust-lang.org/rustc-builds{suffix}/{sha}/metrics-{job_name}.json")
}

/// Represents a difference in the outcome of tests between a base and a current commit.
/// Maps test diffs to jobs that contained them.
#[derive(Debug)]
struct AggregatedTestDiffs {
diffs: HashMap<TestDiff, Vec<JobName>>,
}

fn aggregate_test_diffs(
jobs: &HashMap<JobName, JobMetrics>,
) -> anyhow::Result<Vec<AggregatedTestDiffs>> {
let mut job_diffs = vec![];
) -> anyhow::Result<AggregatedTestDiffs> {
let mut diffs: HashMap<TestDiff, Vec<JobName>> = HashMap::new();

// Aggregate test suites
for (name, metrics) in jobs {
if let Some(parent) = &metrics.parent {
let tests_parent = aggregate_tests(parent);
let tests_current = aggregate_tests(&metrics.current);
let test_diffs = calculate_test_diffs(tests_parent, tests_current);
if !test_diffs.is_empty() {
job_diffs.push((name.clone(), test_diffs));
for diff in calculate_test_diffs(tests_parent, tests_current) {
diffs.entry(diff).or_default().push(name.to_string());
}
}
}

// Aggregate jobs with the same diff, as often the same diff will appear in many jobs
let job_diffs: HashMap<Vec<(Test, TestOutcomeDiff)>, Vec<String>> =
job_diffs.into_iter().fold(HashMap::new(), |mut acc, (job, diffs)| {
acc.entry(diffs).or_default().push(job);
acc
});
Ok(AggregatedTestDiffs { diffs })
}

Ok(job_diffs
.into_iter()
.map(|(test_diffs, jobs)| AggregatedTestDiffs { jobs, test_diffs })
.collect())
#[derive(Eq, PartialEq, Hash, Debug)]
enum TestOutcomeDiff {
ChangeOutcome { before: TestOutcome, after: TestOutcome },
Missing { before: TestOutcome },
Added(TestOutcome),
}

fn calculate_test_diffs(
reference: TestSuiteData,
current: TestSuiteData,
) -> Vec<(Test, TestOutcomeDiff)> {
let mut diffs = vec![];
#[derive(Eq, PartialEq, Hash, Debug)]
struct TestDiff {
test: Test,
diff: TestOutcomeDiff,
}

fn calculate_test_diffs(parent: TestSuiteData, current: TestSuiteData) -> HashSet<TestDiff> {
let mut diffs = HashSet::new();
for (test, outcome) in &current.tests {
match reference.tests.get(test) {
match parent.tests.get(test) {
Some(before) => {
if before != outcome {
diffs.push((
test.clone(),
TestOutcomeDiff::ChangeOutcome {
diffs.insert(TestDiff {
test: test.clone(),
diff: TestOutcomeDiff::ChangeOutcome {
before: before.clone(),
after: outcome.clone(),
},
));
});
}
}
None => diffs.push((test.clone(), TestOutcomeDiff::Added(outcome.clone()))),
None => {
diffs.insert(TestDiff {
test: test.clone(),
diff: TestOutcomeDiff::Added(outcome.clone()),
});
}
}
}
for (test, outcome) in &reference.tests {
for (test, outcome) in &parent.tests {
if !current.tests.contains_key(test) {
diffs.push((test.clone(), TestOutcomeDiff::Missing { before: outcome.clone() }));
diffs.insert(TestDiff {
test: test.clone(),
diff: TestOutcomeDiff::Missing { before: outcome.clone() },
});
}
}

diffs
}

/// Represents a difference in the outcome of tests between a base and a current commit.
#[derive(Debug)]
struct AggregatedTestDiffs {
/// All jobs that had the exact same test diffs.
jobs: Vec<String>,
test_diffs: Vec<(Test, TestOutcomeDiff)>,
}

#[derive(Eq, PartialEq, Hash, Debug)]
enum TestOutcomeDiff {
ChangeOutcome { before: TestOutcome, after: TestOutcome },
Missing { before: TestOutcome },
Added(TestOutcome),
}

/// Aggregates test suite executions from all bootstrap invocations in a given CI job.
#[derive(Default)]
struct TestSuiteData {
Expand All @@ -160,6 +177,7 @@ struct TestSuiteData {
#[derive(Hash, PartialEq, Eq, Debug, Clone)]
struct Test {
name: String,
is_doctest: bool,
}

/// Extracts all tests from the passed metrics and map them to their outcomes.
Expand All @@ -168,7 +186,10 @@ fn aggregate_tests(metrics: &JsonRoot) -> TestSuiteData {
let test_suites = get_test_suites(&metrics);
for suite in test_suites {
for test in &suite.tests {
let test_entry = Test { name: normalize_test_name(&test.name) };
// Poor man's detection of doctests based on the "(line XYZ)" suffix
let is_doctest = matches!(suite.metadata, TestSuiteMetadata::CargoPackage { .. })
&& test.name.contains("(line");
let test_entry = Test { name: normalize_test_name(&test.name), is_doctest };
tests.insert(test_entry, test.outcome.clone());
}
}
Expand All @@ -181,16 +202,13 @@ fn normalize_test_name(name: &str) -> String {
}

/// Prints test changes in Markdown format to stdout.
fn report_test_changes(mut diffs: Vec<AggregatedTestDiffs>) {
fn report_test_diffs(diff: AggregatedTestDiffs) {
println!("## Test differences");
if diffs.is_empty() {
if diff.diffs.is_empty() {
println!("No test diffs found");
return;
}

// Sort diffs in decreasing order by diff count
diffs.sort_by_key(|entry| Reverse(entry.test_diffs.len()));

fn format_outcome(outcome: &TestOutcome) -> String {
match outcome {
TestOutcome::Passed => "pass".to_string(),
Expand Down Expand Up @@ -219,36 +237,79 @@ fn report_test_changes(mut diffs: Vec<AggregatedTestDiffs>) {
}
}

let max_diff_count = 10;
let max_job_count = 5;
let max_test_count = 10;

for diff in diffs.iter().take(max_diff_count) {
let mut jobs = diff.jobs.clone();
jobs.sort();

let jobs = jobs.iter().take(max_job_count).map(|j| format!("`{j}`")).collect::<Vec<_>>();
fn format_job_group(group: u64) -> String {
format!("**J{group}**")
}

let extra_jobs = diff.jobs.len().saturating_sub(max_job_count);
let suffix = if extra_jobs > 0 {
format!(" (and {extra_jobs} {})", pluralize("other", extra_jobs))
} else {
String::new()
// It would be quite noisy to repeat the jobs that contained the test changes after/next to
// every test diff. At the same time, grouping the test diffs by
// [unique set of jobs that contained them] also doesn't work well, because the test diffs
// would have to be duplicated several times.
// Instead, we create a set of unique job groups, and then print a job group after each test.
// We then print the job groups at the end, as a sort of index.
let mut grouped_diffs: Vec<(&TestDiff, u64)> = vec![];
let mut job_list_to_group: HashMap<&[JobName], u64> = HashMap::new();
let mut job_index: Vec<&[JobName]> = vec![];

let original_diff_count = diff.diffs.len();
let diffs = diff
.diffs
.into_iter()
.filter(|(diff, _)| !diff.test.is_doctest)
.map(|(diff, mut jobs)| {
jobs.sort();
(diff, jobs)
})
.collect::<Vec<_>>();
let doctest_count = original_diff_count.saturating_sub(diffs.len());

let max_diff_count = 100;
for (diff, jobs) in diffs.iter().take(max_diff_count) {
let jobs = &*jobs;
let job_group = match job_list_to_group.get(jobs.as_slice()) {
Some(id) => *id,
None => {
let id = job_index.len() as u64;
job_index.push(jobs);
job_list_to_group.insert(jobs, id);
id
}
};
println!("- {}{suffix}", jobs.join(","));
grouped_diffs.push((diff, job_group));
}

let extra_tests = diff.test_diffs.len().saturating_sub(max_test_count);
for (test, outcome_diff) in diff.test_diffs.iter().take(max_test_count) {
println!(" - {}: {}", test.name, format_diff(&outcome_diff));
}
if extra_tests > 0 {
println!(" - (and {extra_tests} additional {})", pluralize("tests", extra_tests));
}
// Sort diffs by job group and test name
grouped_diffs.sort_by(|(d1, g1), (d2, g2)| g1.cmp(&g2).then(d1.test.name.cmp(&d2.test.name)));

for (diff, job_group) in grouped_diffs {
println!(
"- `{}`: {} ({})",
diff.test.name,
format_diff(&diff.diff),
format_job_group(job_group)
);
}

let extra_diffs = diffs.len().saturating_sub(max_diff_count);
if extra_diffs > 0 {
println!("\n(and {extra_diffs} additional {})", pluralize("diff", extra_diffs));
println!("\n(and {extra_diffs} additional {})", pluralize("test diff", extra_diffs));
}

if doctest_count > 0 {
println!(
"\nAdditionally, {doctest_count} doctest {} were found. These are ignored, as they are noisy.",
pluralize("diff", doctest_count)
);
}

// Now print the job group index
println!("\n**Job group index**\n");
for (group, jobs) in job_index.into_iter().enumerate() {
println!(
"- {}: {}",
format_job_group(group as u64),
jobs.iter().map(|j| format!("`{j}`")).collect::<Vec<_>>().join(", ")
);
}
}

Expand Down
Loading