Skip to content

Commit a8cfc83

Browse files
committed
Auto merge of rust-lang#123246 - Kobzol:tarball-reproducible, r=Mark-Simulacrum
Make source tarball generation more reproducible This PR performs several changes to source tarball generation (`x dist rustc-src`) in order to make it more reproducible (in light of the recent "xz backdoor"...). I want to follow up on it with making a separate CI workflow for generating the tarball. After this PR, running this locally produces identical checksums: ```bash $ ./x dist rustc-src $ sha256sum build/dist/rustc-1.79.0-src.tar.gz $ ./x dist rustc-src $ sha256sum build/dist/rustc-1.79.0-src.tar.gz ``` r? `@Mark-Simulacrum`
2 parents 395f780 + 877e8d4 commit a8cfc83

File tree

3 files changed

+58
-29
lines changed

3 files changed

+58
-29
lines changed

src/bootstrap/src/core/build_steps/dist.rs

+17-3
Original file line numberDiff line numberDiff line change
@@ -995,9 +995,9 @@ impl Step for PlainSourceTarball {
995995
if builder.rust_info().is_managed_git_subrepository()
996996
|| builder.rust_info().is_from_tarball()
997997
{
998-
if builder.rust_info().is_managed_git_subrepository() {
999-
// Ensure we have the submodules checked out.
1000-
builder.update_submodule(Path::new("src/tools/cargo"));
998+
// Ensure we have all submodules from src and other directories checked out.
999+
for submodule in builder.get_all_submodules() {
1000+
builder.update_submodule(Path::new(submodule));
10011001
}
10021002

10031003
// Vendor all Cargo dependencies
@@ -1028,6 +1028,20 @@ impl Step for PlainSourceTarball {
10281028
builder.create(&cargo_config_dir.join("config.toml"), &config);
10291029
}
10301030

1031+
// Delete extraneous directories
1032+
// FIXME: if we're managed by git, we should probably instead ask git if the given path
1033+
// is managed by it?
1034+
for entry in walkdir::WalkDir::new(tarball.image_dir())
1035+
.follow_links(true)
1036+
.into_iter()
1037+
.filter_map(|e| e.ok())
1038+
{
1039+
if entry.path().is_dir() && entry.path().file_name() == Some(OsStr::new("__pycache__"))
1040+
{
1041+
t!(fs::remove_dir_all(entry.path()));
1042+
}
1043+
}
1044+
10311045
tarball.bare()
10321046
}
10331047
}

src/bootstrap/src/core/builder.rs

+32-23
Original file line numberDiff line numberDiff line change
@@ -554,29 +554,7 @@ impl<'a> ShouldRun<'a> {
554554
///
555555
/// [`path`]: ShouldRun::path
556556
pub fn paths(mut self, paths: &[&str]) -> Self {
557-
static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();
558-
559-
let init_submodules_paths = |src: &PathBuf| {
560-
let file = File::open(src.join(".gitmodules")).unwrap();
561-
562-
let mut submodules_paths = vec![];
563-
for line in BufReader::new(file).lines() {
564-
if let Ok(line) = line {
565-
let line = line.trim();
566-
567-
if line.starts_with("path") {
568-
let actual_path =
569-
line.split(' ').last().expect("Couldn't get value of path");
570-
submodules_paths.push(actual_path.to_owned());
571-
}
572-
}
573-
}
574-
575-
submodules_paths
576-
};
577-
578-
let submodules_paths =
579-
SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.builder.src));
557+
let submodules_paths = self.builder.get_all_submodules();
580558

581559
self.paths.insert(PathSet::Set(
582560
paths
@@ -2151,6 +2129,37 @@ impl<'a> Builder<'a> {
21512129
out
21522130
}
21532131

2132+
/// Return paths of all submodules managed by git.
2133+
/// If the current checkout is not managed by git, returns an empty slice.
2134+
pub fn get_all_submodules(&self) -> &[String] {
2135+
if !self.rust_info().is_managed_git_subrepository() {
2136+
return &[];
2137+
}
2138+
2139+
static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();
2140+
2141+
let init_submodules_paths = |src: &PathBuf| {
2142+
let file = File::open(src.join(".gitmodules")).unwrap();
2143+
2144+
let mut submodules_paths = vec![];
2145+
for line in BufReader::new(file).lines() {
2146+
if let Ok(line) = line {
2147+
let line = line.trim();
2148+
2149+
if line.starts_with("path") {
2150+
let actual_path =
2151+
line.split(' ').last().expect("Couldn't get value of path");
2152+
submodules_paths.push(actual_path.to_owned());
2153+
}
2154+
}
2155+
}
2156+
2157+
submodules_paths
2158+
};
2159+
2160+
&SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.src))
2161+
}
2162+
21542163
/// Ensure that a given step is built *only if it's supposed to be built by default*, returning
21552164
/// its output. This will cache the step, so it's safe (and good!) to call this as often as
21562165
/// needed to ensure that all dependencies are build.

src/tools/rust-installer/src/tarballer.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use anyhow::{bail, Context, Result};
22
use std::fs::{read_link, symlink_metadata};
33
use std::io::{BufWriter, Write};
44
use std::path::Path;
5-
use tar::{Builder, Header};
5+
use tar::{Builder, Header, HeaderMode};
66
use walkdir::WalkDir;
77

88
use crate::{
@@ -53,14 +53,19 @@ impl Tarballer {
5353
// Sort files by their suffix, to group files with the same name from
5454
// different locations (likely identical) and files with the same
5555
// extension (likely containing similar data).
56-
let (dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
56+
// Sorting of file and directory paths also helps with the reproducibility
57+
// of the resulting archive.
58+
let (mut dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
5759
.context("failed to collect file paths")?;
60+
dirs.sort();
5861
files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));
5962

6063
// Write the tar into both encoded files. We write all directories
6164
// first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
6265
let buf = BufWriter::with_capacity(1024 * 1024, encoder);
6366
let mut builder = Builder::new(buf);
67+
// Make uid, gid and mtime deterministic to improve reproducibility
68+
builder.mode(HeaderMode::Deterministic);
6469

6570
let pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap();
6671
pool.install(move || {
@@ -91,7 +96,8 @@ impl Tarballer {
9196
fn append_path<W: Write>(builder: &mut Builder<W>, src: &Path, path: &String) -> Result<()> {
9297
let stat = symlink_metadata(src)?;
9398
let mut header = Header::new_gnu();
94-
header.set_metadata(&stat);
99+
header.set_metadata_in_mode(&stat, HeaderMode::Deterministic);
100+
95101
if stat.file_type().is_symlink() {
96102
let link = read_link(src)?;
97103
builder.append_link(&mut header, path, &link)?;

0 commit comments

Comments
 (0)