From bb5a8276be2d3dbc97d0f52e90db15455d542edf Mon Sep 17 00:00:00 2001 From: Jacob Kiesel Date: Sat, 22 Jun 2024 01:27:59 -0600 Subject: [PATCH] add unstable support for outputting file checksums for use in cargo --- Cargo.lock | 26 ++++ .../src/debuginfo/metadata.rs | 1 + compiler/rustc_interface/src/interface.rs | 3 +- compiler/rustc_interface/src/lib.rs | 1 + compiler/rustc_interface/src/passes.rs | 105 +++++++++++-- compiler/rustc_interface/src/tests.rs | 2 + compiler/rustc_metadata/src/rmeta/decoder.rs | 2 + .../src/ich/impls_syntax.rs | 2 + compiler/rustc_session/src/config.rs | 4 + compiler/rustc_session/src/options.rs | 22 ++- compiler/rustc_span/Cargo.toml | 1 + compiler/rustc_span/src/lib.rs | 146 +++++++++++++++++- compiler/rustc_span/src/source_map.rs | 16 +- compiler/rustc_span/src/source_map/tests.rs | 2 + compiler/rustc_span/src/tests.rs | 10 +- src/tools/tidy/src/deps.rs | 6 + 16 files changed, 321 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c1071ccbc226..6348046751a8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -199,6 +199,12 @@ dependencies = [ "object 0.36.4", ] +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + [[package]] name = "arrayvec" version = "0.7.6" @@ -262,6 +268,19 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "blake3" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d08263faac5cde2a4d52b513dadb80846023aade56fcd8fc99ba73ba8050e92" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -719,6 +738,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -4374,6 +4399,7 @@ dependencies = [ name = "rustc_span" version = "0.0.0" dependencies = [ + "blake3", "derive-where", "indexmap", "itoa", diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index 964b83c0fa07a..b7a6f80956dd3 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -630,6 +630,7 @@ pub(crate) fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFi rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5, rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1, rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256, + rustc_span::SourceFileHashAlgorithm::Blake3 => llvm::ChecksumKind::None, }; let hash_value = hex_encode(source_file.src_hash.hash_bytes()); diff --git a/compiler/rustc_interface/src/interface.rs b/compiler/rustc_interface/src/interface.rs index bd38b3c109a41..3920d3077d361 100644 --- a/compiler/rustc_interface/src/interface.rs +++ b/compiler/rustc_interface/src/interface.rs @@ -389,12 +389,13 @@ pub fn run_compiler(config: Config, f: impl FnOnce(&Compiler) -> R + Se let file_loader = config.file_loader.unwrap_or_else(|| Box::new(RealFileLoader)); let path_mapping = config.opts.file_path_mapping(); let hash_kind = config.opts.unstable_opts.src_hash_algorithm(&target); + let checksum_hash_kind = config.opts.unstable_opts.checksum_hash_algorithm(); util::run_in_thread_pool_with_globals( &early_dcx, config.opts.edition, config.opts.unstable_opts.threads, - SourceMapInputs { file_loader, path_mapping, hash_kind }, + SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind }, |current_gcx| { // The previous `early_dcx` can't be reused here because it doesn't // impl `Send`. Creating a new one is fine. diff --git a/compiler/rustc_interface/src/lib.rs b/compiler/rustc_interface/src/lib.rs index b81a740270163..1c4dda2a4367e 100644 --- a/compiler/rustc_interface/src/lib.rs +++ b/compiler/rustc_interface/src/lib.rs @@ -1,6 +1,7 @@ // tidy-alphabetical-start #![feature(decl_macro)] #![feature(file_buffered)] +#![feature(iter_intersperse)] #![feature(let_chains)] #![feature(try_blocks)] #![warn(unreachable_pub)] diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs index 84b320975bbab..b9b65755db5d2 100644 --- a/compiler/rustc_interface/src/passes.rs +++ b/compiler/rustc_interface/src/passes.rs @@ -32,8 +32,8 @@ use rustc_session::cstore::Untracked; use rustc_session::output::{collect_crate_types, filename_for_input, find_crate_name}; use rustc_session::search_paths::PathKind; use rustc_session::{Limit, Session}; -use rustc_span::FileName; use rustc_span::symbol::{Symbol, sym}; +use rustc_span::{FileName, SourceFileHash, SourceFileHashAlgorithm}; use rustc_target::spec::PanicStrategy; use rustc_trait_selection::traits; use tracing::{info, instrument}; @@ -417,15 +417,23 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P let result: io::Result<()> = try { // Build a list of files used to compile the output and // write Makefile-compatible dependency rules - let mut files: Vec = sess + let mut files: Vec<(String, u64, Option)> = sess .source_map() .files() .iter() .filter(|fmap| fmap.is_real_file()) .filter(|fmap| !fmap.is_imported()) - .map(|fmap| escape_dep_filename(&fmap.name.prefer_local().to_string())) + .map(|fmap| { + ( + escape_dep_filename(&fmap.name.prefer_local().to_string()), + fmap.source_len.0 as u64, + fmap.checksum_hash, + ) + }) .collect(); + let checksum_hash_algo = sess.opts.unstable_opts.checksum_hash_algorithm; + // Account for explicitly marked-to-track files // (e.g. accessed in proc macros). let file_depinfo = sess.psess.file_depinfo.borrow(); @@ -437,22 +445,58 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P // The entries will be used to declare dependencies between files in a // Makefile-like output, so the iteration order does not matter. + fn hash_iter_files>( + it: impl Iterator, + checksum_hash_algo: Option, + ) -> impl Iterator)> { + it.map(move |path| { + match checksum_hash_algo.and_then(|algo| { + fs::File::open(path.as_ref()) + .and_then(|mut file| { + SourceFileHash::new(algo, &mut file).map(|h| (file, h)) + }) + .and_then(|(file, h)| file.metadata().map(|m| (m.len(), h))) + .map_err(|e| { + tracing::error!( + "failed to compute checksum, omitting it from dep-info {} {e}", + path.as_ref().display() + ) + }) + .ok() + }) { + Some((file_len, checksum)) => (path, file_len, Some(checksum)), + None => (path, 0, None), + } + }) + } + #[allow(rustc::potential_query_instability)] - let extra_tracked_files = - file_depinfo.iter().map(|path_sym| normalize_path(PathBuf::from(path_sym.as_str()))); + let extra_tracked_files = hash_iter_files( + file_depinfo.iter().map(|path_sym| normalize_path(PathBuf::from(path_sym.as_str()))), + checksum_hash_algo, + ); files.extend(extra_tracked_files); // We also need to track used PGO profile files if let Some(ref profile_instr) = sess.opts.cg.profile_use { - files.push(normalize_path(profile_instr.as_path().to_path_buf())); + files.extend(hash_iter_files( + iter::once(normalize_path(profile_instr.as_path().to_path_buf())), + checksum_hash_algo, + )); } if let Some(ref profile_sample) = sess.opts.unstable_opts.profile_sample_use { - files.push(normalize_path(profile_sample.as_path().to_path_buf())); + files.extend(hash_iter_files( + iter::once(normalize_path(profile_sample.as_path().to_path_buf())), + checksum_hash_algo, + )); } // Debugger visualizer files for debugger_visualizer in tcx.debugger_visualizers(LOCAL_CRATE) { - files.push(normalize_path(debugger_visualizer.path.clone().unwrap())); + files.extend(hash_iter_files( + iter::once(normalize_path(debugger_visualizer.path.clone().unwrap())), + checksum_hash_algo, + )); } if sess.binary_dep_depinfo() { @@ -460,33 +504,54 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P if backend.contains('.') { // If the backend name contain a `.`, it is the path to an external dynamic // library. If not, it is not a path. - files.push(backend.to_string()); + files.extend(hash_iter_files( + iter::once(backend.to_string()), + checksum_hash_algo, + )); } } for &cnum in tcx.crates(()) { let source = tcx.used_crate_source(cnum); if let Some((path, _)) = &source.dylib { - files.push(escape_dep_filename(&path.display().to_string())); + files.extend(hash_iter_files( + iter::once(escape_dep_filename(&path.display().to_string())), + checksum_hash_algo, + )); } if let Some((path, _)) = &source.rlib { - files.push(escape_dep_filename(&path.display().to_string())); + files.extend(hash_iter_files( + iter::once(escape_dep_filename(&path.display().to_string())), + checksum_hash_algo, + )); } if let Some((path, _)) = &source.rmeta { - files.push(escape_dep_filename(&path.display().to_string())); + files.extend(hash_iter_files( + iter::once(escape_dep_filename(&path.display().to_string())), + checksum_hash_algo, + )); } } } let write_deps_to_file = |file: &mut dyn Write| -> io::Result<()> { for path in out_filenames { - writeln!(file, "{}: {}\n", path.display(), files.join(" "))?; + writeln!( + file, + "{}: {}\n", + path.display(), + files + .iter() + .map(|(path, _file_len, _checksum_hash_algo)| path.as_str()) + .intersperse(" ") + .collect::() + )?; } // Emit a fake target for each input file to the compilation. This // prevents `make` from spitting out an error if a file is later // deleted. For more info see #28735 - for path in files { + for (path, _file_len, _checksum_hash_algo) in &files { writeln!(file, "{path}:")?; } @@ -510,6 +575,18 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P } } + // If caller requested this information, add special comments about source file checksums. + // These are not necessarily the same checksums as was used in the debug files. + if sess.opts.unstable_opts.checksum_hash_algorithm().is_some() { + for (path, file_len, checksum_hash) in + files.iter().filter_map(|(path, file_len, hash_algo)| { + hash_algo.map(|hash_algo| (path, file_len, hash_algo)) + }) + { + writeln!(file, "# checksum:{checksum_hash} file_len:{file_len} {path}")?; + } + } + Ok(()) }; diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 895897eca6b8b..536ce154cd071 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -44,10 +44,12 @@ where let sysroot = filesearch::materialize_sysroot(sessopts.maybe_sysroot.clone()); let target = rustc_session::config::build_target_config(&early_dcx, &sessopts, &sysroot); let hash_kind = sessopts.unstable_opts.src_hash_algorithm(&target); + let checksum_hash_kind = sessopts.unstable_opts.checksum_hash_algorithm(); let sm_inputs = Some(SourceMapInputs { file_loader: Box::new(RealFileLoader) as _, path_mapping: sessopts.file_path_mapping(), hash_kind, + checksum_hash_kind, }); rustc_span::create_session_globals_then(DEFAULT_EDITION, sm_inputs, || { diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 2157324d5cc3c..f02fd2ab6fe3d 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -1702,6 +1702,7 @@ impl<'a> CrateMetadataRef<'a> { let rustc_span::SourceFile { mut name, src_hash, + checksum_hash, start_pos: original_start_pos, source_len, lines, @@ -1752,6 +1753,7 @@ impl<'a> CrateMetadataRef<'a> { let local_version = sess.source_map().new_imported_source_file( name, src_hash, + checksum_hash, stable_id, source_len.to_u32(), self.cnum, diff --git a/compiler/rustc_query_system/src/ich/impls_syntax.rs b/compiler/rustc_query_system/src/ich/impls_syntax.rs index 8d7a6e4fa9b0c..5e450979273ab 100644 --- a/compiler/rustc_query_system/src/ich/impls_syntax.rs +++ b/compiler/rustc_query_system/src/ich/impls_syntax.rs @@ -68,6 +68,8 @@ impl<'a> HashStable> for SourceFile { // Do not hash the source as it is not encoded src: _, ref src_hash, + // Already includes src_hash, this is redundant + checksum_hash: _, external_src: _, start_pos: _, source_len: _, diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index 0d293415aa9c7..d2c03e588ff36 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -1242,6 +1242,10 @@ impl UnstableOptions { } }) } + + pub fn checksum_hash_algorithm(&self) -> Option { + self.checksum_hash_algorithm + } } // The type of entry function, so users can have their own entry functions diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 1de09b8be4d63..a1d85dfad32af 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -418,7 +418,8 @@ mod desc { "one of: `legacy`, `v0` (RFC 2603), or `hashed`"; pub(crate) const parse_opt_symbol_visibility: &str = "one of: `hidden`, `protected`, or `interposable`"; - pub(crate) const parse_src_file_hash: &str = "either `md5` or `sha1`"; + pub(crate) const parse_cargo_src_file_hash: &str = "one of `md5`, `sha1`, or `sha256`"; + pub(crate) const parse_src_file_hash: &str = "one of `md5`, `sha1`, or `sha256`"; pub(crate) const parse_relocation_model: &str = "one of supported relocation models (`rustc --print relocation-models`)"; pub(crate) const parse_code_model: &str = @@ -1288,6 +1289,23 @@ mod parse { true } + pub(crate) fn parse_cargo_src_file_hash( + slot: &mut Option, + v: Option<&str>, + ) -> bool { + match v.and_then(|s| SourceFileHashAlgorithm::from_str(s).ok()) { + Some(hash_kind) => { + if hash_kind.supported_in_cargo() { + *slot = Some(hash_kind); + } else { + return false; + } + } + _ => return false, + } + true + } + pub(crate) fn parse_target_feature(slot: &mut String, v: Option<&str>) -> bool { match v { Some(s) => { @@ -1688,6 +1706,8 @@ options! { "instrument control-flow architecture protection"), check_cfg_all_expected: bool = (false, parse_bool, [UNTRACKED], "show all expected values in check-cfg diagnostics (default: no)"), + checksum_hash_algorithm: Option = (None, parse_cargo_src_file_hash, [TRACKED], + "hash algorithm of source files used to check freshness in cargo (`sha256`)"), codegen_backend: Option = (None, parse_opt_string, [TRACKED], "the backend to use"), combine_cgu: bool = (false, parse_bool, [TRACKED], diff --git a/compiler/rustc_span/Cargo.toml b/compiler/rustc_span/Cargo.toml index 3fdfe77ead972..c52d1fcc07fe2 100644 --- a/compiler/rustc_span/Cargo.toml +++ b/compiler/rustc_span/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start +blake3 = "1.5.2" derive-where = "1.2.7" indexmap = { version = "2.0.0" } itoa = "1.0" diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 9dbdab84a81e1..5ab1caaa220a7 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -75,7 +75,9 @@ pub mod profiling; use std::borrow::Cow; use std::cmp::{self, Ordering}; +use std::fmt::Display; use std::hash::Hash; +use std::io::{self, Read}; use std::ops::{Add, Range, Sub}; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -1395,6 +1397,27 @@ pub enum SourceFileHashAlgorithm { Md5, Sha1, Sha256, + Blake3, +} + +impl SourceFileHashAlgorithm { + pub fn supported_in_cargo(&self) -> bool { + match self { + Self::Md5 | Self::Sha1 => false, + Self::Sha256 | Self::Blake3 => true, + } + } +} + +impl Display for SourceFileHashAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + Self::Md5 => "md5", + Self::Sha1 => "sha1", + Self::Sha256 => "sha256", + Self::Blake3 => "blake3", + }) + } } impl FromStr for SourceFileHashAlgorithm { @@ -1405,12 +1428,13 @@ impl FromStr for SourceFileHashAlgorithm { "md5" => Ok(SourceFileHashAlgorithm::Md5), "sha1" => Ok(SourceFileHashAlgorithm::Sha1), "sha256" => Ok(SourceFileHashAlgorithm::Sha256), + "blake3" => Ok(SourceFileHashAlgorithm::Blake3), _ => Err(()), } } } -/// The hash of the on-disk source file used for debug info. +/// The hash of the on-disk source file used for debug info and cargo freshness checks. #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] #[derive(HashStable_Generic, Encodable, Decodable)] pub struct SourceFileHash { @@ -1418,12 +1442,22 @@ pub struct SourceFileHash { value: [u8; 32], } +impl Display for SourceFileHash { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}=", self.kind)?; + for byte in self.value[0..self.hash_len()].into_iter() { + write!(f, "{byte:02x}")?; + } + Ok(()) + } +} + impl SourceFileHash { - pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash { + pub fn new_in_memory(kind: SourceFileHashAlgorithm, src: impl AsRef<[u8]>) -> SourceFileHash { let mut hash = SourceFileHash { kind, value: Default::default() }; let len = hash.hash_len(); let value = &mut hash.value[..len]; - let data = src.as_bytes(); + let data = src.as_ref(); match kind { SourceFileHashAlgorithm::Md5 => { value.copy_from_slice(&Md5::digest(data)); @@ -1434,13 +1468,94 @@ impl SourceFileHash { SourceFileHashAlgorithm::Sha256 => { value.copy_from_slice(&Sha256::digest(data)); } - } + SourceFileHashAlgorithm::Blake3 => value.copy_from_slice(blake3::hash(data).as_bytes()), + }; hash } + pub fn new(kind: SourceFileHashAlgorithm, src: impl Read) -> Result { + let mut hash = SourceFileHash { kind, value: Default::default() }; + let len = hash.hash_len(); + let value = &mut hash.value[..len]; + // Buffer size is the recommended amount to fully leverage SIMD instructions on AVX-512 as per + // blake3 documentation. + let mut buf = vec![0; 16 * 1024]; + + fn digest( + mut hasher: T, + mut update: impl FnMut(&mut T, &[u8]), + finish: impl FnOnce(T, &mut [u8]), + mut src: impl Read, + buf: &mut [u8], + value: &mut [u8], + ) -> Result<(), io::Error> { + loop { + let bytes_read = src.read(buf)?; + if bytes_read == 0 { + break; + } + update(&mut hasher, &buf[0..bytes_read]); + } + finish(hasher, value); + Ok(()) + } + + match kind { + SourceFileHashAlgorithm::Sha256 => { + digest( + Sha256::new(), + |h, b| { + h.update(b); + }, + |h, out| out.copy_from_slice(&h.finalize()), + src, + &mut buf, + value, + )?; + } + SourceFileHashAlgorithm::Sha1 => { + digest( + Sha1::new(), + |h, b| { + h.update(b); + }, + |h, out| out.copy_from_slice(&h.finalize()), + src, + &mut buf, + value, + )?; + } + SourceFileHashAlgorithm::Md5 => { + digest( + Md5::new(), + |h, b| { + h.update(b); + }, + |h, out| out.copy_from_slice(&h.finalize()), + src, + &mut buf, + value, + )?; + } + SourceFileHashAlgorithm::Blake3 => { + digest( + blake3::Hasher::new(), + |h, b| { + h.update(b); + }, + |h, out| out.copy_from_slice(h.finalize().as_bytes()), + src, + &mut buf, + value, + )?; + } + } + Ok(hash) + } + /// Check if the stored hash matches the hash of the string. pub fn matches(&self, src: &str) -> bool { - Self::new(self.kind, src) == *self + Self::new_in_memory(self.kind, src.as_bytes()) == *self } /// The bytes of the hash. @@ -1453,7 +1568,7 @@ impl SourceFileHash { match self.kind { SourceFileHashAlgorithm::Md5 => 16, SourceFileHashAlgorithm::Sha1 => 20, - SourceFileHashAlgorithm::Sha256 => 32, + SourceFileHashAlgorithm::Sha256 | SourceFileHashAlgorithm::Blake3 => 32, } } } @@ -1509,6 +1624,10 @@ pub struct SourceFile { pub src: Option>, /// The source code's hash. pub src_hash: SourceFileHash, + /// Used to enable cargo to use checksums to check if a crate is fresh rather + /// than mtimes. This might be the same as `src_hash`, and if the requested algorithm + /// is identical we won't compute it twice. + pub checksum_hash: Option, /// The external source code (used for external crates, which will have a `None` /// value as `self.src`. pub external_src: FreezeLock, @@ -1536,6 +1655,7 @@ impl Clone for SourceFile { name: self.name.clone(), src: self.src.clone(), src_hash: self.src_hash, + checksum_hash: self.checksum_hash, external_src: self.external_src.clone(), start_pos: self.start_pos, source_len: self.source_len, @@ -1552,6 +1672,7 @@ impl Encodable for SourceFile { fn encode(&self, s: &mut S) { self.name.encode(s); self.src_hash.encode(s); + self.checksum_hash.encode(s); // Do not encode `start_pos` as it's global state for this session. self.source_len.encode(s); @@ -1625,6 +1746,7 @@ impl Decodable for SourceFile { fn decode(d: &mut D) -> SourceFile { let name: FileName = Decodable::decode(d); let src_hash: SourceFileHash = Decodable::decode(d); + let checksum_hash: Option = Decodable::decode(d); let source_len: RelativeBytePos = Decodable::decode(d); let lines = { let num_lines: u32 = Decodable::decode(d); @@ -1650,6 +1772,7 @@ impl Decodable for SourceFile { source_len, src: None, src_hash, + checksum_hash, // Unused - the metadata decoder will construct // a new SourceFile, filling in `external_src` properly external_src: FreezeLock::frozen(ExternalSource::Unneeded), @@ -1733,9 +1856,17 @@ impl SourceFile { name: FileName, mut src: String, hash_kind: SourceFileHashAlgorithm, + checksum_hash_kind: Option, ) -> Result { // Compute the file hash before any normalization. - let src_hash = SourceFileHash::new(hash_kind, &src); + let src_hash = SourceFileHash::new_in_memory(hash_kind, src.as_bytes()); + let checksum_hash = checksum_hash_kind.map(|checksum_hash_kind| { + if checksum_hash_kind == hash_kind { + src_hash + } else { + SourceFileHash::new_in_memory(checksum_hash_kind, src.as_bytes()) + } + }); let normalized_pos = normalize_src(&mut src); let stable_id = StableSourceFileId::from_filename_in_current_crate(&name); @@ -1748,6 +1879,7 @@ impl SourceFile { name, src: Some(Lrc::new(src)), src_hash, + checksum_hash, external_src: FreezeLock::frozen(ExternalSource::Unneeded), start_pos: BytePos::from_u32(0), source_len: RelativeBytePos::from_u32(source_len), diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 98447147d3e13..8a02330593711 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -175,6 +175,7 @@ pub struct SourceMapInputs { pub file_loader: Box, pub path_mapping: FilePathMapping, pub hash_kind: SourceFileHashAlgorithm, + pub checksum_hash_kind: Option, } pub struct SourceMap { @@ -187,6 +188,12 @@ pub struct SourceMap { /// The algorithm used for hashing the contents of each source file. hash_kind: SourceFileHashAlgorithm, + + /// Similar to `hash_kind`, however this algorithm is used for checksums to determine if a crate is fresh. + /// `cargo` is the primary user of these. + /// + /// If this is equal to `hash_kind` then the checksum won't be computed twice. + checksum_hash_kind: Option, } impl SourceMap { @@ -195,17 +202,19 @@ impl SourceMap { file_loader: Box::new(RealFileLoader), path_mapping, hash_kind: SourceFileHashAlgorithm::Md5, + checksum_hash_kind: None, }) } pub fn with_inputs( - SourceMapInputs { file_loader, path_mapping, hash_kind }: SourceMapInputs, + SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind }: SourceMapInputs, ) -> SourceMap { SourceMap { files: Default::default(), file_loader: IntoDynSyncSend(file_loader), path_mapping, hash_kind, + checksum_hash_kind, } } @@ -307,7 +316,8 @@ impl SourceMap { match self.source_file_by_stable_id(stable_id) { Some(lrc_sf) => Ok(lrc_sf), None => { - let source_file = SourceFile::new(filename, src, self.hash_kind)?; + let source_file = + SourceFile::new(filename, src, self.hash_kind, self.checksum_hash_kind)?; // Let's make sure the file_id we generated above actually matches // the ID we generate for the SourceFile we just created. @@ -326,6 +336,7 @@ impl SourceMap { &self, filename: FileName, src_hash: SourceFileHash, + checksum_hash: Option, stable_id: StableSourceFileId, source_len: u32, cnum: CrateNum, @@ -340,6 +351,7 @@ impl SourceMap { name: filename, src: None, src_hash, + checksum_hash, external_src: FreezeLock::new(ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, metadata_index, diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 0c818b94b85a3..360baec273d9a 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -229,6 +229,7 @@ fn t10() { let SourceFile { name, src_hash, + checksum_hash, source_len, lines, multibyte_chars, @@ -240,6 +241,7 @@ fn t10() { let imported_src_file = sm.new_imported_source_file( name, src_hash, + checksum_hash, stable_id, source_len.to_u32(), CrateNum::ZERO, diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs index 48fa786fb1c80..ed1db34463429 100644 --- a/compiler/rustc_span/src/tests.rs +++ b/compiler/rustc_span/src/tests.rs @@ -3,9 +3,13 @@ use super::*; #[test] fn test_lookup_line() { let source = "abcdefghijklm\nabcdefghij\n...".to_owned(); - let mut sf = - SourceFile::new(FileName::Anon(Hash64::ZERO), source, SourceFileHashAlgorithm::Sha256) - .unwrap(); + let mut sf = SourceFile::new( + FileName::Anon(Hash64::ZERO), + source, + SourceFileHashAlgorithm::Sha256, + Some(SourceFileHashAlgorithm::Sha256), + ) + .unwrap(); sf.start_pos = BytePos(3); assert_eq!(sf.lines(), &[RelativeBytePos(0), RelativeBytePos(14), RelativeBytePos(25)]); diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 49d5b71ff2ddf..1ffad06457f9b 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -88,7 +88,10 @@ pub(crate) const WORKSPACES: &[(&str, ExceptionList, Option<(&[&str], &[&str])>, const EXCEPTIONS: ExceptionList = &[ // tidy-alphabetical-start ("ar_archive_writer", "Apache-2.0 WITH LLVM-exception"), // rustc + ("arrayref", "BSD-2-Clause"), // rustc + ("blake3", "CC0-1.0 OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception"), // rustc ("colored", "MPL-2.0"), // rustfmt + ("constant_time_eq", "CC0-1.0 OR MIT-0 OR Apache-2.0"), // rustc ("dissimilar", "Apache-2.0"), // rustdoc, rustc_lexer (few tests) via expect-test, (dev deps) ("fluent-langneg", "Apache-2.0"), // rustc (fluent translations) ("instant", "BSD-3-Clause"), // rustc_driver/tracing-subscriber/parking_lot @@ -249,14 +252,17 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "annotate-snippets", "anstyle", "ar_archive_writer", + "arrayref", "arrayvec", "autocfg", "bitflags", + "blake3", "block-buffer", "byteorder", // via ruzstd in object in thorin-dwp "cc", "cfg-if", "cfg_aliases", + "constant_time_eq", "cpufeatures", "crc32fast", "crossbeam-channel",