Skip to content

Commit 936eba3

Browse files
committed
Auto merge of #96867 - michaelwoerister:path-prefix-fixes-2, r=davidtwco
--remap-path-prefix: Fix duplicated path components in debuginfo This PR fixes an issue with `--remap-path-prefix` where path components could appear twice in the remapped version of the path (e.g. #78479). The underlying problem was that `--remap-path-prefix` is often used to map an absolute path to something that looks like a relative path, e.g.: ``` --remap-path-prefix=/home/calvin/.cargo/registry/src/github.com-1ecc6299db9ec823=crates.io", ``` and relative paths in debuginfo are interpreted as being relative to the compilation directory. So if Cargo invokes the compiler with `/home/calvin/.cargo/registry/src/github.com-1ecc6299db9ec823/some_crate-0.1.0/src/lib.rs` as input and `/home/calvin/.cargo/registry/src/github.com-1ecc6299db9ec823/some_crate-0.1.0` as the compiler's working directory, then debuginfo will state that the working directory was `crates.io/some_crate-0.1.0` and the file is question was `crates.io/some_crate-0.1.0/src/lib.rs`, which combined gives the path: ``` crates.io/some_crate-0.1.0/crates.io/some_crate-0.1.0/src/lib.rs ``` With this PR the compiler will detect this situation and set up debuginfo in LLVM in a way that makes it strip the duplicated path components when emitting DWARF. The PR also extracts the logic for making remapped paths absolute into a common helper function that is now used by debuginfo too (instead of just during crate metadata generation).
2 parents e5732a2 + 6411fef commit 936eba3

File tree

10 files changed

+463
-197
lines changed

10 files changed

+463
-197
lines changed

compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs

+95-67
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,21 @@ use rustc_middle::ty::subst::GenericArgKind;
3636
use rustc_middle::ty::{self, AdtKind, Instance, ParamEnv, Ty, TyCtxt, COMMON_VTABLE_ENTRIES};
3737
use rustc_session::config::{self, DebugInfo};
3838
use rustc_span::symbol::Symbol;
39+
use rustc_span::FileName;
3940
use rustc_span::FileNameDisplayPreference;
40-
use rustc_span::{self, SourceFile, SourceFileHash};
41+
use rustc_span::{self, SourceFile};
4142
use rustc_target::abi::{Align, Size};
4243
use smallvec::smallvec;
4344
use tracing::debug;
4445

4546
use libc::{c_longlong, c_uint};
4647
use std::borrow::Cow;
47-
use std::collections::hash_map::Entry;
4848
use std::fmt::{self, Write};
4949
use std::hash::{Hash, Hasher};
5050
use std::iter;
5151
use std::path::{Path, PathBuf};
5252
use std::ptr;
53+
use tracing::instrument;
5354

5455
impl PartialEq for llvm::Metadata {
5556
fn eq(&self, other: &Self) -> bool {
@@ -527,78 +528,105 @@ fn hex_encode(data: &[u8]) -> String {
527528
}
528529

529530
pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) -> &'ll DIFile {
530-
debug!("file_metadata: file_name: {:?}", source_file.name);
531-
532-
let hash = Some(&source_file.src_hash);
533-
let file_name = Some(source_file.name.prefer_remapped().to_string());
534-
let directory = if source_file.is_real_file() && !source_file.is_imported() {
535-
Some(
536-
cx.sess()
537-
.opts
538-
.working_dir
539-
.to_string_lossy(FileNameDisplayPreference::Remapped)
540-
.to_string(),
541-
)
542-
} else {
543-
// If the path comes from an upstream crate we assume it has been made
544-
// independent of the compiler's working directory one way or another.
545-
None
546-
};
547-
file_metadata_raw(cx, file_name, directory, hash)
548-
}
549-
550-
pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
551-
file_metadata_raw(cx, None, None, None)
552-
}
553-
554-
fn file_metadata_raw<'ll>(
555-
cx: &CodegenCx<'ll, '_>,
556-
file_name: Option<String>,
557-
directory: Option<String>,
558-
hash: Option<&SourceFileHash>,
559-
) -> &'ll DIFile {
560-
let key = (file_name, directory);
561-
562-
match debug_context(cx).created_files.borrow_mut().entry(key) {
563-
Entry::Occupied(o) => o.get(),
564-
Entry::Vacant(v) => {
565-
let (file_name, directory) = v.key();
566-
debug!("file_metadata: file_name: {:?}, directory: {:?}", file_name, directory);
567-
568-
let file_name = file_name.as_deref().unwrap_or("<unknown>");
569-
let directory = directory.as_deref().unwrap_or("");
570-
571-
let (hash_kind, hash_value) = match hash {
572-
Some(hash) => {
573-
let kind = match hash.kind {
574-
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
575-
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
576-
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
577-
};
578-
(kind, hex_encode(hash.hash_bytes()))
531+
let cache_key = Some((source_file.name_hash, source_file.src_hash));
532+
return debug_context(cx)
533+
.created_files
534+
.borrow_mut()
535+
.entry(cache_key)
536+
.or_insert_with(|| alloc_new_file_metadata(cx, source_file));
537+
538+
#[instrument(skip(cx, source_file), level = "debug")]
539+
fn alloc_new_file_metadata<'ll>(
540+
cx: &CodegenCx<'ll, '_>,
541+
source_file: &SourceFile,
542+
) -> &'ll DIFile {
543+
debug!(?source_file.name);
544+
545+
let (directory, file_name) = match &source_file.name {
546+
FileName::Real(filename) => {
547+
let working_directory = &cx.sess().opts.working_dir;
548+
debug!(?working_directory);
549+
550+
let filename = cx
551+
.sess()
552+
.source_map()
553+
.path_mapping()
554+
.to_embeddable_absolute_path(filename.clone(), working_directory);
555+
556+
// Construct the absolute path of the file
557+
let abs_path = filename.remapped_path_if_available();
558+
debug!(?abs_path);
559+
560+
if let Ok(rel_path) =
561+
abs_path.strip_prefix(working_directory.remapped_path_if_available())
562+
{
563+
// If the compiler's working directory (which also is the DW_AT_comp_dir of
564+
// the compilation unit) is a prefix of the path we are about to emit, then
565+
// only emit the part relative to the working directory.
566+
// Because of path remapping we sometimes see strange things here: `abs_path`
567+
// might actually look like a relative path
568+
// (e.g. `<crate-name-and-version>/src/lib.rs`), so if we emit it without
569+
// taking the working directory into account, downstream tooling will
570+
// interpret it as `<working-directory>/<crate-name-and-version>/src/lib.rs`,
571+
// which makes no sense. Usually in such cases the working directory will also
572+
// be remapped to `<crate-name-and-version>` or some other prefix of the path
573+
// we are remapping, so we end up with
574+
// `<crate-name-and-version>/<crate-name-and-version>/src/lib.rs`.
575+
// By moving the working directory portion into the `directory` part of the
576+
// DIFile, we allow LLVM to emit just the relative path for DWARF, while
577+
// still emitting the correct absolute path for CodeView.
578+
(
579+
working_directory.to_string_lossy(FileNameDisplayPreference::Remapped),
580+
rel_path.to_string_lossy().into_owned(),
581+
)
582+
} else {
583+
("".into(), abs_path.to_string_lossy().into_owned())
579584
}
580-
None => (llvm::ChecksumKind::None, String::new()),
581-
};
585+
}
586+
other => ("".into(), other.prefer_remapped().to_string_lossy().into_owned()),
587+
};
582588

583-
let file_metadata = unsafe {
584-
llvm::LLVMRustDIBuilderCreateFile(
585-
DIB(cx),
586-
file_name.as_ptr().cast(),
587-
file_name.len(),
588-
directory.as_ptr().cast(),
589-
directory.len(),
590-
hash_kind,
591-
hash_value.as_ptr().cast(),
592-
hash_value.len(),
593-
)
594-
};
589+
let hash_kind = match source_file.src_hash.kind {
590+
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
591+
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
592+
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
593+
};
594+
let hash_value = hex_encode(source_file.src_hash.hash_bytes());
595595

596-
v.insert(file_metadata);
597-
file_metadata
596+
unsafe {
597+
llvm::LLVMRustDIBuilderCreateFile(
598+
DIB(cx),
599+
file_name.as_ptr().cast(),
600+
file_name.len(),
601+
directory.as_ptr().cast(),
602+
directory.len(),
603+
hash_kind,
604+
hash_value.as_ptr().cast(),
605+
hash_value.len(),
606+
)
598607
}
599608
}
600609
}
601610

611+
pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
612+
debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| unsafe {
613+
let file_name = "<unknown>";
614+
let directory = "";
615+
let hash_value = "";
616+
617+
llvm::LLVMRustDIBuilderCreateFile(
618+
DIB(cx),
619+
file_name.as_ptr().cast(),
620+
file_name.len(),
621+
directory.as_ptr().cast(),
622+
directory.len(),
623+
llvm::ChecksumKind::None,
624+
hash_value.as_ptr().cast(),
625+
hash_value.len(),
626+
)
627+
})
628+
}
629+
602630
trait MsvcBasicName {
603631
fn msvc_basic_name(self) -> &'static str;
604632
}

compiler/rustc_codegen_llvm/src/debuginfo/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TypeFoldable};
3131
use rustc_session::config::{self, DebugInfo};
3232
use rustc_session::Session;
3333
use rustc_span::symbol::Symbol;
34-
use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, Span};
34+
use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span};
3535
use rustc_target::abi::Size;
3636

3737
use libc::c_uint;
@@ -61,7 +61,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> {
6161
llcontext: &'ll llvm::Context,
6262
llmod: &'ll llvm::Module,
6363
builder: &'ll mut DIBuilder<'ll>,
64-
created_files: RefCell<FxHashMap<(Option<String>, Option<String>), &'ll DIFile>>,
64+
created_files: RefCell<FxHashMap<Option<(u128, SourceFileHash)>, &'ll DIFile>>,
6565

6666
type_map: metadata::TypeMap<'ll, 'tcx>,
6767
namespace_map: RefCell<DefIdMap<&'ll DIScope>>,

compiler/rustc_metadata/src/rmeta/encoder.rs

+36-64
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,14 @@ use rustc_middle::ty::{self, SymbolName, Ty, TyCtxt};
3333
use rustc_serialize::{opaque, Encodable, Encoder};
3434
use rustc_session::config::CrateType;
3535
use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib};
36+
use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind};
3637
use rustc_span::symbol::{sym, Ident, Symbol};
3738
use rustc_span::{
3839
self, DebuggerVisualizerFile, ExternalSource, FileName, SourceFile, Span, SyntaxContext,
3940
};
40-
use rustc_span::{
41-
hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind},
42-
RealFileName,
43-
};
4441
use rustc_target::abi::VariantIdx;
4542
use std::hash::Hash;
4643
use std::num::NonZeroUsize;
47-
use std::path::Path;
4844
use tracing::{debug, trace};
4945

5046
pub(super) struct EncodeContext<'a, 'tcx> {
@@ -490,6 +486,8 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
490486
// is done.
491487
let required_source_files = self.required_source_files.take().unwrap();
492488

489+
let working_directory = &self.tcx.sess.opts.working_dir;
490+
493491
let adapted = all_source_files
494492
.iter()
495493
.enumerate()
@@ -502,76 +500,50 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
502500
(!source_file.is_imported() || self.is_proc_macro)
503501
})
504502
.map(|(_, source_file)| {
505-
let mut adapted = match source_file.name {
506-
FileName::Real(ref realname) => {
507-
let mut adapted = (**source_file).clone();
508-
adapted.name = FileName::Real(match realname {
509-
RealFileName::LocalPath(path_to_file) => {
510-
// Prepend path of working directory onto potentially
511-
// relative paths, because they could become relative
512-
// to a wrong directory.
513-
// We include `working_dir` as part of the crate hash,
514-
// so it's okay for us to use it as part of the encoded
515-
// metadata.
516-
let working_dir = &self.tcx.sess.opts.working_dir;
517-
match working_dir {
518-
RealFileName::LocalPath(absolute) => {
519-
// Although neither working_dir or the file name were subject
520-
// to path remapping, the concatenation between the two may
521-
// be. Hence we need to do a remapping here.
522-
let joined = Path::new(absolute).join(path_to_file);
523-
let (joined, remapped) =
524-
source_map.path_mapping().map_prefix(joined);
525-
if remapped {
526-
RealFileName::Remapped {
527-
local_path: None,
528-
virtual_name: joined,
529-
}
530-
} else {
531-
RealFileName::LocalPath(joined)
532-
}
533-
}
534-
RealFileName::Remapped { local_path: _, virtual_name } => {
535-
// If working_dir has been remapped, then we emit
536-
// Remapped variant as the expanded path won't be valid
537-
RealFileName::Remapped {
538-
local_path: None,
539-
virtual_name: Path::new(virtual_name)
540-
.join(path_to_file),
541-
}
542-
}
543-
}
544-
}
545-
RealFileName::Remapped { local_path: _, virtual_name } => {
546-
RealFileName::Remapped {
547-
// We do not want any local path to be exported into metadata
548-
local_path: None,
549-
virtual_name: virtual_name.clone(),
550-
}
551-
}
552-
});
553-
adapted.name_hash = {
554-
let mut hasher: StableHasher = StableHasher::new();
555-
adapted.name.hash(&mut hasher);
556-
hasher.finish::<u128>()
557-
};
558-
Lrc::new(adapted)
503+
// At export time we expand all source file paths to absolute paths because
504+
// downstream compilation sessions can have a different compiler working
505+
// directory, so relative paths from this or any other upstream crate
506+
// won't be valid anymore.
507+
//
508+
// At this point we also erase the actual on-disk path and only keep
509+
// the remapped version -- as is necessary for reproducible builds.
510+
match source_file.name {
511+
FileName::Real(ref original_file_name) => {
512+
let adapted_file_name =
513+
source_map.path_mapping().to_embeddable_absolute_path(
514+
original_file_name.clone(),
515+
working_directory,
516+
);
517+
518+
if adapted_file_name != *original_file_name {
519+
let mut adapted: SourceFile = (**source_file).clone();
520+
adapted.name = FileName::Real(adapted_file_name);
521+
adapted.name_hash = {
522+
let mut hasher: StableHasher = StableHasher::new();
523+
adapted.name.hash(&mut hasher);
524+
hasher.finish::<u128>()
525+
};
526+
Lrc::new(adapted)
527+
} else {
528+
// Nothing to adapt
529+
source_file.clone()
530+
}
559531
}
560-
561532
// expanded code, not from a file
562533
_ => source_file.clone(),
563-
};
564-
534+
}
535+
})
536+
.map(|mut source_file| {
565537
// We're serializing this `SourceFile` into our crate metadata,
566538
// so mark it as coming from this crate.
567539
// This also ensures that we don't try to deserialize the
568540
// `CrateNum` for a proc-macro dependency - since proc macro
569541
// dependencies aren't loaded when we deserialize a proc-macro,
570542
// trying to remap the `CrateNum` would fail.
571543
if self.is_proc_macro {
572-
Lrc::make_mut(&mut adapted).cnum = LOCAL_CRATE;
544+
Lrc::make_mut(&mut source_file).cnum = LOCAL_CRATE;
573545
}
574-
adapted
546+
source_file
575547
})
576548
.collect::<Vec<_>>();
577549

compiler/rustc_span/src/lib.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -335,8 +335,8 @@ impl fmt::Display for FileNameDisplay<'_> {
335335
}
336336
}
337337

338-
impl FileNameDisplay<'_> {
339-
pub fn to_string_lossy(&self) -> Cow<'_, str> {
338+
impl<'a> FileNameDisplay<'a> {
339+
pub fn to_string_lossy(&self) -> Cow<'a, str> {
340340
match self.inner {
341341
FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref),
342342
_ => Cow::from(format!("{}", self)),
@@ -1153,7 +1153,7 @@ impl FromStr for SourceFileHashAlgorithm {
11531153
}
11541154

11551155
/// The hash of the on-disk source file used for debug info.
1156-
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1156+
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
11571157
#[derive(HashStable_Generic, Encodable, Decodable)]
11581158
pub struct SourceFileHash {
11591159
pub kind: SourceFileHashAlgorithm,

0 commit comments

Comments
 (0)