Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

--remap-path-prefix: Fix duplicated path components in debuginfo #96867

Merged
merged 4 commits into from
May 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 95 additions & 67 deletions compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,21 @@ use rustc_middle::ty::subst::GenericArgKind;
use rustc_middle::ty::{self, AdtKind, Instance, ParamEnv, Ty, TyCtxt, COMMON_VTABLE_ENTRIES};
use rustc_session::config::{self, DebugInfo};
use rustc_span::symbol::Symbol;
use rustc_span::FileName;
use rustc_span::FileNameDisplayPreference;
use rustc_span::{self, SourceFile, SourceFileHash};
use rustc_span::{self, SourceFile};
use rustc_target::abi::{Align, Size};
use smallvec::smallvec;
use tracing::debug;

use libc::{c_longlong, c_uint};
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::fmt::{self, Write};
use std::hash::{Hash, Hasher};
use std::iter;
use std::path::{Path, PathBuf};
use std::ptr;
use tracing::instrument;

impl PartialEq for llvm::Metadata {
fn eq(&self, other: &Self) -> bool {
Expand Down Expand Up @@ -527,78 +528,105 @@ fn hex_encode(data: &[u8]) -> String {
}

pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) -> &'ll DIFile {
debug!("file_metadata: file_name: {:?}", source_file.name);

let hash = Some(&source_file.src_hash);
let file_name = Some(source_file.name.prefer_remapped().to_string());
let directory = if source_file.is_real_file() && !source_file.is_imported() {
Some(
cx.sess()
.opts
.working_dir
.to_string_lossy(FileNameDisplayPreference::Remapped)
.to_string(),
)
} else {
// If the path comes from an upstream crate we assume it has been made
// independent of the compiler's working directory one way or another.
None
};
file_metadata_raw(cx, file_name, directory, hash)
}

pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
file_metadata_raw(cx, None, None, None)
}

fn file_metadata_raw<'ll>(
cx: &CodegenCx<'ll, '_>,
file_name: Option<String>,
directory: Option<String>,
hash: Option<&SourceFileHash>,
) -> &'ll DIFile {
let key = (file_name, directory);

match debug_context(cx).created_files.borrow_mut().entry(key) {
Entry::Occupied(o) => o.get(),
Entry::Vacant(v) => {
let (file_name, directory) = v.key();
debug!("file_metadata: file_name: {:?}, directory: {:?}", file_name, directory);

let file_name = file_name.as_deref().unwrap_or("<unknown>");
let directory = directory.as_deref().unwrap_or("");

let (hash_kind, hash_value) = match hash {
Some(hash) => {
let kind = match hash.kind {
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
};
(kind, hex_encode(hash.hash_bytes()))
let cache_key = Some((source_file.name_hash, source_file.src_hash));
return debug_context(cx)
.created_files
.borrow_mut()
.entry(cache_key)
.or_insert_with(|| alloc_new_file_metadata(cx, source_file));

#[instrument(skip(cx, source_file), level = "debug")]
fn alloc_new_file_metadata<'ll>(
cx: &CodegenCx<'ll, '_>,
source_file: &SourceFile,
) -> &'ll DIFile {
debug!(?source_file.name);

let (directory, file_name) = match &source_file.name {
FileName::Real(filename) => {
let working_directory = &cx.sess().opts.working_dir;
debug!(?working_directory);

let filename = cx
.sess()
.source_map()
.path_mapping()
.to_embeddable_absolute_path(filename.clone(), working_directory);

// Construct the absolute path of the file
let abs_path = filename.remapped_path_if_available();
debug!(?abs_path);

if let Ok(rel_path) =
abs_path.strip_prefix(working_directory.remapped_path_if_available())
{
// If the compiler's working directory (which also is the DW_AT_comp_dir of
// the compilation unit) is a prefix of the path we are about to emit, then
// only emit the part relative to the working directory.
// Because of path remapping we sometimes see strange things here: `abs_path`
// might actually look like a relative path
// (e.g. `<crate-name-and-version>/src/lib.rs`), so if we emit it without
// taking the working directory into account, downstream tooling will
// interpret it as `<working-directory>/<crate-name-and-version>/src/lib.rs`,
// which makes no sense. Usually in such cases the working directory will also
// be remapped to `<crate-name-and-version>` or some other prefix of the path
// we are remapping, so we end up with
// `<crate-name-and-version>/<crate-name-and-version>/src/lib.rs`.
// By moving the working directory portion into the `directory` part of the
// DIFile, we allow LLVM to emit just the relative path for DWARF, while
// still emitting the correct absolute path for CodeView.
(
working_directory.to_string_lossy(FileNameDisplayPreference::Remapped),
rel_path.to_string_lossy().into_owned(),
)
} else {
("".into(), abs_path.to_string_lossy().into_owned())
}
None => (llvm::ChecksumKind::None, String::new()),
};
}
other => ("".into(), other.prefer_remapped().to_string_lossy().into_owned()),
};

let file_metadata = unsafe {
llvm::LLVMRustDIBuilderCreateFile(
DIB(cx),
file_name.as_ptr().cast(),
file_name.len(),
directory.as_ptr().cast(),
directory.len(),
hash_kind,
hash_value.as_ptr().cast(),
hash_value.len(),
)
};
let hash_kind = match source_file.src_hash.kind {
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
};
let hash_value = hex_encode(source_file.src_hash.hash_bytes());

v.insert(file_metadata);
file_metadata
unsafe {
llvm::LLVMRustDIBuilderCreateFile(
DIB(cx),
file_name.as_ptr().cast(),
file_name.len(),
directory.as_ptr().cast(),
directory.len(),
hash_kind,
hash_value.as_ptr().cast(),
hash_value.len(),
)
}
}
}

pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| unsafe {
let file_name = "<unknown>";
let directory = "";
let hash_value = "";

llvm::LLVMRustDIBuilderCreateFile(
DIB(cx),
file_name.as_ptr().cast(),
file_name.len(),
directory.as_ptr().cast(),
directory.len(),
llvm::ChecksumKind::None,
hash_value.as_ptr().cast(),
hash_value.len(),
)
})
}

trait MsvcBasicName {
fn msvc_basic_name(self) -> &'static str;
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TypeFoldable};
use rustc_session::config::{self, DebugInfo};
use rustc_session::Session;
use rustc_span::symbol::Symbol;
use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, Span};
use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span};
use rustc_target::abi::Size;

use libc::c_uint;
Expand Down Expand Up @@ -61,7 +61,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> {
llcontext: &'ll llvm::Context,
llmod: &'ll llvm::Module,
builder: &'ll mut DIBuilder<'ll>,
created_files: RefCell<FxHashMap<(Option<String>, Option<String>), &'ll DIFile>>,
created_files: RefCell<FxHashMap<Option<(u128, SourceFileHash)>, &'ll DIFile>>,

type_map: metadata::TypeMap<'ll, 'tcx>,
namespace_map: RefCell<DefIdMap<&'ll DIScope>>,
Expand Down
100 changes: 36 additions & 64 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,14 @@ use rustc_middle::ty::{self, SymbolName, Ty, TyCtxt};
use rustc_serialize::{opaque, Encodable, Encoder};
use rustc_session::config::CrateType;
use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib};
use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind};
use rustc_span::symbol::{sym, Ident, Symbol};
use rustc_span::{
self, DebuggerVisualizerFile, ExternalSource, FileName, SourceFile, Span, SyntaxContext,
};
use rustc_span::{
hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind},
RealFileName,
};
use rustc_target::abi::VariantIdx;
use std::hash::Hash;
use std::num::NonZeroUsize;
use std::path::Path;
use tracing::{debug, trace};

pub(super) struct EncodeContext<'a, 'tcx> {
Expand Down Expand Up @@ -490,6 +486,8 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
// is done.
let required_source_files = self.required_source_files.take().unwrap();

let working_directory = &self.tcx.sess.opts.working_dir;

let adapted = all_source_files
.iter()
.enumerate()
Expand All @@ -502,76 +500,50 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
(!source_file.is_imported() || self.is_proc_macro)
})
.map(|(_, source_file)| {
let mut adapted = match source_file.name {
FileName::Real(ref realname) => {
let mut adapted = (**source_file).clone();
adapted.name = FileName::Real(match realname {
RealFileName::LocalPath(path_to_file) => {
// Prepend path of working directory onto potentially
// relative paths, because they could become relative
// to a wrong directory.
// We include `working_dir` as part of the crate hash,
// so it's okay for us to use it as part of the encoded
// metadata.
let working_dir = &self.tcx.sess.opts.working_dir;
match working_dir {
RealFileName::LocalPath(absolute) => {
// Although neither working_dir or the file name were subject
// to path remapping, the concatenation between the two may
// be. Hence we need to do a remapping here.
let joined = Path::new(absolute).join(path_to_file);
let (joined, remapped) =
source_map.path_mapping().map_prefix(joined);
if remapped {
RealFileName::Remapped {
local_path: None,
virtual_name: joined,
}
} else {
RealFileName::LocalPath(joined)
}
}
RealFileName::Remapped { local_path: _, virtual_name } => {
// If working_dir has been remapped, then we emit
// Remapped variant as the expanded path won't be valid
RealFileName::Remapped {
local_path: None,
virtual_name: Path::new(virtual_name)
.join(path_to_file),
}
}
}
}
RealFileName::Remapped { local_path: _, virtual_name } => {
RealFileName::Remapped {
// We do not want any local path to be exported into metadata
local_path: None,
virtual_name: virtual_name.clone(),
}
}
});
adapted.name_hash = {
let mut hasher: StableHasher = StableHasher::new();
adapted.name.hash(&mut hasher);
hasher.finish::<u128>()
};
Lrc::new(adapted)
// At export time we expand all source file paths to absolute paths because
// downstream compilation sessions can have a different compiler working
// directory, so relative paths from this or any other upstream crate
// won't be valid anymore.
//
// At this point we also erase the actual on-disk path and only keep
// the remapped version -- as is necessary for reproducible builds.
match source_file.name {
FileName::Real(ref original_file_name) => {
let adapted_file_name =
source_map.path_mapping().to_embeddable_absolute_path(
original_file_name.clone(),
working_directory,
);

if adapted_file_name != *original_file_name {
let mut adapted: SourceFile = (**source_file).clone();
adapted.name = FileName::Real(adapted_file_name);
adapted.name_hash = {
let mut hasher: StableHasher = StableHasher::new();
adapted.name.hash(&mut hasher);
hasher.finish::<u128>()
};
Lrc::new(adapted)
} else {
// Nothing to adapt
source_file.clone()
}
}

// expanded code, not from a file
_ => source_file.clone(),
};

}
})
.map(|mut source_file| {
// We're serializing this `SourceFile` into our crate metadata,
// so mark it as coming from this crate.
// This also ensures that we don't try to deserialize the
// `CrateNum` for a proc-macro dependency - since proc macro
// dependencies aren't loaded when we deserialize a proc-macro,
// trying to remap the `CrateNum` would fail.
if self.is_proc_macro {
Lrc::make_mut(&mut adapted).cnum = LOCAL_CRATE;
Lrc::make_mut(&mut source_file).cnum = LOCAL_CRATE;
}
adapted
source_file
})
.collect::<Vec<_>>();

Expand Down
6 changes: 3 additions & 3 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,8 @@ impl fmt::Display for FileNameDisplay<'_> {
}
}

impl FileNameDisplay<'_> {
pub fn to_string_lossy(&self) -> Cow<'_, str> {
impl<'a> FileNameDisplay<'a> {
pub fn to_string_lossy(&self) -> Cow<'a, str> {
match self.inner {
FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref),
_ => Cow::from(format!("{}", self)),
Expand Down Expand Up @@ -1153,7 +1153,7 @@ impl FromStr for SourceFileHashAlgorithm {
}

/// The hash of the on-disk source file used for debug info.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[derive(HashStable_Generic, Encodable, Decodable)]
pub struct SourceFileHash {
pub kind: SourceFileHashAlgorithm,
Expand Down
Loading