Skip to content

Commit 468887e

Browse files
committed
Auto merge of rust-lang#100209 - cjgillot:source-file-index, r=estebank
Lazily decode SourceFile from metadata Currently, source files from foreign crates are decoded up-front from metadata. Spans from those crates were matched with the corresponding source using binary search among those files. This PR changes the strategy by matching spans to files during encoding. This allows to decode source files on-demand, instead of up-front. The on-disk format for spans becomes: `<tag> <position from start of file> <length> <file index> <crate (if foreign file)>`.
2 parents 6c943ba + 0d41f91 commit 468887e

File tree

8 files changed

+204
-224
lines changed

8 files changed

+204
-224
lines changed

compiler/rustc_metadata/src/rmeta/decoder.rs

+102-126
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ pub(crate) struct CrateMetadata {
9999
/// Proc macro descriptions for this crate, if it's a proc macro crate.
100100
raw_proc_macros: Option<&'static [ProcMacro]>,
101101
/// Source maps for code from the crate.
102-
source_map_import_info: OnceCell<Vec<ImportedSourceFile>>,
102+
source_map_import_info: Lock<Vec<Option<ImportedSourceFile>>>,
103103
/// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`.
104104
def_path_hash_map: DefPathHashMapRef<'static>,
105105
/// Likewise for ExpnHash.
@@ -143,7 +143,8 @@ pub(crate) struct CrateMetadata {
143143
}
144144

145145
/// Holds information about a rustc_span::SourceFile imported from another crate.
146-
/// See `imported_source_files()` for more information.
146+
/// See `imported_source_file()` for more information.
147+
#[derive(Clone)]
147148
struct ImportedSourceFile {
148149
/// This SourceFile's byte-offset within the source_map of its original crate
149150
original_start_pos: rustc_span::BytePos,
@@ -160,9 +161,6 @@ pub(super) struct DecodeContext<'a, 'tcx> {
160161
sess: Option<&'tcx Session>,
161162
tcx: Option<TyCtxt<'tcx>>,
162163

163-
// Cache the last used source_file for translating spans as an optimization.
164-
last_source_file_index: usize,
165-
166164
lazy_state: LazyState,
167165

168166
// Used for decoding interpret::AllocIds in a cached & thread-safe manner.
@@ -191,7 +189,6 @@ pub(super) trait Metadata<'a, 'tcx>: Copy {
191189
blob: self.blob(),
192190
sess: self.sess().or(tcx.map(|tcx| tcx.sess)),
193191
tcx,
194-
last_source_file_index: 0,
195192
lazy_state: LazyState::NoNode,
196193
alloc_decoding_session: self
197194
.cdata()
@@ -527,6 +524,9 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
527524
bug!("Cannot decode Span without Session.")
528525
};
529526

527+
// Index of the file in the corresponding crate's list of encoded files.
528+
let metadata_index = u32::decode(decoder);
529+
530530
// There are two possibilities here:
531531
// 1. This is a 'local span', which is located inside a `SourceFile`
532532
// that came from this crate. In this case, we use the source map data
@@ -553,10 +553,10 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
553553
// to be based on the *foreign* crate (e.g. crate C), not the crate
554554
// we are writing metadata for (e.g. crate B). This allows us to
555555
// treat the 'local' and 'foreign' cases almost identically during deserialization:
556-
// we can call `imported_source_files` for the proper crate, and binary search
556+
// we can call `imported_source_file` for the proper crate, and binary search
557557
// through the returned slice using our span.
558-
let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL {
559-
decoder.cdata().imported_source_files(sess)
558+
let source_file = if tag == TAG_VALID_SPAN_LOCAL {
559+
decoder.cdata().imported_source_file(metadata_index, sess)
560560
} else {
561561
// When we encode a proc-macro crate, all `Span`s should be encoded
562562
// with `TAG_VALID_SPAN_LOCAL`
@@ -577,60 +577,30 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
577577
cnum
578578
);
579579

580-
// Decoding 'foreign' spans should be rare enough that it's
581-
// not worth it to maintain a per-CrateNum cache for `last_source_file_index`.
582-
// We just set it to 0, to ensure that we don't try to access something out
583-
// of bounds for our initial 'guess'
584-
decoder.last_source_file_index = 0;
585-
586580
let foreign_data = decoder.cdata().cstore.get_crate_data(cnum);
587-
foreign_data.imported_source_files(sess)
581+
foreign_data.imported_source_file(metadata_index, sess)
588582
};
589583

590-
let source_file = {
591-
// Optimize for the case that most spans within a translated item
592-
// originate from the same source_file.
593-
let last_source_file = &imported_source_files[decoder.last_source_file_index];
594-
595-
if lo >= last_source_file.original_start_pos && lo <= last_source_file.original_end_pos
596-
{
597-
last_source_file
598-
} else {
599-
let index = imported_source_files
600-
.binary_search_by_key(&lo, |source_file| source_file.original_start_pos)
601-
.unwrap_or_else(|index| index - 1);
602-
603-
// Don't try to cache the index for foreign spans,
604-
// as this would require a map from CrateNums to indices
605-
if tag == TAG_VALID_SPAN_LOCAL {
606-
decoder.last_source_file_index = index;
607-
}
608-
&imported_source_files[index]
609-
}
610-
};
611-
612-
// Make sure our binary search above is correct.
584+
// Make sure our span is well-formed.
613585
debug_assert!(
614-
lo >= source_file.original_start_pos && lo <= source_file.original_end_pos,
615-
"Bad binary search: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
586+
lo + source_file.original_start_pos <= source_file.original_end_pos,
587+
"Malformed encoded span: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
616588
lo,
617589
source_file.original_start_pos,
618590
source_file.original_end_pos
619591
);
620592

621-
// Make sure we correctly filtered out invalid spans during encoding
593+
// Make sure we correctly filtered out invalid spans during encoding.
622594
debug_assert!(
623-
hi >= source_file.original_start_pos && hi <= source_file.original_end_pos,
624-
"Bad binary search: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
595+
hi + source_file.original_start_pos <= source_file.original_end_pos,
596+
"Malformed encoded span: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
625597
hi,
626598
source_file.original_start_pos,
627599
source_file.original_end_pos
628600
);
629601

630-
let lo =
631-
(lo + source_file.translated_source_file.start_pos) - source_file.original_start_pos;
632-
let hi =
633-
(hi + source_file.translated_source_file.start_pos) - source_file.original_start_pos;
602+
let lo = lo + source_file.translated_source_file.start_pos;
603+
let hi = hi + source_file.translated_source_file.start_pos;
634604

635605
// Do not try to decode parent for foreign spans.
636606
Span::new(lo, hi, ctxt, None)
@@ -1482,7 +1452,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
14821452
///
14831453
/// Proc macro crates don't currently export spans, so this function does not have
14841454
/// to work for them.
1485-
fn imported_source_files(self, sess: &Session) -> &'a [ImportedSourceFile] {
1455+
fn imported_source_file(self, source_file_index: u32, sess: &Session) -> ImportedSourceFile {
14861456
fn filter<'a>(sess: &Session, path: Option<&'a Path>) -> Option<&'a Path> {
14871457
path.filter(|_| {
14881458
// Only spend time on further checks if we have what to translate *to*.
@@ -1570,90 +1540,96 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
15701540
}
15711541
};
15721542

1573-
self.cdata.source_map_import_info.get_or_init(|| {
1574-
let external_source_map = self.root.source_map.decode(self);
1575-
1576-
external_source_map
1577-
.map(|source_file_to_import| {
1578-
// We can't reuse an existing SourceFile, so allocate a new one
1579-
// containing the information we need.
1580-
let rustc_span::SourceFile {
1581-
mut name,
1582-
src_hash,
1583-
start_pos,
1584-
end_pos,
1585-
lines,
1586-
multibyte_chars,
1587-
non_narrow_chars,
1588-
normalized_pos,
1589-
name_hash,
1590-
..
1591-
} = source_file_to_import;
1592-
1593-
// If this file is under $sysroot/lib/rustlib/src/ but has not been remapped
1594-
// during rust bootstrapping by `remap-debuginfo = true`, and the user
1595-
// wish to simulate that behaviour by -Z simulate-remapped-rust-src-base,
1596-
// then we change `name` to a similar state as if the rust was bootstrapped
1597-
// with `remap-debuginfo = true`.
1598-
// This is useful for testing so that tests about the effects of
1599-
// `try_to_translate_virtual_to_real` don't have to worry about how the
1600-
// compiler is bootstrapped.
1601-
if let Some(virtual_dir) =
1602-
&sess.opts.unstable_opts.simulate_remapped_rust_src_base
1603-
{
1604-
if let Some(real_dir) = &sess.opts.real_rust_source_base_dir {
1605-
if let rustc_span::FileName::Real(ref mut old_name) = name {
1606-
if let rustc_span::RealFileName::LocalPath(local) = old_name {
1607-
if let Ok(rest) = local.strip_prefix(real_dir) {
1608-
*old_name = rustc_span::RealFileName::Remapped {
1609-
local_path: None,
1610-
virtual_name: virtual_dir.join(rest),
1611-
};
1612-
}
1543+
let mut import_info = self.cdata.source_map_import_info.lock();
1544+
for _ in import_info.len()..=(source_file_index as usize) {
1545+
import_info.push(None);
1546+
}
1547+
import_info[source_file_index as usize]
1548+
.get_or_insert_with(|| {
1549+
let source_file_to_import = self
1550+
.root
1551+
.source_map
1552+
.get(self, source_file_index)
1553+
.expect("missing source file")
1554+
.decode(self);
1555+
1556+
// We can't reuse an existing SourceFile, so allocate a new one
1557+
// containing the information we need.
1558+
let rustc_span::SourceFile {
1559+
mut name,
1560+
src_hash,
1561+
start_pos,
1562+
end_pos,
1563+
lines,
1564+
multibyte_chars,
1565+
non_narrow_chars,
1566+
normalized_pos,
1567+
name_hash,
1568+
..
1569+
} = source_file_to_import;
1570+
1571+
// If this file is under $sysroot/lib/rustlib/src/ but has not been remapped
1572+
// during rust bootstrapping by `remap-debuginfo = true`, and the user
1573+
// wish to simulate that behaviour by -Z simulate-remapped-rust-src-base,
1574+
// then we change `name` to a similar state as if the rust was bootstrapped
1575+
// with `remap-debuginfo = true`.
1576+
// This is useful for testing so that tests about the effects of
1577+
// `try_to_translate_virtual_to_real` don't have to worry about how the
1578+
// compiler is bootstrapped.
1579+
if let Some(virtual_dir) = &sess.opts.unstable_opts.simulate_remapped_rust_src_base
1580+
{
1581+
if let Some(real_dir) = &sess.opts.real_rust_source_base_dir {
1582+
if let rustc_span::FileName::Real(ref mut old_name) = name {
1583+
if let rustc_span::RealFileName::LocalPath(local) = old_name {
1584+
if let Ok(rest) = local.strip_prefix(real_dir) {
1585+
*old_name = rustc_span::RealFileName::Remapped {
1586+
local_path: None,
1587+
virtual_name: virtual_dir.join(rest),
1588+
};
16131589
}
16141590
}
16151591
}
16161592
}
1593+
}
16171594

1618-
// If this file's path has been remapped to `/rustc/$hash`,
1619-
// we might be able to reverse that (also see comments above,
1620-
// on `try_to_translate_virtual_to_real`).
1621-
try_to_translate_virtual_to_real(&mut name);
1622-
1623-
let source_length = (end_pos - start_pos).to_usize();
1624-
1625-
let local_version = sess.source_map().new_imported_source_file(
1626-
name,
1627-
src_hash,
1628-
name_hash,
1629-
source_length,
1630-
self.cnum,
1631-
lines,
1632-
multibyte_chars,
1633-
non_narrow_chars,
1634-
normalized_pos,
1635-
start_pos,
1636-
end_pos,
1637-
);
1638-
debug!(
1639-
"CrateMetaData::imported_source_files alloc \
1595+
// If this file's path has been remapped to `/rustc/$hash`,
1596+
// we might be able to reverse that (also see comments above,
1597+
// on `try_to_translate_virtual_to_real`).
1598+
try_to_translate_virtual_to_real(&mut name);
1599+
1600+
let source_length = (end_pos - start_pos).to_usize();
1601+
1602+
let local_version = sess.source_map().new_imported_source_file(
1603+
name,
1604+
src_hash,
1605+
name_hash,
1606+
source_length,
1607+
self.cnum,
1608+
lines,
1609+
multibyte_chars,
1610+
non_narrow_chars,
1611+
normalized_pos,
1612+
start_pos,
1613+
source_file_index,
1614+
);
1615+
debug!(
1616+
"CrateMetaData::imported_source_files alloc \
16401617
source_file {:?} original (start_pos {:?} end_pos {:?}) \
16411618
translated (start_pos {:?} end_pos {:?})",
1642-
local_version.name,
1643-
start_pos,
1644-
end_pos,
1645-
local_version.start_pos,
1646-
local_version.end_pos
1647-
);
1619+
local_version.name,
1620+
start_pos,
1621+
end_pos,
1622+
local_version.start_pos,
1623+
local_version.end_pos
1624+
);
16481625

1649-
ImportedSourceFile {
1650-
original_start_pos: start_pos,
1651-
original_end_pos: end_pos,
1652-
translated_source_file: local_version,
1653-
}
1654-
})
1655-
.collect()
1656-
})
1626+
ImportedSourceFile {
1627+
original_start_pos: start_pos,
1628+
original_end_pos: end_pos,
1629+
translated_source_file: local_version,
1630+
}
1631+
})
1632+
.clone()
16571633
}
16581634

16591635
fn get_generator_diagnostic_data(
@@ -1716,7 +1692,7 @@ impl CrateMetadata {
17161692
trait_impls,
17171693
incoherent_impls: Default::default(),
17181694
raw_proc_macros,
1719-
source_map_import_info: OnceCell::new(),
1695+
source_map_import_info: Lock::new(Vec::new()),
17201696
def_path_hash_map,
17211697
expn_hash_map: Default::default(),
17221698
alloc_decoding_state,

compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,9 @@ impl CrateStore for CStore {
676676
}
677677

678678
fn import_source_files(&self, sess: &Session, cnum: CrateNum) {
679-
self.get_crate_data(cnum).imported_source_files(sess);
679+
let cdata = self.get_crate_data(cnum);
680+
for file_index in 0..cdata.root.source_map.size() {
681+
cdata.imported_source_file(file_index as u32, sess);
682+
}
680683
}
681684
}

0 commit comments

Comments
 (0)