Skip to content

Commit 3b3bb0e

Browse files
committed
Auto merge of #22235 - michaelwoerister:cross-crate-spans, r=michaelwoerister
This allows to create proper debuginfo line information for items inlined from other crates (e.g. instantiations of generics). Only the codemap's 'metadata' is stored in a crate's metadata. That is, just filename, positions of line-beginnings, etc. but not the actual source code itself. Crate metadata size is increased by this change because spans in the encoded ASTs take up space now: ``` BEFORE AFTER libcore 36 MiB 39.6 MiB +10% libsyntax 51.1 MiB 60.5 MiB +18.4% libcollections 11.2 MiB 12.8 MiB +14.3% ``` This only affects binaries containing metadata (rlibs and dylibs), executables should not be affected in size. Fixes #19228 and probably #22226.
2 parents bdf6e4f + 2f88655 commit 3b3bb0e

File tree

12 files changed

+693
-172
lines changed

12 files changed

+693
-172
lines changed

Diff for: src/librustc/metadata/common.rs

+3
Original file line numberDiff line numberDiff line change
@@ -252,3 +252,6 @@ pub const tag_macro_def: uint = 0x9e;
252252
pub const tag_macro_def_body: uint = 0x9f;
253253

254254
pub const tag_paren_sugar: uint = 0xa0;
255+
256+
pub const tag_codemap: uint = 0xa1;
257+
pub const tag_codemap_filemap: uint = 0xa2;

Diff for: src/librustc/metadata/creader.rs

+133-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use syntax::ast;
2626
use syntax::abi;
2727
use syntax::attr;
2828
use syntax::attr::AttrMetaMethods;
29-
use syntax::codemap::{Span, mk_sp};
29+
use syntax::codemap::{self, Span, mk_sp, Pos};
3030
use syntax::parse;
3131
use syntax::parse::token::InternedString;
3232
use syntax::parse::token;
@@ -373,15 +373,17 @@ impl<'a> CrateReader<'a> {
373373
// Maintain a reference to the top most crate.
374374
let root = if root.is_some() { root } else { &crate_paths };
375375

376-
let cnum_map = self.resolve_crate_deps(root, lib.metadata.as_slice(), span);
376+
let loader::Library { dylib, rlib, metadata } = lib;
377377

378-
let loader::Library{ dylib, rlib, metadata } = lib;
378+
let cnum_map = self.resolve_crate_deps(root, metadata.as_slice(), span);
379+
let codemap_import_info = import_codemap(self.sess.codemap(), &metadata);
379380

380381
let cmeta = Rc::new( cstore::crate_metadata {
381382
name: name.to_string(),
382383
data: metadata,
383384
cnum_map: cnum_map,
384385
cnum: cnum,
386+
codemap_import_info: codemap_import_info,
385387
span: span,
386388
});
387389

@@ -586,3 +588,131 @@ impl<'a> CrateReader<'a> {
586588
}
587589
}
588590
}
591+
592+
/// Imports the codemap from an external crate into the codemap of the crate
593+
/// currently being compiled (the "local crate").
594+
///
595+
/// The import algorithm works analogous to how AST items are inlined from an
596+
/// external crate's metadata:
597+
/// For every FileMap in the external codemap an 'inline' copy is created in the
598+
/// local codemap. The correspondence relation between external and local
599+
/// FileMaps is recorded in the `ImportedFileMap` objects returned from this
600+
/// function. When an item from an external crate is later inlined into this
601+
/// crate, this correspondence information is used to translate the span
602+
/// information of the inlined item so that it refers the correct positions in
603+
/// the local codemap (see `astencode::DecodeContext::tr_span()`).
604+
///
605+
/// The import algorithm in the function below will reuse FileMaps already
606+
/// existing in the local codemap. For example, even if the FileMap of some
607+
/// source file of libstd gets imported many times, there will only ever be
608+
/// one FileMap object for the corresponding file in the local codemap.
609+
///
610+
/// Note that imported FileMaps do not actually contain the source code of the
611+
/// file they represent, just information about length, line breaks, and
612+
/// multibyte characters. This information is enough to generate valid debuginfo
613+
/// for items inlined from other crates.
614+
fn import_codemap(local_codemap: &codemap::CodeMap,
615+
metadata: &MetadataBlob)
616+
-> Vec<cstore::ImportedFileMap> {
617+
let external_codemap = decoder::get_imported_filemaps(metadata.as_slice());
618+
619+
let imported_filemaps = external_codemap.into_iter().map(|filemap_to_import| {
620+
// Try to find an existing FileMap that can be reused for the filemap to
621+
// be imported. A FileMap is reusable if it is exactly the same, just
622+
// positioned at a different offset within the codemap.
623+
let reusable_filemap = {
624+
local_codemap.files
625+
.borrow()
626+
.iter()
627+
.find(|fm| are_equal_modulo_startpos(&fm, &filemap_to_import))
628+
.map(|rc| rc.clone())
629+
};
630+
631+
match reusable_filemap {
632+
Some(fm) => {
633+
cstore::ImportedFileMap {
634+
original_start_pos: filemap_to_import.start_pos,
635+
original_end_pos: filemap_to_import.end_pos,
636+
translated_filemap: fm
637+
}
638+
}
639+
None => {
640+
// We can't reuse an existing FileMap, so allocate a new one
641+
// containing the information we need.
642+
let codemap::FileMap {
643+
name,
644+
start_pos,
645+
end_pos,
646+
lines,
647+
multibyte_chars,
648+
..
649+
} = filemap_to_import;
650+
651+
let source_length = (end_pos - start_pos).to_usize();
652+
653+
// Translate line-start positions and multibyte character
654+
// position into frame of reference local to file.
655+
// `CodeMap::new_imported_filemap()` will then translate those
656+
// coordinates to their new global frame of reference when the
657+
// offset of the FileMap is known.
658+
let lines = lines.into_inner().map_in_place(|pos| pos - start_pos);
659+
let multibyte_chars = multibyte_chars
660+
.into_inner()
661+
.map_in_place(|mbc|
662+
codemap::MultiByteChar {
663+
pos: mbc.pos + start_pos,
664+
bytes: mbc.bytes
665+
});
666+
667+
let local_version = local_codemap.new_imported_filemap(name,
668+
source_length,
669+
lines,
670+
multibyte_chars);
671+
cstore::ImportedFileMap {
672+
original_start_pos: start_pos,
673+
original_end_pos: end_pos,
674+
translated_filemap: local_version
675+
}
676+
}
677+
}
678+
}).collect();
679+
680+
return imported_filemaps;
681+
682+
fn are_equal_modulo_startpos(fm1: &codemap::FileMap,
683+
fm2: &codemap::FileMap)
684+
-> bool {
685+
if fm1.name != fm2.name {
686+
return false;
687+
}
688+
689+
let lines1 = fm1.lines.borrow();
690+
let lines2 = fm2.lines.borrow();
691+
692+
if lines1.len() != lines2.len() {
693+
return false;
694+
}
695+
696+
for (&line1, &line2) in lines1.iter().zip(lines2.iter()) {
697+
if (line1 - fm1.start_pos) != (line2 - fm2.start_pos) {
698+
return false;
699+
}
700+
}
701+
702+
let multibytes1 = fm1.multibyte_chars.borrow();
703+
let multibytes2 = fm2.multibyte_chars.borrow();
704+
705+
if multibytes1.len() != multibytes2.len() {
706+
return false;
707+
}
708+
709+
for (mb1, mb2) in multibytes1.iter().zip(multibytes2.iter()) {
710+
if (mb1.bytes != mb2.bytes) ||
711+
((mb1.pos - fm1.start_pos) != (mb2.pos - fm2.start_pos)) {
712+
return false;
713+
}
714+
}
715+
716+
true
717+
}
718+
}

Diff for: src/librustc/metadata/cstore.rs

+14-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use std::cell::RefCell;
2727
use std::rc::Rc;
2828
use flate::Bytes;
2929
use syntax::ast;
30-
use syntax::codemap::Span;
30+
use syntax::codemap;
3131
use syntax::parse::token::IdentInterner;
3232

3333
// A map from external crate numbers (as decoded from some crate file) to
@@ -41,12 +41,24 @@ pub enum MetadataBlob {
4141
MetadataArchive(loader::ArchiveMetadata),
4242
}
4343

44+
/// Holds information about a codemap::FileMap imported from another crate.
45+
/// See creader::import_codemap() for more information.
46+
pub struct ImportedFileMap {
47+
/// This FileMap's byte-offset within the codemap of its original crate
48+
pub original_start_pos: codemap::BytePos,
49+
/// The end of this FileMap within the codemap of its original crate
50+
pub original_end_pos: codemap::BytePos,
51+
/// The imported FileMap's representation within the local codemap
52+
pub translated_filemap: Rc<codemap::FileMap>
53+
}
54+
4455
pub struct crate_metadata {
4556
pub name: String,
4657
pub data: MetadataBlob,
4758
pub cnum_map: cnum_map,
4859
pub cnum: ast::CrateNum,
49-
pub span: Span,
60+
pub codemap_import_info: Vec<ImportedFileMap>,
61+
pub span: codemap::Span,
5062
}
5163

5264
#[derive(Copy, Debug, PartialEq, Clone)]

Diff for: src/librustc/metadata/decoder.rs

+16-1
Original file line numberDiff line numberDiff line change
@@ -1561,11 +1561,26 @@ pub fn is_associated_type(cdata: Cmd, id: ast::NodeId) -> bool {
15611561
}
15621562
}
15631563

1564-
15651564
pub fn is_default_trait<'tcx>(cdata: Cmd, id: ast::NodeId) -> bool {
15661565
let item_doc = lookup_item(id, cdata.data());
15671566
match item_family(item_doc) {
15681567
Family::DefaultImpl => true,
15691568
_ => false
15701569
}
15711570
}
1571+
1572+
pub fn get_imported_filemaps(metadata: &[u8]) -> Vec<codemap::FileMap> {
1573+
let crate_doc = rbml::Doc::new(metadata);
1574+
let cm_doc = reader::get_doc(crate_doc, tag_codemap);
1575+
1576+
let mut filemaps = vec![];
1577+
1578+
reader::tagged_docs(cm_doc, tag_codemap_filemap, |filemap_doc| {
1579+
let mut decoder = reader::Decoder::new(filemap_doc);
1580+
let filemap: codemap::FileMap = Decodable::decode(&mut decoder).unwrap();
1581+
filemaps.push(filemap);
1582+
true
1583+
});
1584+
1585+
return filemaps;
1586+
}

Diff for: src/librustc/metadata/encoder.rs

+30
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,28 @@ fn encode_plugin_registrar_fn(ecx: &EncodeContext, rbml_w: &mut Encoder) {
17511751
}
17521752
}
17531753

1754+
fn encode_codemap(ecx: &EncodeContext, rbml_w: &mut Encoder) {
1755+
rbml_w.start_tag(tag_codemap);
1756+
let codemap = ecx.tcx.sess.codemap();
1757+
1758+
for filemap in &codemap.files.borrow()[..] {
1759+
1760+
if filemap.lines.borrow().len() == 0 || filemap.is_imported() {
1761+
// No need to export empty filemaps, as they can't contain spans
1762+
// that need translation.
1763+
// Also no need to re-export imported filemaps, as any downstream
1764+
// crate will import them from their original source.
1765+
continue;
1766+
}
1767+
1768+
rbml_w.start_tag(tag_codemap_filemap);
1769+
filemap.encode(rbml_w);
1770+
rbml_w.end_tag();
1771+
}
1772+
1773+
rbml_w.end_tag();
1774+
}
1775+
17541776
/// Serialize the text of the exported macros
17551777
fn encode_macro_defs(rbml_w: &mut Encoder,
17561778
krate: &ast::Crate) {
@@ -1968,6 +1990,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter,
19681990
lang_item_bytes: u64,
19691991
native_lib_bytes: u64,
19701992
plugin_registrar_fn_bytes: u64,
1993+
codemap_bytes: u64,
19711994
macro_defs_bytes: u64,
19721995
impl_bytes: u64,
19731996
misc_bytes: u64,
@@ -1982,6 +2005,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter,
19822005
lang_item_bytes: 0,
19832006
native_lib_bytes: 0,
19842007
plugin_registrar_fn_bytes: 0,
2008+
codemap_bytes: 0,
19852009
macro_defs_bytes: 0,
19862010
impl_bytes: 0,
19872011
misc_bytes: 0,
@@ -2047,6 +2071,11 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter,
20472071
encode_plugin_registrar_fn(&ecx, &mut rbml_w);
20482072
stats.plugin_registrar_fn_bytes = rbml_w.writer.tell().unwrap() - i;
20492073

2074+
// Encode codemap
2075+
i = rbml_w.writer.tell().unwrap();
2076+
encode_codemap(&ecx, &mut rbml_w);
2077+
stats.codemap_bytes = rbml_w.writer.tell().unwrap() - i;
2078+
20502079
// Encode macro definitions
20512080
i = rbml_w.writer.tell().unwrap();
20522081
encode_macro_defs(&mut rbml_w, krate);
@@ -2091,6 +2120,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter,
20912120
println!(" lang item bytes: {}", stats.lang_item_bytes);
20922121
println!(" native bytes: {}", stats.native_lib_bytes);
20932122
println!("plugin registrar bytes: {}", stats.plugin_registrar_fn_bytes);
2123+
println!(" codemap bytes: {}", stats.codemap_bytes);
20942124
println!(" macro def bytes: {}", stats.macro_defs_bytes);
20952125
println!(" impl bytes: {}", stats.impl_bytes);
20962126
println!(" misc bytes: {}", stats.misc_bytes);

Diff for: src/librustc/middle/astencode.rs

+49-4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ use syntax;
4242
use std::old_io::Seek;
4343
use std::num::FromPrimitive;
4444
use std::rc::Rc;
45+
use std::cell::Cell;
4546

4647
use rbml::reader;
4748
use rbml::writer::Encoder;
@@ -58,7 +59,9 @@ struct DecodeContext<'a, 'b, 'tcx: 'a> {
5859
tcx: &'a ty::ctxt<'tcx>,
5960
cdata: &'b cstore::crate_metadata,
6061
from_id_range: ast_util::IdRange,
61-
to_id_range: ast_util::IdRange
62+
to_id_range: ast_util::IdRange,
63+
// Cache the last used filemap for translating spans as an optimization.
64+
last_filemap_index: Cell<usize>,
6265
}
6366

6467
trait tr {
@@ -120,6 +123,8 @@ impl<'a, 'b, 'c, 'tcx> ast_map::FoldOps for &'a DecodeContext<'b, 'c, 'tcx> {
120123
}
121124
}
122125

126+
/// Decodes an item from its AST in the cdata's metadata and adds it to the
127+
/// ast-map.
123128
pub fn decode_inlined_item<'tcx>(cdata: &cstore::crate_metadata,
124129
tcx: &ty::ctxt<'tcx>,
125130
path: Vec<ast_map::PathElem>,
@@ -143,7 +148,8 @@ pub fn decode_inlined_item<'tcx>(cdata: &cstore::crate_metadata,
143148
cdata: cdata,
144149
tcx: tcx,
145150
from_id_range: from_id_range,
146-
to_id_range: to_id_range
151+
to_id_range: to_id_range,
152+
last_filemap_index: Cell::new(0)
147153
};
148154
let raw_ii = decode_ast(ast_doc);
149155
let ii = ast_map::map_decoded_item(&dcx.tcx.map, path, raw_ii, dcx);
@@ -234,8 +240,47 @@ impl<'a, 'b, 'tcx> DecodeContext<'a, 'b, 'tcx> {
234240
assert_eq!(did.krate, ast::LOCAL_CRATE);
235241
ast::DefId { krate: ast::LOCAL_CRATE, node: self.tr_id(did.node) }
236242
}
237-
pub fn tr_span(&self, _span: Span) -> Span {
238-
codemap::DUMMY_SP // FIXME (#1972): handle span properly
243+
244+
/// Translates a `Span` from an extern crate to the corresponding `Span`
245+
/// within the local crate's codemap. `creader::import_codemap()` will
246+
/// already have allocated any additionally needed FileMaps in the local
247+
/// codemap as a side-effect of creating the crate_metadata's
248+
/// `codemap_import_info`.
249+
pub fn tr_span(&self, span: Span) -> Span {
250+
let imported_filemaps = &self.cdata.codemap_import_info[..];
251+
252+
let filemap_index = {
253+
// Optimize for the case that most spans within a translated item
254+
// originate from the same filemap.
255+
let last_filemap_index = self.last_filemap_index.get();
256+
257+
if span.lo >= imported_filemaps[last_filemap_index].original_start_pos &&
258+
span.hi <= imported_filemaps[last_filemap_index].original_end_pos {
259+
last_filemap_index
260+
} else {
261+
let mut a = 0;
262+
let mut b = imported_filemaps.len();
263+
264+
while b - a > 1 {
265+
let m = (a + b) / 2;
266+
if imported_filemaps[m].original_start_pos > span.lo {
267+
b = m;
268+
} else {
269+
a = m;
270+
}
271+
}
272+
273+
self.last_filemap_index.set(a);
274+
a
275+
}
276+
};
277+
278+
let lo = (span.lo - imported_filemaps[filemap_index].original_start_pos) +
279+
imported_filemaps[filemap_index].translated_filemap.start_pos;
280+
let hi = (span.hi - imported_filemaps[filemap_index].original_start_pos) +
281+
imported_filemaps[filemap_index].translated_filemap.start_pos;
282+
283+
codemap::mk_sp(lo, hi)
239284
}
240285
}
241286

Diff for: src/librustc_driver/pretty.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,11 @@ pub fn pretty_print_input(sess: Session,
542542

543543
let src_name = driver::source_name(input);
544544
let src = sess.codemap().get_filemap(&src_name[..])
545-
.src.as_bytes().to_vec();
545+
.src
546+
.as_ref()
547+
.unwrap()
548+
.as_bytes()
549+
.to_vec();
546550
let mut rdr = MemReader::new(src);
547551

548552
let out = match ofile {

0 commit comments

Comments
 (0)