Skip to content

Commit 7c8bf23

Browse files
committed
Store metadata separately in rlib files
Right now whenever an rlib file is linked against, all of the metadata from the rlib is pulled in to the final staticlib or binary. The reason for this is that the metadata is currently stored in a section of the object file. Note that this is intentional for dynamic libraries in order to distribute metadata bundled with static libraries. This commit alters the situation for rlib libraries to instead store the metadata in a separate file in the archive. In doing so, when the archive is passed to the linker, none of the metadata will get pulled into the result executable. Furthermore, the metadata file is skipped when assembling rlibs into an archive. The snag in this implementation comes with multiple output formats. When generating a dylib, the metadata needs to be in the object file, but when generating an rlib this needs to be separate. In order to accomplish this, the metadata variable is inserted into an entirely separate LLVM Module which is then codegen'd into a different location (foo.metadata.o). This is then linked into dynamic libraries and silently ignored for rlib files. While changing how metadata is inserted into archives, I have also stopped compressing metadata when inserted into rlib files. We have wanted to stop compressing metadata, but the sections it creates in object file sections are apparently too large. Thankfully if it's just an arbitrary file it doesn't matter how large it is. I have seen massive reductions in executable sizes, as well as staticlib output sizes (to confirm that this is all working).
1 parent 693ec73 commit 7c8bf23

File tree

7 files changed

+156
-78
lines changed

7 files changed

+156
-78
lines changed

src/librustc/back/archive.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use std::str;
2020
use extra::tempfile::TempDir;
2121
use syntax::abi;
2222

23+
pub static METADATA_FILENAME: &'static str = "metadata";
24+
2325
pub struct Archive {
2426
priv sess: Session,
2527
priv dst: Path,
@@ -81,17 +83,22 @@ impl Archive {
8183
/// search in the relevant locations for a library named `name`.
8284
pub fn add_native_library(&mut self, name: &str) {
8385
let location = self.find_library(name);
84-
self.add_archive(&location, name);
86+
self.add_archive(&location, name, []);
8587
}
8688

8789
/// Adds all of the contents of the rlib at the specified path to this
8890
/// archive.
8991
pub fn add_rlib(&mut self, rlib: &Path) {
9092
let name = rlib.filename_str().unwrap().split('-').next().unwrap();
91-
self.add_archive(rlib, name);
93+
self.add_archive(rlib, name, [METADATA_FILENAME]);
94+
}
95+
96+
/// Adds an arbitrary file to this archive
97+
pub fn add_file(&mut self, file: &Path) {
98+
run_ar(self.sess, "r", None, [&self.dst, file]);
9299
}
93100

94-
fn add_archive(&mut self, archive: &Path, name: &str) {
101+
fn add_archive(&mut self, archive: &Path, name: &str, skip: &[&str]) {
95102
let loc = TempDir::new("rsar").unwrap();
96103

97104
// First, extract the contents of the archive to a temporary directory
@@ -106,6 +113,7 @@ impl Archive {
106113
let mut inputs = ~[];
107114
for file in files.iter() {
108115
let filename = file.filename_str().unwrap();
116+
if skip.iter().any(|s| *s == filename) { continue }
109117
let filename = format!("r-{}-{}", name, filename);
110118
let new_filename = file.with_filename(filename);
111119
fs::rename(file, &new_filename);

src/librustc/back/link.rs

+96-27
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
// except according to those terms.
1010

1111

12-
use back::archive::Archive;
12+
use back::archive::{Archive, METADATA_FILENAME};
1313
use back::rpath;
14+
use driver::driver::CrateTranslation;
1415
use driver::session::Session;
1516
use driver::session;
1617
use lib::llvm::llvm;
@@ -191,10 +192,11 @@ pub mod write {
191192
use back::link::{output_type_assembly, output_type_bitcode};
192193
use back::link::{output_type_exe, output_type_llvm_assembly};
193194
use back::link::{output_type_object};
195+
use driver::driver::CrateTranslation;
194196
use driver::session::Session;
195197
use driver::session;
196198
use lib::llvm::llvm;
197-
use lib::llvm::{ModuleRef, ContextRef};
199+
use lib::llvm::ModuleRef;
198200
use lib;
199201

200202
use std::c_str::ToCStr;
@@ -204,10 +206,11 @@ pub mod write {
204206
use std::str;
205207

206208
pub fn run_passes(sess: Session,
207-
llcx: ContextRef,
208-
llmod: ModuleRef,
209+
trans: &CrateTranslation,
209210
output_type: output_type,
210211
output: &Path) {
212+
let llmod = trans.module;
213+
let llcx = trans.context;
211214
unsafe {
212215
llvm::LLVMInitializePasses();
213216

@@ -313,12 +316,23 @@ pub mod write {
313316
// context, so don't dispose
314317
jit::exec(sess, llcx, llmod, true);
315318
} else {
316-
// Create a codegen-specific pass manager to emit the actual
317-
// assembly or object files. This may not end up getting used,
318-
// but we make it anyway for good measure.
319-
let cpm = llvm::LLVMCreatePassManager();
320-
llvm::LLVMRustAddAnalysisPasses(tm, cpm, llmod);
321-
llvm::LLVMRustAddLibraryInfo(cpm, llmod);
319+
// A codegen-specific pass manager is used to generate object
320+
// files for an LLVM module.
321+
//
322+
// Apparently each of these pass managers is a one-shot kind of
323+
// thing, so we create a new one for each type of output. The
324+
// pass manager passed to the closure should be ensured to not
325+
// escape the closure itself, and the manager should only be
326+
// used once.
327+
fn with_codegen(tm: TargetMachineRef, llmod: ModuleRef,
328+
f: |PassManagerRef|) {
329+
let cpm = llvm::LLVMCreatePassManager();
330+
llvm::LLVMRustAddAnalysisPasses(tm, cpm, llmod);
331+
llvm::LLVMRustAddLibraryInfo(cpm, llmod);
332+
f(cpm);
333+
llvm::LLVMDisposePassManager(cpm);
334+
335+
}
322336

323337
match output_type {
324338
output_type_none => {}
@@ -329,21 +343,48 @@ pub mod write {
329343
}
330344
output_type_llvm_assembly => {
331345
output.with_c_str(|output| {
332-
llvm::LLVMRustPrintModule(cpm, llmod, output)
346+
with_codegen(tm, llmod, |cpm| {
347+
llvm::LLVMRustPrintModule(cpm, llmod, output);
348+
})
333349
})
334350
}
335351
output_type_assembly => {
336-
WriteOutputFile(sess, tm, cpm, llmod, output, lib::llvm::AssemblyFile);
352+
with_codegen(tm, llmod, |cpm| {
353+
WriteOutputFile(sess, tm, cpm, llmod, output,
354+
lib::llvm::AssemblyFile);
355+
});
356+
357+
// windows will invoke this function with an assembly
358+
// output type when it's actually generating an object
359+
// file. This is because g++ is used to compile the
360+
// assembly instead of having LLVM directly output an
361+
// object file. Regardless, in this case, we're going to
362+
// possibly need a metadata file.
363+
if sess.opts.output_type != output_type_assembly {
364+
with_codegen(tm, trans.metadata_module, |cpm| {
365+
let out = output.with_extension("metadata.o");
366+
WriteOutputFile(sess, tm, cpm,
367+
trans.metadata_module, &out,
368+
lib::llvm::ObjectFile);
369+
})
370+
}
337371
}
338372
output_type_exe | output_type_object => {
339-
WriteOutputFile(sess, tm, cpm, llmod, output, lib::llvm::ObjectFile);
373+
with_codegen(tm, llmod, |cpm| {
374+
WriteOutputFile(sess, tm, cpm, llmod, output,
375+
lib::llvm::ObjectFile);
376+
});
377+
with_codegen(tm, trans.metadata_module, |cpm| {
378+
WriteOutputFile(sess, tm, cpm, trans.metadata_module,
379+
&output.with_extension("metadata.o"),
380+
lib::llvm::ObjectFile);
381+
})
340382
}
341383
}
342-
343-
llvm::LLVMDisposePassManager(cpm);
344384
}
345385

346386
llvm::LLVMRustDisposeTargetMachine(tm);
387+
llvm::LLVMDisposeModule(trans.metadata_module);
347388
// the jit takes ownership of these two items
348389
if !sess.opts.jit {
349390
llvm::LLVMDisposeModule(llmod);
@@ -895,10 +936,9 @@ pub fn get_cc_prog(sess: Session) -> ~str {
895936
/// Perform the linkage portion of the compilation phase. This will generate all
896937
/// of the requested outputs for this compilation session.
897938
pub fn link_binary(sess: Session,
898-
crate_types: &[~str],
939+
trans: &CrateTranslation,
899940
obj_filename: &Path,
900-
out_filename: &Path,
901-
lm: LinkMeta) {
941+
out_filename: &Path) {
902942
let outputs = if sess.opts.test {
903943
// If we're generating a test executable, then ignore all other output
904944
// styles at all other locations
@@ -908,7 +948,7 @@ pub fn link_binary(sess: Session,
908948
// look at what was in the crate file itself for generating output
909949
// formats.
910950
let mut outputs = sess.opts.outputs.clone();
911-
for ty in crate_types.iter() {
951+
for ty in trans.crate_types.iter() {
912952
if "bin" == *ty {
913953
outputs.push(session::OutputExecutable);
914954
} else if "dylib" == *ty || "lib" == *ty {
@@ -926,12 +966,13 @@ pub fn link_binary(sess: Session,
926966
};
927967

928968
for output in outputs.move_iter() {
929-
link_binary_output(sess, output, obj_filename, out_filename, lm);
969+
link_binary_output(sess, trans, output, obj_filename, out_filename);
930970
}
931971

932-
// Remove the temporary object file if we aren't saving temps
972+
// Remove the temporary object file and metadata if we aren't saving temps
933973
if !sess.opts.save_temps {
934974
fs::unlink(obj_filename);
975+
fs::unlink(&obj_filename.with_extension("metadata.o"));
935976
}
936977
}
937978

@@ -945,11 +986,11 @@ fn is_writeable(p: &Path) -> bool {
945986
}
946987

947988
fn link_binary_output(sess: Session,
989+
trans: &CrateTranslation,
948990
output: session::OutputStyle,
949991
obj_filename: &Path,
950-
out_filename: &Path,
951-
lm: LinkMeta) {
952-
let libname = output_lib_filename(lm);
992+
out_filename: &Path) {
993+
let libname = output_lib_filename(trans.link);
953994
let out_filename = match output {
954995
session::OutputRlib => {
955996
out_filename.with_filename(format!("lib{}.rlib", libname))
@@ -987,7 +1028,7 @@ fn link_binary_output(sess: Session,
9871028

9881029
match output {
9891030
session::OutputRlib => {
990-
link_rlib(sess, obj_filename, &out_filename);
1031+
link_rlib(sess, Some(trans), obj_filename, &out_filename);
9911032
}
9921033
session::OutputStaticlib => {
9931034
link_staticlib(sess, obj_filename, &out_filename);
@@ -1007,9 +1048,25 @@ fn link_binary_output(sess: Session,
10071048
// rlib primarily contains the object file of the crate, but it also contains
10081049
// all of the object files from native libraries. This is done by unzipping
10091050
// native libraries and inserting all of the contents into this archive.
1010-
fn link_rlib(sess: Session, obj_filename: &Path,
1051+
//
1052+
// Instead of putting the metadata in an object file section, instead rlibs
1053+
// contain the metadata in a separate file.
1054+
fn link_rlib(sess: Session,
1055+
trans: Option<&CrateTranslation>, // None == no metadata
1056+
obj_filename: &Path,
10111057
out_filename: &Path) -> Archive {
10121058
let mut a = Archive::create(sess, out_filename, obj_filename);
1059+
1060+
match trans {
1061+
Some(trans) => {
1062+
let metadata = obj_filename.with_filename(METADATA_FILENAME);
1063+
fs::File::create(&metadata).write(trans.metadata);
1064+
a.add_file(&metadata);
1065+
fs::unlink(&metadata);
1066+
}
1067+
None => {}
1068+
}
1069+
10131070
for &(ref l, kind) in cstore::get_used_libraries(sess.cstore).iter() {
10141071
match kind {
10151072
cstore::NativeStatic => {
@@ -1029,8 +1086,12 @@ fn link_rlib(sess: Session, obj_filename: &Path,
10291086
//
10301087
// Additionally, there's no way for us to link dynamic libraries, so we warn
10311088
// about all dynamic library dependencies that they're not linked in.
1089+
//
1090+
// There's no need to include metadata in a static archive, so ensure to not
1091+
// link in the metadata object file (and also don't prepare the archive with a
1092+
// metadata file).
10321093
fn link_staticlib(sess: Session, obj_filename: &Path, out_filename: &Path) {
1033-
let mut a = link_rlib(sess, obj_filename, out_filename);
1094+
let mut a = link_rlib(sess, None, obj_filename, out_filename);
10341095
a.add_native_library("morestack");
10351096

10361097
let crates = cstore::get_used_crates(sess.cstore, cstore::RequireStatic);
@@ -1111,6 +1172,14 @@ fn link_args(sess: Session,
11111172
~"-o", out_filename.as_str().unwrap().to_owned(),
11121173
obj_filename.as_str().unwrap().to_owned()]);
11131174

1175+
// When linking a dynamic library, we put the metadata into a section of the
1176+
// executable. This metadata is in a separate object file from the main
1177+
// object file, so we link that in here.
1178+
if dylib {
1179+
let metadata = obj_filename.with_extension("metadata.o");
1180+
args.push(metadata.as_str().unwrap().to_owned());
1181+
}
1182+
11141183
if sess.targ_cfg.os == abi::OsLinux {
11151184
// GNU-style linkers will use this to omit linking to libraries which
11161185
// don't actually fulfill any relocations, but only for libraries which

src/librustc/driver/driver.rs

+6-7
Original file line numberDiff line numberDiff line change
@@ -331,8 +331,10 @@ pub fn phase_3_run_analysis_passes(sess: Session,
331331
pub struct CrateTranslation {
332332
context: ContextRef,
333333
module: ModuleRef,
334+
metadata_module: ModuleRef,
334335
link: LinkMeta,
335336
crate_types: ~[~str],
337+
metadata: ~[u8],
336338
}
337339

338340
/// Run the translation phase to LLVM, after which the AST and analysis can
@@ -364,8 +366,7 @@ pub fn phase_5_run_llvm_passes(sess: Session,
364366

365367
time(sess.time_passes(), "LLVM passes", (), |_|
366368
link::write::run_passes(sess,
367-
trans.context,
368-
trans.module,
369+
trans,
369370
output_type,
370371
&asm_filename));
371372

@@ -378,8 +379,7 @@ pub fn phase_5_run_llvm_passes(sess: Session,
378379
} else {
379380
time(sess.time_passes(), "LLVM passes", (), |_|
380381
link::write::run_passes(sess,
381-
trans.context,
382-
trans.module,
382+
trans,
383383
sess.opts.output_type,
384384
&outputs.obj_filename));
385385
}
@@ -392,10 +392,9 @@ pub fn phase_6_link_output(sess: Session,
392392
outputs: &OutputFilenames) {
393393
time(sess.time_passes(), "linking", (), |_|
394394
link::link_binary(sess,
395-
trans.crate_types,
395+
trans,
396396
&outputs.obj_filename,
397-
&outputs.out_filename,
398-
trans.link));
397+
&outputs.out_filename));
399398
}
400399

401400
pub fn stop_after_phase_3(sess: Session) -> bool {

src/librustc/metadata/encoder.rs

+6-8
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ use middle::ty;
2121
use middle::typeck;
2222
use middle;
2323

24+
use std::cast;
2425
use std::hashmap::{HashMap, HashSet};
25-
use std::io::{Writer, Seek, Decorator};
2626
use std::io::mem::MemWriter;
27+
use std::io::{Writer, Seek, Decorator};
2728
use std::str;
29+
use std::util;
2830
use std::vec;
2931

30-
use extra::flate;
3132
use extra::serialize::Encodable;
3233
use extra;
3334

@@ -47,8 +48,6 @@ use syntax::parse::token;
4748
use syntax;
4849
use writer = extra::ebml::writer;
4950

50-
use std::cast;
51-
5251
// used by astencode:
5352
type abbrev_map = @mut HashMap<ty::t, tyencode::ty_abbrev>;
5453

@@ -1887,10 +1886,9 @@ pub fn encode_metadata(parms: EncodeParams, crate: &Crate) -> ~[u8] {
18871886
// remaining % 4 bytes.
18881887
wr.write(&[0u8, 0u8, 0u8, 0u8]);
18891888

1890-
let writer_bytes: &mut ~[u8] = wr.inner_mut_ref();
1891-
1892-
metadata_encoding_version.to_owned() +
1893-
flate::deflate_bytes(*writer_bytes)
1889+
// This is a horrible thing to do to the outer MemWriter, but thankfully we
1890+
// don't use it again so... it's ok right?
1891+
return util::replace(wr.inner_mut_ref(), ~[]);
18941892
}
18951893

18961894
// Get the encoded string for a type

0 commit comments

Comments
 (0)