Skip to content

Commit d3ac386

Browse files
committed
auto merge of #16139 : michaelwoerister/rust/rlib-bc-versioning, r=alexcrichton
Before this commit, the LLVM IR of exported items was simply zip-compressed and stored as an object file inside rlib archives. This commit adds a header to this "object" containing a file identifier and a format version number so the compiler can deal with changes in the way bytecode objects are stored within rlibs. While updating the format of bytecode objects, this commit also worksaround a problem in LLDB which could not handle odd-sized objects within archives before mid-2014. Fixes #15950.
2 parents 2574160 + ff0fa8f commit d3ac386

File tree

2 files changed

+168
-38
lines changed

2 files changed

+168
-38
lines changed

src/librustc/back/link.rs

+88-15
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use std::char;
3232
use std::collections::HashSet;
3333
use std::io::{fs, TempDir, Command};
3434
use std::io;
35+
use std::mem;
3536
use std::ptr;
3637
use std::str;
3738
use std::string::String;
@@ -45,6 +46,36 @@ use syntax::attr::AttrMetaMethods;
4546
use syntax::codemap::Span;
4647
use syntax::parse::token;
4748

49+
// RLIB LLVM-BYTECODE OBJECT LAYOUT
50+
// Version 1
51+
// Bytes Data
52+
// 0..10 "RUST_OBJECT" encoded in ASCII
53+
// 11..14 format version as little-endian u32
54+
// 15..22 size in bytes of deflate compressed LLVM bitcode as
55+
// little-endian u64
56+
// 23.. compressed LLVM bitcode
57+
58+
// This is the "magic number" expected at the beginning of a LLVM bytecode
59+
// object in an rlib.
60+
pub static RLIB_BYTECODE_OBJECT_MAGIC: &'static [u8] = b"RUST_OBJECT";
61+
62+
// The version number this compiler will write to bytecode objects in rlibs
63+
pub static RLIB_BYTECODE_OBJECT_VERSION: u32 = 1;
64+
65+
// The offset in bytes the bytecode object format version number can be found at
66+
pub static RLIB_BYTECODE_OBJECT_VERSION_OFFSET: uint = 11;
67+
68+
// The offset in bytes the size of the compressed bytecode can be found at in
69+
// format version 1
70+
pub static RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET: uint =
71+
RLIB_BYTECODE_OBJECT_VERSION_OFFSET + 4;
72+
73+
// The offset in bytes the compressed LLVM bytecode can be found at in format
74+
// version 1
75+
pub static RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET: uint =
76+
RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET + 8;
77+
78+
4879
#[deriving(Clone, PartialEq, PartialOrd, Ord, Eq)]
4980
pub enum OutputType {
5081
OutputTypeBitcode,
@@ -1103,28 +1134,44 @@ fn link_rlib<'a>(sess: &'a Session,
11031134
// is never exactly 16 bytes long by adding a 16 byte extension to
11041135
// it. This is to work around a bug in LLDB that would cause it to
11051136
// crash if the name of a file in an archive was exactly 16 bytes.
1106-
let bc = obj_filename.with_extension("bc");
1107-
let bc_deflated = obj_filename.with_extension("bytecode.deflate");
1108-
match fs::File::open(&bc).read_to_end().and_then(|data| {
1109-
fs::File::create(&bc_deflated)
1110-
.write(match flate::deflate_bytes(data.as_slice()) {
1111-
Some(compressed) => compressed,
1112-
None => sess.fatal("failed to compress bytecode")
1113-
}.as_slice())
1114-
}) {
1137+
let bc_filename = obj_filename.with_extension("bc");
1138+
let bc_deflated_filename = obj_filename.with_extension("bytecode.deflate");
1139+
1140+
let bc_data = match fs::File::open(&bc_filename).read_to_end() {
1141+
Ok(buffer) => buffer,
1142+
Err(e) => sess.fatal(format!("failed to read bytecode: {}",
1143+
e).as_slice())
1144+
};
1145+
1146+
let bc_data_deflated = match flate::deflate_bytes(bc_data.as_slice()) {
1147+
Some(compressed) => compressed,
1148+
None => sess.fatal(format!("failed to compress bytecode from {}",
1149+
bc_filename.display()).as_slice())
1150+
};
1151+
1152+
let mut bc_file_deflated = match fs::File::create(&bc_deflated_filename) {
1153+
Ok(file) => file,
1154+
Err(e) => {
1155+
sess.fatal(format!("failed to create compressed bytecode \
1156+
file: {}", e).as_slice())
1157+
}
1158+
};
1159+
1160+
match write_rlib_bytecode_object_v1(&mut bc_file_deflated,
1161+
bc_data_deflated.as_slice()) {
11151162
Ok(()) => {}
11161163
Err(e) => {
11171164
sess.err(format!("failed to write compressed bytecode: \
1118-
{}",
1119-
e).as_slice());
1165+
{}", e).as_slice());
11201166
sess.abort_if_errors()
11211167
}
1122-
}
1123-
ab.add_file(&bc_deflated).unwrap();
1124-
remove(sess, &bc_deflated);
1168+
};
1169+
1170+
ab.add_file(&bc_deflated_filename).unwrap();
1171+
remove(sess, &bc_deflated_filename);
11251172
if !sess.opts.cg.save_temps &&
11261173
!sess.opts.output_types.contains(&OutputTypeBitcode) {
1127-
remove(sess, &bc);
1174+
remove(sess, &bc_filename);
11281175
}
11291176
}
11301177

@@ -1134,6 +1181,32 @@ fn link_rlib<'a>(sess: &'a Session,
11341181
ab
11351182
}
11361183

1184+
fn write_rlib_bytecode_object_v1<T: Writer>(writer: &mut T,
1185+
bc_data_deflated: &[u8])
1186+
-> ::std::io::IoResult<()> {
1187+
let bc_data_deflated_size: u64 = bc_data_deflated.as_slice().len() as u64;
1188+
1189+
try! { writer.write(RLIB_BYTECODE_OBJECT_MAGIC) };
1190+
try! { writer.write_le_u32(1) };
1191+
try! { writer.write_le_u64(bc_data_deflated_size) };
1192+
try! { writer.write(bc_data_deflated.as_slice()) };
1193+
1194+
let number_of_bytes_written_so_far =
1195+
RLIB_BYTECODE_OBJECT_MAGIC.len() + // magic id
1196+
mem::size_of_val(&RLIB_BYTECODE_OBJECT_VERSION) + // version
1197+
mem::size_of_val(&bc_data_deflated_size) + // data size field
1198+
bc_data_deflated_size as uint; // actual data
1199+
1200+
// If the number of bytes written to the object so far is odd, add a
1201+
// padding byte to make it even. This works around a crash bug in LLDB
1202+
// (see issue #15950)
1203+
if number_of_bytes_written_so_far % 2 == 1 {
1204+
try! { writer.write_u8(0) };
1205+
}
1206+
1207+
return Ok(());
1208+
}
1209+
11371210
// Create a static archive
11381211
//
11391212
// This is essentially the same thing as an rlib, but it also involves adding

src/librustc/back/lto.rs

+80-23
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use util::common::time;
2020
use libc;
2121
use flate;
2222

23+
use std::mem;
24+
2325
pub fn run(sess: &session::Session, llmod: ModuleRef,
2426
tm: TargetMachineRef, reachable: &[String]) {
2527
if sess.opts.cg.prefer_dynamic {
@@ -57,36 +59,66 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
5759
let file = path.filename_str().unwrap();
5860
let file = file.slice(3, file.len() - 5); // chop off lib/.rlib
5961
debug!("reading {}", file);
60-
let bc = time(sess.time_passes(),
61-
format!("read {}.bytecode.deflate", name).as_slice(),
62-
(),
63-
|_| {
64-
archive.read(format!("{}.bytecode.deflate",
65-
file).as_slice())
66-
});
67-
let bc = bc.expect("missing compressed bytecode in archive!");
68-
let bc = time(sess.time_passes(),
69-
format!("inflate {}.bc", file).as_slice(),
70-
(),
71-
|_| {
72-
match flate::inflate_bytes(bc) {
73-
Some(bc) => bc,
74-
None => {
75-
sess.fatal(format!("failed to decompress \
76-
bc of `{}`",
77-
name).as_slice())
78-
}
79-
}
80-
});
81-
let ptr = bc.as_slice().as_ptr();
62+
let bc_encoded = time(sess.time_passes(),
63+
format!("read {}.bytecode.deflate", name).as_slice(),
64+
(),
65+
|_| {
66+
archive.read(format!("{}.bytecode.deflate",
67+
file).as_slice())
68+
});
69+
let bc_encoded = bc_encoded.expect("missing compressed bytecode in archive!");
70+
let bc_extractor = if is_versioned_bytecode_format(bc_encoded) {
71+
|_| {
72+
// Read the version
73+
let version = extract_bytecode_format_version(bc_encoded);
74+
75+
if version == 1 {
76+
// The only version existing so far
77+
let data_size = extract_compressed_bytecode_size_v1(bc_encoded);
78+
let compressed_data = bc_encoded.slice(
79+
link::RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET,
80+
link::RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET + data_size as uint);
81+
82+
match flate::inflate_bytes(compressed_data) {
83+
Some(inflated) => inflated,
84+
None => {
85+
sess.fatal(format!("failed to decompress bc of `{}`",
86+
name).as_slice())
87+
}
88+
}
89+
} else {
90+
sess.fatal(format!("Unsupported bytecode format version {}",
91+
version).as_slice())
92+
}
93+
}
94+
} else {
95+
// the object must be in the old, pre-versioning format, so simply
96+
// inflate everything and let LLVM decide if it can make sense of it
97+
|_| {
98+
match flate::inflate_bytes(bc_encoded) {
99+
Some(bc) => bc,
100+
None => {
101+
sess.fatal(format!("failed to decompress bc of `{}`",
102+
name).as_slice())
103+
}
104+
}
105+
}
106+
};
107+
108+
let bc_decoded = time(sess.time_passes(),
109+
format!("decode {}.bc", file).as_slice(),
110+
(),
111+
bc_extractor);
112+
113+
let ptr = bc_decoded.as_slice().as_ptr();
82114
debug!("linking {}", name);
83115
time(sess.time_passes(),
84116
format!("ll link {}", name).as_slice(),
85117
(),
86118
|()| unsafe {
87119
if !llvm::LLVMRustLinkInExternalBitcode(llmod,
88120
ptr as *const libc::c_char,
89-
bc.len() as libc::size_t) {
121+
bc_decoded.len() as libc::size_t) {
90122
link::llvm_err(sess,
91123
format!("failed to load bc of `{}`",
92124
name.as_slice()));
@@ -137,3 +169,28 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
137169
}
138170
debug!("lto done");
139171
}
172+
173+
fn is_versioned_bytecode_format(bc: &[u8]) -> bool {
174+
let magic_id_byte_count = link::RLIB_BYTECODE_OBJECT_MAGIC.len();
175+
return bc.len() > magic_id_byte_count &&
176+
bc.slice(0, magic_id_byte_count) == link::RLIB_BYTECODE_OBJECT_MAGIC;
177+
}
178+
179+
fn extract_bytecode_format_version(bc: &[u8]) -> u32 {
180+
return read_from_le_bytes::<u32>(bc, link::RLIB_BYTECODE_OBJECT_VERSION_OFFSET);
181+
}
182+
183+
fn extract_compressed_bytecode_size_v1(bc: &[u8]) -> u64 {
184+
return read_from_le_bytes::<u64>(bc, link::RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET);
185+
}
186+
187+
fn read_from_le_bytes<T: Int>(bytes: &[u8], position_in_bytes: uint) -> T {
188+
let byte_data = bytes.slice(position_in_bytes,
189+
position_in_bytes + mem::size_of::<T>());
190+
let data = unsafe {
191+
*(byte_data.as_ptr() as *const T)
192+
};
193+
194+
Int::from_le(data)
195+
}
196+

0 commit comments

Comments
 (0)