Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Further compress the in-memory representation of address maps #2324

Merged
merged 1 commit into from
Nov 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 62 additions & 33 deletions crates/cranelift/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,19 +229,28 @@ impl StackMapSink {
fn get_function_address_map<'data>(
context: &Context,
data: &FunctionBodyData<'data>,
body_len: usize,
body_len: u32,
isa: &dyn isa::TargetIsa,
) -> FunctionAddressMap {
// Generate artificial srcloc for function start/end to identify boundary
// within module.
let data = data.body.get_binary_reader();
let offset = data.original_position();
let len = data.bytes_remaining();
assert!((offset + len) <= u32::max_value() as usize);
let start_srcloc = ir::SourceLoc::new(offset as u32);
let end_srcloc = ir::SourceLoc::new((offset + len) as u32);

let instructions = if let Some(ref mcr) = &context.mach_compile_result {
// New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping
// tuples.
collect_address_maps(mcr.buffer.get_srclocs_sorted().into_iter().map(
|&MachSrcLoc { start, end, loc }| InstructionAddressMap {
srcloc: loc,
code_offset: start as usize,
code_len: (end - start) as usize,
},
))
collect_address_maps(
body_len,
mcr.buffer
.get_srclocs_sorted()
.into_iter()
.map(|&MachSrcLoc { start, end, loc }| (loc, start, (end - start))),
)
} else {
// Old-style backend: we need to traverse the instruction/encoding info in the function.
let func = &context.func;
Expand All @@ -250,28 +259,16 @@ fn get_function_address_map<'data>(

let encinfo = isa.encoding_info();
collect_address_maps(
body_len,
blocks
.into_iter()
.flat_map(|block| func.inst_offsets(block, &encinfo))
.map(|(offset, inst, size)| InstructionAddressMap {
srcloc: func.srclocs[inst],
code_offset: offset as usize,
code_len: size as usize,
}),
.map(|(offset, inst, size)| (func.srclocs[inst], offset, size)),
)
};

// Generate artificial srcloc for function start/end to identify boundary
// within module. Similar to FuncTranslator::cur_srcloc(): it will wrap around
// if byte code is larger than 4 GB.
let data = data.body.get_binary_reader();
let offset = data.original_position();
let len = data.bytes_remaining();
let start_srcloc = ir::SourceLoc::new(offset as u32);
let end_srcloc = ir::SourceLoc::new((offset + len) as u32);

FunctionAddressMap {
instructions,
instructions: instructions.into(),
start_srcloc,
end_srcloc,
body_offset: 0,
Expand All @@ -283,23 +280,54 @@ fn get_function_address_map<'data>(
// into a `FunctionAddressMap`. This will automatically coalesce adjacent
// instructions which map to the same original source position.
fn collect_address_maps(
iter: impl IntoIterator<Item = InstructionAddressMap>,
code_size: u32,
iter: impl IntoIterator<Item = (ir::SourceLoc, u32, u32)>,
) -> Vec<InstructionAddressMap> {
let mut iter = iter.into_iter();
let mut cur = match iter.next() {
let (mut cur_loc, mut cur_offset, mut cur_len) = match iter.next() {
Some(i) => i,
None => return Vec::new(),
};
let mut ret = Vec::new();
for item in iter {
if cur.code_offset + cur.code_len == item.code_offset && item.srcloc == cur.srcloc {
cur.code_len += item.code_len;
} else {
ret.push(cur);
cur = item;
for (loc, offset, len) in iter {
// If this instruction is adjacent to the previous and has the same
// source location then we can "coalesce" it with the current
// instruction.
if cur_offset + cur_len == offset && loc == cur_loc {
cur_len += len;
continue;
}

// Push an entry for the previous source item.
ret.push(InstructionAddressMap {
srcloc: cur_loc,
code_offset: cur_offset,
});
// And push a "dummy" entry if necessary to cover the span of ranges,
// if any, between the previous source offset and this one.
if cur_offset + cur_len != offset {
ret.push(InstructionAddressMap {
srcloc: ir::SourceLoc::default(),
code_offset: cur_offset + cur_len,
});
}
// Update our current location to get extended later or pushed on at
// the end.
cur_loc = loc;
cur_offset = offset;
cur_len = len;
}
ret.push(cur);
ret.push(InstructionAddressMap {
srcloc: cur_loc,
code_offset: cur_offset,
});
if cur_offset + cur_len != code_size {
ret.push(InstructionAddressMap {
srcloc: ir::SourceLoc::default(),
code_offset: cur_offset + cur_len,
});
}

return ret;
}

Expand Down Expand Up @@ -406,7 +434,8 @@ impl Compiler for Cranelift {
CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
})?;

let address_transform = get_function_address_map(&context, &input, code_buf.len(), isa);
let address_transform =
get_function_address_map(&context, &input, code_buf.len() as u32, isa);

let ranges = if tunables.debug_info {
let ranges = context.build_value_labels_ranges(isa).map_err(|error| {
Expand Down
40 changes: 28 additions & 12 deletions crates/debug/src/transform/address_transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ fn build_function_lookup(
let mut ranges_index = BTreeMap::new();
let mut current_range = Vec::new();
let mut last_gen_inst_empty = false;
for t in &ft.instructions {
for (i, t) in ft.instructions.iter().enumerate() {
if t.srcloc.is_default() {
continue;
}
Expand All @@ -111,8 +111,11 @@ fn build_function_lookup(
assert_le!(fn_start, offset);
assert_le!(offset, fn_end);

let inst_gen_start = t.code_offset;
let inst_gen_end = t.code_offset + t.code_len;
let inst_gen_start = t.code_offset as usize;
let inst_gen_end = match ft.instructions.get(i + 1) {
Some(i) => i.code_offset as usize,
None => ft.body_len as usize,
};

if last_wasm_pos > offset {
// Start new range.
Expand Down Expand Up @@ -149,7 +152,7 @@ fn build_function_lookup(
}
last_wasm_pos = offset;
}
let last_gen_addr = ft.body_offset + ft.body_len;
let last_gen_addr = ft.body_offset + ft.body_len as usize;
ranges_index.insert(range_wasm_start, ranges.len());
ranges.push(Range {
wasm_start: range_wasm_start,
Expand Down Expand Up @@ -193,13 +196,13 @@ fn build_function_addr_map(
for (_, f) in funcs {
let ft = &f.address_map;
let mut fn_map = Vec::new();
for t in &ft.instructions {
for t in ft.instructions.iter() {
if t.srcloc.is_default() {
continue;
}
let offset = get_wasm_code_offset(t.srcloc, code_section_offset);
fn_map.push(AddressMap {
generated: t.code_offset,
generated: t.code_offset as usize,
wasm: offset,
});
}
Expand All @@ -213,7 +216,7 @@ fn build_function_addr_map(

map.push(FunctionMap {
offset: ft.body_offset,
len: ft.body_len,
len: ft.body_len as usize,
wasm_start: get_wasm_code_offset(ft.start_srcloc, code_section_offset),
wasm_end: get_wasm_code_offset(ft.end_srcloc, code_section_offset),
addresses: fn_map.into_boxed_slice(),
Expand Down Expand Up @@ -605,6 +608,7 @@ mod tests {
use super::{build_function_lookup, get_wasm_code_offset, AddressTransform};
use gimli::write::Address;
use std::iter::FromIterator;
use std::mem;
use wasmtime_environ::entity::PrimaryMap;
use wasmtime_environ::ir::SourceLoc;
use wasmtime_environ::{CompiledFunction, WasmFileInfo};
Expand All @@ -626,14 +630,21 @@ mod tests {
InstructionAddressMap {
srcloc: SourceLoc::new(wasm_offset + 2),
code_offset: 5,
code_len: 3,
},
InstructionAddressMap {
srcloc: SourceLoc::default(),
code_offset: 8,
},
InstructionAddressMap {
srcloc: SourceLoc::new(wasm_offset + 7),
code_offset: 15,
code_len: 8,
},
],
InstructionAddressMap {
srcloc: SourceLoc::default(),
code_offset: 23,
},
]
.into(),
start_srcloc: SourceLoc::new(wasm_offset),
end_srcloc: SourceLoc::new(wasm_offset + 10),
body_offset: 0,
Expand Down Expand Up @@ -678,11 +689,16 @@ mod tests {
fn test_build_function_lookup_two_ranges() {
let mut input = create_simple_func(11);
// append instruction with same srcloc as input.instructions[0]
input.instructions.push(InstructionAddressMap {
let mut list = Vec::from(mem::take(&mut input.instructions));
list.push(InstructionAddressMap {
srcloc: SourceLoc::new(11 + 2),
code_offset: 23,
code_len: 3,
peterhuene marked this conversation as resolved.
Show resolved Hide resolved
});
list.push(InstructionAddressMap {
srcloc: SourceLoc::default(),
code_offset: 26,
});
input.instructions = list.into();
let (start, end, lookup) = build_function_lookup(&input, 1);
assert_eq!(10, start);
assert_eq!(20, end);
Expand Down
13 changes: 10 additions & 3 deletions crates/debug/src/transform/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1145,14 +1145,21 @@ mod tests {
InstructionAddressMap {
srcloc: SourceLoc::new(code_section_offset + 12),
code_offset: 5,
code_len: 3,
},
InstructionAddressMap {
srcloc: SourceLoc::default(),
code_offset: 8,
},
InstructionAddressMap {
srcloc: SourceLoc::new(code_section_offset + 17),
code_offset: 15,
code_len: 8,
},
],
InstructionAddressMap {
srcloc: SourceLoc::default(),
code_offset: 23,
},
]
.into(),
start_srcloc: SourceLoc::new(code_section_offset + 10),
end_srcloc: SourceLoc::new(code_section_offset + 20),
body_offset: 0,
Expand Down
22 changes: 12 additions & 10 deletions crates/environ/src/address_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,24 @@ use serde::{Deserialize, Serialize};
/// Single source location to generated address mapping.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct InstructionAddressMap {
/// Original source location.
/// Where in the source this instruction comes from.
pub srcloc: ir::SourceLoc,

/// Generated instructions offset.
pub code_offset: usize,

/// Generated instructions length.
pub code_len: usize,
/// Offset from the start of the function's compiled code to where this
/// instruction is located, or the region where it starts.
pub code_offset: u32,
}

/// Function and its instructions addresses mappings.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
pub struct FunctionAddressMap {
/// Instructions maps.
/// The array is sorted by the InstructionAddressMap::code_offset field.
pub instructions: Vec<InstructionAddressMap>,
/// An array of data for the instructions in this function, indicating where
/// each instruction maps back to in the original function.
///
/// This array is sorted least-to-greatest by the `code_offset` field.
/// Additionally the span of each `InstructionAddressMap` is implicitly the
/// gap between it and the next item in the array.
pub instructions: Box<[InstructionAddressMap]>,

/// Function start source location (normally declaration).
pub start_srcloc: ir::SourceLoc,
Expand All @@ -34,7 +36,7 @@ pub struct FunctionAddressMap {
pub body_offset: usize,

/// Generated function body length.
pub body_len: usize,
pub body_len: u32,
}

/// Memory definition offset in the VMContext structure.
Expand Down
17 changes: 3 additions & 14 deletions crates/wasmtime/src/frame_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl GlobalFrameInfo {
// Use our relative position from the start of the function to find the
// machine instruction that corresponds to `pc`, which then allows us to
// map that to a wasm original source location.
let rel_pos = pc - func.start;
let rel_pos = (pc - func.start) as u32;
let pos = match func
.instr_map
.instructions
Expand All @@ -77,19 +77,8 @@ impl GlobalFrameInfo {
// instructions cover `pc`.
Err(0) => None,

// This would be at the `nth` slot, so check `n-1` to see if we're
// part of that instruction. This happens due to the minus one when
// this function is called form trap symbolication, where we don't
// always get called with a `pc` that's an exact instruction
// boundary.
Err(n) => {
let instr = &func.instr_map.instructions[n - 1];
if instr.code_offset <= rel_pos && rel_pos < instr.code_offset + instr.code_len {
Some(n - 1)
} else {
None
}
}
// This would be at the `nth` slot, so we're at the `n-1`th slot.
Err(n) => Some(n - 1),
};

// In debug mode for now assert that we found a mapping for `pc` within
Expand Down