From 3508ecaac4ce91527c7faea49cbb6746370386b6 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Fri, 20 Dec 2024 15:18:56 +0000 Subject: [PATCH] Unbreak `YKD_OPT=0`. When adding support for loop peeling, we forgot that loops are only peeled when optimisations are turned on. When they were turned off, we generated code with no backjump e.g.: ``` ; %294: ptr = load %293 mov rsi, [rdi+0x588] ; header_end [...] ; Deopt ID for guard 0 push rsi mov rsi, 0x00 ``` Notice no `jmp` between `header_end` and `Deopt`. That meant that we fell through to whichever guard's deopt happened to come first, with confusing results. For example, before this commit the new test `ykd_opt_off` would print: ``` 10 9 8 7: 7 ``` even though the code shows this is clearly "impossible": ```rust if (i < 7) printf("7: "); ``` This commit not only fixes that, but brings a bit more order to three kinds of traces we can compile: "header only" traces (most tests, and when `YKD_OPT=0`); "header and body" traces (with loop peeling); and "side traces". A new `TraceKind` captures these, and allows us to simplify quite a bit of code in the x64 backend, which no longer has to guess what kind of trace it has. Perhaps inevitably this also showed up a few tests where we'd muddled header and body. --- tests/c/ykd_opt_off.c | 47 +++++ ykrt/src/compile/jitc_yk/codegen/x64/mod.rs | 160 ++++++++++-------- ykrt/src/compile/jitc_yk/jit_ir/mod.rs | 42 ++++- .../src/compile/jitc_yk/jit_ir/well_formed.rs | 4 +- ykrt/src/compile/jitc_yk/opt/mod.rs | 5 +- ykrt/src/compile/jitc_yk/trace_builder.rs | 12 +- 6 files changed, 192 insertions(+), 78 deletions(-) create mode 100644 tests/c/ykd_opt_off.c diff --git a/tests/c/ykd_opt_off.c b/tests/c/ykd_opt_off.c new file mode 100644 index 000000000..931c6e2b9 --- /dev/null +++ b/tests/c/ykd_opt_off.c @@ -0,0 +1,47 @@ +// Run-time: +// env-var: YKD_OPT=0 +// env-var: YKD_SERIALISE_COMPILATION=1 +// stdout: +// 10 +// 9 +// 8 +// 7 +// 7: 6 +// 7: 5 +// 7: 4 +// 7: 4: 3 +// 7: 4: 2 +// 7: 4: 1 + + +// Check that basic trace compilation works. + +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + YkMT *mt = yk_mt_new(NULL); + yk_mt_hot_threshold_set(mt, 0); + yk_mt_sidetrace_threshold_set(mt, 1); + YkLocation loc = yk_location_new(); + + int i = 10; + NOOPT_VAL(loc); + NOOPT_VAL(i); + while (i > 0) { + yk_mt_control_point(mt, &loc); + if (i < 7) + printf("7: "); + if (i < 4) + printf("4: "); + printf("%d\n", i); + i--; + } + yk_location_drop(loc); + yk_mt_shutdown(mt); + return (EXIT_SUCCESS); +} diff --git a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs index 0baabc465..19de6d198 100644 --- a/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs +++ b/ykrt/src/compile/jitc_yk/codegen/x64/mod.rs @@ -23,7 +23,7 @@ use super::{ super::{ int_signs::{SignExtend, Truncate}, - jit_ir::{self, BinOp, FloatTy, Inst, InstIdx, Module, Operand, PtrAddInst, Ty}, + jit_ir::{self, BinOp, FloatTy, Inst, InstIdx, Module, Operand, PtrAddInst, TraceKind, Ty}, CompilationError, }, reg_alloc::{self, VarLocation}, @@ -519,7 +519,7 @@ impl<'a> Assemble<'a> { jit_ir::Inst::Guard(i) => self.cg_guard(iidx, i), jit_ir::Inst::TraceHeaderStart => self.cg_header_start(), jit_ir::Inst::TraceHeaderEnd => { - self.cg_header_end(); + self.cg_header_end(iidx); in_header = false; } jit_ir::Inst::TraceBodyStart => self.cg_body_start(), @@ -1791,12 +1791,11 @@ impl<'a> Assemble<'a> { /// # Arguments /// /// * `tgt_vars` - The target locations. If `None` use `self.loop_start_locs` instead. - fn write_jump_vars(&mut self, iidx: InstIdx, is_sidetrace: bool) { - let (tgt_vars, src_ops) = if is_sidetrace { - // Side-traces don't have a body and store these variables in `trace_header_end`. - (self.m.root_entry_vars(), self.m.trace_header_end()) - } else { - (self.body_start_locs.as_slice(), self.m.trace_body_end()) + fn write_jump_vars(&mut self, iidx: InstIdx) { + let (tgt_vars, src_ops) = match self.m.tracekind() { + TraceKind::HeaderOnly => (self.header_start_locs.as_slice(), self.m.trace_header_end()), + TraceKind::HeaderAndBody => (self.body_start_locs.as_slice(), self.m.trace_body_end()), + TraceKind::Sidetrace => (self.m.root_entry_vars(), self.m.trace_header_end()), }; // If we pass in `None` use `self.loop_start_locs` instead. We need to do this since we // can't pass in `&self.loop_start_locs` directly due to borrowing restrictions. @@ -1889,13 +1888,14 @@ impl<'a> Assemble<'a> { } fn cg_body_end(&mut self, iidx: InstIdx) { + debug_assert_eq!(self.m.tracekind(), TraceKind::HeaderAndBody); // Loop the JITted code if the `tloop_start` label is present (not relevant for IR created // by a test or a side-trace). let label = StaticLabel::global("tloop_start"); match self.asm.labels().resolve_static(&label) { Ok(_) => { // Found the label, emit a jump to it. - self.write_jump_vars(iidx, false); + self.write_jump_vars(iidx); dynasm!(self.asm; jmp ->tloop_start); } Err(DynasmError::UnknownLabel(_)) => { @@ -1917,9 +1917,10 @@ impl<'a> Assemble<'a> { } fn cg_sidetrace_end(&mut self, iidx: InstIdx, addr: *const libc::c_void) { + debug_assert_eq!(self.m.tracekind(), TraceKind::Sidetrace); // The end of a side-trace. Map live variables of this side-trace to the entry variables of // the root parent trace, then jump to it. - self.write_jump_vars(iidx, true); + self.write_jump_vars(iidx); self.ra.align_stack(SYSV_CALL_STACK_ALIGN); dynasm!(self.asm @@ -1944,60 +1945,80 @@ impl<'a> Assemble<'a> { }; self.header_start_locs.push(loc); } - dynasm!(self.asm; ->reentry:); + match self.m.tracekind() { + TraceKind::HeaderOnly => { + dynasm!(self.asm; ->tloop_start:); + } + TraceKind::HeaderAndBody => { + dynasm!(self.asm; ->reentry:); + } + TraceKind::Sidetrace => todo!(), + } self.prologue_offset = self.asm.offset(); } - fn cg_header_end(&mut self) { - // FIXME: This is a bit of a roundabout way of doing things. Especially, since it means - // that the [ParamInst]s in the trace body are just placeholders. While, since a recent - // change, the register allocator makes sure the values automatically end up in the - // [VarLocation]s expected by the loop start, this only works for registers right now. We - // can extend this to spill locations as well, but won't be able to do so for variables - // that have become constants during the trace header. So we will always have to either - // update the [ParamInst]s of the trace body, which isn't ideal since it requires the - // [Module] the be mutable. Or we do what we do below just for constants. - let mut varlocs = Vec::new(); - for var in self.m.trace_header_end().iter() { - let varloc = self.op_to_var_location(var.unpack(self.m)); - varlocs.push(varloc); - } - // Reset the register allocator before priming it with information about the trace body - // inputs. - self.ra.reset(); - for (i, op) in self.m.trace_body_start().iter().enumerate() { - // By definition these can only be variables. - let iidx = match op.unpack(self.m) { - Operand::Var(iidx) => iidx, - _ => panic!(), - }; - let varloc = varlocs[i]; - - // Write the varlocations from the head jump to the body start. - // FIXME: This is copied verbatim from `cg_param` and can be reused. - match varloc { - VarLocation::Register(reg_alloc::Register::GP(reg)) => { - self.ra.force_assign_inst_gp_reg(&mut self.asm, iidx, reg); - } - VarLocation::Register(reg_alloc::Register::FP(reg)) => { - self.ra.force_assign_inst_fp_reg(iidx, reg); - } - VarLocation::Direct { frame_off, size: _ } => { - self.ra.force_assign_inst_direct(iidx, frame_off); - } - VarLocation::Stack { frame_off, size: _ } => { - self.ra - .force_assign_inst_indirect(iidx, i32::try_from(frame_off).unwrap()); + fn cg_header_end(&mut self, iidx: InstIdx) { + match self.m.tracekind() { + TraceKind::HeaderOnly => { + self.write_jump_vars(iidx); + dynasm!(self.asm; jmp ->tloop_start); + } + TraceKind::HeaderAndBody => { + // FIXME: This is a bit of a roundabout way of doing things. Especially, since it means + // that the [ParamInst]s in the trace body are just placeholders. While, since a recent + // change, the register allocator makes sure the values automatically end up in the + // [VarLocation]s expected by the loop start, this only works for registers right now. We + // can extend this to spill locations as well, but won't be able to do so for variables + // that have become constants during the trace header. So we will always have to either + // update the [ParamInst]s of the trace body, which isn't ideal since it requires the + // [Module] the be mutable. Or we do what we do below just for constants. + let mut varlocs = Vec::new(); + for var in self.m.trace_header_end().iter() { + let varloc = self.op_to_var_location(var.unpack(self.m)); + varlocs.push(varloc); } - VarLocation::ConstInt { bits, v } => { - self.ra.assign_const(iidx, bits, v); + // Reset the register allocator before priming it with information about the trace body + // inputs. + self.ra.reset(); + for (i, op) in self.m.trace_body_start().iter().enumerate() { + // By definition these can only be variables. + let iidx = match op.unpack(self.m) { + Operand::Var(iidx) => iidx, + _ => panic!(), + }; + let varloc = varlocs[i]; + + // Write the varlocations from the head jump to the body start. + // FIXME: This is copied verbatim from `cg_param` and can be reused. + match varloc { + VarLocation::Register(reg_alloc::Register::GP(reg)) => { + self.ra.force_assign_inst_gp_reg(&mut self.asm, iidx, reg); + } + VarLocation::Register(reg_alloc::Register::FP(reg)) => { + self.ra.force_assign_inst_fp_reg(iidx, reg); + } + VarLocation::Direct { frame_off, size: _ } => { + self.ra.force_assign_inst_direct(iidx, frame_off); + } + VarLocation::Stack { frame_off, size: _ } => { + self.ra.force_assign_inst_indirect( + iidx, + i32::try_from(frame_off).unwrap(), + ); + } + VarLocation::ConstInt { bits, v } => { + self.ra.assign_const(iidx, bits, v); + } + e => panic!("{:?}", e), + } } - e => panic!("{:?}", e), } + TraceKind::Sidetrace => todo!(), } } fn cg_body_start(&mut self) { + debug_assert_eq!(self.m.tracekind(), TraceKind::HeaderAndBody); debug_assert_eq!(self.body_start_locs.len(), 0); // Remember the locations of the live variables at the beginning of the trace loop. When we // loop back around here we need to write the live variables back into these same @@ -2495,7 +2516,7 @@ enum Immediate { mod tests { use super::{Assemble, X64CompiledTrace}; use crate::compile::{ - jitc_yk::jit_ir::{self, Inst, Module, ParamIdx}, + jitc_yk::jit_ir::{self, Inst, Module, ParamIdx, TraceKind}, CompiledTrace, }; use crate::location::{HotLocation, HotLocationKind}; @@ -3984,12 +4005,13 @@ mod tests { ); } + #[should_panic] #[test] fn unterminated_trace() { codegen_and_test( " entry: - body_end [] + header_end [] ", " ... @@ -4006,13 +4028,13 @@ mod tests { codegen_and_test( " entry: - body_start [] - body_end [] + header_start [] + header_end [] ", " ... - ; body_start [] - ; body_end [] + ; header_start [] + ; header_end [] jmp {{target}} ", false, @@ -4025,20 +4047,20 @@ mod tests { " entry: %0: i8 = param 0 - body_start [%0] + header_start [%0] %2: i8 = add %0, %0 black_box %2 - body_end [%0] + header_end [%0] ", " ... ; %0: i8 = param ... ... - ; body_start [%0] + ; header_start [%0] ; %2: i8 = add %0, %0 {{_}} {{off}}: ... ... - ; body_end [%0] + ; header_end [%0] ... {{_}} {{_}}: jmp 0x00000000{{off}} ", @@ -4531,16 +4553,16 @@ mod tests { " entry: %0: i8 = param 0 - body_start [%0] + header_start [%0] %2: i8 = 42i8 - body_end [%2] + header_end [%2] ", " ... ; %0: i8 = param ... ... - ; body_start [%0] - ; body_end [42i8] + ; header_start [%0] + ; header_end [42i8] mov r.64.x, 0x2a jmp ... ", @@ -4578,7 +4600,7 @@ mod tests { #[test] fn cg_aliasing_params() { - let mut m = jit_ir::Module::new(0, 0).unwrap(); + let mut m = jit_ir::Module::new(TraceKind::HeaderOnly, 0, 0).unwrap(); // Create two trace paramaters whose locations alias. let loc = yksmp::Location::Register(13, 1, [].into()); diff --git a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs index 47e155c4a..266e95e2b 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir/mod.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir/mod.rs @@ -114,6 +114,18 @@ use ykaddr::addr::symbol_to_ptr; // This is simple and can be shared across both IRs. pub(crate) use super::aot_ir::{BinOp, FloatPredicate, FloatTy, Predicate}; +/// What kind of trace does this module represent? +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum TraceKind { + /// A trace which contains only a header: the trace must loop back to the very start every + /// time. + HeaderOnly, + /// A trace with a header and a body: the trace must loop back to the start of the body. + HeaderAndBody, + /// A sidetrace: the trace must loop back to the root of the trace tree. + Sidetrace, +} + /// The `Module` is the top-level container for JIT IR. /// /// The IR is conceptually a list of word-sized instructions containing indices into auxiliary @@ -125,6 +137,8 @@ pub(crate) use super::aot_ir::{BinOp, FloatPredicate, FloatTy, Predicate}; /// - you may NOT remove an instruction. #[derive(Debug)] pub(crate) struct Module { + /// What kind of trace does this module represent? + tracekind: TraceKind, /// The ID of the compiled trace. /// /// See the [Self::ctr_id] method for details. @@ -191,8 +205,28 @@ pub(crate) struct Module { impl Module { /// Create a new [Module]. - pub(crate) fn new(ctr_id: u64, global_decls_len: usize) -> Result { - Self::new_internal(ctr_id, global_decls_len) + pub(crate) fn new( + tracekind: TraceKind, + ctr_id: u64, + global_decls_len: usize, + ) -> Result { + Self::new_internal(tracekind, ctr_id, global_decls_len) + } + + /// Returns this module's current [TraceKind]. Note: this can change as a result of calling + /// [Self::set_tracekind]! + pub(crate) fn tracekind(&self) -> TraceKind { + self.tracekind + } + + /// Returns this module's current [TraceKind]. Currently the only transition allowed is from + /// [TraceKind::HeaderOnly] to [TraceKind::HeaderAndBody]. + pub(crate) fn set_tracekind(&mut self, tracekind: TraceKind) { + match (self.tracekind, tracekind) { + (TraceKind::HeaderOnly, TraceKind::HeaderAndBody) => (), + (from, to) => panic!("Can't transition from a {from:?} trace to a {to:?} trace"), + } + self.tracekind = tracekind; } /// Returns the ID of the module. @@ -207,10 +241,11 @@ impl Module { #[cfg(test)] pub(crate) fn new_testing() -> Self { - Self::new_internal(0, 0).unwrap() + Self::new_internal(TraceKind::HeaderOnly, 0, 0).unwrap() } pub(crate) fn new_internal( + tracekind: TraceKind, ctr_id: u64, global_decls_len: usize, ) -> Result { @@ -250,6 +285,7 @@ impl Module { assert_eq!(global_decls_len, 0); Ok(Self { + tracekind, ctr_id, insts: Vec::new(), args: Vec::new(), diff --git a/ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs b/ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs index 23f6fee56..df9d49602 100644 --- a/ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs +++ b/ykrt/src/compile/jitc_yk/jit_ir/well_formed.rs @@ -294,7 +294,7 @@ impl Module { #[cfg(test)] mod tests { - use super::{BinOp, BinOpInst, Const, Inst, Module, Operand}; + use super::{super::TraceKind, BinOp, BinOpInst, Const, Inst, Module, Operand}; #[should_panic(expected = "Instruction at position 0 passing too few arguments")] #[test] @@ -365,7 +365,7 @@ mod tests { fn cg_add_wrong_types() { // The parser will reject a binop with a result type different from either operand, so to // get the test we want, we can't use the parser. - let mut m = Module::new(0, 0).unwrap(); + let mut m = Module::new(TraceKind::HeaderOnly, 0, 0).unwrap(); let c1 = m.insert_const(Const::Int(m.int1_tyidx(), 0)).unwrap(); let c2 = m.insert_const(Const::Int(m.int8_tyidx(), 0)).unwrap(); m.push(Inst::BinOp(BinOpInst::new( diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index 998222edd..108acfd19 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -10,7 +10,7 @@ use super::{ int_signs::{SignExtend, Truncate}, jit_ir::{ BinOp, BinOpInst, Const, ConstIdx, ICmpInst, Inst, InstIdx, Module, Operand, Predicate, - PtrAddInst, Ty, + PtrAddInst, TraceKind, Ty, }, }; use crate::compile::CompilationError; @@ -69,6 +69,9 @@ impl Opt { return Ok(self.m); } + debug_assert_eq!(self.m.tracekind(), TraceKind::HeaderOnly); + self.m.set_tracekind(TraceKind::HeaderAndBody); + // Now that we've processed the trace header, duplicate it to create the loop body. let mut iidx_map = vec![InstIdx::max(); base]; let skipping = self.m.iter_skipping_insts().collect::>(); diff --git a/ykrt/src/compile/jitc_yk/trace_builder.rs b/ykrt/src/compile/jitc_yk/trace_builder.rs index 36431fb17..221774e11 100644 --- a/ykrt/src/compile/jitc_yk/trace_builder.rs +++ b/ykrt/src/compile/jitc_yk/trace_builder.rs @@ -5,7 +5,7 @@ use super::aot_ir::{self, BBlockId, BinOp, Module}; use super::YkSideTraceInfo; use super::{ - jit_ir::{self, Const, Operand, PackedOperand, ParamIdx}, + jit_ir::{self, Const, Operand, PackedOperand, ParamIdx, TraceKind}, AOT_MOD, }; use crate::aotsmp::AOT_STACKMAPS; @@ -49,13 +49,14 @@ impl TraceBuilder { /// - `mtrace`: The mapped trace. /// - `promotions`: Values promoted to constants during runtime. fn new( + tracekind: TraceKind, ctr_id: u64, aot_mod: &'static Module, promotions: Box<[u8]>, ) -> Result { Ok(Self { aot_mod, - jit_mod: jit_ir::Module::new(ctr_id, aot_mod.global_decls_len())?, + jit_mod: jit_ir::Module::new(tracekind, ctr_id, aot_mod.global_decls_len())?, local_map: HashMap::new(), cp_block: None, first_paraminst_idx: 0, @@ -1426,5 +1427,10 @@ pub(super) fn build( sti: Option>, promotions: Box<[u8]>, ) -> Result { - TraceBuilder::new(ctr_id, aot_mod, promotions)?.build(ta_iter, sti) + let tracekind = if sti.is_some() { + TraceKind::Sidetrace + } else { + TraceKind::HeaderOnly + }; + TraceBuilder::new(tracekind, ctr_id, aot_mod, promotions)?.build(ta_iter, sti) }