diff --git a/cranelift/codegen/meta/src/isa/arm64.rs b/cranelift/codegen/meta/src/isa/arm64.rs index 7fc17738bb27..9e1aac536422 100644 --- a/cranelift/codegen/meta/src/isa/arm64.rs +++ b/cranelift/codegen/meta/src/isa/arm64.rs @@ -5,13 +5,13 @@ use crate::shared::Definitions as SharedDefinitions; fn define_settings(_shared: &SettingGroup) -> SettingGroup { let mut setting = SettingGroupBuilder::new("arm64"); - let has_lse = setting.add_bool( + + setting.add_bool( "has_lse", "Has Large System Extensions (FEAT_LSE) support.", "", false, ); - setting.add_bool( "has_pauth", "Has Pointer authentication (FEAT_PAuth) support; enables the use of \ @@ -44,8 +44,13 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { "", false, ); + setting.add_bool( + "use_bti", + "Use Branch Target Identification (FEAT_BTI) instructions.", + "", + false, + ); - setting.add_predicate("use_lse", predicate!(has_lse)); setting.build() } diff --git a/cranelift/codegen/src/alias_analysis.rs b/cranelift/codegen/src/alias_analysis.rs index 53d3ba60cfc6..2e2087b0642f 100644 --- a/cranelift/codegen/src/alias_analysis.rs +++ b/cranelift/codegen/src/alias_analysis.rs @@ -237,7 +237,7 @@ impl<'a> AliasAnalysis<'a> { trace!("after inst{}: state is {:?}", inst.index(), state); } - visit_block_succs(self.func, block, |_inst, succ| { + visit_block_succs(self.func, block, |_inst, succ, _from_table| { let succ_first_inst = self .func .layout diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 8d36742979ce..6b6d76a6b2f9 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -129,8 +129,15 @@ pub fn has_memory_fence_semantics(op: Opcode) -> bool { } } -/// Visit all successors of a block with a given visitor closure. -pub(crate) fn visit_block_succs(f: &Function, block: Block, mut visit: F) { +/// Visit all successors of a block with a given visitor closure. The closure +/// arguments are the branch instruction that is used to reach the successor, +/// the successor block itself, and a flag indicating whether the block is +/// branched to via a table entry. +pub(crate) fn visit_block_succs( + f: &Function, + block: Block, + mut visit: F, +) { for inst in f.layout.block_likely_branches(block) { if f.dfg[inst].opcode().is_branch() { visit_branch_targets(f, inst, &mut visit); @@ -138,18 +145,20 @@ pub(crate) fn visit_block_succs(f: &Function, block: Bloc } } -fn visit_branch_targets(f: &Function, inst: Inst, visit: &mut F) { +fn visit_branch_targets(f: &Function, inst: Inst, visit: &mut F) { match f.dfg[inst].analyze_branch(&f.dfg.value_lists) { BranchInfo::NotABranch => {} BranchInfo::SingleDest(dest, _) => { - visit(inst, dest); + visit(inst, dest, false); } BranchInfo::Table(table, maybe_dest) => { if let Some(dest) = maybe_dest { - visit(inst, dest); + // The default block is reached via a direct conditional branch, + // so it is not part of the table. + visit(inst, dest, false); } for &dest in f.jump_tables[table].as_slice() { - visit(inst, dest); + visit(inst, dest, true); } } } diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 51587642431d..25491721b785 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -67,7 +67,11 @@ fn saved_reg_stack_size( /// point for the trait; it is never actually instantiated. pub struct AArch64MachineDeps; -impl IsaFlags for aarch64_settings::Flags {} +impl IsaFlags for aarch64_settings::Flags { + fn is_forward_edge_cfi_enabled(&self) -> bool { + self.use_bti() + } +} impl ABIMachineSpec for AArch64MachineDeps { type I = Inst; @@ -549,13 +553,21 @@ impl ABIMachineSpec for AArch64MachineDeps { }, }); } - } else if flags.unwind_info() && call_conv.extends_apple_aarch64() { - // The macOS unwinder seems to require this. - insts.push(Inst::Unwind { - inst: UnwindInst::Aarch64SetPointerAuth { - return_addresses: false, - }, - }); + } else { + if isa_flags.use_bti() { + insts.push(Inst::Bti { + targets: BranchTargetType::C, + }); + } + + if flags.unwind_info() && call_conv.extends_apple_aarch64() { + // The macOS unwinder seems to require this. + insts.push(Inst::Unwind { + inst: UnwindInst::Aarch64SetPointerAuth { + return_addresses: false, + }, + }); + } } insts diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 7a9e9c0173e3..bc54bb7d4928 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -880,6 +880,11 @@ ;; supported. (Xpaclri) + ;; Branch target identification; equivalent to a no-op if Branch Target + ;; Identification (FEAT_BTI) is not supported. + (Bti + (targets BranchTargetType)) + ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This ;; controls how AMode::NominalSPOffset args are lowered. (VirtualSPOffsetAdj @@ -1568,6 +1573,15 @@ (B) )) +;; Branch target types +(type BranchTargetType + (enum + (None) + (C) + (J) + (JC) +)) + ;; Extractors for target features ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl pure sign_return_address_disabled () Unit) (extern constructor sign_return_address_disabled sign_return_address_disabled) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 8ba60b59ad21..718929dd0744 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -3332,6 +3332,16 @@ impl MachInstEmit for Inst { sink.put4(0xd503233f | key << 6); } &Inst::Xpaclri => sink.put4(0xd50320ff), + &Inst::Bti { targets } => { + let targets = match targets { + BranchTargetType::None => 0b00, + BranchTargetType::C => 0b01, + BranchTargetType::J => 0b10, + BranchTargetType::JC => 0b11, + }; + + sink.put4(0xd503241f | targets << 6); + } &Inst::VirtualSPOffsetAdj { offset } => { trace!( "virtual sp offset adjusted by {} -> {}", diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 55e1a8f82f09..6be6d1065b25 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -58,6 +58,13 @@ fn test_aarch64_binemit() { )); insns.push((Inst::Pacisp { key: APIKey::B }, "7F2303D5", "pacibsp")); insns.push((Inst::Xpaclri, "FF2003D5", "xpaclri")); + insns.push(( + Inst::Bti { + targets: BranchTargetType::J, + }, + "9F2403D5", + "bti j", + )); insns.push((Inst::Nop0, "", "nop-zero-len")); insns.push((Inst::Nop4, "1F2003D5", "nop")); insns.push((Inst::Csdb, "9F2203D5", "csdb")); diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 22b034d4e240..00d2a800b336 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -36,10 +36,10 @@ mod emit_tests; // Instructions (top level): definition pub use crate::isa::aarch64::lower::isle::generated_code::{ - ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, - FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp, - VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, - VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp, + ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1, + FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, + VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, + VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp, }; /// A floating-point unit (FPU) operation with two args, a register and an immediate. @@ -1072,6 +1072,7 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan // Neither LR nor SP is an allocatable register, so there is no need // to do anything. } + &Inst::Bti { .. } => {} &Inst::VirtualSPOffsetAdj { .. } => {} &Inst::ElfTlsGetAddr { rd, .. } => { @@ -1266,6 +1267,19 @@ impl MachInst for Inst { fn ref_type_regclass(_: &settings::Flags) -> RegClass { RegClass::Int } + + fn gen_block_start( + is_indirect_branch_target: bool, + is_forward_edge_cfi_enabled: bool, + ) -> Option { + if is_indirect_branch_target && is_forward_edge_cfi_enabled { + Some(Inst::Bti { + targets: BranchTargetType::J, + }) + } else { + None + } + } } //============================================================================= @@ -2700,7 +2714,7 @@ impl Inst { "csel {}, xzr, {}, hs ; ", "csdb ; ", "adr {}, pc+16 ; ", - "ldrsw {}, [{}, {}, LSL 2] ; ", + "ldrsw {}, [{}, {}, uxtw #2] ; ", "add {}, {}, {} ; ", "br {} ; ", "jt_entries {:?}" @@ -2812,6 +2826,16 @@ impl Inst { "paci".to_string() + key + "sp" } &Inst::Xpaclri => "xpaclri".to_string(), + &Inst::Bti { targets } => { + let targets = match targets { + BranchTargetType::None => "", + BranchTargetType::C => " c", + BranchTargetType::J => " j", + BranchTargetType::JC => " jc", + }; + + "bti".to_string() + targets + } &Inst::VirtualSPOffsetAdj { offset } => { state.virtual_sp_offset += offset; format!("virtual_sp_offset_adjust {}", offset) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index f8791d31c8de..deec6a2e078e 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -85,7 +85,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } fn use_lse(&mut self, _: Inst) -> Option<()> { - if self.isa_flags.use_lse() { + if self.isa_flags.has_lse() { Some(()) } else { None diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 4e4b045331f9..c9a09049cf6b 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -657,18 +657,20 @@ pub(crate) fn lower_branch( // emit_island // this forces an island at this point // // if the jumptable would push us past // // the deadline - // subs idx, #jt_size + // cmp idx, #jt_size // b.hs default + // csel vTmp2, xzr, idx, hs + // csdb // adr vTmp1, PC+16 - // ldr vTmp2, [vTmp1, idx, lsl #2] - // add vTmp2, vTmp2, vTmp1 - // br vTmp2 + // ldr vTmp2, [vTmp1, vTmp2, uxtw #2] + // add vTmp1, vTmp1, vTmp2 + // br vTmp1 // [jumptable offsets relative to JT base] let jt_size = targets.len() - 1; assert!(jt_size <= std::u32::MAX as usize); ctx.emit(Inst::EmitIsland { - needed_space: 4 * (6 + jt_size) as CodeOffset, + needed_space: 4 * (8 + jt_size) as CodeOffset, }); let ridx = put_input_in_reg( @@ -707,8 +709,10 @@ pub(crate) fn lower_branch( // Emit the compound instruction that does: // // b.hs default + // csel rB, xzr, rIndex, hs + // csdb // adr rA, jt - // ldrsw rB, [rA, rIndex, UXTW 2] + // ldrsw rB, [rA, rB, uxtw #2] // add rA, rA, rB // br rA // [jt entries] diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 0498875f2ef3..a132c470df20 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -115,6 +115,10 @@ impl TargetIsa for AArch64Backend { self.isa_flags.iter().collect() } + fn is_branch_protection_enabled(&self) -> bool { + self.isa_flags.use_bti() + } + fn dynamic_vector_bytes(&self, _dyn_ty: Type) -> u32 { 16 } diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 98364bb22f5d..e513987c6d59 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -226,6 +226,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the ISA-dependent flag values that were used to make this trait object. fn isa_flags(&self) -> Vec; + /// Get a flag indicating whether branch protection is enabled. + fn is_branch_protection_enabled(&self) -> bool { + false + } + /// Get the ISA-dependent maximum vector register size, in bytes. fn dynamic_vector_bytes(&self, dynamic_ty: ir::Type) -> u32; diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index e205658365be..ab9369664d2b 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -286,7 +286,12 @@ impl StackAMode { } /// Trait implemented by machine-specific backend to represent ISA flags. -pub trait IsaFlags: Clone {} +pub trait IsaFlags: Clone { + /// Get a flag indicating whether forward-edge CFI is enabled. + fn is_forward_edge_cfi_enabled(&self) -> bool { + false + } +} /// Trait implemented by machine-specific backend to provide information about /// register assignments and to allow generating the specific instructions for @@ -1256,6 +1261,10 @@ impl Callee { } } + pub fn is_forward_edge_cfi_enabled(&self) -> bool { + self.isa_flags.is_forward_edge_cfi_enabled() + } + /// Get the calling convention implemented by this ABI object. pub fn call_conv(&self, sigs: &SigSet) -> isa::CallConv { sigs[self.sig].call_conv diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs index 4d1708dc4bb4..847cbf53f2ff 100644 --- a/cranelift/codegen/src/machinst/blockorder.rs +++ b/cranelift/codegen/src/machinst/blockorder.rs @@ -106,6 +106,8 @@ pub struct BlockLoweringOrder { /// which is used by VCode emission to sink the blocks at the last /// moment (when we actually emit bytes into the MachBuffer). cold_blocks: FxHashSet, + /// Lowered blocks that are indirect branch targets. + indirect_branch_targets: FxHashSet, } /// The origin of a block in the lowered block-order: either an original CLIF @@ -230,14 +232,20 @@ impl BlockLoweringOrder { // Cache the block successors to avoid re-examining branches below. let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new(); let mut block_succ_range = SecondaryMap::with_default((0, 0)); + let mut indirect_branch_target_clif_blocks = FxHashSet::default(); + for block in f.layout.blocks() { let block_succ_start = block_succs.len(); let mut succ_idx = 0; - visit_block_succs(f, block, |inst, succ| { + visit_block_succs(f, block, |inst, succ, from_table| { block_out_count[block] += 1; block_in_count[succ] += 1; block_succs.push((inst, succ_idx, succ)); succ_idx += 1; + + if from_table { + indirect_branch_target_clif_blocks.insert(succ); + } }); let block_succ_end = block_succs.len(); block_succ_range[block] = (block_succ_start, block_succ_end); @@ -432,6 +440,7 @@ impl BlockLoweringOrder { let mut cold_blocks = FxHashSet::default(); let mut lowered_succ_ranges = vec![]; let mut lb_to_bindex = FxHashMap::default(); + let mut indirect_branch_targets = FxHashSet::default(); for (block, succ_range) in rpo.into_iter() { let index = BlockIndex::new(lowered_order.len()); lb_to_bindex.insert(block, index); @@ -445,11 +454,19 @@ impl BlockLoweringOrder { if f.layout.is_cold(block) { cold_blocks.insert(index); } + + if indirect_branch_target_clif_blocks.contains(&block) { + indirect_branch_targets.insert(index); + } } LoweredBlock::Edge { pred, succ, .. } => { if f.layout.is_cold(pred) || f.layout.is_cold(succ) { cold_blocks.insert(index); } + + if indirect_branch_target_clif_blocks.contains(&succ) { + indirect_branch_targets.insert(index); + } } } } @@ -474,6 +491,7 @@ impl BlockLoweringOrder { lowered_succ_ranges, orig_map, cold_blocks, + indirect_branch_targets, }; trace!("BlockLoweringOrder: {:?}", result); result @@ -494,6 +512,12 @@ impl BlockLoweringOrder { pub fn is_cold(&self, block: BlockIndex) -> bool { self.cold_blocks.contains(&block) } + + /// Determine whether the given lowered block index is an indirect branch + /// target. + pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool { + self.indirect_branch_targets.contains(&block) + } } #[cfg(test)] diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index a2aa97064fa3..9fcb51e4c9dd 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -168,6 +168,16 @@ pub trait MachInst: Clone + Debug { /// Is this a safepoint? fn is_safepoint(&self) -> bool; + /// Generate an instruction that must appear at the beginning of a basic + /// block, if any. Note that the return value must not be subject to + /// register allocation. + fn gen_block_start( + _is_indirect_branch_target: bool, + _is_forward_edge_cfi_enabled: bool, + ) -> Option { + None + } + /// A label-use kind: a type that describes the types of label references that /// can occur in an instruction. type LabelUse: MachInstLabelUse; diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index bd458227b274..0c4f81b15a49 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -845,6 +845,8 @@ impl VCode { ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); } + let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); + for (block_order_idx, &block) in final_order.iter().enumerate() { trace!("emitting block {:?}", block); let new_offset = I::align_basic_block(buffer.cur_offset()); @@ -902,6 +904,13 @@ impl VCode { last_offset = Some(cur_offset); } + if let Some(block_start) = I::gen_block_start( + self.block_order.is_indirect_branch_target(block), + is_forward_edge_cfi_enabled, + ) { + do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state); + } + for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { match inst_or_edit { InstOrEdit::Inst(iix) => { diff --git a/cranelift/filetests/filetests/isa/aarch64/bti.clif b/cranelift/filetests/filetests/isa/aarch64/bti.clif new file mode 100644 index 000000000000..4e7ea3075f6e --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/bti.clif @@ -0,0 +1,111 @@ +test compile precise-output +set unwind_info=false +target aarch64 use_bti + +function %f1(i32) -> i32 { + jt0 = jump_table [block1, block2, block3] + +block0(v0: i32): + br_table v0, block4, jt0 + +block1: + v1 = iconst.i32 1 + jump block5(v1) + +block2: + v2 = iconst.i32 2 + jump block5(v2) + +block3: + v3 = iconst.i32 3 + jump block5(v3) + +block4: + v4 = iconst.i32 4 + jump block5(v4) + +block5(v5: i32): + v6 = iadd.i32 v0, v5 + return v6 +} + +; bti c +; block0: +; emit_island 44 +; subs wzr, w0, #3 +; b.hs label1 ; csel x1, xzr, x0, hs ; csdb ; adr x15, pc+16 ; ldrsw x1, [x15, x1, uxtw #2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] +; block1: +; movz x5, #4 +; b label2 +; block2: +; b label9 +; block3: +; bti j +; movz x5, #1 +; b label4 +; block4: +; b label9 +; block5: +; bti j +; movz x5, #2 +; b label6 +; block6: +; b label9 +; block7: +; bti j +; movz x5, #3 +; b label8 +; block8: +; b label9 +; block9: +; add w0, w0, w5 +; ret + +function %f2(i64) -> i64 { + jt0 = jump_table [block2] + +block0(v0: i64): + v1 = ireduce.i32 v0 + v2 = load.i64 notrap aligned table v0 + br_table v1, block1, jt0 + +block1: + return v2 + +block2: + v3 = iconst.i64 42 + v4 = iadd.i64 v2, v3 + return v4 +} + +; bti c +; block0: +; ldr x6, [x0] +; emit_island 36 +; subs wzr, w0, #1 +; b.hs label1 ; csel x8, xzr, x0, hs ; csdb ; adr x7, pc+16 ; ldrsw x8, [x7, x8, uxtw #2] ; add x7, x7, x8 ; br x7 ; jt_entries [Label(MachLabel(2))] +; block1: +; mov x0, x6 +; ret +; block2: +; bti j +; mov x0, x6 +; add x0, x0, #42 +; ret + +function %f3(i64) -> i64 { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; bti c +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; ldr x4, 8 ; b 12 ; data TestCase(%g) + 0 +; blr x4 +; ldp fp, lr, [sp], #16 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif index efd0697d82e3..37f3798e46bb 100644 --- a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif +++ b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif @@ -30,9 +30,9 @@ block5(v5: i32): } ; block0: -; emit_island 36 +; emit_island 44 ; subs wzr, w0, #3 -; b.hs label1 ; csel x1, xzr, x0, hs ; csdb ; adr x15, pc+16 ; ldrsw x1, [x15, x1, LSL 2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] +; b.hs label1 ; csel x1, xzr, x0, hs ; csdb ; adr x15, pc+16 ; ldrsw x1, [x15, x1, uxtw #2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] ; block1: ; movz x5, #4 ; b label2 diff --git a/cranelift/filetests/filetests/runtests/br_table.clif b/cranelift/filetests/filetests/runtests/br_table.clif index 0de2e5cd615b..4e4cbb49516e 100644 --- a/cranelift/filetests/filetests/runtests/br_table.clif +++ b/cranelift/filetests/filetests/runtests/br_table.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target aarch64 use_bti target x86_64 target s390x diff --git a/cranelift/jit/src/backend.rs b/cranelift/jit/src/backend.rs index 796a4e00c2ee..31158e90389d 100644 --- a/cranelift/jit/src/backend.rs +++ b/cranelift/jit/src/backend.rs @@ -1,6 +1,6 @@ //! Defines `JITModule`. -use crate::{compiled_blob::CompiledBlob, memory::Memory}; +use crate::{compiled_blob::CompiledBlob, memory::BranchProtection, memory::Memory}; use cranelift_codegen::isa::TargetIsa; use cranelift_codegen::settings::Configurable; use cranelift_codegen::{self, ir, settings, MachReloc}; @@ -480,6 +480,12 @@ impl JITModule { ); } + let branch_protection = + if cfg!(target_arch = "aarch64") && use_bti(&builder.isa.isa_flags()) { + BranchProtection::BTI + } else { + BranchProtection::None + }; let mut module = Self { isa: builder.isa, hotswap_enabled: builder.hotswap_enabled, @@ -487,9 +493,10 @@ impl JITModule { lookup_symbols: builder.lookup_symbols, libcall_names: builder.libcall_names, memory: MemoryHandle { - code: Memory::new(), - readonly: Memory::new(), - writable: Memory::new(), + code: Memory::new(branch_protection), + // Branch protection is not applicable to non-executable memory. + readonly: Memory::new(BranchProtection::None), + writable: Memory::new(BranchProtection::None), }, declarations: ModuleDeclarations::default(), function_got_entries: SecondaryMap::new(), @@ -959,3 +966,10 @@ fn lookup_with_dlsym(name: &str) -> Option<*const u8> { None } } + +fn use_bti(isa_flags: &Vec) -> bool { + isa_flags + .iter() + .find(|&f| f.name == "use_bti") + .map_or(false, |f| f.as_bool().unwrap_or(false)) +} diff --git a/cranelift/jit/src/memory.rs b/cranelift/jit/src/memory.rs index 02f274c72ff4..a18f6ad3c1ff 100644 --- a/cranelift/jit/src/memory.rs +++ b/cranelift/jit/src/memory.rs @@ -104,6 +104,15 @@ impl Drop for PtrLen { // TODO: add a `Drop` impl for `cfg(target_os = "windows")` +/// Type of branch protection to apply to executable memory. +#[derive(Clone, Debug, PartialEq)] +pub(crate) enum BranchProtection { + /// No protection. + None, + /// Use the Branch Target Identification extension of the Arm architecture. + BTI, +} + /// JIT memory manager. This manages pages of suitably aligned and /// accessible memory. Memory will be leaked by default to have /// function pointers remain valid for the remainder of the @@ -113,15 +122,17 @@ pub(crate) struct Memory { already_protected: usize, current: PtrLen, position: usize, + branch_protection: BranchProtection, } impl Memory { - pub(crate) fn new() -> Self { + pub(crate) fn new(branch_protection: BranchProtection) -> Self { Self { allocations: Vec::new(), already_protected: 0, current: PtrLen::new(), position: 0, + branch_protection, } } @@ -157,14 +168,35 @@ impl Memory { pub(crate) fn set_readable_and_executable(&mut self) { self.finish_current(); + let set_region_readable_and_executable = |ptr, len| { + if len != 0 { + if self.branch_protection == BranchProtection::BTI { + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + if std::arch::is_aarch64_feature_detected!("bti") { + let prot = libc::PROT_EXEC | libc::PROT_READ | /* PROT_BTI */ 0x10; + + unsafe { + if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 { + panic!("unable to make memory readable+executable"); + } + } + + return; + } + } + + unsafe { + region::protect(ptr, len, region::Protection::READ_EXECUTE) + .expect("unable to make memory readable+executable"); + } + } + }; + #[cfg(feature = "selinux-fix")] { for &PtrLen { ref map, ptr, len } in &self.allocations[self.already_protected..] { - if len != 0 && map.is_some() { - unsafe { - region::protect(ptr, len, region::Protection::READ_EXECUTE) - .expect("unable to make memory readable+executable"); - } + if map.is_some() { + set_region_readable_and_executable(ptr, len); } } } @@ -172,12 +204,7 @@ impl Memory { #[cfg(not(feature = "selinux-fix"))] { for &PtrLen { ptr, len } in &self.allocations[self.already_protected..] { - if len != 0 { - unsafe { - region::protect(ptr, len, region::Protection::READ_EXECUTE) - .expect("unable to make memory readable+executable"); - } - } + set_region_readable_and_executable(ptr, len); } } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 198b0e63037c..6e0c15bdc4da 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -459,6 +459,10 @@ impl wasmtime_environ::Compiler for Compiler { .collect() } + fn is_branch_protection_enabled(&self) -> bool { + self.isa.is_branch_protection_enabled() + } + #[cfg(feature = "component-model")] fn component_compiler(&self) -> &dyn wasmtime_environ::component::ComponentCompiler { self diff --git a/crates/environ/src/compilation.rs b/crates/environ/src/compilation.rs index 9fc56e75fa75..f4f6aa72d4d2 100644 --- a/crates/environ/src/compilation.rs +++ b/crates/environ/src/compilation.rs @@ -266,6 +266,9 @@ pub trait Compiler: Send + Sync { /// Same as [`Compiler::flags`], but ISA-specific (a cranelift-ism) fn isa_flags(&self) -> BTreeMap; + /// Get a flag indicating whether branch protection is enabled. + fn is_branch_protection_enabled(&self) -> bool; + /// Returns a suitable compiler usable for component-related compliations. /// /// Note that the `ComponentCompiler` trait can also be implemented for diff --git a/crates/fuzzing/src/generators/codegen_settings.rs b/crates/fuzzing/src/generators/codegen_settings.rs index 8c2ef4476edb..629a95d8fae3 100644 --- a/crates/fuzzing/src/generators/codegen_settings.rs +++ b/crates/fuzzing/src/generators/codegen_settings.rs @@ -127,6 +127,7 @@ impl<'a> Arbitrary<'a> for CodegenSettings { "aarch64" => { test: is_aarch64_feature_detected, + std: "bti" => clif: "use_bti", std: "lse" => clif: "has_lse", // even though the natural correspondence seems to be // between "paca" and "has_pauth", the latter has no effect diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index 5dfe1a111593..08ee895f7c37 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -40,7 +40,7 @@ pub struct Publish<'a> { pub obj: File<'a>, /// Reference to the entire `MmapVec` and its contents. - pub mmap: &'a [u8], + pub mmap: &'a MmapVec, /// Reference to just the text section of the object file, a subslice of /// `mmap`. @@ -87,7 +87,7 @@ impl CodeMemory { /// After this function executes all JIT code should be ready to execute. /// The various parsed results of the internals of the `MmapVec` are /// returned through the `Publish` structure. - pub fn publish(&mut self) -> Result> { + pub fn publish(&mut self, enable_branch_protection: bool) -> Result> { assert!(!self.published); self.published = true; @@ -159,7 +159,7 @@ impl CodeMemory { // read/execute, notably not using read/write/execute to prevent // modifications. self.mmap - .make_executable(text_range.clone()) + .make_executable(text_range.clone(), enable_branch_protection) .expect("unable to make memory executable"); #[cfg(all(target_arch = "aarch64", target_os = "linux"))] diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index ecc516c42f2d..4a0521a526a2 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -136,6 +136,9 @@ struct Metadata { /// Note that even if this flag is `true` sections may be missing if they /// weren't found in the original wasm module itself. has_wasm_debuginfo: bool, + + /// Whether or not branch protection is enabled. + is_branch_protection_enabled: bool, } /// Finishes compilation of the `translation` specified, producing the final @@ -160,6 +163,7 @@ pub fn finish_compile( funcs: PrimaryMap, trampolines: Vec, tunables: &Tunables, + is_branch_protection_enabled: bool, ) -> Result<(MmapVec, CompiledModuleInfo)> { let ModuleTranslation { mut module, @@ -265,6 +269,7 @@ pub fn finish_compile( has_unparsed_debuginfo, code_section_offset: debuginfo.wasm_file.code_section_offset, has_wasm_debuginfo: tunables.parse_wasm_debuginfo, + is_branch_protection_enabled, }, }; bincode::serialize_into(&mut bytes, &info)?; @@ -398,19 +403,10 @@ impl CompiledModule { profiler: &dyn ProfilingAgent, id_allocator: &CompiledModuleIdAllocator, ) -> Result { - // Transfer ownership of `obj` to a `CodeMemory` object which will - // manage permissions, such as the executable bit. Once it's located - // there we also publish it for being able to execute. Note that this - // step will also resolve pending relocations in the compiled image. - let mut code_memory = CodeMemory::new(mmap); - let code = code_memory - .publish() - .context("failed to publish code memory")?; - + let obj = File::parse(&mmap[..]).context("failed to parse internal elf file")?; + let opt_section = |name: &str| obj.section_by_name(name).and_then(|s| s.data().ok()); let section = |name: &str| { - code.obj - .section_by_name(name) - .and_then(|s| s.data().ok()) + opt_section(name) .ok_or_else(|| anyhow!("missing section `{}` in compilation artifacts", name)) }; @@ -422,35 +418,28 @@ impl CompiledModule { .context("failed to deserialize wasmtime module info")?, }; - let func_name_data = match code - .obj - .section_by_name(ELF_NAME_DATA) - .and_then(|s| s.data().ok()) - { - Some(data) => subslice_range(data, code.mmap), - None => 0..0, - }; - let mut ret = Self { module: Arc::new(info.module), funcs: info.funcs, trampolines: info.trampolines, - wasm_data: subslice_range(section(ELF_WASM_DATA)?, code.mmap), - address_map_data: code - .obj - .section_by_name(ELF_WASMTIME_ADDRMAP) - .and_then(|s| s.data().ok()) - .map(|slice| subslice_range(slice, code.mmap)) + wasm_data: subslice_range(section(ELF_WASM_DATA)?, &mmap), + address_map_data: opt_section(ELF_WASMTIME_ADDRMAP) + .map(|slice| subslice_range(slice, &mmap)) .unwrap_or(0..0), - trap_data: subslice_range(section(ELF_WASMTIME_TRAPS)?, code.mmap), - code: subslice_range(code.text, code.mmap), + func_name_data: opt_section(ELF_NAME_DATA) + .map(|slice| subslice_range(slice, &mmap)) + .unwrap_or(0..0), + trap_data: subslice_range(section(ELF_WASMTIME_TRAPS)?, &mmap), + code: subslice_range(section(".text")?, &mmap), dbg_jit_registration: None, - code_memory, + code_memory: CodeMemory::new(mmap), meta: info.meta, unique_id: id_allocator.alloc(), func_names: info.func_names, - func_name_data, }; + ret.code_memory + .publish(ret.meta.is_branch_protection_enabled) + .context("failed to publish code memory")?; ret.register_debug_and_profiling(profiler)?; Ok(ret) diff --git a/crates/runtime/src/mmap.rs b/crates/runtime/src/mmap.rs index a00a47c7dbbd..478a387c4327 100644 --- a/crates/runtime/src/mmap.rs +++ b/crates/runtime/src/mmap.rs @@ -412,7 +412,11 @@ impl Mmap { } /// Makes the specified `range` within this `Mmap` to be read/execute. - pub unsafe fn make_executable(&self, range: Range) -> Result<()> { + pub unsafe fn make_executable( + &self, + range: Range, + enable_branch_protection: bool, + ) -> Result<()> { assert!(range.start <= self.len()); assert!(range.end <= self.len()); assert!(range.start <= range.end); @@ -428,8 +432,15 @@ impl Mmap { use std::io; use windows_sys::Win32::System::Memory::*; + let flags = if enable_branch_protection { + // TODO: We use this check to avoid an unused variable warning, + // but some of the CFG-related flags might be applicable + PAGE_EXECUTE_READ + } else { + PAGE_EXECUTE_READ + }; let mut old = 0; - let result = VirtualProtect(base, len, PAGE_EXECUTE_READ, &mut old); + let result = VirtualProtect(base, len, flags, &mut old); if result == 0 { return Err(io::Error::last_os_error().into()); } @@ -438,8 +449,25 @@ impl Mmap { #[cfg(not(windows))] { use rustix::mm::{mprotect, MprotectFlags}; - mprotect(base, len, MprotectFlags::READ | MprotectFlags::EXEC)?; + + let flags = MprotectFlags::READ | MprotectFlags::EXEC; + let flags = if enable_branch_protection { + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + if std::arch::is_aarch64_feature_detected!("bti") { + MprotectFlags::from_bits_unchecked(flags.bits() | /* PROT_BTI */ 0x10) + } else { + flags + } + + #[cfg(not(all(target_arch = "aarch64", target_os = "linux")))] + flags + } else { + flags + }; + + mprotect(base, len, flags)?; } + Ok(()) } diff --git a/crates/runtime/src/mmap_vec.rs b/crates/runtime/src/mmap_vec.rs index 2eaedc5a3034..9249f2a5e132 100644 --- a/crates/runtime/src/mmap_vec.rs +++ b/crates/runtime/src/mmap_vec.rs @@ -102,9 +102,15 @@ impl MmapVec { } /// Makes the specified `range` within this `mmap` to be read/execute. - pub unsafe fn make_executable(&self, range: Range) -> Result<()> { - self.mmap - .make_executable(range.start + self.range.start..range.end + self.range.start) + pub unsafe fn make_executable( + &self, + range: Range, + enable_branch_protection: bool, + ) -> Result<()> { + self.mmap.make_executable( + range.start + self.range.start..range.end + self.range.start, + enable_branch_protection, + ) } /// Returns the underlying file that this mmap is mapping, if present. diff --git a/crates/wasmtime/src/component/component.rs b/crates/wasmtime/src/component/component.rs index f4afc9789f73..bac0eb4f7e09 100644 --- a/crates/wasmtime/src/component/component.rs +++ b/crates/wasmtime/src/component/component.rs @@ -164,7 +164,7 @@ impl Component { let static_modules = static_modules?; let (lowerings, always_trap, transcoders, trampolines, trampoline_obj) = trampolines?; let mut trampoline_obj = CodeMemory::new(trampoline_obj); - let code = trampoline_obj.publish()?; + let code = trampoline_obj.publish(engine.compiler().is_branch_protection_enabled())?; let text = wasmtime_jit::subslice_range(code.text, code.mmap); // This map is used to register all known tramplines in the diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 02747725ebe1..85c3aeb99bc5 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -462,6 +462,9 @@ impl Engine { "sign_return_address" => Some(true), // No effect on its own. "sign_return_address_with_bkey" => Some(true), + // The `BTI` instruction acts as a `NOP` when unsupported, so it + // is safe to enable it. + "use_bti" => Some(true), // fall through to the very bottom to indicate that support is // not enabled to test whether this feature is enabled on the // host. diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 43a8280b6cea..574f19dd728e 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -1,7 +1,7 @@ -use crate::Engine; use crate::{ signatures::SignatureCollection, types::{ExportType, ExternType, ImportType}, + Engine, }; use anyhow::{bail, Context, Result}; use once_cell::sync::OnceCell; @@ -328,18 +328,15 @@ impl Module { /// /// This is where compilation actually happens of WebAssembly modules and /// translation/parsing/validation of the binary input occurs. The actual - /// result here is a triple of: - /// - /// * The index into the second field of the "main module". The "main - /// module" in this case is the outermost module described by the `wasm` - /// input, and is here for the module linking proposal. - /// * A list of compilation artifacts for each module found within `wasm`. - /// Note that if module linking is disabled then this list will always - /// have a size of exactly 1. These pairs are returned by - /// `wasmtime_jit::finish_compile`. - /// * Type information about all the modules returned. All returned modules - /// have local type information with indices that refer to these returned + /// result here is a combination of: + /// + /// * The compilation artifacts for the module found within `wasm`, as + /// returned by `wasmtime_jit::finish_compile`. + /// * Type information about the module returned. All returned modules have + /// local type information with indices that refer to these returned /// tables. + /// * A boolean value indicating whether forward-edge CFI has been applied + /// to the compiled module. #[cfg(compiler)] pub(crate) fn build_artifacts( engine: &Engine, @@ -431,8 +428,14 @@ impl Module { // table lazy init. translation.try_func_table_init(); - let (mmap, info) = - wasmtime_jit::finish_compile(translation, obj, funcs, trampolines, tunables)?; + let (mmap, info) = wasmtime_jit::finish_compile( + translation, + obj, + funcs, + trampolines, + tunables, + engine.compiler().is_branch_protection_enabled(), + )?; Ok((mmap, Some(info))) } diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index cc9cd570e3ee..33fb6e12b40f 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -118,7 +118,7 @@ where // Copy the results of JIT compilation into executable memory, and this will // also take care of unwind table registration. let mut code_memory = CodeMemory::new(obj); - let code = code_memory.publish()?; + let code = code_memory.publish(engine.compiler().is_branch_protection_enabled())?; register_trampolines(engine.profiler(), &code.obj);