diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 64d41d4147644..d947733e49853 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -104,6 +104,8 @@ class AArch64AsmPrinter : public AsmPrinter { void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI); + void LowerHardenedBRJumpTable(const MachineInstr &MI); + void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI); void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, @@ -1310,6 +1312,139 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer, .addImm(Size == 4 ? 0 : 2)); } +void AArch64AsmPrinter::LowerHardenedBRJumpTable(const MachineInstr &MI) { + unsigned InstsEmitted = 0; + + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + assert(MJTI && "Can't lower jump-table dispatch without JTI"); + + const std::vector &JTs = MJTI->getJumpTables(); + assert(!JTs.empty() && "Invalid JT index for jump-table dispatch"); + + // Emit: + // mov x17, # ; depending on table size, with MOVKs + // cmp x16, x17 ; or #imm if table size fits in 12-bit + // csel x16, x16, xzr, ls ; check for index overflow + // + // adrp x17, Ltable@PAGE ; materialize table address + // add x17, Ltable@PAGEOFF + // ldrsw x16, [x17, x16, lsl #2] ; load table entry + // + // Lanchor: + // adr x17, Lanchor ; compute target address + // add x16, x17, x16 + // br x16 ; branch to target + + MachineOperand JTOp = MI.getOperand(0); + + unsigned JTI = JTOp.getIndex(); + assert(!AArch64FI->getJumpTableEntryPCRelSymbol(JTI) && + "unsupported compressed jump table"); + + const uint64_t NumTableEntries = JTs[JTI].MBBs.size(); + + // cmp only supports a 12-bit immediate. If we need more, materialize the + // immediate, using x17 as a scratch register. + uint64_t MaxTableEntry = NumTableEntries - 1; + if (isUInt<12>(MaxTableEntry)) { + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXri) + .addReg(AArch64::XZR) + .addReg(AArch64::X16) + .addImm(MaxTableEntry) + .addImm(0)); + ++InstsEmitted; + } else { + EmitToStreamer(*OutStreamer, + MCInstBuilder(AArch64::MOVZXi) + .addReg(AArch64::X17) + .addImm(static_cast(MaxTableEntry)) + .addImm(0)); + ++InstsEmitted; + // It's sad that we have to manually materialize instructions, but we can't + // trivially reuse the main pseudo expansion logic. + // A MOVK sequence is easy enough to generate and handles the general case. + for (int Offset = 16; Offset < 64; Offset += 16) { + if ((MaxTableEntry >> Offset) == 0) + break; + EmitToStreamer(*OutStreamer, + MCInstBuilder(AArch64::MOVKXi) + .addReg(AArch64::X17) + .addReg(AArch64::X17) + .addImm(static_cast(MaxTableEntry >> Offset)) + .addImm(Offset)); + ++InstsEmitted; + } + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXrs) + .addReg(AArch64::XZR) + .addReg(AArch64::X16) + .addReg(AArch64::X17) + .addImm(0)); + ++InstsEmitted; + } + + // This picks entry #0 on failure. + // We might want to trap instead. + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::CSELXr) + .addReg(AArch64::X16) + .addReg(AArch64::X16) + .addReg(AArch64::XZR) + .addImm(AArch64CC::LS)); + ++InstsEmitted; + + // Prepare the @PAGE/@PAGEOFF low/high operands. + MachineOperand JTMOHi(JTOp), JTMOLo(JTOp); + MCOperand JTMCHi, JTMCLo; + + JTMOHi.setTargetFlags(AArch64II::MO_PAGE); + JTMOLo.setTargetFlags(AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + + MCInstLowering.lowerOperand(JTMOHi, JTMCHi); + MCInstLowering.lowerOperand(JTMOLo, JTMCLo); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(AArch64::ADRP).addReg(AArch64::X17).addOperand(JTMCHi)); + ++InstsEmitted; + + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXri) + .addReg(AArch64::X17) + .addReg(AArch64::X17) + .addOperand(JTMCLo) + .addImm(0)); + ++InstsEmitted; + + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX) + .addReg(AArch64::X16) + .addReg(AArch64::X17) + .addReg(AArch64::X16) + .addImm(0) + .addImm(1)); + ++InstsEmitted; + + MCSymbol *AdrLabel = MF->getContext().createTempSymbol(); + const auto *AdrLabelE = MCSymbolRefExpr::create(AdrLabel, MF->getContext()); + AArch64FI->setJumpTableEntryInfo(JTI, 4, AdrLabel); + + OutStreamer->emitLabel(AdrLabel); + EmitToStreamer( + *OutStreamer, + MCInstBuilder(AArch64::ADR).addReg(AArch64::X17).addExpr(AdrLabelE)); + ++InstsEmitted; + + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs) + .addReg(AArch64::X16) + .addReg(AArch64::X17) + .addReg(AArch64::X16) + .addImm(0)); + ++InstsEmitted; + + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::BR).addReg(AArch64::X16)); + ++InstsEmitted; + + (void)InstsEmitted; + assert(STI->getInstrInfo()->getInstSizeInBytes(MI) >= InstsEmitted * 4); +} + void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer, const llvm::MachineInstr &MI) { unsigned Opcode = MI.getOpcode(); @@ -2177,6 +2312,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { LowerJumpTableDest(*OutStreamer, *MI); return; + case AArch64::BR_JumpTable: + LowerHardenedBRJumpTable(*MI); + return; + case AArch64::FMOVH0: case AArch64::FMOVS0: case AArch64::FMOVD0: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e0c3cc5eddb82..073a071c6358a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10678,6 +10678,30 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, auto *AFI = DAG.getMachineFunction().getInfo(); AFI->setJumpTableEntryInfo(JTI, 4, nullptr); + // With aarch64-jump-table-hardening, we only expand the jump table dispatch + // sequence later, to guarantee the integrity of the intermediate values. + if (DAG.getMachineFunction().getFunction().hasFnAttribute( + "aarch64-jump-table-hardening")) { + CodeModel::Model CM = getTargetMachine().getCodeModel(); + if (Subtarget->isTargetMachO()) { + if (CM != CodeModel::Small && CM != CodeModel::Large) + report_fatal_error("Unsupported code-model for hardened jump-table"); + } else { + // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO. + assert(Subtarget->isTargetELF() && + "jump table hardening only supported on MachO/ELF"); + if (CM != CodeModel::Small) + report_fatal_error("Unsupported code-model for hardened jump-table"); + } + + SDValue X16Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X16, + Entry, SDValue()); + SDNode *B = DAG.getMachineNode(AArch64::BR_JumpTable, DL, MVT::Other, + DAG.getTargetJumpTable(JTI, MVT::i32), + X16Copy.getValue(0), X16Copy.getValue(1)); + return SDValue(B, 0); + } + SDNode *Dest = DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT, Entry, DAG.getTargetJumpTable(JTI, MVT::i32)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 1e06d5fdc7562..64777099ca31b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1143,6 +1143,33 @@ def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), Sched<[]>; } +// A hardened but more expensive version of jump-table dispatch. +// This combines the target address computation (otherwise done using the +// JumpTableDest pseudos above) with the branch itself (otherwise done using +// a plain BR) in a single non-attackable sequence. +// +// We take the final entry index as an operand to allow isel freedom. This does +// mean that the index can be attacker-controlled. To address that, we also do +// limited checking of the offset, mainly ensuring it still points within the +// jump-table array. When it doesn't, this branches to the first entry. +// We might want to trap instead. +// +// This is intended for use in conjunction with ptrauth for other code pointers, +// to avoid signing jump-table entries and turning them into pointers. +// +// Entry index is passed in x16. Clobbers x16/x17/nzcv. +let isNotDuplicable = 1 in +def BR_JumpTable : Pseudo<(outs), (ins i32imm:$jti), []>, Sched<[]> { + let isBranch = 1; + let isTerminator = 1; + let isIndirectBranch = 1; + let isBarrier = 1; + let isNotDuplicable = 1; + let Defs = [X16,X17,NZCV]; + let Uses = [X16]; + let Size = 44; // 28 fixed + 16 variable, for table size materialization +} + // Space-consuming pseudo to aid testing of placement and reachability // algorithms. Immediate operand is the number of bytes this "instruction" // occupies; register operands can be used to enforce dependency and constrain diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 9e0860934f777..d5655bc1847e6 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -3597,10 +3597,33 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, unsigned JTI = I.getOperand(1).getIndex(); Register Index = I.getOperand(2).getReg(); + MF->getInfo()->setJumpTableEntryInfo(JTI, 4, nullptr); + + // With aarch64-jump-table-hardening, we only expand the jump table dispatch + // sequence later, to guarantee the integrity of the intermediate values. + if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) { + CodeModel::Model CM = TM.getCodeModel(); + if (STI.isTargetMachO()) { + if (CM != CodeModel::Small && CM != CodeModel::Large) + report_fatal_error("Unsupported code-model for hardened jump-table"); + } else { + // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO. + assert(STI.isTargetELF() && + "jump table hardening only supported on MachO/ELF"); + if (CM != CodeModel::Small) + report_fatal_error("Unsupported code-model for hardened jump-table"); + } + + MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg()); + MIB.buildInstr(AArch64::BR_JumpTable) + .addJumpTableIndex(I.getOperand(1).getIndex()); + I.eraseFromParent(); + return true; + } + Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); - MF->getInfo()->setJumpTableEntryInfo(JTI, 4, nullptr); auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg}, {JTAddr, Index}) .addJumpTableIndex(JTI); diff --git a/llvm/test/CodeGen/AArch64/hardened-br-jump-table.ll b/llvm/test/CodeGen/AArch64/hardened-br-jump-table.ll new file mode 100644 index 0000000000000..f20ea4db7baeb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/hardened-br-jump-table.ll @@ -0,0 +1,133 @@ +; RUN: rm -rf %t && split-file %s %t + +;--- err1.ll + +; RUN: not --crash llc %t/err1.ll -mtriple=aarch64-elf \ +; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \ +; RUN: -code-model=large \ +; RUN: -o - -verify-machineinstrs 2>&1 | FileCheck %s --check-prefix=ERR1 + +; RUN: not --crash llc %t/err1.ll -mtriple=aarch64-elf \ +; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \ +; RUN: -global-isel -global-isel-abort=1 \ +; RUN: -code-model=large \ +; RUN: -o - -verify-machineinstrs 2>&1 | FileCheck %s --check-prefix=ERR1 + +; ERR1: LLVM ERROR: Unsupported code-model for hardened jump-table +define i32 @test_jumptable(i32 %in) "aarch64-jump-table-hardening" { + + switch i32 %in, label %def [ + i32 0, label %lbl1 + i32 1, label %lbl2 + ] + +def: + ret i32 0 + +lbl1: + ret i32 1 + +lbl2: + ret i32 2 +} + +;--- test.ll + +; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \ +; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \ +; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO + +; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \ +; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \ +; RUN: -global-isel -global-isel-abort=1 \ +; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO + +; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \ +; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \ +; RUN: -code-model=large \ +; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO + +; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \ +; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \ +; RUN: -global-isel -global-isel-abort=1 \ +; RUN: -code-model=large \ +; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO + +; RUN: llc %t/test.ll -mtriple=aarch64-elf \ +; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \ +; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=ELF + +; RUN: llc %t/test.ll -mtriple=aarch64-elf \ +; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \ +; RUN: -global-isel -global-isel-abort=1 \ +; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=ELF + +; MACHO-LABEL: test_jumptable: +; MACHO: mov w16, w0 +; MACHO: cmp x16, #5 +; MACHO: csel x16, x16, xzr, ls +; MACHO-NEXT: adrp x17, LJTI0_0@PAGE +; MACHO-NEXT: add x17, x17, LJTI0_0@PAGEOFF +; MACHO-NEXT: ldrsw x16, [x17, x16, lsl #2] +; MACHO-NEXT: Ltmp0: +; MACHO-NEXT: adr x17, Ltmp0 +; MACHO-NEXT: add x16, x17, x16 +; MACHO-NEXT: br x16 + +; ELF-LABEL: test_jumptable: +; ELF: mov w16, w0 +; ELF: cmp x16, #5 +; ELF: csel x16, x16, xzr, ls +; ELF-NEXT: adrp x17, .LJTI0_0 +; ELF-NEXT: add x17, x17, :lo12:.LJTI0_0 +; ELF-NEXT: ldrsw x16, [x17, x16, lsl #2] +; ELF-NEXT: .Ltmp0: +; ELF-NEXT: adr x17, .Ltmp0 +; ELF-NEXT: add x16, x17, x16 +; ELF-NEXT: br x16 + +define i32 @test_jumptable(i32 %in) "aarch64-jump-table-hardening" { + + switch i32 %in, label %def [ + i32 0, label %lbl1 + i32 1, label %lbl2 + i32 2, label %lbl3 + i32 4, label %lbl4 + i32 5, label %lbl5 + ] + +def: + ret i32 0 + +lbl1: + ret i32 1 + +lbl2: + ret i32 2 + +lbl3: + ret i32 4 + +lbl4: + ret i32 8 + +lbl5: + ret i32 10 + +} + +; MACHO-LABEL: LJTI0_0: +; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0 +; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0 +; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0 +; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0 +; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0 +; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0 + +; ELF-LABEL: .LJTI0_0: +; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0 +; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0 +; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0 +; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0 +; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0 +; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0