diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index ae04891e791f9..a6b6196b8796b 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1096,7 +1096,7 @@ class MCPlusBuilder { /// targets. Return true on success, and the address in Target. virtual bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const { - return Analysis->evaluateBranch(Inst, Addr, Size, Target); + return Analysis->findTargetAddress(Inst, Addr, Size, Target); } /// Return true if one of the operands of the \p Inst instruction uses diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h index 3cfc879b92ef5..4b08be437013d 100644 --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -19,6 +19,7 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include #include @@ -180,11 +181,12 @@ class LLVM_ABI MCInstrAnalysis { return false; } - /// Given a branch instruction try to get the address the branch - /// targets. Return true on success, and the address in Target. - virtual bool - evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const; + /// Given an instruction that accesses memory or + /// that branches to another address, try to get the address it targets. + /// Return true on success, and the address in \p Target. + virtual bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI = nullptr) const; /// Given an instruction tries to get the address of a memory operand. Returns /// the address on success. diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp index cea905d092e0b..0300ad3003cf7 100644 --- a/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/llvm/lib/MC/MCInstrAnalysis.cpp @@ -24,9 +24,9 @@ bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI, return false; } -bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/, - uint64_t /*Size*/, - uint64_t & /*Target*/) const { +bool MCInstrAnalysis::findTargetAddress(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target, + const MCSubtargetInfo *STI) const { return false; } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 5c8f57664a2cc..e08dcbac4e759 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -394,8 +394,9 @@ class AArch64MCInstrAnalysis : public MCInstrAnalysis { public: AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { // Search for a PC-relative argument. // This will handle instructions like bcc (where the first argument is the // condition code) and cbz (where it is a register). diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index d66725d3a6c4b..884c60cc45a19 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -137,8 +137,9 @@ class AMDGPUMCInstrAnalysis : public MCInstrAnalysis { explicit AMDGPUMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { if (Inst.getNumOperands() == 0 || !Inst.getOperand(0).isImm() || Info->get(Inst.getOpcode()).operands()[0].OperandType != MCOI::OPERAND_PCREL) diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index e84aaaad3750d..4cd72395b6933 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -407,8 +407,9 @@ class ARMMCInstrAnalysis : public MCInstrAnalysis { return MCInstrAnalysis::isConditionalBranch(Inst); } - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { const MCInstrDesc &Desc = Info->get(Inst.getOpcode()); // Find the PC-relative immediate operand in the instruction. diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index 5f44dd9583aff..3425ef7cb0bf4 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -77,8 +77,9 @@ class BPFMCInstrAnalysis : public MCInstrAnalysis { explicit BPFMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { // The target is the 3rd operand of cond inst and the 1st of uncond inst. int32_t Imm; if (isConditionalBranch(Inst)) { diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp index faaeeb8ed4dca..082eeff1c71af 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp @@ -111,8 +111,9 @@ class CSKYMCInstrAnalysis : public MCInstrAnalysis { explicit CSKYMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { if (isConditionalBranch(Inst) || isUnconditionalBranch(Inst)) { int64_t Imm; Imm = Inst.getOperand(Inst.getNumOperands() - 1).getImm(); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index bfea50e2d6dc0..ffd5e1b66f2bd 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -730,8 +730,9 @@ class HexagonMCInstrAnalysis : public MCInstrAnalysis { return MCInstrAnalysis::isConditionalBranch(Inst); } - bool evaluateBranch(MCInst const &Inst, uint64_t Addr, - uint64_t Size, uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { if (!(isCall(Inst) || isUnconditionalBranch(Inst) || isConditionalBranch(Inst))) return false; diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index 687386c6962be..d1dccdbdabc13 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -94,8 +94,9 @@ class LanaiMCInstrAnalysis : public MCInstrAnalysis { explicit LanaiMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { if (Inst.getNumOperands() == 0) return false; if (!isConditionalBranch(Inst) && !isUnconditionalBranch(Inst) && diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index e5bd1c91edec9..dbfc8f4711ee3 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -171,8 +171,9 @@ class LoongArchMCInstrAnalysis : public MCInstrAnalysis { } } - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { unsigned NumOps = Inst.getNumOperands(); if ((isBranch(Inst) && !isIndirectBranch(Inst)) || Inst.getOpcode() == LoongArch::BL) { diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 2cc634154bffd..4eeae453bff01 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -226,8 +226,9 @@ class MipsMCInstrAnalysis : public MCInstrAnalysis { public: MipsMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { unsigned NumOps = Inst.getNumOperands(); if (NumOps == 0) return false; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index d856c3f055abd..8201ead14ca05 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -454,8 +454,9 @@ class PPCMCInstrAnalysis : public MCInstrAnalysis { explicit PPCMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { unsigned NumOps = Inst.getNumOperands(); if (NumOps == 0 || Info->get(Inst.getOpcode()).operands()[NumOps - 1].OperandType != diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index d917ef4129791..d0d54d950b6c8 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include +#include #define GET_INSTRINFO_MC_DESC #define ENABLE_INSTR_PREDICATE_VERIFIER @@ -184,6 +185,17 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { } switch (Inst.getOpcode()) { + case RISCV::C_LUI: + case RISCV::LUI: { + setGPRState(Inst.getOperand(0).getReg(), + SignExtend64<32>(Inst.getOperand(1).getImm() << 12)); + break; + } + case RISCV::AUIPC: { + setGPRState(Inst.getOperand(0).getReg(), + Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12)); + break; + } default: { // Clear the state of all defined registers for instructions that we don't // explicitly support. @@ -195,15 +207,12 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { } break; } - case RISCV::AUIPC: - setGPRState(Inst.getOperand(0).getReg(), - Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12)); - break; } } - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override { + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override { if (isConditionalBranch(Inst)) { int64_t Imm; if (Size == 2) @@ -215,6 +224,8 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { } switch (Inst.getOpcode()) { + default: + return false; case RISCV::C_J: case RISCV::C_JAL: case RISCV::QC_E_J: @@ -231,8 +242,85 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { } return false; } + case RISCV::C_ADDI: + case RISCV::ADDI: { + if (!STI) + return false; + unsigned int ArchRegWidth = + STI->getTargetTriple().getArchPointerBitWidth(); + MCRegister Reg = Inst.getOperand(1).getReg(); + auto TargetRegState = getGPRState(Reg); + if (TargetRegState && Reg != RISCV::X0) { + Target = *TargetRegState + Inst.getOperand(2).getImm(); + Target &= maskTrailingOnes(ArchRegWidth); + return true; + } + break; + } + case RISCV::C_ADDIW: + case RISCV::ADDIW: { + MCRegister Reg = Inst.getOperand(1).getReg(); + auto TargetRegState = getGPRState(Reg); + if (TargetRegState && Reg != RISCV::X0) { + Target = *TargetRegState + Inst.getOperand(2).getImm(); + Target = SignExtend64<32>(Target); + return true; + } + break; + } + case RISCV::LB: + case RISCV::LH: + case RISCV::LD: + case RISCV::LW: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::SD: + case RISCV::FLH: + case RISCV::FLW: + case RISCV::FLD: + case RISCV::FSH: + case RISCV::FSW: + case RISCV::FSD: + case RISCV::C_LD: + case RISCV::C_SD: + case RISCV::C_FLD: + case RISCV::C_FSD: + case RISCV::C_SW: + case RISCV::C_LW: + case RISCV::C_FSW: + case RISCV::C_FLW: + case RISCV::C_LBU: + case RISCV::C_LH: + case RISCV::C_LHU: + case RISCV::C_SB: + case RISCV::C_SH: + case RISCV::C_LWSP: + case RISCV::C_SWSP: + case RISCV::C_LDSP: + case RISCV::C_SDSP: + case RISCV::C_FLWSP: + case RISCV::C_FSWSP: + case RISCV::C_FLDSP: + case RISCV::C_FSDSP: + case RISCV::C_LD_RV32: + case RISCV::C_SD_RV32: + case RISCV::C_SDSP_RV32: + case RISCV::LD_RV32: + case RISCV::C_LDSP_RV32: + case RISCV::SD_RV32: { + MCRegister Reg = Inst.getOperand(1).getReg(); + auto TargetRegState = getGPRState(Reg); + if (TargetRegState) { + Target = *TargetRegState + Inst.getOperand(2).getImm(); + return true; + } + break; + } } - return false; } @@ -389,12 +477,11 @@ LLVMInitializeRISCVTargetMC() { TargetRegistry::RegisterELFStreamer(*T, createRISCVELFStreamer); TargetRegistry::RegisterObjectTargetStreamer( *T, createRISCVObjectTargetStreamer); - TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis); - // Register the asm target streamer. TargetRegistry::RegisterAsmTargetStreamer(*T, createRISCVAsmTargetStreamer); // Register the null target streamer. TargetRegistry::RegisterNullTargetStreamer(*T, createRISCVNullTargetStreamer); + TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis); } } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp index 564636959f00f..5aa5db826dcb4 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -441,7 +441,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, // binary object. if (SymbolizeOperands && MIA) { uint64_t Target; - if (MIA->evaluateBranch(*MI, 0, 0, Target)) + if (MIA->findTargetAddress(*MI, 0, 0, Target)) return; if (MIA->evaluateMemoryOperandAddress(*MI, /*STI=*/nullptr, 0, 0)) return; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp index ff27005f52ea8..8496bf81ede68 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -383,7 +383,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, // binary object. if (SymbolizeOperands && MIA) { uint64_t Target; - if (MIA->evaluateBranch(*MI, 0, 0, Target)) + if (MIA->findTargetAddress(*MI, 0, 0, Target)) return; if (MIA->evaluateMemoryOperandAddress(*MI, /*STI=*/nullptr, 0, 0)) return; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index cc7bcd678cb31..d71cbd8f7ad78 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -515,8 +515,9 @@ class X86MCInstrAnalysis : public MCInstrAnalysis { findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, const MCSubtargetInfo &STI) const override; - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, - uint64_t &Target) const override; + bool findTargetAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target, + const MCSubtargetInfo *STI) const override; std::optional evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr, uint64_t Size) const override; @@ -641,8 +642,9 @@ X86MCInstrAnalysis::findPltEntries(uint64_t PltSectionVA, } } -bool X86MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size, uint64_t &Target) const { +bool X86MCInstrAnalysis::findTargetAddress( + const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target, + const MCSubtargetInfo *STI = nullptr) const { if (Inst.getNumOperands() == 0 || Info->get(Inst.getOpcode()).operands()[0].OperandType != MCOI::OPERAND_PCREL) diff --git a/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg b/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg new file mode 100644 index 0000000000000..9dd5a0eedea08 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg @@ -0,0 +1,2 @@ +if "RISCV" not in config.targets_to_build: + config.unsupported = True diff --git a/llvm/test/tools/llvm-objdump/RISCV/riscv-disassembly-address-resolution.s b/llvm/test/tools/llvm-objdump/RISCV/riscv-disassembly-address-resolution.s new file mode 100644 index 0000000000000..fe29bb537e496 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/RISCV/riscv-disassembly-address-resolution.s @@ -0,0 +1,110 @@ +# RUN: llvm-mc -riscv-add-build-attributes -triple=riscv64 -filetype=obj -mattr=+d,+c,+zcb %s -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + +# CHECK: 0000000000000000 <_start>: +# CHECK-NEXT: 0: 00010517 auipc a0, 0x10 +# CHECK-NEXT: 4: 01450513 addi a0, a0, 0x14 +# CHECK-NEXT: 8: 00010517 auipc a0, 0x10 +# CHECK-NEXT: c: 0531 addi a0, a0, 0xc +# CHECK-NEXT: e: 6541 lui a0, 0x10 +# CHECK-NEXT: 10: 0145059b addiw a1, a0, 0x14 +# CHECK-NEXT: 14: 6541 lui a0, 0x10 +# CHECK-NEXT: 16: 2551 addiw a0, a0, 0x14 +# CHECK-NEXT: 18: 00110537 lui a0, 0x110 +# CHECK-NEXT: 1c: c90c sw a1, 0x10(a0) +# CHECK-NEXT: 1e: 00110537 lui a0, 0x110 +# CHECK-NEXT: 22: 4908 lw a0, 0x10(a0) +# CHECK-NEXT: 24: 6541 lui a0, 0x10 +# CHECK-NEXT: 26: 6585 lui a1, 0x1 +# CHECK-NEXT: 28: 0306 slli t1, t1, 0x1 +# CHECK-NEXT: 2a: 0551 addi a0, a0, 0x14 +# CHECK-NEXT: 2c: 0505 addi a0, a0, 0x1 +# CHECK-NEXT: 2e: 00002427 fsw ft0, 0x8(zero) <_start+0x8> +# CHECK-NEXT: 32: 00100017 auipc zero, 0x100 +# CHECK-NEXT: 36: 00002427 fsw ft0, 0x8(zero) <_start+0x8> +# CHECK-NEXT: 3a: 00110097 auipc ra, 0x110 +# CHECK-NEXT: 3e: fda080e7 jalr -0x26(ra) +# CHECK-NEXT: 42: 01000517 auipc a0, 0x1000 +# CHECK-NEXT: 46: 00110517 auipc a0, 0x110 +# CHECK-NEXT: 4a: fca50513 addi a0, a0, -0x36 + + +## The core of the feature being added was address resolution for instruction +## sequences where a register is populated by immediate values via two +## separate instructions. First by an instruction that provides the upper bits +## (auipc, lui, etc) followed by another instruction for the lower bits (addi, +## jalr, ld, etc.). + +.global _start +.text + +_start: + ## Test block 1-3 each focus on a certain starting instruction in a sequence. + ## Starting instructions are the ones that provide the upper bits. The other + ## instruction in the sequence is the one that provides the lower bits. The + ## second instruction is arbitrarily chosen to increase code coverage. + + ## Test block #1. + lla a0, target + auipc a0, 0x10 + c.addi a0, 0xc + + ## Test block #2. + c.lui a0, 0x10 + addiw a1, a0, 0x14 + c.lui a0, 0x10 + c.addiw a0, 0x14 + + ## Test block #3. + lui a0, 0x110 + sw a1, 0x10(a0) + lui a0, 0x110 + c.lw a0, 0x10(a0) + + ## Test block 4 tests instruction interleaving. Essentially the code's + ## ability to keep track of a valid sequence even if multiple other unrelated + ## instructions separate the two. In effect, the resolution must occur + ## alongside the instruction marked below with the upper bits provided by the + ## first instruction in the test. The instructions marked to be unrelated + ## operate on unrelated registers and should not affect the instruction + ## sequence formed around them. The last instruction in the test operates on the same + ## register as the sequence but should NOT have an address resolution since + ## the sequence terminated in the previous instruction. + lui a0, 0x10 ## Part of sequence. Provides upper bits + lui a1, 0x1 ## Unrelated instruction. + slli t1, t1, 0x1 ## Unrelated instruction. + addi a0, a0, 0x14 ## End of sequence. Provides lower bits. Resolution here + addi a0, a0, 0x1 ## Verify register tracking terminates. NO resolution here + + ## Test 5 checks that address resolution works for instructions that make + ## sense to have address resolution occur without an instruction providing + ## the upper bits. Such instructions include load/stores relative to the + ## zero register and short jumps pc-relative jumps + fsw f0, 0x8(x0) + + ## Test 6 checks instructions providing upper bits do not change the tracked + ## value of zero register. + auipc x0, 0x100 + fsw f0, 0x8(x0) + + ## Test 7 ensures that the newly added functionality is compatible with + ## code that already worked for branch instructions. + call func + + ## Test 8 checks that subsequent upper bits operations on the same register + ## correctly update the tracked register value to the value written by the + ## latest instruction. Resolution must occur based on the update upper bit + ## value. + auipc a0, 0x1000 ## Initial upper bit value + lla a0, far_target ## Pseudo instruction provides AUIPC. Resolution occurs + ## based on value written by this instruction + +## These are the labels that the instructions above are expected to resolve to. +.skip 0xffc6 +target: + .word 1 +.skip 0xffff8 +far_target: + .word 2 +func: + ret diff --git a/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp b/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp index 25ad985c52584..cdeee37d195f7 100644 --- a/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp +++ b/llvm/tools/llvm-cfi-verify/lib/FileAnalysis.cpp @@ -180,7 +180,7 @@ bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const { if (!InstrDesc.isCall()) return false; uint64_t Target; - if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, + if (!MIA->findTargetAddress(InstrMeta.Instruction, InstrMeta.VMAddress, InstrMeta.InstructionSize, Target)) return false; return TrapOnFailFunctionAddresses.contains(Target); @@ -215,7 +215,7 @@ FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { return nullptr; uint64_t Target; - if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, + if (!MIA->findTargetAddress(InstrMeta.Instruction, InstrMeta.VMAddress, InstrMeta.InstructionSize, Target)) return nullptr; @@ -508,7 +508,7 @@ void FileAnalysis::parseSectionContents(ArrayRef SectionBytes, continue; uint64_t Target; - if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { + if (MIA->findTargetAddress(Instruction, VMAddress, InstructionSize, Target)) { // If the target can be evaluated, it's not indirect. StaticBranchTargetings[Target].push_back(VMAddress); continue; diff --git a/llvm/tools/llvm-cfi-verify/lib/GraphBuilder.cpp b/llvm/tools/llvm-cfi-verify/lib/GraphBuilder.cpp index b5181043a06ed..51154db9c6ef6 100644 --- a/llvm/tools/llvm-cfi-verify/lib/GraphBuilder.cpp +++ b/llvm/tools/llvm-cfi-verify/lib/GraphBuilder.cpp @@ -137,7 +137,7 @@ void GraphBuilder::buildFlowsToUndefined(const FileAnalysis &Analysis, } else if (BranchNode.Fallthrough && !BranchNode.Target) { // We already know the fallthrough, evaluate the target. uint64_t Target; - if (!Analysis.getMCInstrAnalysis()->evaluateBranch( + if (!Analysis.getMCInstrAnalysis()->findTargetAddress( BranchInstrMeta.Instruction, BranchInstrMeta.VMAddress, BranchInstrMeta.InstructionSize, Target)) { errs() << "Failed to get branch target for conditional branch at address " @@ -254,7 +254,7 @@ void GraphBuilder::buildFlowGraphImpl(const FileAnalysis &Analysis, // Evaluate the branch target to ascertain whether this XRef is the result // of a fallthrough or the target of a branch. uint64_t BranchTarget; - if (!Analysis.getMCInstrAnalysis()->evaluateBranch( + if (!Analysis.getMCInstrAnalysis()->findTargetAddress( ParentMeta.Instruction, ParentMeta.VMAddress, ParentMeta.InstructionSize, BranchTarget)) { errs() << "Failed to evaluate branch target for instruction at address " diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 815759dfec309..59ecdf73b5579 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1589,8 +1589,8 @@ collectLocalBranchTargets(ArrayRef Bytes, MCInstrAnalysis *MIA, if (MIA) { if (Disassembled) { uint64_t Target; - bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target); - if (TargetKnown && (Target >= Start && Target < End) && + bool BranchTargetKnown = MIA->findTargetAddress(Inst, Index, Size, Target); + if (BranchTargetKnown && (Target >= Start && Target < End) && !Targets.count(Target)) { // On PowerPC and AIX, a function call is encoded as a branch to 0. // On other PowerPC platforms (ELF), a function call is encoded as @@ -2402,9 +2402,9 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, if (Disassembled && DT->InstrAnalysis) { llvm::raw_ostream *TargetOS = &FOS; uint64_t Target; - bool PrintTarget = DT->InstrAnalysis->evaluateBranch( - Inst, SectionAddr + Index, Size, Target); - + bool PrintTarget = DT->InstrAnalysis->findTargetAddress( + Inst, SectionAddr + Index, Size, Target, + DT->SubtargetInfo.get()); if (!PrintTarget) { if (std::optional MaybeTarget = DT->InstrAnalysis->evaluateMemoryOperandAddress( @@ -2477,7 +2477,7 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, break; } - // Branch targets are printed just after the instructions. + // Instruction targets are printed just after the instructions. // Print the labels corresponding to the target if there's any. bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target); bool LabelAvailable = AllLabels.count(Target); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 31cac4d5c7721..6f7fceabe9e64 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -570,7 +570,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, // Record potential call targets for tail frame inference later-on. if (InferMissingFrames && FRange) { uint64_t Target = 0; - MIA->evaluateBranch(Inst, Address, Size, Target); + MIA->findTargetAddress(Inst, Address, Size, Target); if (MCDesc.isCall()) { // Indirect call targets are unknown at this point. Recording the // unknown target (zero) for further LBR-based refinement. diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp index 38893cf974a10..70cd3fa6419a2 100644 --- a/llvm/tools/sancov/sancov.cpp +++ b/llvm/tools/sancov/sancov.cpp @@ -771,7 +771,7 @@ static void getObjectCoveragePoints(const object::ObjectFile &O, uint64_t CovPoint = getPreviousInstructionPc(Addr + Size, TheTriple); uint64_t Target; if (MIA->isCall(Inst) && - MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) && + MIA->findTargetAddress(Inst, SectionAddr + Index, Size, Target) && SanCovAddrs.find(Target) != SanCovAddrs.end()) Addrs->insert(CovPoint); MIA->updateState(Inst, Addr); diff --git a/llvm/unittests/MC/RISCV/CMakeLists.txt b/llvm/unittests/MC/RISCV/CMakeLists.txt new file mode 100644 index 0000000000000..9279fcf6693b4 --- /dev/null +++ b/llvm/unittests/MC/RISCV/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LLVM_LINK_COMPONENTS + MC + Support +) + +add_llvm_unittest(RISCVMCMCTests + RISCVMCInstAnalysis.cpp +) diff --git a/llvm/unittests/MC/RISCV/RISCVMCInstAnalysis.cpp b/llvm/unittests/MC/RISCV/RISCVMCInstAnalysis.cpp new file mode 100644 index 0000000000000..9f2392de36a92 --- /dev/null +++ b/llvm/unittests/MC/RISCV/RISCVMCInstAnalysis.cpp @@ -0,0 +1,147 @@ +//===- RISCVMCInstAnalysis.cpp - Unit tests for RISCV MCInstrAnalysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" +#include +#include + +using namespace llvm; + +namespace { + +struct TestContext { + const char *TripleName = "riscv32-unknown-elf"; + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr MII; + std::unique_ptr MIA; + std::unique_ptr Ctx; + std::unique_ptr STI; + + TestContext(const char *TripleName) { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllDisassemblers(); + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + if (!TheTarget) + return; + + MRI.reset(TheTarget->createMCRegInfo(TripleName)); + MII.reset(TheTarget->createMCInstrInfo()); + MIA.reset(TheTarget->createMCInstrAnalysis(MII.get())); + MCTargetOptions MCOptions; + MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); + Ctx = std::make_unique(Triple(TripleName), MAI.get(), MRI.get(), + /*MSTI=*/nullptr); + const char *MCPU = "generic"; + std::string Features = ""; // No extensions, just RV32I + STI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features)); + } + + operator bool() { return Ctx.get() && MIA.get(); } + operator MCContext &() { return *Ctx; } + MCInstrAnalysis &getInstrAnalysis() { return *MIA; } +}; + +TestContext &getTestContext() { + static TestContext Ctx; + return Ctx; +} + +} // end anonymous namespace + +// Helper to create an MCInst with register and immediate operands +static MCInst makeInst(unsigned Opcode, std::initializer_list Ops) { + MCInst Inst; + Inst.setOpcode(Opcode); + for (const auto &Op : Ops) + Inst.addOperand(Op); + return Inst; +} + +TEST(RISCVMCInstrAnalysis, FindTargetAddressWithRegisterState) { + if (!getTestContext()) + GTEST_SKIP(); + auto &MIA = getTestContext().getInstrAnalysis(); + + // Set up variables. + uint64_t Addr = 0x2, Target; + uint32_t mask = ~(0); // All bits set to 1. + uint64_t lowerImm = 0xfff; // Lower 12 bits set to 1. + bool found; + + // ------------ Test 1 -------------- + // Verifies accuracy of the result when ADDI (and related instructions, see + // switch case in RISCVTargetDesc.cpp) is involved in an instruction sequence. + // ADDI only retains the bottom XLEN bits, making it dependent on the target. + // This masking is especially apparent in 32-bit targets as overflow must + // correctly be discarded. + uint64_t upperImm1 = 0 | mask; + upperImm1 >>= 12; // Bottom 20 bits set to 1. + + // 1. AUIPC x5, 0xFFFFF. + MCInst auipc = makeInst( + /*AUIPC=*/23, {MCOperand::createReg(5), MCOperand::createImm(upperImm1)}); + // 2. ADDI x5, x5, 0xFFF. + MCInst addi = + makeInst(/*ADDI=*/13, {MCOperand::createReg(5), MCOperand::createReg(5), + MCOperand::createImm(lowerImm)}); + + MIA.updateState(auipc, Addr); + found = MIA.findTargetAddress(addi, Addr, 4, Target, nullptr); + EXPECT_TRUE(found); + + uint64_t expected1 = + ((upperImm1 << 12) + lowerImm + Addr) & maskTrailingOnes(32); + EXPECT_EQ(Target, expected1); + MIA.resetState(); + + // ------------------- Test 2 ----------------- + // Verifies the correctness when ADDIW is involved in a sequence. ADDIW + // performs a sign extension up to 64 bits based on the 32nd bit after the + // result has been computed. + mask >>= 1; // Bottom 31 bits set to 1. + + uint64_t upper_bits2 = 0 | mask; + upper_bits2 >>= 12; // Bottom 19 bits set to 1. + + // 1. AUIPC x6, 0x7FFFF. + MCInst auipc2 = makeInst(/*AUIPC=*/23, {MCOperand::createReg(6), + MCOperand::createImm(upper_bits2)}); + // 2. ADDIW x6, x6, 0xFFF. + MCInst addiw = + makeInst(/*ADDIW=*/15, {MCOperand::createReg(6), MCOperand::createReg(6), + MCOperand::createImm(lowerImm)}); + + MIA.updateState(auipc2, Addr); + found = MIA.findTargetAddress(addiw, Addr, 4, Target, nullptr); + EXPECT_TRUE(found); + + uint64_t expected2 = SignExtend64((upper_bits2 << 12) + lowerImm + Addr, 32); + EXPECT_EQ(Target, expected2); + MIA.resetState(); + + // All other instructions simply add the immediate to the value loaded in the + // corresponding register by the previous instruction in the sequence. This + // correctness check is implicitly performed in the tests above. For more + // tests related to this feature, check + // llvm/test/tools/llvm-objdump/RISCV/riscv64-ar-coverage.s +}