Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Use absolute relocations when compiling for AMDPAL and Mesa3D #67791

Merged
merged 1 commit into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/BinaryFormat/ELF.h"
Expand All @@ -26,6 +29,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
Expand Down Expand Up @@ -2762,7 +2766,63 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
if (PtrTy.getSizeInBits() == 32)
B.buildExtract(DstReg, PCReg, 0);
return true;
}
}

// Emit a ABS32_LO / ABS32_HI relocation stub.
tsymalla marked this conversation as resolved.
Show resolved Hide resolved
void AMDGPULegalizerInfo::buildAbsGlobalAddress(
Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
MachineRegisterInfo &MRI) const {
bool RequiresHighHalf = PtrTy.getSizeInBits() != 32;

LLT S32 = LLT::scalar(32);

// Use the destination directly, if and only if we store the lower address
// part only and we don't have a register class being set.
Register AddrLo = !RequiresHighHalf && !MRI.getRegClassOrNull(DstReg)
? DstReg
: MRI.createGenericVirtualRegister(S32);

if (!MRI.getRegClassOrNull(AddrLo))
MRI.setRegClass(AddrLo, &AMDGPU::SReg_32RegClass);

// Write the lower half.
B.buildInstr(AMDGPU::S_MOV_B32)
.addDef(AddrLo)
.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO);
tsymalla marked this conversation as resolved.
Show resolved Hide resolved

// If required, write the upper half as well.
if (RequiresHighHalf) {
assert(PtrTy.getSizeInBits() == 64 &&
"Must provide a 64-bit pointer type!");

Register AddrHi = MRI.createGenericVirtualRegister(S32);
MRI.setRegClass(AddrHi, &AMDGPU::SReg_32RegClass);

B.buildInstr(AMDGPU::S_MOV_B32)
.addDef(AddrHi)
.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_HI);

tsymalla marked this conversation as resolved.
Show resolved Hide resolved
// Use the destination directly, if and only if we don't have a register
// class being set.
Register AddrDst = !MRI.getRegClassOrNull(DstReg)
? DstReg
: MRI.createGenericVirtualRegister(LLT::scalar(64));

if (!MRI.getRegClassOrNull(AddrDst))
MRI.setRegClass(AddrDst, &AMDGPU::SReg_64RegClass);

B.buildMergeValues(AddrDst, {AddrLo, AddrHi});

// If we created a new register for the destination, cast the result into
// the final output.
if (AddrDst != DstReg)
B.buildCast(DstReg, AddrDst);
} else if (AddrLo != DstReg) {
// If we created a new register for the destination, cast the result into
// the final output.
B.buildCast(DstReg, AddrLo);
}
}

bool AMDGPULegalizerInfo::legalizeGlobalValue(
MachineInstr &MI, MachineRegisterInfo &MRI,
Expand Down Expand Up @@ -2828,6 +2888,12 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
return true;
}

if (ST.isAmdPalOS() || ST.isMesa3DOS()) {
buildAbsGlobalAddress(DstReg, Ty, B, GV, MRI);
MI.eraseFromParent();
return true;
}

const SITargetLowering *TLI = ST.getTargetLowering();

if (TLI->shouldEmitFixup(GV)) {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
const GlobalValue *GV, int64_t Offset,
unsigned GAFlags = SIInstrInfo::MO_NONE) const;

void buildAbsGlobalAddress(Register DstReg, LLT PtrTy, MachineIRBuilder &B,
const GlobalValue *GV,
MachineRegisterInfo &MRI) const;

bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AMDGPU_REL32_HI;
case MCSymbolRefExpr::VK_AMDGPU_REL64:
return ELF::R_AMDGPU_REL64;
case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO:
return ELF::R_AMDGPU_ABS32_LO;
case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI:
return ELF::R_AMDGPU_ABS32_HI;
}

MCFixupKind Kind = Fixup.getKind();
Expand Down
18 changes: 17 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5709,6 +5709,9 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
}

bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS())
return false;

// FIXME: Either avoid relying on address space here or change the default
// address space for functions to avoid the explicit check.
return (GV->getValueType()->isFunctionTy() ||
Expand Down Expand Up @@ -6726,9 +6729,22 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
}

if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS()) {
SDValue AddrLo = DAG.getTargetGlobalAddress(
GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO);
AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0};

SDValue AddrHi = DAG.getTargetGlobalAddress(
GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_HI);
AddrHi = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrHi), 0};

return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddrLo, AddrHi);
}

if (shouldEmitFixup(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
else if (shouldEmitPCReloc(GV))

if (shouldEmitPCReloc(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT,
SIInstrInfo::MO_REL32);

Expand Down
70 changes: 69 additions & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN-PAL %s

@external_constant = external addrspace(4) constant i32, align 4
@external_constant32 = external addrspace(6) constant i32, align 4
Expand All @@ -14,6 +14,7 @@


define ptr addrspace(4) @external_constant_got() {

; GCN-LABEL: name: external_constant_got
; GCN: bb.1 (%ir-block.0):
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc
Expand All @@ -22,10 +23,19 @@ define ptr addrspace(4) @external_constant_got() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
;
; GCN-PAL-LABEL: name: external_constant_got
; GCN-PAL: bb.1 (%ir-block.0):
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_constant
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(4) @external_constant
}

define ptr addrspace(1) @external_global_got() {

; GCN-LABEL: name: external_global_got
; GCN: bb.1 (%ir-block.0):
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc
Expand All @@ -34,10 +44,19 @@ define ptr addrspace(1) @external_global_got() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
;
; GCN-PAL-LABEL: name: external_global_got
; GCN-PAL: bb.1 (%ir-block.0):
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_global
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_global
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(1) @external_global
}

define ptr addrspace(999) @external_other_got() {

; GCN-LABEL: name: external_other_got
; GCN: bb.1 (%ir-block.0):
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc
Expand All @@ -46,59 +65,108 @@ define ptr addrspace(999) @external_other_got() {
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
;
; GCN-PAL-LABEL: name: external_other_got
; GCN-PAL: bb.1 (%ir-block.0):
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_other
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_other
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(999) @external_other
}

define ptr addrspace(4) @internal_constant_pcrel() {

; GCN-LABEL: name: internal_constant_pcrel
; GCN: bb.1 (%ir-block.0):
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4)
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
;
; GCN-PAL-LABEL: name: internal_constant_pcrel
; GCN-PAL: bb.1 (%ir-block.0):
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_constant
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(4) @internal_constant
}

define ptr addrspace(1) @internal_global_pcrel() {

; GCN-LABEL: name: internal_global_pcrel
; GCN: bb.1 (%ir-block.0):
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1)
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
;
; GCN-PAL-LABEL: name: internal_global_pcrel
; GCN-PAL: bb.1 (%ir-block.0):
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_global
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_global
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(1) @internal_global
}

define ptr addrspace(999) @internal_other_pcrel() {

; GCN-LABEL: name: internal_other_pcrel
; GCN: bb.1 (%ir-block.0):
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999)
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
;
; GCN-PAL-LABEL: name: internal_other_pcrel
; GCN-PAL: bb.1 (%ir-block.0):
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_other
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_other
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
ret ptr addrspace(999) @internal_other
}

define ptr addrspace(6) @external_constant32_got() {

; GCN-LABEL: name: external_constant32_got
; GCN: bb.1 (%ir-block.0):
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4)
; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0
; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6)
; GCN-NEXT: SI_RETURN implicit $vgpr0
;
; GCN-PAL-LABEL: name: external_constant32_got
; GCN-PAL: bb.1 (%ir-block.0):
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(p6) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant32
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](p6)
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0
ret ptr addrspace(6) @external_constant32
}

define ptr addrspace(6) @internal_constant32_pcrel() {

; GCN-LABEL: name: internal_constant32_pcrel
; GCN: bb.1 (%ir-block.0):
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc
; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6)
; GCN-NEXT: SI_RETURN implicit $vgpr0
;
; GCN-PAL-LABEL: name: internal_constant32_pcrel
; GCN-PAL: bb.1 (%ir-block.0):
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(p6) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant32
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](p6)
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0
ret ptr addrspace(6) @internal_constant32
}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

; ELF: Relocations [
; ELF-NEXT: Section (3) .rel.text {
; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b{{$}}
; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32_LO doff_0_0_b{{$}}

define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
%rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
Expand Down
Loading
Loading