Skip to content

Commit 3dbf02a

Browse files
committed
[AMDGPU] Use absolute relocations when compiling for AMDPAL and Mesa3D
The primary ISA-independent justification for using PC-relative addressing is that it makes code position-independent and therefore allows sharing of .text pages between processes. When not sharing .text pages, we can use absolute relocations instead, which will possibly prevent a bubble by using s_getpc_b64.
1 parent bb38f26 commit 3dbf02a

17 files changed

+1382
-1720
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717
#include "AMDGPUGlobalISelUtils.h"
1818
#include "AMDGPUInstrInfo.h"
1919
#include "AMDGPUTargetMachine.h"
20+
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21+
#include "SIInstrInfo.h"
2022
#include "SIMachineFunctionInfo.h"
23+
#include "SIRegisterInfo.h"
2124
#include "Utils/AMDGPUBaseInfo.h"
2225
#include "llvm/ADT/ScopeExit.h"
2326
#include "llvm/BinaryFormat/ELF.h"
@@ -26,6 +29,7 @@
2629
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
2730
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2831
#include "llvm/CodeGen/GlobalISel/Utils.h"
32+
#include "llvm/CodeGen/TargetOpcodes.h"
2933
#include "llvm/IR/DiagnosticInfo.h"
3034
#include "llvm/IR/IntrinsicsAMDGPU.h"
3135
#include "llvm/IR/IntrinsicsR600.h"
@@ -2762,7 +2766,63 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
27622766
if (PtrTy.getSizeInBits() == 32)
27632767
B.buildExtract(DstReg, PCReg, 0);
27642768
return true;
2765-
}
2769+
}
2770+
2771+
// Emit a ABS32_LO / ABS32_HI relocation stub.
2772+
void AMDGPULegalizerInfo::buildAbsGlobalAddress(
2773+
Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
2774+
MachineRegisterInfo &MRI) const {
2775+
bool RequiresHighHalf = PtrTy.getSizeInBits() != 32;
2776+
2777+
LLT S32 = LLT::scalar(32);
2778+
2779+
// Use the destination directly, if and only if we store the lower address
2780+
// part only and we don't have a register class being set.
2781+
Register AddrLo = !RequiresHighHalf && !MRI.getRegClassOrNull(DstReg)
2782+
? DstReg
2783+
: MRI.createGenericVirtualRegister(S32);
2784+
2785+
if (!MRI.getRegClassOrNull(AddrLo))
2786+
MRI.setRegClass(AddrLo, &AMDGPU::SReg_32RegClass);
2787+
2788+
// Write the lower half.
2789+
B.buildInstr(AMDGPU::S_MOV_B32)
2790+
.addDef(AddrLo)
2791+
.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO);
2792+
2793+
// If required, write the upper half as well.
2794+
if (RequiresHighHalf) {
2795+
assert(PtrTy.getSizeInBits() == 64 &&
2796+
"Must provide a 64-bit pointer type!");
2797+
2798+
Register AddrHi = MRI.createGenericVirtualRegister(S32);
2799+
MRI.setRegClass(AddrHi, &AMDGPU::SReg_32RegClass);
2800+
2801+
B.buildInstr(AMDGPU::S_MOV_B32)
2802+
.addDef(AddrHi)
2803+
.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_HI);
2804+
2805+
// Use the destination directly, if and only if we don't have a register
2806+
// class being set.
2807+
Register AddrDst = !MRI.getRegClassOrNull(DstReg)
2808+
? DstReg
2809+
: MRI.createGenericVirtualRegister(LLT::scalar(64));
2810+
2811+
if (!MRI.getRegClassOrNull(AddrDst))
2812+
MRI.setRegClass(AddrDst, &AMDGPU::SReg_64RegClass);
2813+
2814+
B.buildMergeValues(AddrDst, {AddrLo, AddrHi});
2815+
2816+
// If we created a new register for the destination, cast the result into
2817+
// the final output.
2818+
if (AddrDst != DstReg)
2819+
B.buildCast(DstReg, AddrDst);
2820+
} else if (AddrLo != DstReg) {
2821+
// If we created a new register for the destination, cast the result into
2822+
// the final output.
2823+
B.buildCast(DstReg, AddrLo);
2824+
}
2825+
}
27662826

27672827
bool AMDGPULegalizerInfo::legalizeGlobalValue(
27682828
MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -2828,6 +2888,12 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
28282888
return true;
28292889
}
28302890

2891+
if (ST.isAmdPalOS() || ST.isMesa3DOS()) {
2892+
buildAbsGlobalAddress(DstReg, Ty, B, GV, MRI);
2893+
MI.eraseFromParent();
2894+
return true;
2895+
}
2896+
28312897
const SITargetLowering *TLI = ST.getTargetLowering();
28322898

28332899
if (TLI->shouldEmitFixup(GV)) {

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
6868
const GlobalValue *GV, int64_t Offset,
6969
unsigned GAFlags = SIInstrInfo::MO_NONE) const;
7070

71+
void buildAbsGlobalAddress(Register DstReg, LLT PtrTy, MachineIRBuilder &B,
72+
const GlobalValue *GV,
73+
MachineRegisterInfo &MRI) const;
74+
7175
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
7276
MachineIRBuilder &B) const;
7377
bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
6363
return ELF::R_AMDGPU_REL32_HI;
6464
case MCSymbolRefExpr::VK_AMDGPU_REL64:
6565
return ELF::R_AMDGPU_REL64;
66+
case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO:
67+
return ELF::R_AMDGPU_ABS32_LO;
68+
case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI:
69+
return ELF::R_AMDGPU_ABS32_HI;
6670
}
6771

6872
MCFixupKind Kind = Fixup.getKind();

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5709,6 +5709,9 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
57095709
}
57105710

57115711
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
5712+
if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS())
5713+
return false;
5714+
57125715
// FIXME: Either avoid relying on address space here or change the default
57135716
// address space for functions to avoid the explicit check.
57145717
return (GV->getValueType()->isFunctionTy() ||
@@ -6726,9 +6729,22 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
67266729
return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
67276730
}
67286731

6732+
if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS()) {
6733+
SDValue AddrLo = DAG.getTargetGlobalAddress(
6734+
GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO);
6735+
AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0};
6736+
6737+
SDValue AddrHi = DAG.getTargetGlobalAddress(
6738+
GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_HI);
6739+
AddrHi = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrHi), 0};
6740+
6741+
return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddrLo, AddrHi);
6742+
}
6743+
67296744
if (shouldEmitFixup(GV))
67306745
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
6731-
else if (shouldEmitPCReloc(GV))
6746+
6747+
if (shouldEmitPCReloc(GV))
67326748
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT,
67336749
SIInstrInfo::MO_REL32);
67346750

llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s
3-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s
3+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN-PAL %s
44

55
@external_constant = external addrspace(4) constant i32, align 4
66
@external_constant32 = external addrspace(6) constant i32, align 4
@@ -14,6 +14,7 @@
1414

1515

1616
define ptr addrspace(4) @external_constant_got() {
17+
1718
; GCN-LABEL: name: external_constant_got
1819
; GCN: bb.1 (%ir-block.0):
1920
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc
@@ -22,10 +23,19 @@ define ptr addrspace(4) @external_constant_got() {
2223
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
2324
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
2425
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
26+
;
27+
; GCN-PAL-LABEL: name: external_constant_got
28+
; GCN-PAL: bb.1 (%ir-block.0):
29+
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant
30+
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_constant
31+
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
32+
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
33+
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
2534
ret ptr addrspace(4) @external_constant
2635
}
2736

2837
define ptr addrspace(1) @external_global_got() {
38+
2939
; GCN-LABEL: name: external_global_got
3040
; GCN: bb.1 (%ir-block.0):
3141
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc
@@ -34,10 +44,19 @@ define ptr addrspace(1) @external_global_got() {
3444
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
3545
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
3646
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
47+
;
48+
; GCN-PAL-LABEL: name: external_global_got
49+
; GCN-PAL: bb.1 (%ir-block.0):
50+
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_global
51+
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_global
52+
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
53+
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
54+
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
3755
ret ptr addrspace(1) @external_global
3856
}
3957

4058
define ptr addrspace(999) @external_other_got() {
59+
4160
; GCN-LABEL: name: external_other_got
4261
; GCN: bb.1 (%ir-block.0):
4362
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc
@@ -46,59 +65,108 @@ define ptr addrspace(999) @external_other_got() {
4665
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
4766
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
4867
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
68+
;
69+
; GCN-PAL-LABEL: name: external_other_got
70+
; GCN-PAL: bb.1 (%ir-block.0):
71+
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_other
72+
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_other
73+
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
74+
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
75+
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
4976
ret ptr addrspace(999) @external_other
5077
}
5178

5279
define ptr addrspace(4) @internal_constant_pcrel() {
80+
5381
; GCN-LABEL: name: internal_constant_pcrel
5482
; GCN: bb.1 (%ir-block.0):
5583
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc
5684
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4)
5785
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
5886
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
5987
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
88+
;
89+
; GCN-PAL-LABEL: name: internal_constant_pcrel
90+
; GCN-PAL: bb.1 (%ir-block.0):
91+
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant
92+
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_constant
93+
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
94+
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
95+
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
6096
ret ptr addrspace(4) @internal_constant
6197
}
6298

6399
define ptr addrspace(1) @internal_global_pcrel() {
100+
64101
; GCN-LABEL: name: internal_global_pcrel
65102
; GCN: bb.1 (%ir-block.0):
66103
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc
67104
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1)
68105
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
69106
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
70107
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
108+
;
109+
; GCN-PAL-LABEL: name: internal_global_pcrel
110+
; GCN-PAL: bb.1 (%ir-block.0):
111+
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_global
112+
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_global
113+
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
114+
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
115+
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
71116
ret ptr addrspace(1) @internal_global
72117
}
73118

74119
define ptr addrspace(999) @internal_other_pcrel() {
120+
75121
; GCN-LABEL: name: internal_other_pcrel
76122
; GCN: bb.1 (%ir-block.0):
77123
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc
78124
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999)
79125
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
80126
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
81127
; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
128+
;
129+
; GCN-PAL-LABEL: name: internal_other_pcrel
130+
; GCN-PAL: bb.1 (%ir-block.0):
131+
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_other
132+
; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_other
133+
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32)
134+
; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32)
135+
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
82136
ret ptr addrspace(999) @internal_other
83137
}
84138

85139
define ptr addrspace(6) @external_constant32_got() {
140+
86141
; GCN-LABEL: name: external_constant32_got
87142
; GCN: bb.1 (%ir-block.0):
88143
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc
89144
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4)
90145
; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0
91146
; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6)
92147
; GCN-NEXT: SI_RETURN implicit $vgpr0
148+
;
149+
; GCN-PAL-LABEL: name: external_constant32_got
150+
; GCN-PAL: bb.1 (%ir-block.0):
151+
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(p6) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant32
152+
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](p6)
153+
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0
93154
ret ptr addrspace(6) @external_constant32
94155
}
95156

96157
define ptr addrspace(6) @internal_constant32_pcrel() {
158+
97159
; GCN-LABEL: name: internal_constant32_pcrel
98160
; GCN: bb.1 (%ir-block.0):
99161
; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc
100162
; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
101163
; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6)
102164
; GCN-NEXT: SI_RETURN implicit $vgpr0
165+
;
166+
; GCN-PAL-LABEL: name: internal_constant32_pcrel
167+
; GCN-PAL: bb.1 (%ir-block.0):
168+
; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(p6) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant32
169+
; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](p6)
170+
; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0
103171
ret ptr addrspace(6) @internal_constant32
104172
}

llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
; ELF: Relocations [
1414
; ELF-NEXT: Section (3) .rel.text {
15-
; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b{{$}}
15+
; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32_LO doff_0_0_b{{$}}
1616

1717
define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
1818
%rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)

0 commit comments

Comments
 (0)