diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index db226a3029001..4797e5a7a61d4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -17,7 +17,10 @@ #include "AMDGPUGlobalISelUtils.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/BinaryFormat/ELF.h" @@ -26,6 +29,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" @@ -2762,7 +2766,63 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy, if (PtrTy.getSizeInBits() == 32) B.buildExtract(DstReg, PCReg, 0); return true; - } +} + +// Emit a ABS32_LO / ABS32_HI relocation stub. +void AMDGPULegalizerInfo::buildAbsGlobalAddress( + Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV, + MachineRegisterInfo &MRI) const { + bool RequiresHighHalf = PtrTy.getSizeInBits() != 32; + + LLT S32 = LLT::scalar(32); + + // Use the destination directly, if and only if we store the lower address + // part only and we don't have a register class being set. + Register AddrLo = !RequiresHighHalf && !MRI.getRegClassOrNull(DstReg) + ? DstReg + : MRI.createGenericVirtualRegister(S32); + + if (!MRI.getRegClassOrNull(AddrLo)) + MRI.setRegClass(AddrLo, &AMDGPU::SReg_32RegClass); + + // Write the lower half. + B.buildInstr(AMDGPU::S_MOV_B32) + .addDef(AddrLo) + .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO); + + // If required, write the upper half as well. + if (RequiresHighHalf) { + assert(PtrTy.getSizeInBits() == 64 && + "Must provide a 64-bit pointer type!"); + + Register AddrHi = MRI.createGenericVirtualRegister(S32); + MRI.setRegClass(AddrHi, &AMDGPU::SReg_32RegClass); + + B.buildInstr(AMDGPU::S_MOV_B32) + .addDef(AddrHi) + .addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_HI); + + // Use the destination directly, if and only if we don't have a register + // class being set. + Register AddrDst = !MRI.getRegClassOrNull(DstReg) + ? DstReg + : MRI.createGenericVirtualRegister(LLT::scalar(64)); + + if (!MRI.getRegClassOrNull(AddrDst)) + MRI.setRegClass(AddrDst, &AMDGPU::SReg_64RegClass); + + B.buildMergeValues(AddrDst, {AddrLo, AddrHi}); + + // If we created a new register for the destination, cast the result into + // the final output. + if (AddrDst != DstReg) + B.buildCast(DstReg, AddrDst); + } else if (AddrLo != DstReg) { + // If we created a new register for the destination, cast the result into + // the final output. + B.buildCast(DstReg, AddrLo); + } +} bool AMDGPULegalizerInfo::legalizeGlobalValue( MachineInstr &MI, MachineRegisterInfo &MRI, @@ -2828,6 +2888,12 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue( return true; } + if (ST.isAmdPalOS() || ST.isMesa3DOS()) { + buildAbsGlobalAddress(DstReg, Ty, B, GV, MRI); + MI.eraseFromParent(); + return true; + } + const SITargetLowering *TLI = ST.getTargetLowering(); if (TLI->shouldEmitFixup(GV)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index ab7fe92d6a720..b90fb76a4ccda 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -68,6 +68,10 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { const GlobalValue *GV, int64_t Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const; + void buildAbsGlobalAddress(Register DstReg, LLT PtrTy, MachineIRBuilder &B, + const GlobalValue *GV, + MachineRegisterInfo &MRI) const; + bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 3f188478ca8bc..58eed81e07556 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -63,6 +63,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AMDGPU_REL32_HI; case MCSymbolRefExpr::VK_AMDGPU_REL64: return ELF::R_AMDGPU_REL64; + case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO: + return ELF::R_AMDGPU_ABS32_LO; + case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI: + return ELF::R_AMDGPU_ABS32_HI; } MCFixupKind Kind = Fixup.getKind(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f170428b38c49..fd31d95eb82b1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5709,6 +5709,9 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { } bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { + if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS()) + return false; + // FIXME: Either avoid relying on address space here or change the default // address space for functions to avoid the explicit check. return (GV->getValueType()->isFunctionTy() || @@ -6726,9 +6729,22 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA); } + if (Subtarget->isAmdPalOS() || Subtarget->isMesa3DOS()) { + SDValue AddrLo = DAG.getTargetGlobalAddress( + GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_LO); + AddrLo = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrLo), 0}; + + SDValue AddrHi = DAG.getTargetGlobalAddress( + GV, DL, MVT::i32, GSD->getOffset(), SIInstrInfo::MO_ABS32_HI); + AddrHi = {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, AddrHi), 0}; + + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddrLo, AddrHi); + } + if (shouldEmitFixup(GV)) return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT); - else if (shouldEmitPCReloc(GV)) + + if (shouldEmitPCReloc(GV)) return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT, SIInstrInfo::MO_REL32); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll index b60dd6dea7f79..b98b366b01a46 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN-PAL %s @external_constant = external addrspace(4) constant i32, align 4 @external_constant32 = external addrspace(6) constant i32, align 4 @@ -14,6 +14,7 @@ define ptr addrspace(4) @external_constant_got() { + ; GCN-LABEL: name: external_constant_got ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc @@ -22,10 +23,19 @@ define ptr addrspace(4) @external_constant_got() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; GCN-PAL-LABEL: name: external_constant_got + ; GCN-PAL: bb.1 (%ir-block.0): + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_constant + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(4) @external_constant } define ptr addrspace(1) @external_global_got() { + ; GCN-LABEL: name: external_global_got ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc @@ -34,10 +44,19 @@ define ptr addrspace(1) @external_global_got() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; GCN-PAL-LABEL: name: external_global_got + ; GCN-PAL: bb.1 (%ir-block.0): + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_global + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_global + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(1) @external_global } define ptr addrspace(999) @external_other_got() { + ; GCN-LABEL: name: external_other_got ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc @@ -46,10 +65,19 @@ define ptr addrspace(999) @external_other_got() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; GCN-PAL-LABEL: name: external_other_got + ; GCN-PAL: bb.1 (%ir-block.0): + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_other + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @external_other + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(999) @external_other } define ptr addrspace(4) @internal_constant_pcrel() { + ; GCN-LABEL: name: internal_constant_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc @@ -57,10 +85,19 @@ define ptr addrspace(4) @internal_constant_pcrel() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; GCN-PAL-LABEL: name: internal_constant_pcrel + ; GCN-PAL: bb.1 (%ir-block.0): + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_constant + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(4) @internal_constant } define ptr addrspace(1) @internal_global_pcrel() { + ; GCN-LABEL: name: internal_global_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc @@ -68,10 +105,19 @@ define ptr addrspace(1) @internal_global_pcrel() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; GCN-PAL-LABEL: name: internal_global_pcrel + ; GCN-PAL: bb.1 (%ir-block.0): + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_global + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_global + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(1) @internal_global } define ptr addrspace(999) @internal_other_pcrel() { + ; GCN-LABEL: name: internal_other_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc @@ -79,10 +125,19 @@ define ptr addrspace(999) @internal_other_pcrel() { ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; GCN-PAL-LABEL: name: internal_other_pcrel + ; GCN-PAL: bb.1 (%ir-block.0): + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_other + ; GCN-PAL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32(s32) = S_MOV_B32 target-flags(amdgpu-abs32-hi) @internal_other + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](s32) + ; GCN-PAL-NEXT: $vgpr1 = COPY [[S_MOV_B32_1]](s32) + ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret ptr addrspace(999) @internal_other } define ptr addrspace(6) @external_constant32_got() { + ; GCN-LABEL: name: external_constant32_got ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc @@ -90,15 +145,28 @@ define ptr addrspace(6) @external_constant32_got() { ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GCN-PAL-LABEL: name: external_constant32_got + ; GCN-PAL: bb.1 (%ir-block.0): + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(p6) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @external_constant32 + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](p6) + ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0 ret ptr addrspace(6) @external_constant32 } define ptr addrspace(6) @internal_constant32_pcrel() { + ; GCN-LABEL: name: internal_constant32_pcrel ; GCN: bb.1 (%ir-block.0): ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc ; GCN-NEXT: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 ; GCN-NEXT: $vgpr0 = COPY [[EXTRACT]](p6) ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GCN-PAL-LABEL: name: internal_constant32_pcrel + ; GCN-PAL: bb.1 (%ir-block.0): + ; GCN-PAL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(p6) = S_MOV_B32 target-flags(amdgpu-abs32-lo) @internal_constant32 + ; GCN-PAL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]](p6) + ; GCN-PAL-NEXT: SI_RETURN implicit $vgpr0 ret ptr addrspace(6) @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll index 2feeb83e6f146..c5dbfb0f219bd 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll @@ -12,7 +12,7 @@ ; ELF: Relocations [ ; ELF-NEXT: Section (3) .rel.text { -; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b{{$}} +; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32_LO doff_0_0_b{{$}} define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 { %rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1) diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll index 9ec9414d91171..9d94f8e6ca227 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1335,9 +1335,9 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac ; CI-NEXT: s_mov_b32 s40, s0 ; CI-NEXT: s_load_dwordx4 s[40:43], s[40:41], 0x0 ; CI-NEXT: s_mov_b32 s14, s10 -; CI-NEXT: s_mov_b32 s12, s8 -; CI-NEXT: s_mov_b32 s13, s9 ; CI-NEXT: v_lshlrev_b32_e32 v3, 2, v0 +; CI-NEXT: s_mov_b32 m0, -1 +; CI-NEXT: s_mov_b32 s12, s8 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_add_u32 s40, s40, s11 ; CI-NEXT: s_mov_b64 s[10:11], s[6:7] @@ -1345,27 +1345,24 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac ; CI-NEXT: s_load_dword s6, s[4:5], 0x2 ; CI-NEXT: s_addc_u32 s41, s41, 0 ; CI-NEXT: s_add_u32 s8, s4, 12 -; CI-NEXT: s_addc_u32 s9, s5, 0 -; CI-NEXT: s_getpc_b64 s[4:5] -; CI-NEXT: s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4 -; CI-NEXT: s_addc_u32 s5, s5, void_func_void@gotpcrel32@hi+12 +; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; CI-NEXT: s_mov_b32 s13, s9 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v40, vcc, s6, v3 -; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; CI-NEXT: ds_read_b32 v41, v40 -; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; CI-NEXT: s_addc_u32 s9, s5, 0 ; CI-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: s_mov_b64 s[4:5], s[0:1] ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_mov_b64 s[0:1], s[40:41] +; CI-NEXT: s_mov_b32 s17, void_func_void@abs32@hi +; CI-NEXT: s_mov_b32 s16, void_func_void@abs32@lo ; CI-NEXT: v_or_b32_e32 v31, v0, v2 ; CI-NEXT: s_mov_b64 s[2:3], s[42:43] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_mov_b32 s39, 0xf000 ; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CI-NEXT: ds_read_b32 v0, v40 offset:4 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1384,28 +1381,25 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_add_u32 s8, s4, 12 ; GFX9-NEXT: s_addc_u32 s9, s5, 0 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void@gotpcrel32@hi+12 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_lshl_add_u32 v41, v0, 2, s6 -; GFX9-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; GFX9-NEXT: ds_read_b32 v42, v41 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b32 s17, void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s16, void_func_void@abs32@lo ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-NEXT: ds_read_b32 v0, v41 offset:4 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll index 7c9d01db9c2c0..9ab8be0485edd 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll @@ -33,21 +33,18 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-NEXT: v_writelane_b32 v40, s21, 17 ; SDAG-NEXT: v_writelane_b32 v40, s22, 18 ; SDAG-NEXT: v_writelane_b32 v40, s23, 19 -; SDAG-NEXT: s_addk_i32 s32, 0x400 ; SDAG-NEXT: v_writelane_b32 v40, s24, 20 ; SDAG-NEXT: v_writelane_b32 v40, s25, 21 -; SDAG-NEXT: s_getpc_b64 s[34:35] -; SDAG-NEXT: s_add_u32 s34, s34, extern_c_func@gotpcrel32@lo+4 -; SDAG-NEXT: s_addc_u32 s35, s35, extern_c_func@gotpcrel32@hi+12 ; SDAG-NEXT: v_writelane_b32 v40, s26, 22 -; SDAG-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; SDAG-NEXT: v_writelane_b32 v40, s27, 23 ; SDAG-NEXT: v_writelane_b32 v40, s28, 24 ; SDAG-NEXT: v_writelane_b32 v40, s29, 25 ; SDAG-NEXT: v_writelane_b32 v40, s30, 26 +; SDAG-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi +; SDAG-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; SDAG-NEXT: s_mov_b64 s[8:9], 0 +; SDAG-NEXT: s_addk_i32 s32, 0x400 ; SDAG-NEXT: v_writelane_b32 v40, s31, 27 -; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: s_swappc_b64 s[30:31], s[34:35] ; SDAG-NEXT: v_readlane_b32 s31, v40, 27 ; SDAG-NEXT: v_readlane_b32 s30, v40, 26 @@ -113,21 +110,18 @@ define amdgpu_gfx void @gfx_func() { ; GISEL-NEXT: v_writelane_b32 v40, s21, 17 ; GISEL-NEXT: v_writelane_b32 v40, s22, 18 ; GISEL-NEXT: v_writelane_b32 v40, s23, 19 -; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s24, 20 ; GISEL-NEXT: v_writelane_b32 v40, s25, 21 -; GISEL-NEXT: s_getpc_b64 s[34:35] -; GISEL-NEXT: s_add_u32 s34, s34, extern_c_func@gotpcrel32@lo+4 -; GISEL-NEXT: s_addc_u32 s35, s35, extern_c_func@gotpcrel32@hi+12 ; GISEL-NEXT: v_writelane_b32 v40, s26, 22 -; GISEL-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GISEL-NEXT: v_writelane_b32 v40, s27, 23 ; GISEL-NEXT: v_writelane_b32 v40, s28, 24 ; GISEL-NEXT: v_writelane_b32 v40, s29, 25 ; GISEL-NEXT: v_writelane_b32 v40, s30, 26 +; GISEL-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo +; GISEL-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s31, 27 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GISEL-NEXT: v_readlane_b32 s31, v40, 27 ; GISEL-NEXT: v_readlane_b32 s30, v40, 26 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index c016342314032..f827a78125b77 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -119,10 +119,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 @@ -148,9 +147,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -178,9 +176,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -209,9 +206,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -245,10 +241,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -276,9 +271,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 @@ -308,9 +302,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 @@ -340,9 +333,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 @@ -378,10 +370,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -409,9 +400,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 @@ -441,9 +431,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 @@ -473,9 +462,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 @@ -507,13 +495,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -537,10 +524,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -566,10 +552,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -596,10 +581,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -630,12 +614,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -660,10 +643,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -690,10 +672,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11-NEXT: global_load_i8 v0, v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -721,10 +702,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -756,12 +736,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -786,10 +765,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -816,10 +794,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -847,10 +824,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -880,13 +856,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -910,10 +885,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -939,10 +913,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -969,10 +942,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1003,12 +975,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1033,10 +1004,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -1063,10 +1033,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1094,10 +1063,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1129,12 +1097,11 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1159,10 +1126,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -1189,10 +1155,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11-NEXT: global_load_u16 v0, v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1220,10 +1185,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1253,13 +1217,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 42 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1283,10 +1246,9 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -1312,10 +1274,9 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1342,10 +1303,9 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1374,14 +1334,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1406,11 +1365,10 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1435,11 +1393,10 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1466,11 +1423,10 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -1501,12 +1457,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1531,10 +1486,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -1562,10 +1516,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -1594,10 +1547,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -1628,16 +1580,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1665,10 +1616,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1695,10 +1645,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1728,10 +1677,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -1762,14 +1710,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1796,12 +1743,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 2 -; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1827,11 +1773,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1861,12 +1806,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -1900,16 +1844,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_mov_b32_e32 v6, 3 ; GFX9-NEXT: v_mov_b32_e32 v7, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1940,10 +1883,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v7, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -1971,11 +1913,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off -; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2008,10 +1949,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -2041,13 +1981,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -2071,10 +2010,9 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -2100,10 +2038,9 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2130,10 +2067,9 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2162,13 +2098,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -2192,10 +2127,9 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -2221,10 +2155,9 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2251,10 +2184,9 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -2283,14 +2215,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -2315,11 +2246,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -2344,11 +2274,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2375,11 +2304,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -2407,15 +2335,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -2441,11 +2368,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -2471,11 +2397,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2503,11 +2428,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -2535,17 +2459,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -2574,10 +2497,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -2605,10 +2527,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2639,10 +2560,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -2670,14 +2590,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -2702,11 +2621,10 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -2731,11 +2649,10 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2762,11 +2679,10 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -2794,16 +2710,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -2831,10 +2746,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -2861,10 +2775,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -2894,10 +2807,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -2925,18 +2837,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -2966,10 +2877,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -2997,10 +2907,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -3032,10 +2941,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -3066,12 +2974,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 8, v0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -3098,10 +3005,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i8@rel32@hi+12 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3131,10 +3037,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i8@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i8@rel32@hi+12 ; GFX11-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3165,10 +3070,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i8@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i8@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3204,12 +3108,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3237,10 +3140,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8@rel32@hi+12 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3271,10 +3173,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i8@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8@rel32@hi+12 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3305,10 +3206,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i8@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3345,12 +3245,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -3379,10 +3278,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i8@rel32@hi+12 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3414,10 +3312,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i8@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i8@rel32@hi+12 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3449,10 +3346,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i8@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i8@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3490,12 +3386,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3526,10 +3421,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i8@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3563,10 +3457,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i8@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i8@rel32@hi+12 ; GFX11-NEXT: global_load_b64 v[5:6], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3600,10 +3493,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i8@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i8@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3643,12 +3535,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3682,10 +3573,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i8@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -3722,10 +3612,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i8@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i8@rel32@hi+12 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -3761,10 +3650,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i8@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i8@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -3810,12 +3698,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -3880,14 +3767,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i8@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[4:5], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0 @@ -3952,13 +3838,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 16 ; GFX11-NEXT: v_mov_b32_e32 v5, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i8@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i8@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo ; GFX11-NEXT: global_load_b128 v[16:19], v[4:5], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_lshrrev_b32_e32 v35, 8, v0 @@ -4020,14 +3905,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i8@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i8@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[4:5], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0 @@ -4100,12 +3984,11 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX9-NEXT: v_mov_b32_e32 v42, 0 ; GFX9-NEXT: global_load_ubyte v0, v[41:42], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_ret@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_ret@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_byte v[41:42], v0, off ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4136,10 +4019,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_ret@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_ret@rel32@hi+12 ; GFX10-NEXT: global_load_ubyte v0, v[41:42], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4174,10 +4056,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX11-NEXT: v_mov_b32_e32 v41, 0 ; GFX11-NEXT: v_mov_b32_e32 v42, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_ret@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_ret@rel32@hi+12 ; GFX11-NEXT: global_load_u8 v0, v[41:42], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4211,10 +4092,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_ret@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_ret@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[41:42], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4256,12 +4136,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX9-NEXT: v_mov_b32_e32 v42, 0 ; GFX9-NEXT: global_load_ushort v0, v[41:42], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i8_ret@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i8_ret@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 8, v0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -4296,10 +4175,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i8_ret@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i8_ret@rel32@hi+12 ; GFX10-NEXT: global_load_ushort v0, v[41:42], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4338,10 +4216,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-NEXT: v_mov_b32_e32 v41, 0 ; GFX11-NEXT: v_mov_b32_e32 v42, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i8_ret@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i8_ret@rel32@hi+12 ; GFX11-NEXT: global_load_u16 v0, v[41:42], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4381,10 +4258,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i8_ret@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i8_ret@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[41:42], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4430,12 +4306,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX9-NEXT: v_mov_b32_e32 v42, 0 ; GFX9-NEXT: global_load_dword v0, v[41:42], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i8_ret@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8_ret@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4474,10 +4349,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i8_ret@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8_ret@rel32@hi+12 ; GFX10-NEXT: global_load_dword v0, v[41:42], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4520,10 +4394,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-NEXT: v_mov_b32_e32 v41, 0 ; GFX11-NEXT: v_mov_b32_e32 v42, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i8_ret@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8_ret@rel32@hi+12 ; GFX11-NEXT: global_load_b32 v0, v[41:42], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4568,10 +4441,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i8_ret@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8_ret@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[41:42], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4621,12 +4493,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX9-NEXT: v_mov_b32_e32 v42, 0 ; GFX9-NEXT: global_load_dword v0, v[41:42], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i8_ret@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i8_ret@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -4666,10 +4537,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i8_ret@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i8_ret@rel32@hi+12 ; GFX10-NEXT: global_load_dword v0, v[41:42], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4713,10 +4583,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-NEXT: v_mov_b32_e32 v41, 0 ; GFX11-NEXT: v_mov_b32_e32 v42, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i8_ret@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i8_ret@rel32@hi+12 ; GFX11-NEXT: global_load_b32 v0, v[41:42], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4766,10 +4635,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i8_ret@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i8_ret@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[41:42], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -4820,12 +4688,11 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX9-NEXT: v_mov_b32_e32 v42, 0 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[41:42], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i8_ret@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i8_ret@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -4870,10 +4737,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i8_ret@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i8_ret@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[41:42], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -4922,10 +4788,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-NEXT: v_mov_b32_e32 v41, 0 ; GFX11-NEXT: v_mov_b32_e32 v42, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i8_ret@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i8_ret@rel32@hi+12 ; GFX11-NEXT: global_load_b64 v[5:6], v[41:42], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -4980,10 +4845,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i8_ret@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i8_ret@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[41:42], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -5039,12 +4903,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX9-NEXT: v_mov_b32_e32 v42, 0 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[41:42], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i8_ret@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i8_ret@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5094,10 +4957,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i8_ret@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i8_ret@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[41:42], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -5151,10 +5013,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-NEXT: v_mov_b32_e32 v41, 0 ; GFX11-NEXT: v_mov_b32_e32 v42, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i8_ret@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i8_ret@rel32@hi+12 ; GFX11-NEXT: global_load_b64 v[0:1], v[41:42], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -5216,10 +5077,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i8_ret@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i8_ret@rel32@hi+12 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[41:42], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -5285,12 +5145,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[41:42], off ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[43:44], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i8_ret@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8_ret@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -5406,14 +5265,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-NEXT: v_mov_b32_e32 v44, 0 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i8_ret@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i8_ret@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[41:42], off ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[43:44], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0 @@ -5530,13 +5388,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-NEXT: v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v43, 16 ; GFX11-NEXT: v_mov_b32_e32 v44, 0 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_add_i32 s32, s32, 32 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-NEXT: global_load_b128 v[0:3], v[41:42], off -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i8_ret@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8_ret@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX11-NEXT: global_load_b128 v[16:19], v[43:44], off ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_lshrrev_b32_e32 v35, 8, v0 @@ -5682,14 +5539,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v44, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i8_ret@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i8_ret@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[41:42], off ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[43:44], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0 @@ -5807,12 +5663,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -5836,10 +5691,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -5865,10 +5719,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -5895,10 +5748,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -5929,12 +5781,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -5958,10 +5809,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -5987,10 +5837,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6017,10 +5866,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6051,12 +5899,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -6080,10 +5927,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -6109,10 +5955,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6139,10 +5984,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6172,14 +6016,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -6204,11 +6047,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 3 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -6233,11 +6075,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6264,11 +6105,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -6296,14 +6136,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -6328,11 +6167,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -6358,11 +6196,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6389,11 +6226,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -6422,12 +6258,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -6451,10 +6286,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -6480,10 +6314,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6510,10 +6343,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6543,14 +6375,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -6575,11 +6406,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -6605,11 +6435,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6636,11 +6465,10 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -6669,12 +6497,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -6698,10 +6525,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -6727,10 +6553,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6757,10 +6582,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6791,12 +6615,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -6820,10 +6643,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -6849,10 +6671,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6879,10 +6700,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6912,14 +6732,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -6944,11 +6763,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -6973,11 +6791,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7004,11 +6821,10 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7036,15 +6852,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -7070,11 +6885,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-NEXT: v_mov_b32_e32 v2, 5 -; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -7100,11 +6914,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_mov_b32_e32 v2, 5 -; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7132,11 +6945,10 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7164,16 +6976,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_mov_b32_e32 v3, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -7201,10 +7012,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-NEXT: v_mov_b32_e32 v3, 6 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -7231,10 +7041,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7264,10 +7073,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7296,12 +7104,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -7325,10 +7132,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -7354,10 +7160,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7384,10 +7189,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7417,16 +7221,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -7454,10 +7257,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -7484,10 +7286,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7517,10 +7318,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7548,17 +7348,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_mov_b32_e32 v4, 5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -7587,10 +7386,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 5 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -7618,10 +7416,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v4, 5 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7652,10 +7449,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7685,15 +7481,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -7724,9 +7519,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] ; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -7758,9 +7552,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX11-NEXT: global_load_b128 v[0:3], v4, s[0:1] ; GFX11-NEXT: global_load_b128 v[4:7], v4, s[0:1] offset:16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -7793,9 +7586,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1] ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -7825,8 +7617,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -7835,10 +7628,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v5, 6 ; GFX9-NEXT: v_mov_b32_e32 v6, 7 ; GFX9-NEXT: v_mov_b32_e32 v7, 8 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -7870,11 +7661,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v5, 6 ; GFX10-NEXT: v_mov_b32_e32 v6, 7 ; GFX10-NEXT: v_mov_b32_e32 v7, 8 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 @@ -7902,13 +7692,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 @@ -7939,11 +7728,10 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 6 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 7 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 8 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 @@ -7972,17 +7760,16 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v16, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -8015,9 +7802,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -8051,9 +7837,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX11-NEXT: global_load_b128 v[8:11], v12, s[0:1] offset:32 ; GFX11-NEXT: global_load_b128 v[12:15], v12, s[0:1] offset:48 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -8088,9 +7873,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -8122,8 +7906,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 @@ -8134,10 +7918,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -8174,9 +7957,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -8214,9 +7996,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -8255,9 +8036,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -8302,10 +8082,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(8) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -8345,9 +8124,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(8) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 @@ -8388,9 +8166,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: scratch_store_b32 off, v32, s32 @@ -8431,9 +8208,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8) ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32 @@ -8466,17 +8242,16 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 +; GFX9-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 42 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v42, v1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_dword v[41:42], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -8507,12 +8282,11 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_mov_b32_e32 v41, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 -; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v42, v1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_dword v[41:42], v0, off @@ -8547,10 +8321,9 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX11-NEXT: v_dual_mov_b32 v42, v1 :: v_dual_mov_b32 v41, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: global_store_b32 v[41:42], v0, off dlc @@ -8583,12 +8356,11 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, v1 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_i32_func_i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_i32_func_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: global_store_dword v[41:42], v0, off @@ -8624,15 +8396,14 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] ; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -8663,9 +8434,8 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] ; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -8697,9 +8467,8 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11-NEXT: global_load_u8 v0, v1, s[0:1] ; GFX11-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -8732,9 +8501,8 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v2, s[0:1] ; GFX10-SCRATCH-NEXT: global_load_dword v1, v2, s[0:1] offset:4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -8767,14 +8535,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -8799,14 +8566,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 @@ -8831,15 +8597,14 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-NEXT: v_mov_b32_e32 v0, s33 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -8866,14 +8631,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s33 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 @@ -8911,14 +8675,13 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 @@ -8950,15 +8713,14 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 +; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -8993,15 +8755,14 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 ; GFX11-NEXT: s_add_i32 s32, s32, 32 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX11-NEXT: s_add_i32 s2, s33, 8 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1 @@ -9034,17 +8795,16 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s33, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s33 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: s_clause 0x1 @@ -9095,14 +8855,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i8@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i8@abs32@lo ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -9152,9 +8911,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i8@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i8@abs32@lo ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -9203,9 +8961,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1] -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i8@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i8@abs32@lo ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -9253,9 +9010,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i8@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i8@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i8@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -9337,10 +9093,9 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s61, 29 ; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v40, s62, 30 +; GFX9-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi +; GFX9-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s63, 31 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(2) @@ -9401,9 +9156,8 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi +; GFX10-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) @@ -9494,9 +9248,8 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX11-NEXT: scratch_load_b32 v31, off, s33 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 32 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, byval_align16_f64_arg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, byval_align16_f64_arg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_writelane_b32 v40, s34, 2 ; GFX11-NEXT: v_writelane_b32 v40, s35, 3 @@ -9585,9 +9338,8 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, byval_align16_f64_arg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, byval_align16_f64_arg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 @@ -9682,10 +9434,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 @@ -9711,9 +9462,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 @@ -9741,9 +9491,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 @@ -9772,9 +9521,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 @@ -9805,13 +9553,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 @@ -9835,10 +9582,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -9866,10 +9612,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_movk_i32 s4, 0x7b ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -9898,10 +9643,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -9934,13 +9678,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 @@ -9964,10 +9707,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -9995,10 +9737,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_movk_i32 s4, 0x7b ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -10027,10 +9768,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -10063,13 +9803,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 42 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 @@ -10093,10 +9832,9 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 42 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -10124,10 +9862,9 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 42 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -10156,10 +9893,9 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -10193,14 +9929,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -10225,10 +9960,9 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -10259,10 +9993,9 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_movk_i32 s4, 0x7b ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -10294,10 +10027,9 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -10338,12 +10070,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 @@ -10377,9 +10108,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -10415,9 +10145,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 4 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -10454,9 +10183,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -10494,16 +10222,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 @@ -10530,10 +10257,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -10570,10 +10296,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -10611,10 +10336,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -10663,14 +10387,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 @@ -10706,9 +10429,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 @@ -10750,9 +10472,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 ; GFX11-NEXT: s_mov_b32 s8, 1 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 @@ -10795,9 +10516,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 @@ -10849,16 +10569,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 ; GFX9-NEXT: s_mov_b32 s11, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 @@ -10896,9 +10615,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 @@ -10946,9 +10664,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 ; GFX11-NEXT: s_mov_b32 s8, 1 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 @@ -10997,9 +10714,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 @@ -11047,13 +10763,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x4400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 @@ -11077,10 +10792,9 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -11108,10 +10822,9 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_movk_i32 s4, 0x4400 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -11140,10 +10853,9 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -11176,13 +10888,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 4.0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 @@ -11206,10 +10917,9 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 @@ -11237,10 +10947,9 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 4.0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 @@ -11269,10 +10978,9 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 @@ -11306,14 +11014,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -11338,10 +11045,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11372,10 +11078,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1.0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11407,10 +11112,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11448,15 +11152,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 @@ -11482,10 +11185,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11519,10 +11221,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1.0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11557,10 +11258,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11603,17 +11303,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 ; GFX9-NEXT: s_mov_b32 s7, -1.0 ; GFX9-NEXT: s_mov_b32 s8, 0.5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 @@ -11641,10 +11340,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11684,10 +11382,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1.0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11728,10 +11425,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11777,14 +11473,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -11809,10 +11504,9 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11843,10 +11537,9 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -11878,10 +11571,9 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -11920,16 +11612,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 @@ -11956,10 +11647,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -11996,10 +11686,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -12037,10 +11726,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -12087,18 +11775,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 ; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 @@ -12127,10 +11814,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -12173,10 +11859,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -12220,10 +11905,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -12272,12 +11956,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 @@ -12304,9 +11987,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -12335,9 +12017,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12367,9 +12048,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12403,12 +12083,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -12437,9 +12116,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -12470,9 +12148,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12504,9 +12181,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12541,12 +12217,11 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -12575,9 +12250,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -12608,9 +12282,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12642,9 +12315,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12678,14 +12350,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -12710,10 +12381,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -12744,10 +12414,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0x20001 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -12779,10 +12448,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -12819,14 +12487,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -12851,10 +12518,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -12885,10 +12551,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -12920,10 +12585,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -12961,12 +12625,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -12995,9 +12658,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -13028,9 +12690,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13062,9 +12723,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13098,14 +12758,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -13130,10 +12789,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -13164,10 +12822,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 0x20001 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -13199,10 +12856,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -13239,12 +12895,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 @@ -13271,9 +12926,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -13302,9 +12956,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13334,9 +12987,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13370,12 +13022,11 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -13404,9 +13055,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -13437,9 +13087,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13471,9 +13120,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13507,14 +13155,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 @@ -13539,10 +13186,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -13573,10 +13219,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -13608,10 +13253,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -13649,15 +13293,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 @@ -13683,10 +13326,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -13720,10 +13362,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 3 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -13758,10 +13399,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -13803,16 +13443,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: s_mov_b32 s7, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 @@ -13839,10 +13478,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -13879,10 +13517,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 3 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -13920,10 +13557,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -13969,12 +13605,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 @@ -14007,9 +13642,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -14044,9 +13678,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 4 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -14082,9 +13715,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -14122,16 +13754,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 @@ -14158,10 +13789,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -14198,10 +13828,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -14239,10 +13868,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -14288,17 +13916,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: s_mov_b32 s8, 5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 @@ -14326,10 +13953,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -14369,10 +13995,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -14413,10 +14038,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -14471,12 +14095,11 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 @@ -14519,9 +14142,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -14566,9 +14188,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s11, 7 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b256 s[4:11], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 8 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -14614,9 +14235,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -14663,8 +14283,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 @@ -14673,10 +14294,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s9, 6 ; GFX9-NEXT: s_mov_b32 s10, 7 ; GFX9-NEXT: s_mov_b32 s11, 8 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 @@ -14707,10 +14326,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -14759,10 +14377,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -14812,10 +14429,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -14887,12 +14503,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32_inreg@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 17 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 17 ; GFX9-NEXT: v_readlane_b32 s30, v40, 16 @@ -14951,9 +14566,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -15014,9 +14628,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s19, 15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 17 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -15078,9 +14691,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -15171,6 +14783,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s20, s36 ; GFX9-NEXT: s_mov_b32 s21, s37 ; GFX9-NEXT: s_mov_b32 s22, s38 @@ -15183,9 +14797,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s29, s45 ; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 @@ -15256,9 +14867,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s20, 16 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18 @@ -15366,9 +14976,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s20, 16 ; GFX11-NEXT: v_writelane_b32 v40, s21, 17 ; GFX11-NEXT: v_writelane_b32 v40, s22, 18 @@ -15469,9 +15078,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 @@ -15606,6 +15214,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s20, s36 ; GFX9-NEXT: s_mov_b32 s21, s37 ; GFX9-NEXT: s_mov_b32 s22, s38 @@ -15618,9 +15228,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s29, s45 ; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 @@ -15694,9 +15301,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: ; meta instruction ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s20, 16 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18 @@ -15784,6 +15390,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 28 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 s3, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -15805,10 +15413,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12 -; GFX11-NEXT: s_add_i32 s3, s32, 16 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s20, 16 ; GFX11-NEXT: v_writelane_b32 v40, s21, 17 ; GFX11-NEXT: v_writelane_b32 v40, s22, 18 @@ -15891,6 +15497,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -15914,10 +15521,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: ; meta instruction ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 @@ -16011,10 +15616,9 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(1) @@ -16045,9 +15649,8 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -16078,9 +15681,8 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -16109,9 +15711,8 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) @@ -16153,6 +15754,8 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_12xv3i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_12xv3i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -16186,9 +15789,6 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_mov_b32_e32 v31, 11 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -16253,10 +15853,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v29, 9 ; GFX10-NEXT: v_mov_b32_e32 v30, 10 ; GFX10-NEXT: v_mov_b32_e32 v31, 11 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_12xv3i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_12xv3i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 @@ -16300,12 +15899,11 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX11-NEXT: v_dual_mov_b32 v26, 8 :: v_dual_mov_b32 v27, 9 ; GFX11-NEXT: v_dual_mov_b32 v28, 9 :: v_dual_mov_b32 v29, 9 ; GFX11-NEXT: v_dual_mov_b32 v30, 10 :: v_dual_mov_b32 v31, 11 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_12xv3i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_12xv3i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 @@ -16366,10 +15964,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 9 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 10 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 11 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_12xv3i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_12xv3i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 @@ -16427,6 +16024,8 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_8xv5i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_8xv5i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -16460,9 +16059,6 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_mov_b32_e32 v31, 7 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -16535,10 +16131,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v29, 5 ; GFX10-NEXT: v_mov_b32_e32 v30, 6 ; GFX10-NEXT: v_mov_b32_e32 v31, 7 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_8xv5i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_8xv5i32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 @@ -16586,12 +16181,11 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX11-NEXT: v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v27, 5 ; GFX11-NEXT: v_dual_mov_b32 v28, 5 :: v_dual_mov_b32 v29, 5 ; GFX11-NEXT: v_dual_mov_b32 v30, 6 :: v_dual_mov_b32 v31, 7 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_8xv5i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_8xv5i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 @@ -16658,10 +16252,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 6 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 7 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_8xv5i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_8xv5i32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 @@ -16715,6 +16308,8 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_8xv5f32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_8xv5f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -16748,9 +16343,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -16823,10 +16415,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_8xv5f32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_8xv5f32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 @@ -16880,12 +16471,11 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; GFX11-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX11-NEXT: v_mov_b32_e32 v31, 0x40e00000 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_8xv5f32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_8xv5f32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 @@ -16952,10 +16542,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 0x40e00000 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_8xv5f32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_8xv5f32@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index 09c57543ecd29..ad55d49a1a96d 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -17,12 +17,11 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND @@ -52,10 +51,9 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -87,10 +85,9 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_getpc_b64 s[4:5] -; GFX11-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 +; GFX11-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -213,16 +210,15 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s31 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s31, s4 ; GFX9-NEXT: ;;#ASMSTART @@ -250,10 +246,9 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 @@ -288,10 +283,9 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 @@ -331,17 +325,16 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v31 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_mov_b32_e32 v31, v41 ; GFX9-NEXT: ;;#ASMSTART @@ -369,16 +362,15 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v41, v31 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_mov_b32_e32 v31, v41 @@ -407,18 +399,18 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v31 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_mov_b32_e32 v41, v31 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_mov_b32_e32 v31, v41 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use v31 @@ -452,16 +444,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s33 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: ;;#ASMSTART @@ -489,10 +480,9 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND @@ -527,10 +517,9 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s33 ; GFX11-NEXT: ;;#ASMEND @@ -572,16 +561,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s34 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, s34 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s34, s4 ; GFX9-NEXT: ;;#ASMSTART @@ -609,18 +597,17 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[36:37] -; GFX10-NEXT: s_add_u32 s36, s36, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s37, s37, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, s34 +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s34, s4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s34 @@ -647,10 +634,9 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s34 ; GFX11-NEXT: ;;#ASMEND @@ -691,16 +677,15 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v41, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v41, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v41, s31, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v40 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v40 @@ -727,15 +712,14 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v41, s30, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v41, s30, 0 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART @@ -763,15 +747,14 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT: v_writelane_b32 v41, s30, 0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v40 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v41, s30, 0 ; GFX11-NEXT: v_writelane_b32 v41, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: ;;#ASMSTART @@ -920,12 +903,11 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -948,10 +930,9 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -976,10 +957,9 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, void_func_void_clobber_s33@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, void_func_void_clobber_s33@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1008,12 +988,11 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -1036,10 +1015,9 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -1064,10 +1042,9 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, void_func_void_clobber_s34@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, void_func_void_clobber_s34@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -1097,16 +1074,15 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s40 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 @@ -1133,10 +1109,9 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND @@ -1170,10 +1145,9 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s40 ; GFX11-NEXT: ;;#ASMEND @@ -1214,8 +1188,10 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART @@ -1226,9 +1202,6 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: ; def v32 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v32 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 @@ -1259,20 +1232,19 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: v_mov_b32_e32 v41, v32 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -1305,20 +1277,19 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s40 ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: s_mov_b32 s4, s40 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v32 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, s40 ; GFX11-NEXT: v_mov_b32_e32 v41, v32 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index 8dd73c5ab32fb..0d54da3128a61 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -28,14 +28,11 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, return_i1@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_i1@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 @@ -56,14 +53,11 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, return_i1@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_i1@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 @@ -84,14 +78,11 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, return_i1@gotpcrel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, return_i1@gotpcrel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, return_i1@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_i1@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v1, 1 @@ -133,14 +124,11 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, return_i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_i16@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 @@ -161,14 +149,11 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, return_i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_i16@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 @@ -189,14 +174,11 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, return_i16@gotpcrel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, return_i16@gotpcrel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, return_i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_i16@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v1, 1 @@ -238,14 +220,11 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 @@ -266,14 +245,11 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 @@ -294,14 +270,11 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, return_2xi16@gotpcrel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, return_2xi16@gotpcrel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, return_2xi16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_2xi16@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v1, 1 @@ -351,14 +324,11 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: v_readlane_b32 s30, v2, 0 @@ -379,14 +349,11 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v2, s30, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v2, s31, 1 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: v_readlane_b32 s30, v2, 0 @@ -407,14 +374,11 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, return_3xi16@gotpcrel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, return_3xi16@gotpcrel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v2, s30, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, return_3xi16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_3xi16@abs32@lo +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v2, s31, 1 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v2, 1 @@ -710,12 +674,10 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_addk_i32 s32, 0x2400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_100xi32@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_100xi32@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v100, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo +; GFX9-NEXT: s_addk_i32 s32, 0x2400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill @@ -749,7 +711,6 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX9-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v95, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v100, s31, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_dword v95, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -802,12 +763,10 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX10-NEXT: buffer_store_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_addk_i32 s32, 0x1200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_100xi32@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_100xi32@gotpcrel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v100, s30, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo +; GFX10-NEXT: s_addk_i32 s32, 0x1200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill @@ -841,7 +800,6 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX10-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v95, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v100, s31, 1 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1f ; GFX10-NEXT: buffer_load_dword v95, off, s[0:3], s33 @@ -895,12 +853,10 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v100, s33 offset:128 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_addk_i32 s32, 0x90 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, return_100xi32@gotpcrel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, return_100xi32@gotpcrel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v100, s30, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, return_100xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_100xi32@abs32@lo +; GFX11-NEXT: s_addk_i32 s32, 0x90 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:124 ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:120 @@ -935,7 +891,6 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v94, s33 offset:4 ; GFX11-NEXT: scratch_store_b32 off, v95, s33 ; GFX11-NEXT: v_writelane_b32 v100, s31, 1 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_load_b32 v95, off, s33 @@ -2306,15 +2261,12 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, return_512xi32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_512xi32@abs32@lo ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: v_readlane_b32 s30, v2, 0 @@ -2336,15 +2288,12 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v2, s30, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10-NEXT: s_mov_b32 s35, return_512xi32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_512xi32@abs32@lo +; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: v_writelane_b32 v2, s31, 1 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: v_readlane_b32 s30, v2, 0 @@ -2367,15 +2316,12 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v5, s33 offset:2048 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_addk_i32 s32, 0x1800 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, return_512xi32@gotpcrel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, return_512xi32@gotpcrel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v5, s30, 0 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, s33 +; GFX11-NEXT: s_mov_b32 s1, return_512xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_512xi32@abs32@lo +; GFX11-NEXT: s_addk_i32 s32, 0x1800 ; GFX11-NEXT: v_writelane_b32 v5, s31, 1 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v5, 1 @@ -2848,10 +2794,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_add_i32 s32, s32, 0x28000 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_72xi32@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_72xi32@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill @@ -2912,6 +2854,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:160 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_writelane_b32 v33, s30, 0 +; GFX9-NEXT: s_mov_b32 s35, return_72xi32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_72xi32@abs32@lo ; GFX9-NEXT: v_add_u32_e32 v0, 0x200, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2945,7 +2889,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: v_mov_b32_e32 v30, 0 ; GFX9-NEXT: v_mov_b32_e32 v31, 0 ; GFX9-NEXT: v_writelane_b32 v33, s31, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:636 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:640 @@ -3122,12 +3065,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_add_i32 s32, s32, 0x14000 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_72xi32@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_72xi32@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: s_add_i32 s32, s32, 0x14000 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill @@ -3143,6 +3082,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v63, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 @@ -3185,13 +3126,11 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:156 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:160 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x200, v0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x200, v0 ; GFX10-NEXT: v_mov_b32_e32 v6, 0 ; GFX10-NEXT: v_mov_b32_e32 v7, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 @@ -3218,8 +3157,9 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: v_mov_b32_e32 v29, 0 ; GFX10-NEXT: v_mov_b32_e32 v30, 0 ; GFX10-NEXT: v_mov_b32_e32 v31, 0 +; GFX10-NEXT: s_mov_b32 s35, return_72xi32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_72xi32@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x28 ; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:636 @@ -3443,16 +3383,12 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_add_i32 s1, s32, 48 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 -; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_add_u32 s0, s0, return_72xi32@gotpcrel32@lo+4 -; GFX11-NEXT: s_addc_u32 s1, s1, return_72xi32@gotpcrel32@hi+12 -; GFX11-NEXT: s_add_i32 s2, s32, 32 -; GFX11-NEXT: s_load_b64 s[46:47], s[0:1], 0x0 -; GFX11-NEXT: s_add_i32 s3, s32, 16 +; GFX11-NEXT: s_add_i32 s0, s32, 32 +; GFX11-NEXT: s_add_i32 s1, s32, 16 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 ; GFX11-NEXT: s_add_i32 s0, s33, 0x200 ; GFX11-NEXT: v_writelane_b32 v32, s30, 0 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s3 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0 @@ -3469,8 +3405,9 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0 ; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0 ; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0 +; GFX11-NEXT: s_mov_b32 s47, return_72xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s46, return_72xi32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v32, s31, 1 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[46:47] ; GFX11-NEXT: s_clause 0xb ; GFX11-NEXT: scratch_load_b128 v[33:36], off, s33 offset:624 diff --git a/llvm/test/CodeGen/AMDGPU/global-constant.ll b/llvm/test/CodeGen/AMDGPU/global-constant.ll index 2bd2888aaa5ef..336f012ec80e4 100644 --- a/llvm/test/CodeGen/AMDGPU/global-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/global-constant.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-PAL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-DEFAULT %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-MESA %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-DEFAULT %s ; RUN: llc -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=R600 %s @private1 = private unnamed_addr addrspace(4) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] @@ -9,14 +9,24 @@ @available_externally = available_externally addrspace(4) global [256 x i32] zeroinitializer ; GCN-LABEL: {{^}}private_test: -; GCN: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]] ; Non-R600 OSes use relocations. -; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+12 -; GCN: s_getpc_b64 s[[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]] -; GCN: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+12 +; GCN-DEFAULT: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]] +; GCN-DEFAULT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4 +; GCN-DEFAULT: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+12 +; GCN-DEFAULT: s_getpc_b64 s[[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]] +; GCN-DEFAULT: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4 +; GCN-DEFAULT: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+12 + +; MESA uses absolute relocations. +; GCN-MESA: s_add_u32 s2, s4, private1@abs32@lo +; GCN-MESA: s_addc_u32 s3, s5, private1@abs32@hi + +; PAL uses absolute relocations. +; GCN-PAL: s_add_u32 s2, s4, private1@abs32@lo +; GCN-PAL: s_addc_u32 s3, s5, private1@abs32@hi +; GCN-PAL: s_add_u32 s4, s4, private2@abs32@lo +; GCN-PAL: s_addc_u32 s5, s5, private2@abs32@hi ; R600-LABEL: private_test define amdgpu_kernel void @private_test(i32 %index, ptr addrspace(1) %out) { @@ -30,10 +40,17 @@ define amdgpu_kernel void @private_test(i32 %index, ptr addrspace(1) %out) { } ; GCN-LABEL: {{^}}available_externally_test: -; GCN: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]] -; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+12 +; GCN-DEFAULT: s_getpc_b64 s[[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]] +; GCN-DEFAULT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4 +; GCN-DEFAULT: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+12 + +; GCN-MESA: s_mov_b32 s1, available_externally@abs32@hi+4 +; GCN-MESA: s_mov_b32 s0, available_externally@abs32@lo+4 + ; R600-LABEL: available_externally_test + +; GCN-PAL: s_mov_b32 s3, available_externally@abs32@hi+4 +; GCN-PAL: s_mov_b32 s2, available_externally@abs32@lo+4 define amdgpu_kernel void @available_externally_test(ptr addrspace(1) %out) { %ptr = getelementptr [256 x i32], ptr addrspace(4) @available_externally, i32 0, i32 1 %val = load i32, ptr addrspace(4) %ptr diff --git a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll index 4bd3a2f964e72..60dbd1c082a24 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll @@ -6,8 +6,8 @@ ; ELF: Relocations [ ; ELF-NEXT: Section (3) .rel.text { -; ELF-NEXT: 0x{{[0-9a-f]*}} R_AMDGPU_ABS32 lds.external -; ELF-NEXT: 0x{{[0-9a-f]*}} R_AMDGPU_ABS32 lds.defined +; ELF-NEXT: 0x{{[0-9a-f]*}} R_AMDGPU_ABS32_LO lds.external +; ELF-NEXT: 0x{{[0-9a-f]*}} R_AMDGPU_ABS32_LO lds.defined ; ELF-NEXT: } ; ELF-NEXT: ] diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll index 4cdb267d682a8..c732ff7094255 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -40,18 +40,15 @@ define amdgpu_cs void @caller() { ; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9] ; GFX9-SDAG-NEXT: s_mov_b32 s8, s0 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9-SDAG-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9-SDAG-NEXT: s_mov_b32 s4, callee@abs32@lo ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-SDAG-NEXT: s_add_u32 s8, s8, s0 ; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1] -; GFX9-SDAG-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9] ; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] -; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-SDAG-NEXT: s_endpgm ; @@ -60,18 +57,15 @@ define amdgpu_cs void @caller() { ; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9] ; GFX9-GISEL-NEXT: s_mov_b32 s8, s0 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9-GISEL-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9-GISEL-NEXT: s_mov_b32 s5, callee@abs32@hi ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_add_u32 s8, s8, s0 ; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1] -; GFX9-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 -; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 -; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9] ; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] -; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-GISEL-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll index f3fb0425b57a5..36b5e2a00f6d4 100644 --- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll +++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll @@ -202,7 +202,7 @@ define void @func_stacksave_nonentry_block(i1 %cond) { ; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4 -; WAVE32-O0-NEXT: ; implicit-def: $vgpr1 +; WAVE32-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; WAVE32-O0-NEXT: v_mov_b32_e32 v1, v0 ; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1 ; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload @@ -244,7 +244,7 @@ define void @func_stacksave_nonentry_block(i1 %cond) { ; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5] -; WAVE64-O0-NEXT: ; implicit-def: $vgpr1 +; WAVE64-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; WAVE64-O0-NEXT: v_mov_b32_e32 v1, v0 ; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 ; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload @@ -705,13 +705,12 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-OPT-NEXT: s_mov_b32 s0, s32 ; WAVE32-OPT-NEXT: v_mov_b32_e32 v0, 42 ; WAVE32-OPT-NEXT: v_mov_b32_e32 v1, 17 +; WAVE32-OPT-NEXT: s_mov_b32 s5, stack_passed_argument@abs32@hi +; WAVE32-OPT-NEXT: s_mov_b32 s4, stack_passed_argument@abs32@lo ; WAVE32-OPT-NEXT: s_waitcnt lgkmcnt(0) ; WAVE32-OPT-NEXT: s_bitset0_b32 s11, 21 ; WAVE32-OPT-NEXT: s_add_u32 s8, s8, s1 ; WAVE32-OPT-NEXT: s_addc_u32 s9, s9, 0 -; WAVE32-OPT-NEXT: s_getpc_b64 s[4:5] -; WAVE32-OPT-NEXT: s_add_u32 s4, s4, stack_passed_argument@rel32@lo+4 -; WAVE32-OPT-NEXT: s_addc_u32 s5, s5, stack_passed_argument@rel32@hi+12 ; WAVE32-OPT-NEXT: s_lshr_b32 s6, s0, 5 ; WAVE32-OPT-NEXT: s_mov_b64 s[0:1], s[8:9] ; WAVE32-OPT-NEXT: s_mov_b64 s[2:3], s[10:11] @@ -733,12 +732,11 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE64-OPT-NEXT: s_mov_b32 s0, s32 ; WAVE64-OPT-NEXT: v_mov_b32_e32 v0, 42 ; WAVE64-OPT-NEXT: v_mov_b32_e32 v1, 17 +; WAVE64-OPT-NEXT: s_mov_b32 s5, stack_passed_argument@abs32@hi +; WAVE64-OPT-NEXT: s_mov_b32 s4, stack_passed_argument@abs32@lo ; WAVE64-OPT-NEXT: s_waitcnt lgkmcnt(0) ; WAVE64-OPT-NEXT: s_add_u32 s8, s8, s1 ; WAVE64-OPT-NEXT: s_addc_u32 s9, s9, 0 -; WAVE64-OPT-NEXT: s_getpc_b64 s[4:5] -; WAVE64-OPT-NEXT: s_add_u32 s4, s4, stack_passed_argument@rel32@lo+4 -; WAVE64-OPT-NEXT: s_addc_u32 s5, s5, stack_passed_argument@rel32@hi+12 ; WAVE64-OPT-NEXT: s_lshr_b32 s6, s0, 6 ; WAVE64-OPT-NEXT: s_mov_b64 s[0:1], s[8:9] ; WAVE64-OPT-NEXT: s_mov_b64 s[2:3], s[10:11] @@ -761,7 +759,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-O0-NEXT: s_bitset0_b32 s23, 21 ; WAVE32-O0-NEXT: s_add_u32 s20, s20, s11 ; WAVE32-O0-NEXT: s_addc_u32 s21, s21, 0 -; WAVE32-O0-NEXT: ; implicit-def: $vgpr3 +; WAVE32-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; WAVE32-O0-NEXT: s_mov_b32 s14, s10 ; WAVE32-O0-NEXT: s_mov_b32 s13, s9 ; WAVE32-O0-NEXT: s_mov_b32 s12, s8 @@ -784,9 +782,10 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-O0-NEXT: s_mov_b32 s15, s32 ; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 17 ; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], s15 offset:4 -; WAVE32-O0-NEXT: s_getpc_b64 s[16:17] -; WAVE32-O0-NEXT: s_add_u32 s16, s16, stack_passed_argument@rel32@lo+4 -; WAVE32-O0-NEXT: s_addc_u32 s17, s17, stack_passed_argument@rel32@hi+12 +; WAVE32-O0-NEXT: s_mov_b32 s15, stack_passed_argument@abs32@hi +; WAVE32-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo +; WAVE32-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17 +; WAVE32-O0-NEXT: s_mov_b32 s17, s15 ; WAVE32-O0-NEXT: s_mov_b32 s15, 20 ; WAVE32-O0-NEXT: v_lshlrev_b32_e64 v2, s15, v2 ; WAVE32-O0-NEXT: s_mov_b32 s15, 10 @@ -878,7 +877,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE64-O0-NEXT: s_waitcnt lgkmcnt(0) ; WAVE64-O0-NEXT: s_add_u32 s24, s24, s11 ; WAVE64-O0-NEXT: s_addc_u32 s25, s25, 0 -; WAVE64-O0-NEXT: ; implicit-def: $vgpr3 +; WAVE64-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; WAVE64-O0-NEXT: s_mov_b32 s14, s10 ; WAVE64-O0-NEXT: s_mov_b32 s13, s9 ; WAVE64-O0-NEXT: s_mov_b32 s12, s8 @@ -901,9 +900,10 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE64-O0-NEXT: s_mov_b32 s15, s32 ; WAVE64-O0-NEXT: v_mov_b32_e32 v3, 17 ; WAVE64-O0-NEXT: buffer_store_dword v3, off, s[24:27], s15 offset:4 -; WAVE64-O0-NEXT: s_getpc_b64 s[16:17] -; WAVE64-O0-NEXT: s_add_u32 s16, s16, stack_passed_argument@rel32@lo+4 -; WAVE64-O0-NEXT: s_addc_u32 s17, s17, stack_passed_argument@rel32@hi+12 +; WAVE64-O0-NEXT: s_mov_b32 s15, stack_passed_argument@abs32@hi +; WAVE64-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo +; WAVE64-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17 +; WAVE64-O0-NEXT: s_mov_b32 s17, s15 ; WAVE64-O0-NEXT: s_mov_b32 s15, 20 ; WAVE64-O0-NEXT: v_lshlrev_b32_e64 v2, s15, v2 ; WAVE64-O0-NEXT: s_mov_b32 s15, 10 @@ -1007,10 +1007,9 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-OPT-NEXT: v_mov_b32_e32 v0, 42 ; WAVE32-OPT-NEXT: v_mov_b32_e32 v1, 17 ; WAVE32-OPT-NEXT: s_addk_i32 s32, 0x1200 -; WAVE32-OPT-NEXT: s_getpc_b64 s[4:5] -; WAVE32-OPT-NEXT: s_add_u32 s4, s4, stack_passed_argument@rel32@lo+4 -; WAVE32-OPT-NEXT: s_addc_u32 s5, s5, stack_passed_argument@rel32@hi+12 +; WAVE32-OPT-NEXT: s_mov_b32 s5, stack_passed_argument@abs32@hi ; WAVE32-OPT-NEXT: s_mov_b32 s6, s32 +; WAVE32-OPT-NEXT: s_mov_b32 s4, stack_passed_argument@abs32@lo ; WAVE32-OPT-NEXT: v_writelane_b32 v31, s31, 1 ; WAVE32-OPT-NEXT: s_lshr_b32 s7, s6, 5 ; WAVE32-OPT-NEXT: buffer_store_dword v0, off, s[0:3], s33 @@ -1043,10 +1042,9 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-OPT-NEXT: v_mov_b32_e32 v0, 42 ; WAVE64-OPT-NEXT: v_mov_b32_e32 v1, 17 ; WAVE64-OPT-NEXT: s_addk_i32 s32, 0x2400 -; WAVE64-OPT-NEXT: s_getpc_b64 s[4:5] -; WAVE64-OPT-NEXT: s_add_u32 s4, s4, stack_passed_argument@rel32@lo+4 -; WAVE64-OPT-NEXT: s_addc_u32 s5, s5, stack_passed_argument@rel32@hi+12 +; WAVE64-OPT-NEXT: s_mov_b32 s5, stack_passed_argument@abs32@hi ; WAVE64-OPT-NEXT: s_mov_b32 s6, s32 +; WAVE64-OPT-NEXT: s_mov_b32 s4, stack_passed_argument@abs32@lo ; WAVE64-OPT-NEXT: v_writelane_b32 v31, s31, 1 ; WAVE64-OPT-NEXT: s_lshr_b32 s7, s6, 6 ; WAVE64-OPT-NEXT: buffer_store_dword v0, off, s[0:3], s33 @@ -1077,7 +1075,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s16 ; WAVE32-O0-NEXT: s_add_i32 s32, s32, 0x1200 -; WAVE32-O0-NEXT: ; implicit-def: $vgpr0 +; WAVE32-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; WAVE32-O0-NEXT: v_writelane_b32 v32, s30, 0 ; WAVE32-O0-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE32-O0-NEXT: s_mov_b32 s16, s32 @@ -1095,9 +1093,10 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-NEXT: s_mov_b32 s16, s32 ; WAVE32-O0-NEXT: v_mov_b32_e32 v0, 17 ; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s16 offset:4 -; WAVE32-O0-NEXT: s_getpc_b64 s[16:17] -; WAVE32-O0-NEXT: s_add_u32 s16, s16, stack_passed_argument@rel32@lo+4 -; WAVE32-O0-NEXT: s_addc_u32 s17, s17, stack_passed_argument@rel32@hi+12 +; WAVE32-O0-NEXT: s_mov_b32 s18, stack_passed_argument@abs32@hi +; WAVE32-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo +; WAVE32-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17 +; WAVE32-O0-NEXT: s_mov_b32 s17, s18 ; WAVE32-O0-NEXT: s_mov_b64 s[0:1], s[20:21] ; WAVE32-O0-NEXT: s_mov_b64 s[2:3], s[22:23] ; WAVE32-O0-NEXT: ; implicit-def: $sgpr18 @@ -1195,7 +1194,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill ; WAVE64-O0-NEXT: s_mov_b64 exec, s[16:17] ; WAVE64-O0-NEXT: s_add_i32 s32, s32, 0x2400 -; WAVE64-O0-NEXT: ; implicit-def: $vgpr0 +; WAVE64-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; WAVE64-O0-NEXT: v_writelane_b32 v32, s30, 0 ; WAVE64-O0-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE64-O0-NEXT: s_mov_b32 s16, s32 @@ -1213,9 +1212,10 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-O0-NEXT: s_mov_b32 s16, s32 ; WAVE64-O0-NEXT: v_mov_b32_e32 v0, 17 ; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s16 offset:4 -; WAVE64-O0-NEXT: s_getpc_b64 s[16:17] -; WAVE64-O0-NEXT: s_add_u32 s16, s16, stack_passed_argument@rel32@lo+4 -; WAVE64-O0-NEXT: s_addc_u32 s17, s17, stack_passed_argument@rel32@hi+12 +; WAVE64-O0-NEXT: s_mov_b32 s18, stack_passed_argument@abs32@hi +; WAVE64-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo +; WAVE64-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17 +; WAVE64-O0-NEXT: s_mov_b32 s17, s18 ; WAVE64-O0-NEXT: s_mov_b64 s[0:1], s[20:21] ; WAVE64-O0-NEXT: s_mov_b64 s[2:3], s[22:23] ; WAVE64-O0-NEXT: ; implicit-def: $sgpr18 diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll index 756182dc243bb..88c1fd9f66b33 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN -enable-var-scope %s -; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-SELDAG -enable-var-scope %s +; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-GISEL -enable-var-scope %s ; Callee with VGPR arguments @@ -15,15 +15,23 @@ define hidden amdgpu_gfx float @callee(float %v.arg0, float %v.arg1) { } define amdgpu_gfx float @caller(float %arg0) { -; GCN-LABEL: caller: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 -; GCN-NEXT: v_mov_b32_e32 v1, 2.0 -; GCN-NEXT: s_getpc_b64 s[36:37] -; GCN-NEXT: s_add_u32 s36, s36, callee@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s37, s37, callee@rel32@hi+12 -; GCN-NEXT: s_setpc_b64 s[36:37] +; GCN-SELDAG-LABEL: caller: +; GCN-SELDAG: ; %bb.0: +; GCN-SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-SELDAG-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GCN-SELDAG-NEXT: s_mov_b32 s37, callee@abs32@hi +; GCN-SELDAG-NEXT: s_mov_b32 s36, callee@abs32@lo +; GCN-SELDAG-NEXT: v_mov_b32_e32 v1, 2.0 +; GCN-SELDAG-NEXT: s_setpc_b64 s[36:37] +; +; GCN-GISEL-LABEL: caller: +; GCN-GISEL: ; %bb.0: +; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0 +; GCN-GISEL-NEXT: s_mov_b32 s36, callee@abs32@lo +; GCN-GISEL-NEXT: s_mov_b32 s37, callee@abs32@hi +; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 2.0 +; GCN-GISEL-NEXT: s_setpc_b64 s[36:37] %add = fadd float %arg0, 1.0 %call = tail call amdgpu_gfx float @callee(float %add, float 2.0) ret float %call diff --git a/llvm/test/MC/AMDGPU/elf-lds.s b/llvm/test/MC/AMDGPU/elf-lds.s index b2b4ad6120f1b..f1081f06b513b 100644 --- a/llvm/test/MC/AMDGPU/elf-lds.s +++ b/llvm/test/MC/AMDGPU/elf-lds.s @@ -34,8 +34,8 @@ test_kernel: // CHECK: Relocations [ // CHECK: Section (3) .rel.text { -// CHECK-NEXT: 0x4 R_AMDGPU_ABS32 lds0 -// CHECK-NEXT: 0x1C R_AMDGPU_ABS32 lds4 +// CHECK-NEXT: 0x4 R_AMDGPU_ABS32_LO lds0 +// CHECK-NEXT: 0x1C R_AMDGPU_ABS32_LO lds4 // CHECK-NEXT: } // CHECK: ]