Skip to content

Conversation

@mssefat
Copy link
Contributor

@mssefat mssefat commented Dec 12, 2025

This patch adds register bank legalization support for buffer load byte and short operations in the AMDGPU GlobalISel pipeline.

This is a re-land of #167798. I have fixed the failing test /CodeGen/AMDGPU/GlobalISel/buffer-load-byte-short.ll

@llvmbot
Copy link
Member

llvmbot commented Dec 12, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Syadus Sefat (mssefat)

Changes

I have fixed the failing test /CodeGen/AMDGPU/GlobalISel/buffer-load-byte-short.ll


Full diff: https://github.com/llvm/llvm-project/pull/172065.diff

6 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+6)
  • (added) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-byte-short.ll (+253)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll (+1-1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index d01afee331025..ac24fcf465997 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -889,6 +889,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Div(B128, {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
       .Uni(B128, {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
 
+  addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
+                    G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
+                   StandardB)
+      .Div(B32, {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
+      .Uni(B32, {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
+
   addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
       .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-byte-short.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-byte-short.ll
new file mode 100644
index 0000000000000..5915d87e3168d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-load-byte-short.ll
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -new-reg-bank-select -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
+
+define amdgpu_ps void @test_buffer_load_u8_uniform(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset, i32 inreg %addend, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_u8_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, 0
+; GFX12-NEXT:    s_mov_b32 s10, s7
+; GFX12-NEXT:    s_mov_b32 s11, s8
+; GFX12-NEXT:    buffer_load_u8 v0, v0, s[0:3], s5 offen
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_add_co_i32 s0, s0, s6
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    global_store_b32 v1, v0, s[10:11]
+; GFX12-NEXT:    s_endpgm
+  %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
+  %zext = zext i8 %val to i32
+  %result = add i32 %zext, %addend
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_i8_uniform(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset, i32 inreg %addend, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_i8_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, 0
+; GFX12-NEXT:    s_mov_b32 s10, s7
+; GFX12-NEXT:    s_mov_b32 s11, s8
+; GFX12-NEXT:    buffer_load_i8 v0, v0, s[0:3], s5 offen
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_add_co_i32 s0, s0, s6
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    global_store_b32 v1, v0, s[10:11]
+; GFX12-NEXT:    s_endpgm
+  %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
+  %sext = sext i8 %val to i32
+  %result = add i32 %sext, %addend
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_u16_uniform(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset, i32 inreg %addend, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_u16_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[0:3], s4
+; GFX12-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_add_co_i32 s0, s0, s5
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    global_store_b32 v1, v0, s[6:7]
+; GFX12-NEXT:    s_endpgm
+  %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
+  %zext = zext i16 %val to i32
+  %result = add i32 %zext, %addend
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_i16_uniform(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset, i32 inreg %addend, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_i16_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, 0
+; GFX12-NEXT:    s_mov_b32 s10, s7
+; GFX12-NEXT:    s_mov_b32 s11, s8
+; GFX12-NEXT:    buffer_load_i16 v0, v0, s[0:3], null offen
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_add_co_i32 s0, s0, s6
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    global_store_b32 v1, v0, s[10:11]
+; GFX12-NEXT:    s_endpgm
+  %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+  %sext = sext i16 %val to i32
+  %result = add i32 %sext, %addend
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_u8_divergent(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_u8_divergent:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    buffer_load_u8 v0, v0, s[0:3], s4 offen
+; GFX12-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-NEXT:    s_mov_b32 s8, s5
+; GFX12-NEXT:    s_mov_b32 s9, s6
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v1, v0, s[8:9]
+; GFX12-NEXT:    s_endpgm
+  %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
+  %zext = zext i8 %val to i32
+  store i32 %zext, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_i8_divergent(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_i8_divergent:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, 0
+; GFX12-NEXT:    buffer_load_i8 v0, v0, s[0:3], s5 offen
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v1, v0, s[6:7]
+; GFX12-NEXT:    s_endpgm
+  %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
+  %sext = sext i8 %val to i32
+  store i32 %sext, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_u16_divergent(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_u16_divergent:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    buffer_load_u16 v0, off, s[0:3], s4
+; GFX12-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-NEXT:    s_mov_b32 s8, s5
+; GFX12-NEXT:    s_mov_b32 s9, s6
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v1, v0, s[8:9]
+; GFX12-NEXT:    s_endpgm
+  %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
+  %zext = zext i16 %val to i32
+  store i32 %zext, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_i16_divergent(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_i16_divergent:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    buffer_load_i16 v0, v0, s[0:3], null offen
+; GFX12-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-NEXT:    s_mov_b32 s4, s5
+; GFX12-NEXT:    s_mov_b32 s5, s6
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v1, v0, s[4:5]
+; GFX12-NEXT:    s_endpgm
+  %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
+  %sext = sext i16 %val to i32
+  store i32 %sext, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_u8_waterfall_rsrc(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_u8_waterfall_rsrc:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_mov_b32 s8, s1
+; GFX12-NEXT:    s_mov_b32 s9, s2
+; GFX12-NEXT:    s_mov_b32 s2, exec_lo
+; GFX12-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX12-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX12-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX12-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    v_cmp_eq_u64_e64 s1, s[6:7], v[2:3]
+; GFX12-NEXT:    s_and_b32 s1, vcc_lo, s1
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    s_and_saveexec_b32 s1, s1
+; GFX12-NEXT:    buffer_load_u8 v1, v4, s[4:7], s0 offen
+; GFX12-NEXT:    ; implicit-def: $vgpr0
+; GFX12-NEXT:    ; implicit-def: $vgpr4
+; GFX12-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s1
+; GFX12-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX12-NEXT:  ; %bb.2:
+; GFX12-NEXT:    s_mov_b32 exec_lo, s2
+; GFX12-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v0, v1, s[8:9]
+; GFX12-NEXT:    s_endpgm
+  %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
+  %zext = zext i8 %val to i32
+  store i32 %zext, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_i8_waterfall_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 %soffset, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_i8_waterfall_soffset:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_mov_b32 s6, exec_lo
+; GFX12-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT:    v_readfirstlane_b32 s8, v1
+; GFX12-NEXT:    s_mov_b32 s7, exec_lo
+; GFX12-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-NEXT:    v_cmpx_eq_u32_e64 s8, v1
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_load_i8 v2, v0, s[0:3], s8 offen
+; GFX12-NEXT:    ; implicit-def: $vgpr1
+; GFX12-NEXT:    ; implicit-def: $vgpr0
+; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s7
+; GFX12-NEXT:    s_cbranch_execnz .LBB9_1
+; GFX12-NEXT:  ; %bb.2:
+; GFX12-NEXT:    s_mov_b32 exec_lo, s6
+; GFX12-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v0, v2, s[4:5]
+; GFX12-NEXT:    s_endpgm
+  %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
+  %sext = sext i8 %val to i32
+  store i32 %sext, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @test_buffer_load_u16_waterfall_both(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, ptr addrspace(1) inreg %out) {
+; GFX12-LABEL: test_buffer_load_u16_waterfall_both:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_mov_b32 s8, exec_lo
+; GFX12-NEXT:  .LBB10_1: ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_readfirstlane_b32 s5, v1
+; GFX12-NEXT:    v_readfirstlane_b32 s6, v2
+; GFX12-NEXT:    v_readfirstlane_b32 s7, v3
+; GFX12-NEXT:    v_readfirstlane_b32 s9, v5
+; GFX12-NEXT:    s_wait_alu depctr_va_sdst(0)
+; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1]
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-NEXT:    v_cmp_eq_u64_e64 s2, s[6:7], v[2:3]
+; GFX12-NEXT:    v_cmp_eq_u32_e64 s3, s9, v5
+; GFX12-NEXT:    s_and_b32 s2, vcc_lo, s2
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT:    s_and_b32 s2, s2, s3
+; GFX12-NEXT:    s_and_saveexec_b32 s2, s2
+; GFX12-NEXT:    buffer_load_u16 v1, v4, s[4:7], s9 offen
+; GFX12-NEXT:    ; implicit-def: $vgpr0
+; GFX12-NEXT:    ; implicit-def: $vgpr5
+; GFX12-NEXT:    ; implicit-def: $vgpr4
+; GFX12-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s2
+; GFX12-NEXT:    s_cbranch_execnz .LBB10_1
+; GFX12-NEXT:  ; %bb.2:
+; GFX12-NEXT:    s_mov_b32 exec_lo, s8
+; GFX12-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX12-NEXT:    s_endpgm
+  %val = call i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
+  %zext = zext i16 %val to i32
+  store i32 %zext, ptr addrspace(1) %out
+  ret void
+}
+
+declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32 immarg) #0
+declare i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32>, i32, i32, i32 immarg) #0
+attributes #0 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
index 7003bb1a09eae..6fb4037ef7278 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX8 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX12,GFX1200 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX12,GFX1250 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX12,GFX1200 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GFX12,GFX1250 %s
 ; FIXME: Test with SI when argument lowering not broken for f16
 
 ; Natural mapping
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll
index 89c3a41eda29c..53206b61be5d6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck %s
 ; FIXME: Test with SI when argument lowering not broken for f16
 
 ; Natural mapping
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
index dbef90f6c9ff9..73b549d0ed3e0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX1200 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX1250 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX1250 %s
 
 ; Natural mapping
 define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
index 4ae456d47ea4f..61d839d291983 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -o - %s | FileCheck %s
 
 ; Natural mapping
 define amdgpu_ps float @struct_ptr_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {

Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you fix the description to have the original commit message

@mssefat
Copy link
Contributor Author

mssefat commented Dec 12, 2025

Yeah, I updated the description.

@mssefat mssefat merged commit f3c1645 into llvm:main Dec 12, 2025
13 checks passed
anonymouspc pushed a commit to anonymouspc/llvm that referenced this pull request Dec 15, 2025
…r_load byte and short (llvm#172065)

This patch adds register bank legalization support for buffer load byte
and short operations in the AMDGPU GlobalISel pipeline.

This is a re-land of llvm#167798. I have fixed the failing test
/CodeGen/AMDGPU/GlobalISel/buffer-load-byte-short.ll
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants