Skip to content

Commit 558ea41

Browse files
authored
[AMDGPU] Reapply 'Sign extend simm16 in setreg intrinsic' (#78492)
We currently force users to use a negative contant in the intrinsic call. Changing it zext would break existing programs, so just sign extend an argument.
1 parent f3a4de3 commit 558ea41

File tree

4 files changed

+71
-8
lines changed

4 files changed

+71
-8
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -787,12 +787,9 @@ class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
787787
return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
788788
}]>;
789789

790-
def SIMM16bit : ImmLeaf <i32,
791-
[{return isInt<16>(Imm);}]
792-
>;
793-
794-
def UIMM16bit : ImmLeaf <i32,
795-
[{return isUInt<16>(Imm);}]
790+
def SIMM16bit : TImmLeaf <i32,
791+
[{return isInt<16>(Imm) || isUInt<16>(Imm);}],
792+
as_i16timm
796793
>;
797794

798795
def i64imm_32bit : ImmLeaf<i64, [{

llvm/lib/Target/AMDGPU/SIModeRegister.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
285285
1;
286286
unsigned Offset =
287287
(Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
288-
unsigned Mask = ((1 << Width) - 1) << Offset;
288+
unsigned Mask = maskTrailingOnes<unsigned>(Width) << Offset;
289289

290290
// If an InsertionPoint is set we will insert a setreg there.
291291
if (InsertionPoint) {

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,7 @@ class S_SETREG_B32_Pseudo <list<dag> pattern=[]> : SOPK_Pseudo <
11241124
pattern>;
11251125

11261126
def S_SETREG_B32 : S_SETREG_B32_Pseudo <
1127-
[(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
1127+
[(int_amdgcn_s_setreg (i32 SIMM16bit:$simm16), i32:$sdst)]> {
11281128
// Use custom inserter to optimize some cases to
11291129
// S_DENORM_MODE/S_ROUND_MODE/S_SETREG_B32_mode.
11301130
let usesCustomInserter = 1;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,6 +1433,72 @@ define amdgpu_kernel void @test_setreg_set_4_bits_straddles_round_and_denorm() {
14331433
ret void
14341434
}
14351435

1436+
define amdgpu_ps void @test_63489(i32 inreg %var.mode) {
1437+
; GFX6-LABEL: test_63489:
1438+
; GFX6: ; %bb.0:
1439+
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
1440+
; GFX6-NEXT: ;;#ASMSTART
1441+
; GFX6-NEXT: ;;#ASMEND
1442+
; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1443+
;
1444+
; GFX789-LABEL: test_63489:
1445+
; GFX789: ; %bb.0:
1446+
; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
1447+
; GFX789-NEXT: ;;#ASMSTART
1448+
; GFX789-NEXT: ;;#ASMEND
1449+
; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1450+
;
1451+
; GFX10-LABEL: test_63489:
1452+
; GFX10: ; %bb.0:
1453+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
1454+
; GFX10-NEXT: ;;#ASMSTART
1455+
; GFX10-NEXT: ;;#ASMEND
1456+
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1457+
;
1458+
; GFX11-LABEL: test_63489:
1459+
; GFX11: ; %bb.0:
1460+
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
1461+
; GFX11-NEXT: ;;#ASMSTART
1462+
; GFX11-NEXT: ;;#ASMEND
1463+
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1464+
call void @llvm.amdgcn.s.setreg(i32 63489, i32 %var.mode)
1465+
call void asm sideeffect "", ""()
1466+
ret void
1467+
}
1468+
1469+
define amdgpu_ps void @test_minus_2047(i32 inreg %var.mode) {
1470+
; GFX6-LABEL: test_minus_2047:
1471+
; GFX6: ; %bb.0:
1472+
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
1473+
; GFX6-NEXT: ;;#ASMSTART
1474+
; GFX6-NEXT: ;;#ASMEND
1475+
; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1476+
;
1477+
; GFX789-LABEL: test_minus_2047:
1478+
; GFX789: ; %bb.0:
1479+
; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
1480+
; GFX789-NEXT: ;;#ASMSTART
1481+
; GFX789-NEXT: ;;#ASMEND
1482+
; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1483+
;
1484+
; GFX10-LABEL: test_minus_2047:
1485+
; GFX10: ; %bb.0:
1486+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
1487+
; GFX10-NEXT: ;;#ASMSTART
1488+
; GFX10-NEXT: ;;#ASMEND
1489+
; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1490+
;
1491+
; GFX11-LABEL: test_minus_2047:
1492+
; GFX11: ; %bb.0:
1493+
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
1494+
; GFX11-NEXT: ;;#ASMSTART
1495+
; GFX11-NEXT: ;;#ASMEND
1496+
; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1497+
call void @llvm.amdgcn.s.setreg(i32 -2047, i32 %var.mode)
1498+
call void asm sideeffect "", ""()
1499+
ret void
1500+
}
1501+
14361502
; FIXME: Broken for DAG
14371503
; define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
14381504
; call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)

0 commit comments

Comments
 (0)