Skip to content

Commit 239053c

Browse files
committed
Legalize mul i1
1 parent 37474c3 commit 239053c

File tree

3 files changed

+61
-112
lines changed

3 files changed

+61
-112
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
769769
// extract of relevant bits.
770770
setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);
771771

772+
setOperationAction(ISD::MUL, MVT::i1, Promote);
773+
772774
setTargetDAGCombine({ISD::ADD,
773775
ISD::UADDO_CARRY,
774776
ISD::SUB,

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2345,11 +2345,6 @@ def : GCNPat <
23452345
(S_AND_B64 $src0, $src1)
23462346
>;
23472347

2348-
def : GCNPat <
2349-
(i1 (mul i1:$src0, i1:$src1)),
2350-
(S_AND_B64 $src0, $src1)
2351-
>;
2352-
23532348
def : GCNPat <
23542349
(i1 (or i1:$src0, i1:$src1)),
23552350
(S_OR_B64 $src0, $src1)
@@ -2389,11 +2384,6 @@ def : GCNPat <
23892384
(S_AND_B32 $src0, $src1)
23902385
>;
23912386

2392-
def : GCNPat <
2393-
(i1 (mul i1:$src0, i1:$src1)),
2394-
(S_AND_B32 $src0, $src1)
2395-
>;
2396-
23972387
def : GCNPat <
23982388
(i1 (or i1:$src0, i1:$src1)),
23992389
(S_OR_B32 $src0, $src1)

llvm/test/CodeGen/AMDGPU/mul.ll

Lines changed: 59 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,52 +1062,43 @@ entry:
10621062
define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8 x i32], i1 %b) nounwind {
10631063
; SI-LABEL: s_mul_i1:
10641064
; SI: ; %bb.0: ; %entry
1065-
; SI-NEXT: s_load_dword s2, s[0:1], 0x13
1066-
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1067-
; SI-NEXT: s_load_dword s3, s[0:1], 0x1c
1068-
; SI-NEXT: s_mov_b32 s7, 0xf000
1069-
; SI-NEXT: s_mov_b32 s6, -1
1065+
; SI-NEXT: s_load_dword s4, s[0:1], 0x13
1066+
; SI-NEXT: s_load_dword s5, s[0:1], 0x1c
1067+
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1068+
; SI-NEXT: s_mov_b32 s3, 0xf000
1069+
; SI-NEXT: s_mov_b32 s2, -1
10701070
; SI-NEXT: s_waitcnt lgkmcnt(0)
1071-
; SI-NEXT: s_bitcmp1_b32 s2, 0
1072-
; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
1073-
; SI-NEXT: s_bitcmp1_b32 s3, 0
1074-
; SI-NEXT: s_cselect_b64 s[2:3], -1, 0
1075-
; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1076-
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1077-
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
1071+
; SI-NEXT: s_mul_i32 s4, s4, s5
1072+
; SI-NEXT: s_and_b32 s4, s4, 1
1073+
; SI-NEXT: v_mov_b32_e32 v0, s4
1074+
; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
10781075
; SI-NEXT: s_endpgm
10791076
;
10801077
; VI-LABEL: s_mul_i1:
10811078
; VI: ; %bb.0: ; %entry
1082-
; VI-NEXT: s_load_dword s2, s[0:1], 0x4c
1083-
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1084-
; VI-NEXT: s_load_dword s3, s[0:1], 0x70
1085-
; VI-NEXT: s_mov_b32 s7, 0xf000
1086-
; VI-NEXT: s_mov_b32 s6, -1
1079+
; VI-NEXT: s_load_dword s4, s[0:1], 0x70
1080+
; VI-NEXT: s_load_dword s5, s[0:1], 0x4c
1081+
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1082+
; VI-NEXT: s_mov_b32 s3, 0xf000
1083+
; VI-NEXT: s_mov_b32 s2, -1
10871084
; VI-NEXT: s_waitcnt lgkmcnt(0)
1088-
; VI-NEXT: s_bitcmp1_b32 s2, 0
1089-
; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
1090-
; VI-NEXT: s_bitcmp1_b32 s3, 0
1091-
; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
1092-
; VI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1093-
; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1094-
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
1085+
; VI-NEXT: v_mov_b32_e32 v0, s4
1086+
; VI-NEXT: v_mul_lo_u16_e32 v0, s5, v0
1087+
; VI-NEXT: v_and_b32_e32 v0, 1, v0
1088+
; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
10951089
; VI-NEXT: s_endpgm
10961090
;
10971091
; GFX9-LABEL: s_mul_i1:
10981092
; GFX9: ; %bb.0: ; %entry
1099-
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x4c
1093+
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x70
1094+
; GFX9-NEXT: s_load_dword s3, s[0:1], 0x4c
11001095
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1101-
; GFX9-NEXT: s_load_dword s3, s[0:1], 0x70
11021096
; GFX9-NEXT: s_mov_b32 s7, 0xf000
11031097
; GFX9-NEXT: s_mov_b32 s6, -1
11041098
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1105-
; GFX9-NEXT: s_bitcmp1_b32 s2, 0
1106-
; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
1107-
; GFX9-NEXT: s_bitcmp1_b32 s3, 0
1108-
; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0
1109-
; GFX9-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1110-
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1099+
; GFX9-NEXT: v_mov_b32_e32 v0, s2
1100+
; GFX9-NEXT: v_mul_lo_u16_e32 v0, s3, v0
1101+
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
11111102
; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0
11121103
; GFX9-NEXT: s_endpgm
11131104
;
@@ -1120,12 +1111,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
11201111
; GFX10-NEXT: s_mov_b32 s7, 0x31016000
11211112
; GFX10-NEXT: s_mov_b32 s6, -1
11221113
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1123-
; GFX10-NEXT: s_bitcmp1_b32 s2, 0
1124-
; GFX10-NEXT: s_cselect_b32 s0, -1, 0
1125-
; GFX10-NEXT: s_bitcmp1_b32 s3, 0
1126-
; GFX10-NEXT: s_cselect_b32 s1, -1, 0
1127-
; GFX10-NEXT: s_and_b32 s0, s0, s1
1128-
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1114+
; GFX10-NEXT: v_mul_lo_u16 v0, s2, s3
1115+
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
11291116
; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0
11301117
; GFX10-NEXT: s_endpgm
11311118
;
@@ -1136,15 +1123,11 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
11361123
; GFX11-NEXT: s_load_b32 s3, s[0:1], 0x70
11371124
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
11381125
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1139-
; GFX11-NEXT: s_bitcmp1_b32 s2, 0
1140-
; GFX11-NEXT: s_cselect_b32 s2, -1, 0
1141-
; GFX11-NEXT: s_bitcmp1_b32 s3, 0
1142-
; GFX11-NEXT: s_cselect_b32 s3, -1, 0
1143-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1144-
; GFX11-NEXT: s_and_b32 s2, s2, s3
1126+
; GFX11-NEXT: v_mul_lo_u16 v0, s2, s3
11451127
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
1146-
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
11471128
; GFX11-NEXT: s_mov_b32 s2, -1
1129+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1130+
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
11481131
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
11491132
; GFX11-NEXT: s_nop 0
11501133
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -1198,14 +1181,9 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
11981181
; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4
11991182
; SI-NEXT: s_mov_b32 s4, s0
12001183
; SI-NEXT: s_mov_b32 s5, s1
1201-
; SI-NEXT: s_waitcnt vmcnt(1)
1202-
; SI-NEXT: v_and_b32_e32 v0, 1, v0
12031184
; SI-NEXT: s_waitcnt vmcnt(0)
1204-
; SI-NEXT: v_and_b32_e32 v1, 1, v1
1205-
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
1206-
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v1
1207-
; SI-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
1208-
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1185+
; SI-NEXT: v_mul_lo_u32 v0, v0, v1
1186+
; SI-NEXT: v_and_b32_e32 v0, 1, v0
12091187
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
12101188
; SI-NEXT: s_endpgm
12111189
;
@@ -1223,14 +1201,9 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
12231201
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4
12241202
; VI-NEXT: s_mov_b32 s4, s0
12251203
; VI-NEXT: s_mov_b32 s5, s1
1226-
; VI-NEXT: s_waitcnt vmcnt(1)
1227-
; VI-NEXT: v_and_b32_e32 v0, 1, v0
12281204
; VI-NEXT: s_waitcnt vmcnt(0)
1229-
; VI-NEXT: v_and_b32_e32 v1, 1, v1
1230-
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
1231-
; VI-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v1
1232-
; VI-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
1233-
; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1205+
; VI-NEXT: v_mul_lo_u16_e32 v0, v0, v1
1206+
; VI-NEXT: v_and_b32_e32 v0, 1, v0
12341207
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
12351208
; VI-NEXT: s_endpgm
12361209
;
@@ -1248,69 +1221,53 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
12481221
; GFX9-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4
12491222
; GFX9-NEXT: s_mov_b32 s4, s0
12501223
; GFX9-NEXT: s_mov_b32 s5, s1
1251-
; GFX9-NEXT: s_waitcnt vmcnt(1)
1252-
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
12531224
; GFX9-NEXT: s_waitcnt vmcnt(0)
1254-
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
1255-
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
1256-
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v1
1257-
; GFX9-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
1258-
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
1225+
; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
1226+
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
12591227
; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0
12601228
; GFX9-NEXT: s_endpgm
12611229
;
12621230
; GFX10-LABEL: v_mul_i1:
12631231
; GFX10: ; %bb.0: ; %entry
1264-
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
1265-
; GFX10-NEXT: s_mov_b32 s2, -1
1266-
; GFX10-NEXT: s_mov_b32 s3, 0x31016000
1267-
; GFX10-NEXT: s_mov_b32 s10, s2
1268-
; GFX10-NEXT: s_mov_b32 s11, s3
1232+
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1233+
; GFX10-NEXT: s_mov_b32 s6, -1
1234+
; GFX10-NEXT: s_mov_b32 s7, 0x31016000
1235+
; GFX10-NEXT: s_mov_b32 s10, s6
1236+
; GFX10-NEXT: s_mov_b32 s11, s7
12691237
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1270-
; GFX10-NEXT: s_mov_b32 s8, s6
1271-
; GFX10-NEXT: s_mov_b32 s9, s7
1238+
; GFX10-NEXT: s_mov_b32 s8, s2
1239+
; GFX10-NEXT: s_mov_b32 s9, s3
12721240
; GFX10-NEXT: s_clause 0x1
12731241
; GFX10-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
12741242
; GFX10-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:4
1275-
; GFX10-NEXT: s_mov_b32 s1, s5
1276-
; GFX10-NEXT: s_waitcnt vmcnt(1)
1277-
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
1243+
; GFX10-NEXT: s_mov_b32 s4, s0
1244+
; GFX10-NEXT: s_mov_b32 s5, s1
12781245
; GFX10-NEXT: s_waitcnt vmcnt(0)
1279-
; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
1280-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
1281-
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v1
1282-
; GFX10-NEXT: s_and_b32 s0, vcc_lo, s0
1283-
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1284-
; GFX10-NEXT: s_mov_b32 s0, s4
1285-
; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], 0
1246+
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
1247+
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
1248+
; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0
12861249
; GFX10-NEXT: s_endpgm
12871250
;
12881251
; GFX11-LABEL: v_mul_i1:
12891252
; GFX11: ; %bb.0: ; %entry
1290-
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
1291-
; GFX11-NEXT: s_mov_b32 s2, -1
1292-
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
1293-
; GFX11-NEXT: s_mov_b32 s10, s2
1294-
; GFX11-NEXT: s_mov_b32 s11, s3
1253+
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1254+
; GFX11-NEXT: s_mov_b32 s6, -1
1255+
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
1256+
; GFX11-NEXT: s_mov_b32 s10, s6
1257+
; GFX11-NEXT: s_mov_b32 s11, s7
12951258
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1296-
; GFX11-NEXT: s_mov_b32 s8, s6
1297-
; GFX11-NEXT: s_mov_b32 s9, s7
1259+
; GFX11-NEXT: s_mov_b32 s8, s2
1260+
; GFX11-NEXT: s_mov_b32 s9, s3
12981261
; GFX11-NEXT: s_clause 0x1
12991262
; GFX11-NEXT: buffer_load_u8 v0, off, s[8:11], 0
13001263
; GFX11-NEXT: buffer_load_u8 v1, off, s[8:11], 0 offset:4
1301-
; GFX11-NEXT: s_mov_b32 s1, s5
1302-
; GFX11-NEXT: s_waitcnt vmcnt(1)
1303-
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
1264+
; GFX11-NEXT: s_mov_b32 s4, s0
1265+
; GFX11-NEXT: s_mov_b32 s5, s1
13041266
; GFX11-NEXT: s_waitcnt vmcnt(0)
1305-
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
1306-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1307-
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
1308-
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v1
1309-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1310-
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
1311-
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
1312-
; GFX11-NEXT: s_mov_b32 s0, s4
1313-
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
1267+
; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1
1268+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1269+
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
1270+
; GFX11-NEXT: buffer_store_b8 v0, off, s[4:7], 0
13141271
; GFX11-NEXT: s_nop 0
13151272
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
13161273
; GFX11-NEXT: s_endpgm

0 commit comments

Comments
 (0)