@@ -2098,6 +2098,113 @@ bb11: ; preds = %bb10, %bb2
20982098 br label %bb1
20992099}
21002100
2101+ define void @crash_lshlrevb16_not_reg_op () {
2102+ ; NOSDWA-LABEL: crash_lshlrevb16_not_reg_op:
2103+ ; NOSDWA: ; %bb.0: ; %bb0
2104+ ; NOSDWA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2105+ ; NOSDWA-NEXT: s_mov_b64 s[4:5], 0
2106+ ; NOSDWA-NEXT: s_and_b64 vcc, exec, -1
2107+ ; NOSDWA-NEXT: v_lshlrev_b16_e64 v3, 8, 1
2108+ ; NOSDWA-NEXT: .LBB22_1: ; %bb1
2109+ ; NOSDWA-NEXT: ; =>This Inner Loop Header: Depth=1
2110+ ; NOSDWA-NEXT: v_mov_b32_e32 v0, s4
2111+ ; NOSDWA-NEXT: v_mov_b32_e32 v2, 0xff
2112+ ; NOSDWA-NEXT: s_lshl_b32 s6, s4, 3
2113+ ; NOSDWA-NEXT: v_mov_b32_e32 v1, s5
2114+ ; NOSDWA-NEXT: s_mov_b64 s[4:5], 1
2115+ ; NOSDWA-NEXT: v_and_b32_e32 v2, s4, v2
2116+ ; NOSDWA-NEXT: v_or_b32_e32 v2, v2, v3
2117+ ; NOSDWA-NEXT: v_lshrrev_b16_e32 v2, s6, v2
2118+ ; NOSDWA-NEXT: flat_store_byte v[0:1], v2
2119+ ; NOSDWA-NEXT: s_mov_b64 vcc, vcc
2120+ ; NOSDWA-NEXT: s_cbranch_vccnz .LBB22_1
2121+ ; NOSDWA-NEXT: ; %bb.2: ; %DummyReturnBlock
2122+ ; NOSDWA-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2123+ ; NOSDWA-NEXT: s_setpc_b64 s[30:31]
2124+ ;
2125+ ; GFX89-LABEL: crash_lshlrevb16_not_reg_op:
2126+ ; GFX89: ; %bb.0: ; %bb0
2127+ ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2128+ ; GFX89-NEXT: s_mov_b64 s[4:5], 0
2129+ ; GFX89-NEXT: s_and_b64 vcc, exec, -1
2130+ ; GFX89-NEXT: v_lshlrev_b16_e64 v0, 8, 1
2131+ ; GFX89-NEXT: .LBB22_1: ; %bb1
2132+ ; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
2133+ ; GFX89-NEXT: v_mov_b32_e32 v3, s4
2134+ ; GFX89-NEXT: s_lshl_b32 s6, s4, 3
2135+ ; GFX89-NEXT: v_mov_b32_e32 v1, s4
2136+ ; GFX89-NEXT: v_or_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2137+ ; GFX89-NEXT: v_mov_b32_e32 v2, s5
2138+ ; GFX89-NEXT: s_mov_b64 s[4:5], 1
2139+ ; GFX89-NEXT: v_lshrrev_b16_e32 v3, s6, v3
2140+ ; GFX89-NEXT: flat_store_byte v[1:2], v3
2141+ ; GFX89-NEXT: s_mov_b64 vcc, vcc
2142+ ; GFX89-NEXT: s_cbranch_vccnz .LBB22_1
2143+ ; GFX89-NEXT: ; %bb.2: ; %DummyReturnBlock
2144+ ; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2145+ ; GFX89-NEXT: s_setpc_b64 s[30:31]
2146+ ;
2147+ ; GFX9-LABEL: crash_lshlrevb16_not_reg_op:
2148+ ; GFX9: ; %bb.0: ; %bb0
2149+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150+ ; GFX9-NEXT: s_mov_b64 s[4:5], 0
2151+ ; GFX9-NEXT: v_lshlrev_b16_e64 v0, 8, 1
2152+ ; GFX9-NEXT: s_and_b64 vcc, exec, -1
2153+ ; GFX9-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2154+ ; GFX9-NEXT: .LBB22_1: ; %bb1
2155+ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
2156+ ; GFX9-NEXT: s_lshl_b32 s6, s4, 3
2157+ ; GFX9-NEXT: v_mov_b32_e32 v1, s4
2158+ ; GFX9-NEXT: v_mov_b32_e32 v2, s5
2159+ ; GFX9-NEXT: s_mov_b64 s[4:5], 1
2160+ ; GFX9-NEXT: v_lshrrev_b16_e32 v3, s6, v0
2161+ ; GFX9-NEXT: flat_store_byte v[1:2], v3
2162+ ; GFX9-NEXT: s_mov_b64 vcc, vcc
2163+ ; GFX9-NEXT: s_cbranch_vccnz .LBB22_1
2164+ ; GFX9-NEXT: ; %bb.2: ; %DummyReturnBlock
2165+ ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2166+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
2167+ ;
2168+ ; GFX10-LABEL: crash_lshlrevb16_not_reg_op:
2169+ ; GFX10: ; %bb.0: ; %bb0
2170+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2171+ ; GFX10-NEXT: v_lshlrev_b16 v0, 8, 1
2172+ ; GFX10-NEXT: s_mov_b32 vcc_lo, exec_lo
2173+ ; GFX10-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2174+ ; GFX10-NEXT: s_mov_b64 s[4:5], 0
2175+ ; GFX10-NEXT: .LBB22_1: ; %bb1
2176+ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
2177+ ; GFX10-NEXT: s_lshl_b32 s6, s4, 3
2178+ ; GFX10-NEXT: v_mov_b32_e32 v1, s4
2179+ ; GFX10-NEXT: v_mov_b32_e32 v2, s5
2180+ ; GFX10-NEXT: v_lshrrev_b16 v3, s6, v0
2181+ ; GFX10-NEXT: s_mov_b64 s[4:5], 1
2182+ ; GFX10-NEXT: flat_store_byte v[1:2], v3
2183+ ; GFX10-NEXT: s_cbranch_vccnz .LBB22_1
2184+ ; GFX10-NEXT: ; %bb.2: ; %DummyReturnBlock
2185+ ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2186+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
2187+ %1 = alloca [2 x i8 ], align 1 , addrspace (5 )
2188+ %2 = getelementptr [2 x i8 ], ptr addrspace (5 ) %1 , i32 0 , i32 1
2189+ br label %bb0
2190+
2191+ bb0:
2192+ store i8 1 , ptr addrspace (5 ) %2 , align 1
2193+ br label %bb1
2194+
2195+ bb1:
2196+ %3 = phi i64 [ 1 , %bb1 ], [ 0 , %bb0 ]
2197+ %4 = trunc i64 %3 to i32
2198+ %5 = getelementptr i8 , ptr addrspace (5 ) %1 , i32 %4
2199+ %6 = load i8 , ptr addrspace (5 ) %5 , align 1
2200+ %7 = getelementptr i8 , ptr null , i64 %3
2201+ store i8 %6 , ptr %7 , align 1
2202+ br i1 false , label %bb2 , label %bb1
2203+
2204+ bb2:
2205+ br label %bb0
2206+ }
2207+
21012208declare i32 @llvm.amdgcn.workitem.id.x ()
21022209
21032210attributes #0 = { "denormal-fp-math" ="preserve-sign,preserve-sign" }
0 commit comments