@@ -1184,14 +1184,15 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
11841184
11851185; CHECK-LABEL: test_fabs(
11861186; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fabs_param_0];
1187- ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1188- ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1189- ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1190- ; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
1191- ; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
1192- ; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
1193- ; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
1194- ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1187+ ; CHECK-NOF16: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1188+ ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]];
1189+ ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]];
1190+ ; CHECK-NOF16-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]];
1191+ ; CHECK-NOF16-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]];
1192+ ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]];
1193+ ; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]];
1194+ ; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1195+ ; CHECK-F16: and.b32 [[R:%r[0-9]+]], [[A]], 2147450879;
11951196; CHECK: st.param.b32 [func_retval0+0], [[R]];
11961197; CHECK: ret;
11971198define <2 x half > @test_fabs (<2 x half > %a ) #0 {
@@ -1244,15 +1245,18 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
12441245; CHECK-LABEL: test_copysign(
12451246; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0];
12461247; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1];
1247- ; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1248- ; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1249- ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1250- ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1251- ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1252- ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1253- ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1254- ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1255- ; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1248+ ; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1249+ ; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1250+ ; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1251+ ; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1252+ ; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1253+ ; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1254+ ; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1255+ ; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1256+ ; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1257+ ; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880;
1258+ ; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879;
1259+ ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R1]], [[R0]]
12561260; CHECK: st.param.b32 [func_retval0+0], [[R]];
12571261; CHECK: ret;
12581262define <2 x half > @test_copysign (<2 x half > %a , <2 x half > %b ) #0 {
@@ -1263,18 +1267,24 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
12631267; CHECK-LABEL: test_copysign_f32(
12641268; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f32_param_0];
12651269; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1];
1266- ; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1267- ; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
1268- ; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
1269- ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1270- ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1271- ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
1272- ; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
1273- ; CHECK-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
1274- ; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
1275- ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1276- ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1277- ; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1270+ ; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1271+ ; CHECK-NOF16-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]];
1272+ ; CHECK-NOF16-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]];
1273+ ; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1274+ ; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1275+ ; CHECK-NOF16-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648;
1276+ ; CHECK-NOF16-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648;
1277+ ; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; }
1278+ ; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; }
1279+ ; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1280+ ; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1281+ ; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1282+ ; CHECK-F16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[B1]];
1283+ ; CHECK-F16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[B0]];
1284+ ; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]};
1285+ ; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880;
1286+ ; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879;
1287+ ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]]
12781288; CHECK: st.param.b32 [func_retval0+0], [[R]];
12791289; CHECK: ret;
12801290define <2 x half > @test_copysign_f32 (<2 x half > %a , <2 x float > %b ) #0 {
@@ -1286,20 +1296,26 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
12861296; CHECK-LABEL: test_copysign_f64(
12871297; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f64_param_0];
12881298; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1];
1289- ; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1290- ; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
1291- ; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
1292- ; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1293- ; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1294- ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
1295- ; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
1296- ; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
1297- ; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
1298- ; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
1299- ; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
1300- ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1301- ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1302- ; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1299+ ; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1300+ ; CHECK-NOF16-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]];
1301+ ; CHECK-NOF16-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]];
1302+ ; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767;
1303+ ; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767;
1304+ ; CHECK-NOF16-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808;
1305+ ; CHECK-NOF16-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808;
1306+ ; CHECK-NOF16-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48;
1307+ ; CHECK-NOF16-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48;
1308+ ; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]];
1309+ ; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]];
1310+ ; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]];
1311+ ; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]];
1312+ ; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
1313+ ; CHECK-F16-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[B1]];
1314+ ; CHECK-F16-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[B0]];
1315+ ; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]};
1316+ ; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880;
1317+ ; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879;
1318+ ; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]];
13031319; CHECK: st.param.b32 [func_retval0+0], [[R]];
13041320; CHECK: ret;
13051321define <2 x half > @test_copysign_f64 (<2 x half > %a , <2 x double > %b ) #0 {
@@ -1311,16 +1327,22 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
13111327; CHECK-LABEL: test_copysign_extended(
13121328; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_extended_param_0];
13131329; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_extended_param_1];
1314- ; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1315- ; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1316- ; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1317- ; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1318- ; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1319- ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1320- ; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1321- ; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1322- ; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]];
1323- ; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]];
1330+ ; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
1331+ ; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
1332+ ; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767;
1333+ ; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767;
1334+ ; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768;
1335+ ; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768;
1336+ ; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]];
1337+ ; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]];
1338+ ; CHECK-NOF16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]];
1339+ ; CHECK-NOF16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]];
1340+ ; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880;
1341+ ; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879;
1342+ ; CHECK-F16-DAG: or.b32 [[R2:%r[0-9]+]], [[R1]], [[R0]]
1343+ ; CHECK-F16-DAG: mov.b32 {[[R3:%rs[0-9]+]], [[R4:%rs[0-9]+]]}, [[R2]]
1344+ ; CHECK-F16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R3]]
1345+ ; CHECK-F16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R4]]
13241346; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]};
13251347; CHECK: ret;
13261348define <2 x float > @test_copysign_extended (<2 x half > %a , <2 x half > %b ) #0 {
0 commit comments