@@ -932,9 +932,9 @@ define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
932932define <8 x i16 > @umull_and_v8i16 (<8 x i8 > %src1 , <8 x i16 > %src2 ) {
933933; CHECK-LABEL: umull_and_v8i16:
934934; CHECK: // %bb.0: // %entry
935- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
936935; CHECK-NEXT: bic v1.8h, #255, lsl #8
937- ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
936+ ; CHECK-NEXT: xtn v1.8b, v1.8h
937+ ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
938938; CHECK-NEXT: ret
939939entry:
940940 %in1 = zext <8 x i8 > %src1 to <8 x i16 >
@@ -946,9 +946,9 @@ entry:
946946define <8 x i16 > @umull_and_v8i16_c (<8 x i8 > %src1 , <8 x i16 > %src2 ) {
947947; CHECK-LABEL: umull_and_v8i16_c:
948948; CHECK: // %bb.0: // %entry
949- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
950949; CHECK-NEXT: bic v1.8h, #255, lsl #8
951- ; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
950+ ; CHECK-NEXT: xtn v1.8b, v1.8h
951+ ; CHECK-NEXT: umull v0.8h, v1.8b, v0.8b
952952; CHECK-NEXT: ret
953953entry:
954954 %in1 = zext <8 x i8 > %src1 to <8 x i16 >
@@ -989,9 +989,9 @@ define <8 x i16> @umull_smaller_v8i16(<8 x i4> %src1, <8 x i16> %src2) {
989989; CHECK: // %bb.0: // %entry
990990; CHECK-NEXT: movi v2.8b, #15
991991; CHECK-NEXT: bic v1.8h, #255, lsl #8
992+ ; CHECK-NEXT: xtn v1.8b, v1.8h
992993; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
993- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
994- ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
994+ ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
995995; CHECK-NEXT: ret
996996entry:
997997 %in1 = zext <8 x i4 > %src1 to <8 x i16 >
@@ -1004,9 +1004,9 @@ define <4 x i32> @umull_and_v4i32(<4 x i16> %src1, <4 x i32> %src2) {
10041004; CHECK-LABEL: umull_and_v4i32:
10051005; CHECK: // %bb.0: // %entry
10061006; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
1007- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
10081007; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1009- ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
1008+ ; CHECK-NEXT: xtn v1.4h, v1.4s
1009+ ; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
10101010; CHECK-NEXT: ret
10111011entry:
10121012 %in1 = zext <4 x i16 > %src1 to <4 x i32 >
@@ -1019,12 +1019,13 @@ define <8 x i32> @umull_and_v8i32(<8 x i16> %src1, <8 x i32> %src2) {
10191019; CHECK-LABEL: umull_and_v8i32:
10201020; CHECK: // %bb.0: // %entry
10211021; CHECK-NEXT: movi v3.2d, #0x0000ff000000ff
1022- ; CHECK-NEXT: ushll v4.4s, v0.4h, #0
1023- ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1024- ; CHECK-NEXT: and v5.16b, v1.16b, v3.16b
1025- ; CHECK-NEXT: and v1.16b, v2.16b, v3.16b
1026- ; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s
1027- ; CHECK-NEXT: mul v0.4s, v4.4s, v5.4s
1022+ ; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
1023+ ; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
1024+ ; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
1025+ ; CHECK-NEXT: xtn v1.4h, v1.4s
1026+ ; CHECK-NEXT: xtn v2.4h, v2.4s
1027+ ; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
1028+ ; CHECK-NEXT: umull v1.4s, v4.4h, v2.4h
10281029; CHECK-NEXT: ret
10291030entry:
10301031 %in1 = zext <8 x i16 > %src1 to <8 x i32 >
@@ -1037,11 +1038,11 @@ define <8 x i32> @umull_and_v8i32_dup(<8 x i16> %src1, i32 %src2) {
10371038; CHECK-LABEL: umull_and_v8i32_dup:
10381039; CHECK: // %bb.0: // %entry
10391040; CHECK-NEXT: and w8, w0, #0xff
1040- ; CHECK-NEXT: ushll v1.4s, v0.4h, #0
1041- ; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
1041+ ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
10421042; CHECK-NEXT: dup v2.4s, w8
1043- ; CHECK-NEXT: mul v0.4s, v1.4s, v2.4s
1044- ; CHECK-NEXT: mul v1.4s, v3.4s, v2.4s
1043+ ; CHECK-NEXT: xtn v2.4h, v2.4s
1044+ ; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
1045+ ; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
10451046; CHECK-NEXT: ret
10461047entry:
10471048 %in1 = zext <8 x i16 > %src1 to <8 x i32 >
@@ -1056,16 +1057,9 @@ define <2 x i64> @umull_and_v2i64(<2 x i32> %src1, <2 x i64> %src2) {
10561057; CHECK-LABEL: umull_and_v2i64:
10571058; CHECK: // %bb.0: // %entry
10581059; CHECK-NEXT: movi v2.2d, #0x000000000000ff
1059- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
1060- ; CHECK-NEXT: fmov x10, d0
10611060; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1062- ; CHECK-NEXT: fmov x9, d1
1063- ; CHECK-NEXT: mov x8, v1.d[1]
1064- ; CHECK-NEXT: mov x11, v0.d[1]
1065- ; CHECK-NEXT: mul x9, x10, x9
1066- ; CHECK-NEXT: mul x8, x11, x8
1067- ; CHECK-NEXT: fmov d0, x9
1068- ; CHECK-NEXT: mov v0.d[1], x8
1061+ ; CHECK-NEXT: xtn v1.2s, v1.2d
1062+ ; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
10691063; CHECK-NEXT: ret
10701064entry:
10711065 %in1 = zext <2 x i32 > %src1 to <2 x i64 >
@@ -1078,26 +1072,13 @@ define <4 x i64> @umull_and_v4i64(<4 x i32> %src1, <4 x i64> %src2) {
10781072; CHECK-LABEL: umull_and_v4i64:
10791073; CHECK: // %bb.0: // %entry
10801074; CHECK-NEXT: movi v3.2d, #0x000000000000ff
1081- ; CHECK-NEXT: ushll v4.2d, v0.2s, #0
1082- ; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
1083- ; CHECK-NEXT: fmov x14, d4
1075+ ; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
10841076; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
1085- ; CHECK-NEXT: fmov x11, d0
1086- ; CHECK-NEXT: mov x9, v0.d[1]
1087- ; CHECK-NEXT: and v0.16b, v1.16b, v3.16b
1088- ; CHECK-NEXT: fmov x10, d2
1089- ; CHECK-NEXT: fmov x13, d0
1090- ; CHECK-NEXT: mov x8, v2.d[1]
1091- ; CHECK-NEXT: mov x12, v0.d[1]
1092- ; CHECK-NEXT: mul x10, x11, x10
1093- ; CHECK-NEXT: mov x15, v4.d[1]
1094- ; CHECK-NEXT: mul x11, x14, x13
1095- ; CHECK-NEXT: mul x8, x9, x8
1096- ; CHECK-NEXT: fmov d1, x10
1097- ; CHECK-NEXT: mul x9, x15, x12
1098- ; CHECK-NEXT: fmov d0, x11
1099- ; CHECK-NEXT: mov v1.d[1], x8
1100- ; CHECK-NEXT: mov v0.d[1], x9
1077+ ; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
1078+ ; CHECK-NEXT: xtn v1.2s, v1.2d
1079+ ; CHECK-NEXT: xtn v2.2s, v2.2d
1080+ ; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
1081+ ; CHECK-NEXT: umull v1.2d, v4.2s, v2.2s
11011082; CHECK-NEXT: ret
11021083entry:
11031084 %in1 = zext <4 x i32 > %src1 to <4 x i64 >
@@ -1109,21 +1090,12 @@ entry:
11091090define <4 x i64 > @umull_and_v4i64_dup (<4 x i32 > %src1 , i64 %src2 ) {
11101091; CHECK-LABEL: umull_and_v4i64_dup:
11111092; CHECK: // %bb.0: // %entry
1112- ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0
11131093; CHECK-NEXT: and x8, x0, #0xff
1114- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
1115- ; CHECK-NEXT: fmov x9, d1
1116- ; CHECK-NEXT: fmov x11, d0
1117- ; CHECK-NEXT: mov x10, v1.d[1]
1118- ; CHECK-NEXT: mov x12, v0.d[1]
1119- ; CHECK-NEXT: mul x9, x9, x8
1120- ; CHECK-NEXT: mul x11, x11, x8
1121- ; CHECK-NEXT: mul x10, x10, x8
1122- ; CHECK-NEXT: mul x8, x12, x8
1123- ; CHECK-NEXT: fmov d1, x9
1124- ; CHECK-NEXT: fmov d0, x11
1125- ; CHECK-NEXT: mov v1.d[1], x10
1126- ; CHECK-NEXT: mov v0.d[1], x8
1094+ ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1095+ ; CHECK-NEXT: dup v2.2d, x8
1096+ ; CHECK-NEXT: xtn v2.2s, v2.2d
1097+ ; CHECK-NEXT: umull v0.2d, v0.2s, v2.2s
1098+ ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
11271099; CHECK-NEXT: ret
11281100entry:
11291101 %in1 = zext <4 x i32 > %src1 to <4 x i64 >
0 commit comments