@@ -932,9 +932,9 @@ define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
932
932
define <8 x i16 > @umull_and_v8i16 (<8 x i8 > %src1 , <8 x i16 > %src2 ) {
933
933
; CHECK-LABEL: umull_and_v8i16:
934
934
; CHECK: // %bb.0: // %entry
935
- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
936
935
; CHECK-NEXT: bic v1.8h, #255, lsl #8
937
- ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
936
+ ; CHECK-NEXT: xtn v1.8b, v1.8h
937
+ ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
938
938
; CHECK-NEXT: ret
939
939
entry:
940
940
%in1 = zext <8 x i8 > %src1 to <8 x i16 >
@@ -946,9 +946,9 @@ entry:
946
946
define <8 x i16 > @umull_and_v8i16_c (<8 x i8 > %src1 , <8 x i16 > %src2 ) {
947
947
; CHECK-LABEL: umull_and_v8i16_c:
948
948
; CHECK: // %bb.0: // %entry
949
- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
950
949
; CHECK-NEXT: bic v1.8h, #255, lsl #8
951
- ; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
950
+ ; CHECK-NEXT: xtn v1.8b, v1.8h
951
+ ; CHECK-NEXT: umull v0.8h, v1.8b, v0.8b
952
952
; CHECK-NEXT: ret
953
953
entry:
954
954
%in1 = zext <8 x i8 > %src1 to <8 x i16 >
@@ -989,9 +989,9 @@ define <8 x i16> @umull_smaller_v8i16(<8 x i4> %src1, <8 x i16> %src2) {
989
989
; CHECK: // %bb.0: // %entry
990
990
; CHECK-NEXT: movi v2.8b, #15
991
991
; CHECK-NEXT: bic v1.8h, #255, lsl #8
992
+ ; CHECK-NEXT: xtn v1.8b, v1.8h
992
993
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
993
- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
994
- ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
994
+ ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
995
995
; CHECK-NEXT: ret
996
996
entry:
997
997
%in1 = zext <8 x i4 > %src1 to <8 x i16 >
@@ -1004,9 +1004,9 @@ define <4 x i32> @umull_and_v4i32(<4 x i16> %src1, <4 x i32> %src2) {
1004
1004
; CHECK-LABEL: umull_and_v4i32:
1005
1005
; CHECK: // %bb.0: // %entry
1006
1006
; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
1007
- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1008
1007
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1009
- ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
1008
+ ; CHECK-NEXT: xtn v1.4h, v1.4s
1009
+ ; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
1010
1010
; CHECK-NEXT: ret
1011
1011
entry:
1012
1012
%in1 = zext <4 x i16 > %src1 to <4 x i32 >
@@ -1019,12 +1019,13 @@ define <8 x i32> @umull_and_v8i32(<8 x i16> %src1, <8 x i32> %src2) {
1019
1019
; CHECK-LABEL: umull_and_v8i32:
1020
1020
; CHECK: // %bb.0: // %entry
1021
1021
; CHECK-NEXT: movi v3.2d, #0x0000ff000000ff
1022
- ; CHECK-NEXT: ushll v4.4s, v0.4h, #0
1023
- ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1024
- ; CHECK-NEXT: and v5.16b, v1.16b, v3.16b
1025
- ; CHECK-NEXT: and v1.16b, v2.16b, v3.16b
1026
- ; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s
1027
- ; CHECK-NEXT: mul v0.4s, v4.4s, v5.4s
1022
+ ; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
1023
+ ; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
1024
+ ; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
1025
+ ; CHECK-NEXT: xtn v1.4h, v1.4s
1026
+ ; CHECK-NEXT: xtn v2.4h, v2.4s
1027
+ ; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
1028
+ ; CHECK-NEXT: umull v1.4s, v4.4h, v2.4h
1028
1029
; CHECK-NEXT: ret
1029
1030
entry:
1030
1031
%in1 = zext <8 x i16 > %src1 to <8 x i32 >
@@ -1037,11 +1038,11 @@ define <8 x i32> @umull_and_v8i32_dup(<8 x i16> %src1, i32 %src2) {
1037
1038
; CHECK-LABEL: umull_and_v8i32_dup:
1038
1039
; CHECK: // %bb.0: // %entry
1039
1040
; CHECK-NEXT: and w8, w0, #0xff
1040
- ; CHECK-NEXT: ushll v1.4s, v0.4h, #0
1041
- ; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
1041
+ ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1042
1042
; CHECK-NEXT: dup v2.4s, w8
1043
- ; CHECK-NEXT: mul v0.4s, v1.4s, v2.4s
1044
- ; CHECK-NEXT: mul v1.4s, v3.4s, v2.4s
1043
+ ; CHECK-NEXT: xtn v2.4h, v2.4s
1044
+ ; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
1045
+ ; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
1045
1046
; CHECK-NEXT: ret
1046
1047
entry:
1047
1048
%in1 = zext <8 x i16 > %src1 to <8 x i32 >
@@ -1056,16 +1057,9 @@ define <2 x i64> @umull_and_v2i64(<2 x i32> %src1, <2 x i64> %src2) {
1056
1057
; CHECK-LABEL: umull_and_v2i64:
1057
1058
; CHECK: // %bb.0: // %entry
1058
1059
; CHECK-NEXT: movi v2.2d, #0x000000000000ff
1059
- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
1060
- ; CHECK-NEXT: fmov x10, d0
1061
1060
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1062
- ; CHECK-NEXT: fmov x9, d1
1063
- ; CHECK-NEXT: mov x8, v1.d[1]
1064
- ; CHECK-NEXT: mov x11, v0.d[1]
1065
- ; CHECK-NEXT: mul x9, x10, x9
1066
- ; CHECK-NEXT: mul x8, x11, x8
1067
- ; CHECK-NEXT: fmov d0, x9
1068
- ; CHECK-NEXT: mov v0.d[1], x8
1061
+ ; CHECK-NEXT: xtn v1.2s, v1.2d
1062
+ ; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
1069
1063
; CHECK-NEXT: ret
1070
1064
entry:
1071
1065
%in1 = zext <2 x i32 > %src1 to <2 x i64 >
@@ -1078,26 +1072,13 @@ define <4 x i64> @umull_and_v4i64(<4 x i32> %src1, <4 x i64> %src2) {
1078
1072
; CHECK-LABEL: umull_and_v4i64:
1079
1073
; CHECK: // %bb.0: // %entry
1080
1074
; CHECK-NEXT: movi v3.2d, #0x000000000000ff
1081
- ; CHECK-NEXT: ushll v4.2d, v0.2s, #0
1082
- ; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
1083
- ; CHECK-NEXT: fmov x14, d4
1075
+ ; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8
1084
1076
; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
1085
- ; CHECK-NEXT: fmov x11, d0
1086
- ; CHECK-NEXT: mov x9, v0.d[1]
1087
- ; CHECK-NEXT: and v0.16b, v1.16b, v3.16b
1088
- ; CHECK-NEXT: fmov x10, d2
1089
- ; CHECK-NEXT: fmov x13, d0
1090
- ; CHECK-NEXT: mov x8, v2.d[1]
1091
- ; CHECK-NEXT: mov x12, v0.d[1]
1092
- ; CHECK-NEXT: mul x10, x11, x10
1093
- ; CHECK-NEXT: mov x15, v4.d[1]
1094
- ; CHECK-NEXT: mul x11, x14, x13
1095
- ; CHECK-NEXT: mul x8, x9, x8
1096
- ; CHECK-NEXT: fmov d1, x10
1097
- ; CHECK-NEXT: mul x9, x15, x12
1098
- ; CHECK-NEXT: fmov d0, x11
1099
- ; CHECK-NEXT: mov v1.d[1], x8
1100
- ; CHECK-NEXT: mov v0.d[1], x9
1077
+ ; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
1078
+ ; CHECK-NEXT: xtn v1.2s, v1.2d
1079
+ ; CHECK-NEXT: xtn v2.2s, v2.2d
1080
+ ; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
1081
+ ; CHECK-NEXT: umull v1.2d, v4.2s, v2.2s
1101
1082
; CHECK-NEXT: ret
1102
1083
entry:
1103
1084
%in1 = zext <4 x i32 > %src1 to <4 x i64 >
@@ -1109,21 +1090,12 @@ entry:
1109
1090
define <4 x i64 > @umull_and_v4i64_dup (<4 x i32 > %src1 , i64 %src2 ) {
1110
1091
; CHECK-LABEL: umull_and_v4i64_dup:
1111
1092
; CHECK: // %bb.0: // %entry
1112
- ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0
1113
1093
; CHECK-NEXT: and x8, x0, #0xff
1114
- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
1115
- ; CHECK-NEXT: fmov x9, d1
1116
- ; CHECK-NEXT: fmov x11, d0
1117
- ; CHECK-NEXT: mov x10, v1.d[1]
1118
- ; CHECK-NEXT: mov x12, v0.d[1]
1119
- ; CHECK-NEXT: mul x9, x9, x8
1120
- ; CHECK-NEXT: mul x11, x11, x8
1121
- ; CHECK-NEXT: mul x10, x10, x8
1122
- ; CHECK-NEXT: mul x8, x12, x8
1123
- ; CHECK-NEXT: fmov d1, x9
1124
- ; CHECK-NEXT: fmov d0, x11
1125
- ; CHECK-NEXT: mov v1.d[1], x10
1126
- ; CHECK-NEXT: mov v0.d[1], x8
1094
+ ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1095
+ ; CHECK-NEXT: dup v2.2d, x8
1096
+ ; CHECK-NEXT: xtn v2.2s, v2.2d
1097
+ ; CHECK-NEXT: umull v0.2d, v0.2s, v2.2s
1098
+ ; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
1127
1099
; CHECK-NEXT: ret
1128
1100
entry:
1129
1101
%in1 = zext <4 x i32 > %src1 to <4 x i64 >
0 commit comments