@@ -90,10 +90,11 @@ define <4 x i64> @v2i64_i16_04812(<16 x i16> %a) {
9090define <4 x i64 > @v2i64_i16_15913 (<16 x i16 > %a ) {
9191; CHECK-LABEL: v2i64_i16_15913:
9292; CHECK: // %bb.0:
93- ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
94- ; CHECK-NEXT: ushr v0.4s, v0.4s, #16
95- ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0
96- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
93+ ; CHECK-NEXT: movi v2.2d, #0x0000000000ffff
94+ ; CHECK-NEXT: ushr v0.2d, v0.2d, #16
95+ ; CHECK-NEXT: ushr v1.2d, v1.2d, #16
96+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
97+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
9798; CHECK-NEXT: ret
9899 %s1 = shufflevector <16 x i16 > %a , <16 x i16 > undef , <4 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 >
99100 %z1 = zext <4 x i16 > %s1 to <4 x i64 >
@@ -117,10 +118,8 @@ define <4 x i64> @v2i64_i16_261014(<16 x i16> %a) {
117118define <4 x i64 > @v2i64_i16_371115 (<16 x i16 > %a ) {
118119; CHECK-LABEL: v2i64_i16_371115:
119120; CHECK: // %bb.0:
120- ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
121- ; CHECK-NEXT: ushr v0.4s, v0.4s, #16
122- ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0
123- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
121+ ; CHECK-NEXT: ushr v0.2d, v0.2d, #48
122+ ; CHECK-NEXT: ushr v1.2d, v1.2d, #48
124123; CHECK-NEXT: ret
125124 %s1 = shufflevector <16 x i16 > %a , <16 x i16 > undef , <4 x i32 > <i32 3 , i32 7 , i32 11 , i32 15 >
126125 %z1 = zext <4 x i16 > %s1 to <4 x i64 >
@@ -142,8 +141,7 @@ define <4 x i32> @v4i32_0246(<8 x i16> %a, <8 x i16> %b) {
142141define <4 x i32 > @v4i32_1357 (<8 x i16 > %a , <8 x i16 > %b ) {
143142; CHECK-LABEL: v4i32_1357:
144143; CHECK: // %bb.0:
145- ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v0.8h
146- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
144+ ; CHECK-NEXT: ushr v0.4s, v0.4s, #16
147145; CHECK-NEXT: ret
148146 %c = shufflevector <8 x i16 > %a , <8 x i16 > %b , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
149147 %d = zext <4 x i16 > %c to <4 x i32 >
@@ -210,8 +208,7 @@ define <8 x i16> @v8i16_0246(<16 x i8> %a, <16 x i8> %b) {
210208define <8 x i16 > @v8i16_1357 (<16 x i8 > %a , <16 x i8 > %b ) {
211209; CHECK-LABEL: v8i16_1357:
212210; CHECK: // %bb.0:
213- ; CHECK-NEXT: uzp2 v0.16b, v0.16b, v0.16b
214- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
211+ ; CHECK-NEXT: ushr v0.8h, v0.8h, #8
215212; CHECK-NEXT: ret
216213 %c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
217214 %d = zext <8 x i8 > %c to <8 x i16 >
@@ -278,8 +275,7 @@ define <8 x i32> @v8i32_0246(<16 x i8> %a, <16 x i8> %b) {
278275define <8 x i32 > @v8i32_1357 (<16 x i8 > %a , <16 x i8 > %b ) {
279276; CHECK-LABEL: v8i32_1357:
280277; CHECK: // %bb.0:
281- ; CHECK-NEXT: uzp2 v0.16b, v0.16b, v0.16b
282- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
278+ ; CHECK-NEXT: ushr v0.8h, v0.8h, #8
283279; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
284280; CHECK-NEXT: ushll v0.4s, v0.4h, #0
285281; CHECK-NEXT: ret
@@ -291,10 +287,9 @@ define <8 x i32> @v8i32_1357(<16 x i8> %a, <16 x i8> %b) {
291287define <8 x i32 > @v8i32_04812 (<16 x i8 > %a , <16 x i8 > %b ) {
292288; CHECK-LABEL: v8i32_04812:
293289; CHECK: // %bb.0:
294- ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
295- ; CHECK-NEXT: bic v0.8h, #255, lsl #8
296- ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
297- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
290+ ; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
291+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
292+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
298293; CHECK-NEXT: ret
299294 %c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 , i32 16 , i32 20 , i32 24 , i32 28 >
300295 %d = zext <8 x i8 > %c to <8 x i32 >
@@ -304,10 +299,11 @@ define <8 x i32> @v8i32_04812(<16 x i8> %a, <16 x i8> %b) {
304299define <8 x i32 > @v8i32_15913 (<16 x i8 > %a , <16 x i8 > %b ) {
305300; CHECK-LABEL: v8i32_15913:
306301; CHECK: // %bb.0:
307- ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
308- ; CHECK-NEXT: ushr v0.8h, v0.8h, #8
309- ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
310- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
302+ ; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
303+ ; CHECK-NEXT: ushr v0.4s, v0.4s, #8
304+ ; CHECK-NEXT: ushr v1.4s, v1.4s, #8
305+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
306+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
311307; CHECK-NEXT: ret
312308 %c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 , i32 17 , i32 21 , i32 25 , i32 29 >
313309 %d = zext <8 x i8 > %c to <8 x i32 >
@@ -317,10 +313,10 @@ define <8 x i32> @v8i32_15913(<16 x i8> %a, <16 x i8> %b) {
317313define <8 x i32 > @v8i32_261014 (<16 x i8 > %a , <16 x i8 > %b ) {
318314; CHECK-LABEL: v8i32_261014:
319315; CHECK: // %bb.0:
320- ; CHECK-NEXT: uzp2 v0.8h , v0.8h, v1.8h
321- ; CHECK-NEXT: bic v0.8h, #255, lsl #8
322- ; CHECK-NEXT: ushll2 v1 .4s, v0.8h, #0
323- ; CHECK-NEXT: ushll v0 .4s, v0.4h, #0
316+ ; CHECK-NEXT: ushr v0.4s , v0.4s, #16
317+ ; CHECK-NEXT: ushr v1.4s, v1.4s, #16
318+ ; CHECK-NEXT: bic v0 .4s, #255, lsl #8
319+ ; CHECK-NEXT: bic v1 .4s, #255, lsl #8
324320; CHECK-NEXT: ret
325321 %c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 2 , i32 6 , i32 10 , i32 14 , i32 18 , i32 22 , i32 26 , i32 30 >
326322 %d = zext <8 x i8 > %c to <8 x i32 >
@@ -330,10 +326,8 @@ define <8 x i32> @v8i32_261014(<16 x i8> %a, <16 x i8> %b) {
330326define <8 x i32 > @v8i32_371115 (<16 x i8 > %a , <16 x i8 > %b ) {
331327; CHECK-LABEL: v8i32_371115:
332328; CHECK: // %bb.0:
333- ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
334- ; CHECK-NEXT: ushr v0.8h, v0.8h, #8
335- ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
336- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
329+ ; CHECK-NEXT: ushr v0.4s, v0.4s, #24
330+ ; CHECK-NEXT: ushr v1.4s, v1.4s, #24
337331; CHECK-NEXT: ret
338332 %c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 3 , i32 7 , i32 11 , i32 15 , i32 19 , i32 23 , i32 27 , i32 31 >
339333 %d = zext <8 x i8 > %c to <8 x i32 >
@@ -407,77 +401,59 @@ define <8 x i64> @zext_load_add(ptr %p) {
407401define <8 x double > @uitofp_fadd (<32 x i16 > %l ) {
408402; CHECK-LABEL: uitofp_fadd:
409403; CHECK: // %bb.0:
410- ; CHECK-NEXT: uzp1 v5.4s, v0.4s, v3.4s
411- ; CHECK-NEXT: uzp1 v6.4s, v0.4s, v1.4s
412- ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
413- ; CHECK-NEXT: movi d4, #0x00ffff0000ffff
414- ; CHECK-NEXT: uzp1 v7.4s, v2.4s, v3.4s
415- ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
416- ; CHECK-NEXT: ext v16.16b, v6.16b, v6.16b, #8
417- ; CHECK-NEXT: ext v5.16b, v5.16b, v5.16b, #8
418- ; CHECK-NEXT: uzp2 v1.4s, v0.4s, v3.4s
419- ; CHECK-NEXT: and v17.8b, v6.8b, v4.8b
420- ; CHECK-NEXT: and v18.8b, v7.8b, v4.8b
421- ; CHECK-NEXT: ushr v6.2s, v6.2s, #16
422- ; CHECK-NEXT: ushr v7.2s, v7.2s, #16
423- ; CHECK-NEXT: and v21.8b, v0.8b, v4.8b
424- ; CHECK-NEXT: and v22.8b, v2.8b, v4.8b
425- ; CHECK-NEXT: ushr v2.2s, v2.2s, #16
426- ; CHECK-NEXT: and v19.8b, v16.8b, v4.8b
427- ; CHECK-NEXT: and v20.8b, v5.8b, v4.8b
428- ; CHECK-NEXT: ushll v3.2d, v17.2s, #0
429- ; CHECK-NEXT: ushll v17.2d, v18.2s, #0
430- ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
431- ; CHECK-NEXT: ushr v16.2s, v16.2s, #16
432- ; CHECK-NEXT: ushr v5.2s, v5.2s, #16
433- ; CHECK-NEXT: ushll v6.2d, v6.2s, #0
434- ; CHECK-NEXT: ushll v7.2d, v7.2s, #0
435- ; CHECK-NEXT: ushll v18.2d, v19.2s, #0
436- ; CHECK-NEXT: ushll v19.2d, v20.2s, #0
437- ; CHECK-NEXT: ext v20.16b, v0.16b, v0.16b, #8
438- ; CHECK-NEXT: ushr v0.2s, v0.2s, #16
439- ; CHECK-NEXT: ushll v16.2d, v16.2s, #0
440- ; CHECK-NEXT: ushll v21.2d, v21.2s, #0
441- ; CHECK-NEXT: ushll v5.2d, v5.2s, #0
442- ; CHECK-NEXT: ushll v22.2d, v22.2s, #0
443- ; CHECK-NEXT: ushll v2.2d, v2.2s, #0
444- ; CHECK-NEXT: ucvtf v3.2d, v3.2d
445- ; CHECK-NEXT: ucvtf v17.2d, v17.2d
446- ; CHECK-NEXT: ucvtf v6.2d, v6.2d
447- ; CHECK-NEXT: and v23.8b, v20.8b, v4.8b
448- ; CHECK-NEXT: and v4.8b, v1.8b, v4.8b
449- ; CHECK-NEXT: ushr v20.2s, v20.2s, #16
450- ; CHECK-NEXT: ushr v1.2s, v1.2s, #16
451- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
452- ; CHECK-NEXT: ucvtf v7.2d, v7.2d
404+ ; CHECK-NEXT: movi v4.2d, #0x0000000000ffff
405+ ; CHECK-NEXT: ushr v5.2d, v0.2d, #16
406+ ; CHECK-NEXT: ushr v6.2d, v1.2d, #16
407+ ; CHECK-NEXT: ushr v7.2d, v2.2d, #16
408+ ; CHECK-NEXT: ushr v17.2d, v3.2d, #16
409+ ; CHECK-NEXT: ushr v20.2d, v0.2d, #32
410+ ; CHECK-NEXT: ushr v22.2d, v1.2d, #32
411+ ; CHECK-NEXT: ushr v23.2d, v2.2d, #32
412+ ; CHECK-NEXT: ushr v24.2d, v3.2d, #32
413+ ; CHECK-NEXT: and v16.16b, v0.16b, v4.16b
414+ ; CHECK-NEXT: and v18.16b, v1.16b, v4.16b
415+ ; CHECK-NEXT: and v19.16b, v2.16b, v4.16b
416+ ; CHECK-NEXT: and v21.16b, v3.16b, v4.16b
417+ ; CHECK-NEXT: and v5.16b, v5.16b, v4.16b
418+ ; CHECK-NEXT: and v6.16b, v6.16b, v4.16b
419+ ; CHECK-NEXT: and v7.16b, v7.16b, v4.16b
420+ ; CHECK-NEXT: and v17.16b, v17.16b, v4.16b
421+ ; CHECK-NEXT: and v20.16b, v20.16b, v4.16b
422+ ; CHECK-NEXT: and v22.16b, v22.16b, v4.16b
423+ ; CHECK-NEXT: and v23.16b, v23.16b, v4.16b
424+ ; CHECK-NEXT: and v4.16b, v24.16b, v4.16b
425+ ; CHECK-NEXT: ushr v0.2d, v0.2d, #48
426+ ; CHECK-NEXT: ushr v1.2d, v1.2d, #48
427+ ; CHECK-NEXT: ushr v2.2d, v2.2d, #48
428+ ; CHECK-NEXT: ushr v3.2d, v3.2d, #48
429+ ; CHECK-NEXT: ucvtf v16.2d, v16.2d
453430; CHECK-NEXT: ucvtf v18.2d, v18.2d
454431; CHECK-NEXT: ucvtf v19.2d, v19.2d
455- ; CHECK-NEXT: ucvtf v16.2d, v16.2d
456- ; CHECK-NEXT: ushll v23.2d, v23.2s, #0
457- ; CHECK-NEXT: ushll v4.2d, v4.2s, #0
458- ; CHECK-NEXT: ushll v20.2d, v20.2s, #0
459- ; CHECK-NEXT: ushll v1.2d, v1.2s, #0
460- ; CHECK-NEXT: ucvtf v5.2d, v5.2d
461432; CHECK-NEXT: ucvtf v21.2d, v21.2d
433+ ; CHECK-NEXT: ucvtf v5.2d, v5.2d
434+ ; CHECK-NEXT: ucvtf v6.2d, v6.2d
435+ ; CHECK-NEXT: ucvtf v7.2d, v7.2d
436+ ; CHECK-NEXT: ucvtf v17.2d, v17.2d
437+ ; CHECK-NEXT: ucvtf v20.2d, v20.2d
462438; CHECK-NEXT: ucvtf v22.2d, v22.2d
463- ; CHECK-NEXT: ucvtf v0.2d, v0.2d
464- ; CHECK-NEXT: ucvtf v2.2d, v2.2d
465439; CHECK-NEXT: ucvtf v23.2d, v23.2d
466440; CHECK-NEXT: ucvtf v4.2d, v4.2d
467- ; CHECK-NEXT: ucvtf v20 .2d, v20 .2d
441+ ; CHECK-NEXT: ucvtf v0 .2d, v0 .2d
468442; CHECK-NEXT: ucvtf v1.2d, v1.2d
469- ; CHECK-NEXT: fadd v16.2d, v18.2d, v16.2d
470- ; CHECK-NEXT: fadd v7.2d, v17.2d, v7.2d
471- ; CHECK-NEXT: fadd v5.2d, v19.2d, v5.2d
472- ; CHECK-NEXT: fadd v3.2d, v3.2d, v6.2d
473- ; CHECK-NEXT: fadd v0.2d, v21.2d, v0.2d
474- ; CHECK-NEXT: fadd v2.2d, v22.2d, v2.2d
475- ; CHECK-NEXT: fadd v4.2d, v4.2d, v1.2d
476- ; CHECK-NEXT: fadd v1.2d, v23.2d, v20.2d
477- ; CHECK-NEXT: fadd v0.2d, v3.2d, v0.2d
443+ ; CHECK-NEXT: ucvtf v2.2d, v2.2d
444+ ; CHECK-NEXT: ucvtf v3.2d, v3.2d
445+ ; CHECK-NEXT: fadd v5.2d, v16.2d, v5.2d
446+ ; CHECK-NEXT: fadd v17.2d, v21.2d, v17.2d
447+ ; CHECK-NEXT: fadd v7.2d, v19.2d, v7.2d
448+ ; CHECK-NEXT: fadd v6.2d, v18.2d, v6.2d
449+ ; CHECK-NEXT: fadd v0.2d, v20.2d, v0.2d
450+ ; CHECK-NEXT: fadd v1.2d, v22.2d, v1.2d
451+ ; CHECK-NEXT: fadd v3.2d, v4.2d, v3.2d
452+ ; CHECK-NEXT: fadd v2.2d, v23.2d, v2.2d
453+ ; CHECK-NEXT: fadd v0.2d, v5.2d, v0.2d
454+ ; CHECK-NEXT: fadd v1.2d, v6.2d, v1.2d
478455; CHECK-NEXT: fadd v2.2d, v7.2d, v2.2d
479- ; CHECK-NEXT: fadd v1.2d, v16.2d, v1.2d
480- ; CHECK-NEXT: fadd v3.2d, v5.2d, v4.2d
456+ ; CHECK-NEXT: fadd v3.2d, v17.2d, v3.2d
481457; CHECK-NEXT: ret
482458 %s1 = shufflevector <32 x i16 > %l , <32 x i16 > undef , <8 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 , i32 16 , i32 20 , i32 24 , i32 28 >
483459 %z1 = uitofp <8 x i16 > %s1 to <8 x double >
0 commit comments