|
5 | 5 | define <16 x i8> @load_v3i8(ptr %src) {
|
6 | 6 | ; CHECK-LABEL: load_v3i8:
|
7 | 7 | ; CHECK: ; %bb.0:
|
8 |
| -; CHECK-NEXT: sub sp, sp, #16 |
9 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
10 |
| -; CHECK-NEXT: ldrh w8, [x0] |
11 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
12 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
13 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
14 |
| -; CHECK-NEXT: umov.h w8, v0[0] |
15 |
| -; CHECK-NEXT: umov.h w9, v0[1] |
| 8 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 9 | +; CHECK-NEXT: ldrh w9, [x0] |
| 10 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
16 | 11 | ; CHECK-NEXT: fmov s0, w8
|
17 |
| -; CHECK-NEXT: add x8, x0, #2 |
18 |
| -; CHECK-NEXT: mov.b v0[1], w9 |
19 |
| -; CHECK-NEXT: ld1.b { v0 }[2], [x8] |
20 |
| -; CHECK-NEXT: add sp, sp, #16 |
21 | 12 | ; CHECK-NEXT: ret
|
22 | 13 | ;
|
23 | 14 | ; BE-LABEL: load_v3i8:
|
@@ -47,19 +38,14 @@ define <16 x i8> @load_v3i8(ptr %src) {
|
47 | 38 | define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
|
48 | 39 | ; CHECK-LABEL: load_v3i8_to_4xi32:
|
49 | 40 | ; CHECK: ; %bb.0:
|
50 |
| -; CHECK-NEXT: sub sp, sp, #16 |
51 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
52 |
| -; CHECK-NEXT: ldrh w8, [x0] |
| 41 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 42 | +; CHECK-NEXT: ldrh w9, [x0] |
53 | 43 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
54 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
55 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
56 |
| -; CHECK-NEXT: ldrsb w8, [x0, #2] |
57 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
58 |
| -; CHECK-NEXT: mov.h v0[1], v0[1] |
59 |
| -; CHECK-NEXT: mov.h v0[2], w8 |
| 44 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 45 | +; CHECK-NEXT: fmov s0, w8 |
| 46 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
60 | 47 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
61 | 48 | ; CHECK-NEXT: and.16b v0, v0, v1
|
62 |
| -; CHECK-NEXT: add sp, sp, #16 |
63 | 49 | ; CHECK-NEXT: ret
|
64 | 50 | ;
|
65 | 51 | ; BE-LABEL: load_v3i8_to_4xi32:
|
@@ -90,19 +76,14 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
|
90 | 76 | define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) {
|
91 | 77 | ; CHECK-LABEL: load_v3i8_to_4xi32_align_2:
|
92 | 78 | ; CHECK: ; %bb.0:
|
93 |
| -; CHECK-NEXT: sub sp, sp, #16 |
94 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
95 |
| -; CHECK-NEXT: ldrh w8, [x0] |
| 79 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 80 | +; CHECK-NEXT: ldrh w9, [x0] |
96 | 81 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
97 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
98 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
99 |
| -; CHECK-NEXT: ldrsb w8, [x0, #2] |
100 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
101 |
| -; CHECK-NEXT: mov.h v0[1], v0[1] |
102 |
| -; CHECK-NEXT: mov.h v0[2], w8 |
| 82 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 83 | +; CHECK-NEXT: fmov s0, w8 |
| 84 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
103 | 85 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
104 | 86 | ; CHECK-NEXT: and.16b v0, v0, v1
|
105 |
| -; CHECK-NEXT: add sp, sp, #16 |
106 | 87 | ; CHECK-NEXT: ret
|
107 | 88 | ;
|
108 | 89 | ; BE-LABEL: load_v3i8_to_4xi32_align_2:
|
@@ -160,19 +141,14 @@ define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) {
|
160 | 141 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
|
161 | 142 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:
|
162 | 143 | ; CHECK: ; %bb.0:
|
163 |
| -; CHECK-NEXT: sub sp, sp, #16 |
164 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
165 |
| -; CHECK-NEXT: ldurh w8, [x0, #1] |
| 144 | +; CHECK-NEXT: ldrb w8, [x0, #3] |
| 145 | +; CHECK-NEXT: ldurh w9, [x0, #1] |
166 | 146 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
167 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
168 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
169 |
| -; CHECK-NEXT: ldrsb w8, [x0, #3] |
170 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
171 |
| -; CHECK-NEXT: mov.h v0[1], v0[1] |
172 |
| -; CHECK-NEXT: mov.h v0[2], w8 |
| 147 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 148 | +; CHECK-NEXT: fmov s0, w8 |
| 149 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
173 | 150 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
174 | 151 | ; CHECK-NEXT: and.16b v0, v0, v1
|
175 |
| -; CHECK-NEXT: add sp, sp, #16 |
176 | 152 | ; CHECK-NEXT: ret
|
177 | 153 | ;
|
178 | 154 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_1:
|
@@ -204,19 +180,14 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
|
204 | 180 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {
|
205 | 181 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3:
|
206 | 182 | ; CHECK: ; %bb.0:
|
207 |
| -; CHECK-NEXT: sub sp, sp, #16 |
208 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
209 |
| -; CHECK-NEXT: ldurh w8, [x0, #3] |
| 183 | +; CHECK-NEXT: ldrb w8, [x0, #5] |
| 184 | +; CHECK-NEXT: ldurh w9, [x0, #3] |
210 | 185 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
211 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
212 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
213 |
| -; CHECK-NEXT: ldrsb w8, [x0, #5] |
214 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
215 |
| -; CHECK-NEXT: mov.h v0[1], v0[1] |
216 |
| -; CHECK-NEXT: mov.h v0[2], w8 |
| 186 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 187 | +; CHECK-NEXT: fmov s0, w8 |
| 188 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
217 | 189 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
218 | 190 | ; CHECK-NEXT: and.16b v0, v0, v1
|
219 |
| -; CHECK-NEXT: add sp, sp, #16 |
220 | 191 | ; CHECK-NEXT: ret
|
221 | 192 | ;
|
222 | 193 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_3:
|
@@ -348,18 +319,14 @@ define <3 x i32> @load_v3i32(ptr %src) {
|
348 | 319 | define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {
|
349 | 320 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32:
|
350 | 321 | ; CHECK: ; %bb.0:
|
351 |
| -; CHECK-NEXT: sub sp, sp, #16 |
352 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
353 |
| -; CHECK-NEXT: ldrh w8, [x0] |
| 322 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 323 | +; CHECK-NEXT: ldrh w9, [x0] |
354 | 324 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
355 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
356 |
| -; CHECK-NEXT: add x8, x0, #2 |
357 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
358 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
359 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 325 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 326 | +; CHECK-NEXT: fmov s0, w8 |
| 327 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
360 | 328 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
361 | 329 | ; CHECK-NEXT: and.16b v0, v0, v1
|
362 |
| -; CHECK-NEXT: add sp, sp, #16 |
363 | 330 | ; CHECK-NEXT: ret
|
364 | 331 | ;
|
365 | 332 | ; BE-LABEL: load_v3i8_zext_to_3xi32:
|
@@ -388,18 +355,14 @@ define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {
|
388 | 355 | define <3 x i32> @load_v3i8_sext_to_3xi32(ptr %src) {
|
389 | 356 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32:
|
390 | 357 | ; CHECK: ; %bb.0:
|
391 |
| -; CHECK-NEXT: sub sp, sp, #16 |
392 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
393 |
| -; CHECK-NEXT: ldrh w8, [x0] |
394 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
395 |
| -; CHECK-NEXT: add x8, x0, #2 |
396 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
397 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
398 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 358 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 359 | +; CHECK-NEXT: ldrh w9, [x0] |
| 360 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 361 | +; CHECK-NEXT: fmov s0, w8 |
| 362 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
399 | 363 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
400 | 364 | ; CHECK-NEXT: shl.4s v0, v0, #24
|
401 | 365 | ; CHECK-NEXT: sshr.4s v0, v0, #24
|
402 |
| -; CHECK-NEXT: add sp, sp, #16 |
403 | 366 | ; CHECK-NEXT: ret
|
404 | 367 | ;
|
405 | 368 | ; BE-LABEL: load_v3i8_sext_to_3xi32:
|
@@ -513,19 +476,15 @@ entry:
|
513 | 476 | define void @load_ext_to_64bits(ptr %src, ptr %dst) {
|
514 | 477 | ; CHECK-LABEL: load_ext_to_64bits:
|
515 | 478 | ; CHECK: ; %bb.0: ; %entry
|
516 |
| -; CHECK-NEXT: sub sp, sp, #16 |
517 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
518 |
| -; CHECK-NEXT: ldrh w8, [x0] |
519 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
520 |
| -; CHECK-NEXT: add x8, x0, #2 |
521 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
522 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
523 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 479 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 480 | +; CHECK-NEXT: ldrh w9, [x0] |
| 481 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 482 | +; CHECK-NEXT: fmov s0, w8 |
524 | 483 | ; CHECK-NEXT: add x8, x1, #4
|
| 484 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
525 | 485 | ; CHECK-NEXT: bic.4h v0, #255, lsl #8
|
526 | 486 | ; CHECK-NEXT: st1.h { v0 }[2], [x8]
|
527 | 487 | ; CHECK-NEXT: str s0, [x1]
|
528 |
| -; CHECK-NEXT: add sp, sp, #16 |
529 | 488 | ; CHECK-NEXT: ret
|
530 | 489 | ;
|
531 | 490 | ; BE-LABEL: load_ext_to_64bits:
|
@@ -614,24 +573,20 @@ entry:
|
614 | 573 | define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
|
615 | 574 | ; CHECK-LABEL: load_ext_add_to_64bits:
|
616 | 575 | ; CHECK: ; %bb.0: ; %entry
|
617 |
| -; CHECK-NEXT: sub sp, sp, #16 |
618 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
619 |
| -; CHECK-NEXT: ldrh w9, [x0] |
| 576 | +; CHECK-NEXT: ldrb w9, [x0, #2] |
| 577 | +; CHECK-NEXT: ldrh w10, [x0] |
620 | 578 | ; CHECK-NEXT: Lloh2:
|
621 | 579 | ; CHECK-NEXT: adrp x8, lCPI15_0@PAGE
|
622 | 580 | ; CHECK-NEXT: Lloh3:
|
623 | 581 | ; CHECK-NEXT: ldr d1, [x8, lCPI15_0@PAGEOFF]
|
624 | 582 | ; CHECK-NEXT: add x8, x1, #4
|
625 |
| -; CHECK-NEXT: strh w9, [sp, #12] |
626 |
| -; CHECK-NEXT: add x9, x0, #2 |
627 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
628 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
629 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 583 | +; CHECK-NEXT: orr w9, w10, w9, lsl #16 |
| 584 | +; CHECK-NEXT: fmov s0, w9 |
| 585 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
630 | 586 | ; CHECK-NEXT: bic.4h v0, #255, lsl #8
|
631 | 587 | ; CHECK-NEXT: add.4h v0, v0, v1
|
632 | 588 | ; CHECK-NEXT: st1.h { v0 }[2], [x8]
|
633 | 589 | ; CHECK-NEXT: str s0, [x1]
|
634 |
| -; CHECK-NEXT: add sp, sp, #16 |
635 | 590 | ; CHECK-NEXT: ret
|
636 | 591 | ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
|
637 | 592 | ;
|
@@ -880,24 +835,21 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
|
880 | 835 | define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {
|
881 | 836 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
|
882 | 837 | ; CHECK: ; %bb.0:
|
883 |
| -; CHECK-NEXT: sub sp, sp, #16 |
884 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
885 |
| -; CHECK-NEXT: ldrh w9, [x0] |
| 838 | +; CHECK-NEXT: ldrb w9, [x0, #2] |
| 839 | +; CHECK-NEXT: ldrh w10, [x0] |
886 | 840 | ; CHECK-NEXT: Lloh4:
|
887 | 841 | ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
|
888 | 842 | ; CHECK-NEXT: Lloh5:
|
889 | 843 | ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
|
890 |
| -; CHECK-NEXT: add x8, x0, #1 |
891 |
| -; CHECK-NEXT: strh w9, [sp, #12] |
892 |
| -; CHECK-NEXT: add x9, x0, #2 |
893 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
894 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
895 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 844 | +; CHECK-NEXT: add x8, x0, #2 |
| 845 | +; CHECK-NEXT: orr w9, w10, w9, lsl #16 |
| 846 | +; CHECK-NEXT: fmov s0, w9 |
| 847 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
896 | 848 | ; CHECK-NEXT: uaddw.4s v0, v1, v0
|
897 |
| -; CHECK-NEXT: st1.b { v0 }[4], [x8] |
898 |
| -; CHECK-NEXT: st1.b { v0 }[8], [x9] |
| 849 | +; CHECK-NEXT: st1.b { v0 }[8], [x8] |
| 850 | +; CHECK-NEXT: add x8, x0, #1 |
899 | 851 | ; CHECK-NEXT: st1.b { v0 }[0], [x0]
|
900 |
| -; CHECK-NEXT: add sp, sp, #16 |
| 852 | +; CHECK-NEXT: st1.b { v0 }[4], [x8] |
901 | 853 | ; CHECK-NEXT: ret
|
902 | 854 | ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
|
903 | 855 | ;
|
@@ -936,24 +888,21 @@ define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {
|
936 | 888 | define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) {
|
937 | 889 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
|
938 | 890 | ; CHECK: ; %bb.0:
|
939 |
| -; CHECK-NEXT: sub sp, sp, #16 |
940 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
941 |
| -; CHECK-NEXT: ldrh w9, [x0] |
| 891 | +; CHECK-NEXT: ldrb w9, [x0, #2] |
| 892 | +; CHECK-NEXT: ldrh w10, [x0] |
942 | 893 | ; CHECK-NEXT: Lloh6:
|
943 | 894 | ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
|
944 | 895 | ; CHECK-NEXT: Lloh7:
|
945 | 896 | ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
|
946 |
| -; CHECK-NEXT: add x8, x0, #1 |
947 |
| -; CHECK-NEXT: strh w9, [sp, #12] |
948 |
| -; CHECK-NEXT: add x9, x0, #2 |
949 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
950 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
951 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 897 | +; CHECK-NEXT: add x8, x0, #2 |
| 898 | +; CHECK-NEXT: orr w9, w10, w9, lsl #16 |
| 899 | +; CHECK-NEXT: fmov s0, w9 |
| 900 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
952 | 901 | ; CHECK-NEXT: uaddw.4s v0, v1, v0
|
953 |
| -; CHECK-NEXT: st1.b { v0 }[4], [x8] |
954 |
| -; CHECK-NEXT: st1.b { v0 }[8], [x9] |
| 902 | +; CHECK-NEXT: st1.b { v0 }[8], [x8] |
| 903 | +; CHECK-NEXT: add x8, x0, #1 |
955 | 904 | ; CHECK-NEXT: st1.b { v0 }[0], [x0]
|
956 |
| -; CHECK-NEXT: add sp, sp, #16 |
| 905 | +; CHECK-NEXT: st1.b { v0 }[4], [x8] |
957 | 906 | ; CHECK-NEXT: ret
|
958 | 907 | ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
|
959 | 908 | ;
|
|
0 commit comments