diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 2d0ca6e6d0d3f..daa1435b3c984 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3186,11 +3186,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return MIB->getIterator(); }; - // SVE objects are always restored in reverse order. - for (const RegPairInfo &RPI : reverse(RegPairs)) - if (RPI.isScalable()) - EmitMI(RPI); - if (homogeneousPrologEpilog(MF, &MBB)) { auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) .setMIFlag(MachineInstr::FrameDestroy); @@ -3201,11 +3196,19 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } + // For performance reasons restore SVE register in increasing order + auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; }; + auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); + auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR); + std::reverse(PPRBegin, PPREnd.base()); + auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; }; + auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR); + auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR); + std::reverse(ZPRBegin, ZPREnd.base()); + if (ReverseCSRRestoreSeq) { MachineBasicBlock::iterator First = MBB.end(); for (const RegPairInfo &RPI : reverse(RegPairs)) { - if (RPI.isScalable()) - continue; MachineBasicBlock::iterator It = EmitMI(RPI); if (First == MBB.end()) First = It; @@ -3214,8 +3217,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( MBB.splice(MBBI, &MBB, First); } else { for (const RegPairInfo &RPI : RegPairs) { - if (RPI.isScalable()) - continue; (void)EmitMI(RPI); } } diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir index 3dba21d59b408..aed3145073619 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir @@ -19,8 +19,8 @@ ; CHECK-NEXT: // implicit-def: $p4 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG - ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload + ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir index 213d7919e4a72..f7920e595e44b 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -772,9 +772,9 @@ body: | # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0 +# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0 # CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 -# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 +# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8 @@ -873,14 +873,14 @@ body: | # CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4 -# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5 -# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14 -# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15 # CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2 # CHECK: $z22 = frame-destroy LDR_ZXI $sp, 3 # CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 # CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17 +# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4 +# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5 +# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14 +# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 18 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8 @@ -1037,14 +1037,14 @@ body: | # CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]] # CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18 +# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2 +# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3 +# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 +# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17 # CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4 # CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5 # CHECK: $p5 = frame-destroy LDR_PXI $sp, 14 # CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 15 -# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 -# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3 -# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16 -# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z9 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z10 @@ -1198,10 +1198,10 @@ body: | # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 7 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 -# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6 -# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7 # CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 1 # CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 +# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 6 +# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8 diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll index 296f2be9cfee5..6d2abf7e18419 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll @@ -226,30 +226,30 @@ define @streaming_compatible_with_scalable_vectors( @streaming_compatible_with_predicate_vectors( @smstart_clobber_sve( %x) nounwind { ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload @@ -267,30 +267,30 @@ define @smstart_clobber_sve_duplicate( %x) ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll index b7119fc082567..ea7808d73093e 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll @@ -129,7 +129,6 @@ define @ld1_x2_i8_z0_z8( %unused, @ld1_x2_i8_z0_z8( %unused, @ld1_x2_i8_z0_z8_scalar( %unused, @ld1_x2_i8_z0_z8_scalar( %unused, @ld1_x2_i16_z0_z8( %unused, @ld1_x2_i16_z0_z8( %unused, @ld1_x2_i16_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z0, [sp] ; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #2 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -611,6 +610,7 @@ define @ld1_x2_i16_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -751,7 +751,6 @@ define @ld1_x2_i32_z0_z8( %unused, @ld1_x2_i32_z0_z8( %unused, @ld1_x2_i32_z0_z8_scalar( %unused, < ; CONTIGUOUS-NEXT: ldr z0, [sp] ; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #2 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -922,6 +921,7 @@ define @ld1_x2_i32_z0_z8_scalar( %unused, < ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1062,7 +1062,6 @@ define @ld1_x2_i64_z0_z8( %unused, @ld1_x2_i64_z0_z8( %unused, @ld1_x2_i64_z0_z8_scalar( %unused, < ; CONTIGUOUS-NEXT: ldr z0, [sp] ; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #2 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1233,6 +1232,7 @@ define @ld1_x2_i64_z0_z8_scalar( %unused, < ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1380,7 +1380,6 @@ define @ld1_x4_i8_z0_z4_z8_z12( %unused, @ld1_x4_i8_z0_z4_z8_z12( %unused, @ld1_x4_i8_z0_z4_z8_z12_scalar( %unu ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1560,6 +1559,7 @@ define @ld1_x4_i8_z0_z4_z8_z12_scalar( %unu ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1711,7 +1711,6 @@ define @ld1_x4_i16_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1726,6 +1725,7 @@ define @ld1_x4_i16_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1877,7 +1877,6 @@ define @ld1_x4_i16_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1892,6 +1891,7 @@ define @ld1_x4_i16_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -2043,7 +2043,6 @@ define @ld1_x4_i32_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -2058,6 +2057,7 @@ define @ld1_x4_i32_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -2209,7 +2209,6 @@ define @ld1_x4_i32_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -2224,6 +2223,7 @@ define @ld1_x4_i32_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -2375,7 +2375,6 @@ define @ld1_x4_i64_z0_z4_z8_z12( %unused, < ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -2390,6 +2389,7 @@ define @ld1_x4_i64_z0_z4_z8_z12( %unused, < ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -2541,7 +2541,6 @@ define @ld1_x4_i64_z0_z4_z8_z12_scalar( %un ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -2556,6 +2555,7 @@ define @ld1_x4_i64_z0_z4_z8_z12_scalar( %un ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll index 1fb251a4f628e..7e2d28fbf7982 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll @@ -82,7 +82,6 @@ define @ldnt1_x2_i8_z0_z8( %unused, @ldnt1_x2_i8_z0_z8( %unused, @ldnt1_x2_i8_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z0, [sp] ; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #2 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -206,6 +205,7 @@ define @ldnt1_x2_i8_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -299,7 +299,6 @@ define @ldnt1_x2_i16_z0_z8( %unused, @ldnt1_x2_i16_z0_z8( %unused, @ldnt1_x2_i16_z0_z8_scalar( %unused ; CONTIGUOUS-NEXT: ldr z0, [sp] ; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #2 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -423,6 +422,7 @@ define @ldnt1_x2_i16_z0_z8_scalar( %unused ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -516,7 +516,6 @@ define @ldnt1_x2_i32_z0_z8( %unused, @ldnt1_x2_i32_z0_z8( %unused, @ldnt1_x2_i32_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z0, [sp] ; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #2 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -640,6 +639,7 @@ define @ldnt1_x2_i32_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -733,7 +733,6 @@ define @ldnt1_x2_i64_z0_z8( %unused, @ldnt1_x2_i64_z0_z8( %unused, @ldnt1_x2_i64_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z0, [sp] ; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #2 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -857,6 +856,7 @@ define @ldnt1_x2_i64_z0_z8_scalar( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #16 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -955,7 +955,6 @@ define @ldnt1_x4_i8_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -970,6 +969,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1071,7 +1071,6 @@ define @ldnt1_x4_i8_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1086,6 +1085,7 @@ define @ldnt1_x4_i8_z0_z4_z8_z12_scalar( %u ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1188,7 +1188,6 @@ define @ldnt1_x4_i16_z0_z4_z8_z12( %unused ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1203,6 +1202,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12( %unused ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1304,7 +1304,6 @@ define @ldnt1_x4_i16_z0_z4_z8_z12_scalar( ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1319,6 +1318,7 @@ define @ldnt1_x4_i16_z0_z4_z8_z12_scalar( ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1421,7 +1421,6 @@ define @ldnt1_x4_i32_z0_z4_z8_z12( %unused ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1436,6 +1435,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12( %unused ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1537,7 +1537,6 @@ define @ldnt1_x4_i32_z0_z4_z8_z12_scalar( ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1552,6 +1551,7 @@ define @ldnt1_x4_i32_z0_z4_z8_z12_scalar( ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1654,7 +1654,6 @@ define @ldnt1_x4_i64_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1669,6 +1668,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12( %unused, ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret @@ -1770,7 +1770,6 @@ define @ldnt1_x4_i64_z0_z4_z8_z12_scalar( % ; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl] ; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl] ; CONTIGUOUS-NEXT: addvl sp, sp, #4 -; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1785,6 +1784,7 @@ define @ldnt1_x4_i64_z0_z4_z8_z12_scalar( % ; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload ; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CONTIGUOUS-NEXT: addvl sp, sp, #15 ; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CONTIGUOUS-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll index 03a9220ebfddc..06946d1be8067 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-sve.ll +++ b/llvm/test/CodeGen/AArch64/stack-probing-sve.ll @@ -380,7 +380,6 @@ define void @sve_16v_1p_csr( %a) #0 { ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload @@ -397,6 +396,7 @@ define void @sve_16v_1p_csr( %a) #0 { ; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #17 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 @@ -697,10 +697,10 @@ define void @sve_unprobed_area( %a, i32 %n) #0 { ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll index 47e49b84aaaff..d227538043fce 100644 --- a/llvm/test/CodeGen/AArch64/sve-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll @@ -66,30 +66,30 @@ define void @foo( %dst, i1 %cond) { ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: bl bar ; CHECK-NEXT: addvl sp, x29, #-18 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll index 9851583b950eb..3965af6a9066d 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -567,30 +567,30 @@ define @sve_caller_non_sve_callee_high_range( @sve_ret_caller_non_sve_callee_high_range() { ; CHECK-NEXT: fmov s7, #7.00000000 ; CHECK-NEXT: bl non_sve_callee_high_range ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-tailcall.ll b/llvm/test/CodeGen/AArch64/sve-tailcall.ll index f32c80d392b63..4ddf007768fd2 100644 --- a/llvm/test/CodeGen/AArch64/sve-tailcall.ll +++ b/llvm/test/CodeGen/AArch64/sve-tailcall.ll @@ -83,30 +83,30 @@ define i32 @sve_caller_non_sve_callee( %arg) nounwind { ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: bl non_sve_callee -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload @@ -158,30 +158,30 @@ define i32 @sve_caller_non_sve_callee_fastcc( %arg) nounwind { ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: bl non_sve_callee -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll index f3c4d217e6fca..822be14faaeb1 100644 --- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -63,18 +63,6 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -91,6 +79,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 @@ -112,18 +112,6 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -140,6 +128,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: .cfi_restore z8 @@ -215,18 +215,6 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 ; GISEL-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG -; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -243,6 +231,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #18 ; GISEL-NEXT: .cfi_def_cfa wsp, 16 ; GISEL-NEXT: .cfi_restore z8 @@ -264,18 +264,6 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 ; GISEL-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG -; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -292,6 +280,18 @@ define @invoke_callee_may_throw_sve( %v) uw ; GISEL-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; GISEL-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #18 ; GISEL-NEXT: .cfi_def_cfa wsp, 16 ; GISEL-NEXT: .cfi_restore z8