diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index e7db9547f03b6..e8e61b73f9e0c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -364,6 +364,8 @@ class MachineIRBuilder { State.Observer = &Observer; } + GISelChangeObserver *getObserver() { return State.Observer; } + void stopObservingChanges() { State.Observer = nullptr; } bool isObservingChanges() const { return State.Observer != nullptr; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 56e564638cdca..51c52aad35949 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -20,7 +20,9 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" @@ -439,6 +441,22 @@ class AArch64PostLegalizerCombiner : public MachineFunctionPass { private: bool IsOptNone; AArch64PostLegalizerCombinerImplRuleConfig RuleConfig; + + + struct StoreInfo { + GStore *St = nullptr; + // The G_PTR_ADD that's used by the store. We keep this to cache the + // MachineInstr def. + GPtrAdd *Ptr = nullptr; + // The signed offset to the Ptr instruction. + int64_t Offset = 0; + LLT StoredType; + }; + bool tryOptimizeConsecStores(SmallVectorImpl &Stores, + CSEMIRBuilder &MIB); + + bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF, + CSEMIRBuilder &MIB); }; } // end anonymous namespace @@ -492,7 +510,191 @@ bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { F.hasMinSize()); AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig, ST, MDT, LI); - return Impl.combineMachineInstrs(); + bool Changed = Impl.combineMachineInstrs(); + + auto MIB = CSEMIRBuilder(MF); + MIB.setCSEInfo(CSEInfo); + Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB); + return Changed; +} + +bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores( + SmallVectorImpl &Stores, CSEMIRBuilder &MIB) { + if (Stores.size() <= 2) + return false; + + // Profitabity checks: + int64_t BaseOffset = Stores[0].Offset; + unsigned NumPairsExpected = Stores.size() / 2; + unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2); + // Size savings will depend on whether we can fold the offset, as an + // immediate of an ADD. + auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering(); + if (!TLI.isLegalAddImmediate(BaseOffset)) + TotalInstsExpected++; + int SavingsExpected = Stores.size() - TotalInstsExpected; + if (SavingsExpected <= 0) + return false; + + auto &MRI = MIB.getMF().getRegInfo(); + + // We have a series of consecutive stores. Factor out the common base + // pointer and rewrite the offsets. + Register NewBase = Stores[0].Ptr->getReg(0); + for (auto &SInfo : Stores) { + // Compute a new pointer with the new base ptr and adjusted offset. + MIB.setInstrAndDebugLoc(*SInfo.St); + auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset); + auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()), + NewBase, NewOff); + if (MIB.getObserver()) + MIB.getObserver()->changingInstr(*SInfo.St); + SInfo.St->getOperand(1).setReg(NewPtr.getReg(0)); + if (MIB.getObserver()) + MIB.getObserver()->changedInstr(*SInfo.St); + } + LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size() + << " stores into a base pointer and offsets.\n"); + return true; +} + +static cl::opt + EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops", + cl::init(true), cl::Hidden, + cl::desc("Enable consecutive memop optimization " + "in AArch64PostLegalizerCombiner")); + +bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing( + MachineFunction &MF, CSEMIRBuilder &MIB) { + // This combine needs to run after all reassociations/folds on pointer + // addressing have been done, specifically those that combine two G_PTR_ADDs + // with constant offsets into a single G_PTR_ADD with a combined offset. + // The goal of this optimization is to undo that combine in the case where + // doing so has prevented the formation of pair stores due to illegal + // addressing modes of STP. The reason that we do it here is because + // it's much easier to undo the transformation of a series consecutive + // mem ops, than it is to detect when doing it would be a bad idea looking + // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine. + // + // An example: + // G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1) + // %off1:_(s64) = G_CONSTANT i64 4128 + // %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64) + // G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1) + // %off2:_(s64) = G_CONSTANT i64 4144 + // %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64) + // G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1) + // %off3:_(s64) = G_CONSTANT i64 4160 + // %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64) + // G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1) + bool Changed = false; + auto &MRI = MF.getRegInfo(); + + if (!EnableConsecutiveMemOpOpt) + return Changed; + + SmallVector Stores; + // If we see a load, then we keep track of any values defined by it. + // In the following example, STP formation will fail anyway because + // the latter store is using a load result that appears after the + // the prior store. In this situation if we factor out the offset then + // we increase code size for no benefit. + // G_STORE %v1:_(s64), %base:_(p0) :: (store (s64)) + // %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64)) + // G_STORE %v2:_(s64), %base:_(p0) :: (store (s64)) + SmallVector LoadValsSinceLastStore; + + auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) { + // Check if this store is consecutive to the last one. + if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() || + (Last.Offset + static_cast(Last.StoredType.getSizeInBytes()) != + New.Offset) || + Last.StoredType != New.StoredType) + return false; + + // Check if this store is using a load result that appears after the + // last store. If so, bail out. + if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) { + return New.St->getValueReg() == LoadVal; + })) + return false; + + // Check if the current offset would be too large for STP. + // If not, then STP formation should be able to handle it, so we don't + // need to do anything. + int64_t MaxLegalOffset; + switch (New.StoredType.getSizeInBits()) { + case 32: + MaxLegalOffset = 252; + break; + case 64: + MaxLegalOffset = 504; + break; + case 128: + MaxLegalOffset = 1008; + break; + default: + llvm_unreachable("Unexpected stored type size"); + } + if (New.Offset < MaxLegalOffset) + return false; + + // If factoring it out still wouldn't help then don't bother. + return New.Offset - Stores[0].Offset <= MaxLegalOffset; + }; + + auto resetState = [&]() { + Stores.clear(); + LoadValsSinceLastStore.clear(); + }; + + for (auto &MBB : MF) { + // We're looking inside a single BB at a time since the memset pattern + // should only be in a single block. + resetState(); + for (auto &MI : MBB) { + if (auto *St = dyn_cast(&MI)) { + Register PtrBaseReg; + APInt Offset; + LLT StoredValTy = MRI.getType(St->getValueReg()); + unsigned ValSize = StoredValTy.getSizeInBits(); + if (ValSize < 32 || ValSize != St->getMMO().getSizeInBits()) + continue; + + Register PtrReg = St->getPointerReg(); + if (mi_match( + PtrReg, MRI, + m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) { + GPtrAdd *PtrAdd = cast(MRI.getVRegDef(PtrReg)); + StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy}; + + if (Stores.empty()) { + Stores.push_back(New); + continue; + } + + // Check if this store is a valid continuation of the sequence. + auto &Last = Stores.back(); + if (storeIsValid(Last, New)) { + Stores.push_back(New); + LoadValsSinceLastStore.clear(); // Reset the load value tracking. + } else { + // The store isn't a valid to consider for the prior sequence, + // so try to optimize what we have so far and start a new sequence. + Changed |= tryOptimizeConsecStores(Stores, MIB); + resetState(); + Stores.push_back(New); + } + } + } else if (auto *Ld = dyn_cast(&MI)) { + LoadValsSinceLastStore.push_back(Ld->getDstReg()); + } + } + Changed |= tryOptimizeConsecStores(Stores, MIB); + resetState(); + } + + return Changed; } char AArch64PostLegalizerCombiner::ID = 0; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll b/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll new file mode 100644 index 0000000000000..6aaefff1f7240 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll @@ -0,0 +1,353 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel -aarch64-postlegalizer-consecutive-memops=0 < %s | FileCheck %s --check-prefix=CHECK-NO-SPLIT +; RUN: llc -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel < %s | FileCheck %s --check-prefix=CHECK-SPLIT + +define void @basic_split(ptr %p) { +; CHECK-NO-SPLIT-LABEL: basic_split: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8032] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8040] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: basic_split: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: mov w8, #8000 ; =0x1f40 +; CHECK-SPLIT-NEXT: add x8, x0, x8 +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #16] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #32] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 1000 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 1001 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 1002 + store i64 0, ptr %addr3 + %addr4 = getelementptr i64, ptr %p, i64 1003 + store i64 0, ptr %addr4 + %addr5 = getelementptr i64, ptr %p, i64 1004 + store i64 0, ptr %addr5 + %addr6 = getelementptr i64, ptr %p, i64 1005 + store i64 0, ptr %addr6 + ret void +} + +define void @basic_multi_use_ptr(ptr %p, ptr %p2) { +; CHECK-NO-SPLIT-LABEL: basic_multi_use_ptr: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: mov w8, #8008 ; =0x1f48 +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-NO-SPLIT-NEXT: add x8, x0, x8 +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024] +; CHECK-NO-SPLIT-NEXT: str x8, [x1] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: basic_multi_use_ptr: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: mov w8, #8008 ; =0x1f48 +; CHECK-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-SPLIT-NEXT: add x8, x0, x8 +; CHECK-SPLIT-NEXT: str xzr, [x0, #8008] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8024] +; CHECK-SPLIT-NEXT: str x8, [x1] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 1000 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 1001 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 1002 + store i64 0, ptr %addr3 + %addr4 = getelementptr i64, ptr %p, i64 1003 + store i64 0, ptr %addr4 + ; multiuse of %addr2 + store ptr %addr2, ptr %p2 + ret void +} + +define void @not_consecutive(ptr %p) { +; CHECK-NO-SPLIT-LABEL: not_consecutive: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8032] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: not_consecutive: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8008] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8024] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8032] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 1000 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 1001 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 1003 + store i64 0, ptr %addr3 + %addr4 = getelementptr i64, ptr %p, i64 1004 + store i64 0, ptr %addr4 + ret void +} + +define void @early_store_is_invalid_but_split_rest(ptr %p) { +; CHECK-NO-SPLIT-LABEL: early_store_is_invalid_but_split_rest: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8080] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8032] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8040] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8048] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8056] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: early_store_is_invalid_but_split_rest: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: mov w8, #8016 ; =0x1f50 +; CHECK-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-SPLIT-NEXT: add x8, x0, x8 +; CHECK-SPLIT-NEXT: str xzr, [x0, #8080] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #16] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #32] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 1000 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 1010 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 1002 + store i64 0, ptr %addr3 + %addr4 = getelementptr i64, ptr %p, i64 1003 + store i64 0, ptr %addr4 + %addr5 = getelementptr i64, ptr %p, i64 1004 + store i64 0, ptr %addr5 + %addr6 = getelementptr i64, ptr %p, i64 1005 + store i64 0, ptr %addr6 + %addr7 = getelementptr i64, ptr %p, i64 1006 + store i64 0, ptr %addr7 + %addr8 = getelementptr i64, ptr %p, i64 1007 + store i64 0, ptr %addr8 + ret void +} + +define void @vector(ptr %p) { +; CHECK-NO-SPLIT-LABEL: vector: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16000] +; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16016] +; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16032] +; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16048] +; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16064] +; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16080] +; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16096] +; CHECK-NO-SPLIT-NEXT: str q0, [x0, #16112] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: vector: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: movi.2d v0, #0000000000000000 +; CHECK-SPLIT-NEXT: mov w8, #16000 ; =0x3e80 +; CHECK-SPLIT-NEXT: add x8, x0, x8 +; CHECK-SPLIT-NEXT: stp q0, q0, [x8] +; CHECK-SPLIT-NEXT: stp q0, q0, [x8, #32] +; CHECK-SPLIT-NEXT: stp q0, q0, [x8, #64] +; CHECK-SPLIT-NEXT: stp q0, q0, [x8, #96] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr <2 x i64>, ptr %p, i64 1000 + store <2 x i64> , ptr %bigoffset + %addr2 = getelementptr <2 x i64>, ptr %p, i64 1001 + store <2 x i64> , ptr %addr2 + %addr3 = getelementptr <2 x i64>, ptr %p, i64 1002 + store <2 x i64> , ptr %addr3 + %addr4 = getelementptr <2 x i64>, ptr %p, i64 1003 + store <2 x i64> , ptr %addr4 + %addr5 = getelementptr <2 x i64>, ptr %p, i64 1004 + store <2 x i64> , ptr %addr5 + %addr6 = getelementptr <2 x i64>, ptr %p, i64 1005 + store <2 x i64> , ptr %addr6 + %addr7 = getelementptr <2 x i64>, ptr %p, i64 1006 + store <2 x i64> , ptr %addr7 + %addr8 = getelementptr <2 x i64>, ptr %p, i64 1007 + store <2 x i64> , ptr %addr8 + ret void +} + +define void @can_already_form_stp(ptr %p) { +; CHECK-NO-SPLIT-LABEL: can_already_form_stp: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: stp xzr, xzr, [x0, #80] +; CHECK-NO-SPLIT-NEXT: stp xzr, xzr, [x0, #96] +; CHECK-NO-SPLIT-NEXT: stp xzr, xzr, [x0, #112] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: can_already_form_stp: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x0, #80] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x0, #96] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x0, #112] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 10 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 11 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 12 + store i64 0, ptr %addr3 + %addr4 = getelementptr i64, ptr %p, i64 13 + store i64 0, ptr %addr4 + %addr5 = getelementptr i64, ptr %p, i64 14 + store i64 0, ptr %addr5 + %addr6 = getelementptr i64, ptr %p, i64 15 + store i64 0, ptr %addr6 + ret void +} + +define void @use_of_load_in_between(ptr %p, ptr %ldptr, ptr %ldptr2) { +; CHECK-NO-SPLIT-LABEL: use_of_load_in_between: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-NO-SPLIT-NEXT: ldr x8, [x1] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-NO-SPLIT-NEXT: str x8, [x0, #8008] +; CHECK-NO-SPLIT-NEXT: ldr x8, [x2] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8032] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8040] +; CHECK-NO-SPLIT-NEXT: str x8, [x0, #8024] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: use_of_load_in_between: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-SPLIT-NEXT: ldr x8, [x1] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-SPLIT-NEXT: str x8, [x0, #8008] +; CHECK-SPLIT-NEXT: ldr x8, [x2] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8032] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8040] +; CHECK-SPLIT-NEXT: str x8, [x0, #8024] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 1000 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 1001 + %ld = load i64, ptr %ldptr + store i64 %ld, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 1002 + store i64 0, ptr %addr3 + %addr4 = getelementptr i64, ptr %p, i64 1003 + %ld2 = load i64, ptr %ldptr2 + store i64 %ld2, ptr %addr4 + %addr5 = getelementptr i64, ptr %p, i64 1004 + store i64 0, ptr %addr5 + %addr6 = getelementptr i64, ptr %p, i64 1005 + store i64 0, ptr %addr6 + ret void +} + +define void @offset_legal_for_add_imm(ptr %p) { +; CHECK-NO-SPLIT-LABEL: offset_legal_for_add_imm: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3200] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3208] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3216] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: offset_legal_for_add_imm: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: add x8, x0, #3200 +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8] +; CHECK-SPLIT-NEXT: str xzr, [x8, #16] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 400 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 401 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 402 + store i64 0, ptr %addr3 + ret void +} + +define void @offset_illegal_for_add_imm(ptr %p) { +; CHECK-NO-SPLIT-LABEL: offset_illegal_for_add_imm: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: offset_illegal_for_add_imm: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8008] +; CHECK-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 1000 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 1001 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 1002 + store i64 0, ptr %addr3 + ret void +} + +define void @offset_legal_for_add_imm_4_stores(ptr %p) { +; CHECK-NO-SPLIT-LABEL: offset_legal_for_add_imm_4_stores: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3200] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3208] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3216] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #3224] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: offset_legal_for_add_imm_4_stores: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: add x8, x0, #3200 +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #16] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 400 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 401 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 402 + store i64 0, ptr %addr3 + %addr4 = getelementptr i64, ptr %p, i64 403 + store i64 0, ptr %addr4 + ret void +} + +define void @offset_illegal_for_add_imm_4_stores(ptr %p) { +; CHECK-NO-SPLIT-LABEL: offset_illegal_for_add_imm_4_stores: +; CHECK-NO-SPLIT: ; %bb.0: +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8000] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8008] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8016] +; CHECK-NO-SPLIT-NEXT: str xzr, [x0, #8024] +; CHECK-NO-SPLIT-NEXT: ret +; +; CHECK-SPLIT-LABEL: offset_illegal_for_add_imm_4_stores: +; CHECK-SPLIT: ; %bb.0: +; CHECK-SPLIT-NEXT: mov w8, #8000 ; =0x1f40 +; CHECK-SPLIT-NEXT: add x8, x0, x8 +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8] +; CHECK-SPLIT-NEXT: stp xzr, xzr, [x8, #16] +; CHECK-SPLIT-NEXT: ret + %bigoffset = getelementptr i64, ptr %p, i64 1000 + store i64 0, ptr %bigoffset + %addr2 = getelementptr i64, ptr %p, i64 1001 + store i64 0, ptr %addr2 + %addr3 = getelementptr i64, ptr %p, i64 1002 + store i64 0, ptr %addr3 + %addr4 = getelementptr i64, ptr %p, i64 1003 + store i64 0, ptr %addr4 + ret void +}