From f5e855534faafb6ca48a17576ac54d3b3603af6f Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Tue, 18 Jun 2024 09:44:06 +0000 Subject: [PATCH] Add isFixed, getZero, getScalableFixedMin/Max convenience methods --- llvm/include/llvm/Support/TypeSize.h | 3 + .../Transforms/Scalar/LoopStrengthReduce.cpp | 259 +++++++++--------- 2 files changed, 140 insertions(+), 122 deletions(-) diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index c6779e258be7cb..bae833ecca7d49 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -170,6 +170,9 @@ template class FixedOrScalableQuantity { /// Returns whether the quantity is scaled by a runtime quantity (vscale). constexpr bool isScalable() const { return Scalable; } + /// Returns true if the quantity is not scaled by vscale. + constexpr bool isFixed() const { return !Scalable; } + /// A return value of true indicates we know at compile time that the number /// of elements (vscale * Min) is definitely even. However, returning false /// does not guarantee that the total number of elements is odd. diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 6fd1685a8fa76e..810c6b68032fae 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -265,16 +265,29 @@ class Immediate : public details::FixedOrScalableQuantity { : FixedOrScalableQuantity(V) {} public: - constexpr Immediate() : FixedOrScalableQuantity() {} + constexpr Immediate() = delete; static constexpr Immediate getFixed(ScalarTy MinVal) { - return Immediate(MinVal, false); + return {MinVal, false}; } static constexpr Immediate getScalable(ScalarTy MinVal) { - return Immediate(MinVal, true); + return {MinVal, true}; } static constexpr Immediate get(ScalarTy MinVal, bool Scalable) { - return Immediate(MinVal, Scalable); + return {MinVal, Scalable}; + } + static constexpr Immediate getZero() { return {0, false}; } + static constexpr Immediate getFixedMin() { + return {std::numeric_limits::min(), false}; + } + static constexpr Immediate getFixedMax() { + return {std::numeric_limits::max(), false}; + } + static constexpr Immediate getScalableMin() { + return {std::numeric_limits::min(), true}; + } + static constexpr Immediate getScalableMax() { + return {std::numeric_limits::max(), true}; } constexpr bool isLessThanZero() const { return Quantity < 0; } @@ -292,6 +305,47 @@ class Immediate : public details::FixedOrScalableQuantity { constexpr bool isMax() const { return Quantity == std::numeric_limits::max(); } + + // Arithmetic 'operators' that cast to unsigned types first. + constexpr Immediate addUnsigned(const Immediate &RHS) const { + assert(isCompatibleImmediate(RHS) && "Incompatible Immediates"); + ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue(); + return {Value, Scalable || RHS.isScalable()}; + } + + constexpr Immediate subUnsigned(const Immediate &RHS) const { + assert(isCompatibleImmediate(RHS) && "Incompatible Immediates"); + ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue(); + return {Value, Scalable || RHS.isScalable()}; + } + + // Scale the quantity by a constant without caring about runtime scalability. + constexpr Immediate mulUnsigned(const ScalarTy RHS) const { + ScalarTy Value = (uint64_t)Quantity * RHS; + return {Value, Scalable}; + } + + // Helpers for generating SCEVs with vscale terms where needed. + const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const { + const SCEV *S = SE.getConstant(Ty, Quantity); + if (Scalable) + S = SE.getMulExpr(S, SE.getVScale(S->getType())); + return S; + } + + const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const { + const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity); + if (Scalable) + NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType())); + return NegS; + } + + const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const { + const SCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity)); + if (Scalable) + SU = SE.getMulExpr(SU, SE.getVScale(SU->getType())); + return SU; + } }; // This is needed for the Compare type of std::map when Immediate is used @@ -431,7 +485,7 @@ struct Formula { GlobalValue *BaseGV = nullptr; /// Base offset for complex addressing. - Immediate BaseOffset; + Immediate BaseOffset = Immediate::getZero(); /// Whether any complex addressing has a base register. bool HasBaseReg = false; @@ -462,7 +516,7 @@ struct Formula { /// An additional constant offset which added near the use. This requires a /// temporary register, but the offset itself can live in an add immediate /// field rather than a register. - Immediate UnfoldedOffset; + Immediate UnfoldedOffset = Immediate::getZero(); Formula() = default; @@ -899,7 +953,7 @@ static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { S = SE.getConstant(M->getType(), 0); return Immediate::getScalable(C->getValue()->getSExtValue()); } - return Immediate(); + return Immediate::getZero(); } /// If S involves the addition of a GlobalValue address, return that symbol, and @@ -1214,7 +1268,7 @@ struct LSRFixup { /// A constant offset to be added to the LSRUse expression. This allows /// multiple fixups to share the same LSRUse with different offsets, for /// example in an unrolled loop. - Immediate Offset; + Immediate Offset = Immediate::getZero(); LSRFixup() = default; @@ -1277,10 +1331,8 @@ class LSRUse { SmallVector Fixups; /// Keep track of the min and max offsets of the fixups. - Immediate MinOffset = - Immediate::getFixed(std::numeric_limits::max()); - Immediate MaxOffset = - Immediate::getFixed(std::numeric_limits::min()); + Immediate MinOffset = Immediate::getFixedMax(); + Immediate MaxOffset = Immediate::getFixedMin(); /// This records whether all of the fixups using this LSRUse are outside of /// the loop, in which case some special-case heuristics may be used. @@ -1338,8 +1390,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale, - Instruction *Fixup = nullptr, - int64_t ScalableOffset = 0); + Instruction *Fixup = nullptr); static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { if (isa(Reg) || isa(Reg)) @@ -1391,7 +1442,7 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg, // If the step size matches the base offset, we could use pre-indexed // addressing. - if (AMK == TTI::AMK_PreIndexed && !F.BaseOffset.isScalable()) { + if (AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed()) { if (auto *Step = dyn_cast(AR->getStepRecurrence(*SE))) if (Step->getAPInt() == F.BaseOffset.getFixedValue()) LoopCost = 0; @@ -1491,25 +1542,25 @@ void Cost::RateFormula(const Formula &F, // Tally up the non-zero immediates. for (const LSRFixup &Fixup : LU.Fixups) { - // FIXME: We probably want to noticeably increase the cost if the - // two offsets differ in scalability? - bool Scalable = Fixup.Offset.isScalable() || F.BaseOffset.isScalable(); - int64_t O = Fixup.Offset.getKnownMinValue(); - Immediate Offset = Immediate::get( - (uint64_t)(O) + F.BaseOffset.getKnownMinValue(), Scalable); - if (F.BaseGV) - C.ImmCost += 64; // Handle symbolic values conservatively. - // TODO: This should probably be the pointer size. - else if (Offset.isNonZero()) - C.ImmCost += - APInt(64, Offset.getKnownMinValue(), true).getSignificantBits(); - - // Check with target if this offset with this instruction is - // specifically not supported. - if (LU.Kind == LSRUse::Address && Offset.isNonZero() && - !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, - Offset, F.HasBaseReg, F.Scale, Fixup.UserInst)) - C.NumBaseAdds++; + if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) { + Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset); + if (F.BaseGV) + C.ImmCost += 64; // Handle symbolic values conservatively. + // TODO: This should probably be the pointer size. + else if (Offset.isNonZero()) + C.ImmCost += + APInt(64, Offset.getKnownMinValue(), true).getSignificantBits(); + + // Check with target if this offset with this instruction is + // specifically not supported. + if (LU.Kind == LSRUse::Address && Offset.isNonZero() && + !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, + Offset, F.HasBaseReg, F.Scale, Fixup.UserInst)) + C.NumBaseAdds++; + } else { + // Incompatible immediate type, increase cost to avoid using + C.ImmCost += 2048; + } } // If we don't count instruction cost exit here. @@ -1763,8 +1814,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale, - Instruction *Fixup /* = nullptr */, - int64_t ScalableOffset) { + Instruction *Fixup /* = nullptr */) { switch (Kind) { case LSRUse::Address: { int64_t FixedOffset = @@ -1778,7 +1828,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, case LSRUse::ICmpZero: // There's not even a target hook for querying whether it would be legal to // fold a GV into an ICmp. - if (BaseGV || ScalableOffset != 0) + if (BaseGV) return false; // ICmp only has two operands; don't allow more than two non-trivial parts. @@ -1961,7 +2011,7 @@ static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, - bool HasBaseReg, int64_t ScalableOffset = 0) { + bool HasBaseReg) { // Fast-path: zero is always foldable. if (BaseOffset.isZero() && !BaseGV) return true; @@ -1987,7 +2037,7 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, Scale = 0; return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, - HasBaseReg, Scale, nullptr, ScalableOffset); + HasBaseReg, Scale); } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, @@ -3304,14 +3354,13 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI) { const SCEVConstant *IncConst = dyn_cast(IncExpr); - int64_t IncOffset = 0; - int64_t ScalableOffset = 0; + Immediate IncOffset = Immediate::getZero(); if (IncConst) { if (IncConst && IncConst->getAPInt().getSignificantBits() > 64) return false; - IncOffset = IncConst->getValue()->getSExtValue(); + IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue()); } else { - // Look for mul(vscale, constant), to detect ScalableOffset. + // Look for mul(vscale, constant), to detect a scalable offset. auto *IncVScale = dyn_cast(IncExpr); if (!IncVScale || IncVScale->getNumOperands() != 2 || !isa(IncVScale->getOperand(1))) @@ -3319,7 +3368,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, auto *Scale = dyn_cast(IncVScale->getOperand(0)); if (!Scale || Scale->getType()->getScalarSizeInBits() > 64) return false; - ScalableOffset = Scale->getValue()->getSExtValue(); + IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue()); } if (!isAddressUse(TTI, UserInst, Operand)) @@ -3327,8 +3376,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand); if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, - Immediate::getFixed(IncOffset), /*HasBaseReg=*/false, - ScalableOffset)) + IncOffset, /*HasBaseReg=*/false)) return false; return true; @@ -3911,6 +3959,9 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, continue; Formula F = Base; + if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable()) + continue; + // Add the remaining pieces of the add back into the new formula. const SCEVConstant *InnerSumSC = dyn_cast(InnerSum); if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && @@ -4026,7 +4077,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, // If we have an unfolded offset, generate a formula combining it with the // registers collected. - if (NewBase.UnfoldedOffset) { + if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) { assert(CombinedIntegerType && "Missing a type for the unfolded offset"); Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset.getFixedValue(), true)); @@ -4074,21 +4125,13 @@ void LSRInstance::GenerateConstantOffsetsImpl( auto GenerateOffset = [&](const SCEV *G, Immediate Offset) { Formula F = Base; - if (Base.BaseOffset.isScalable() != Offset.isScalable() && - Base.BaseOffset.isNonZero() && Offset.isNonZero()) + if (!Base.BaseOffset.isCompatibleImmediate(Offset)) return; - bool Scalable = Base.BaseOffset.isScalable() || Offset.isScalable(); - F.BaseOffset = Immediate::get((uint64_t)Base.BaseOffset.getKnownMinValue() - - Offset.getKnownMinValue(), - Scalable); + F.BaseOffset = Base.BaseOffset.subUnsigned(Offset); if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) { // Add the offset to the base register. - const SCEV *NewOffset = - SE.getConstant(G->getType(), Offset.getKnownMinValue()); - if (Scalable) - NewOffset = - SE.getMulExpr(NewOffset, SE.getVScale(NewOffset->getType())); + const SCEV *NewOffset = Offset.getSCEV(SE, G->getType()); const SCEV *NewG = SE.getAddExpr(NewOffset, G); // If it cancelled out, drop the base register, otherwise update it. if (NewG->isZero()) { @@ -4126,7 +4169,7 @@ void LSRInstance::GenerateConstantOffsetsImpl( StepInt.getSExtValue() : StepInt.getZExtValue(); for (Immediate Offset : Worklist) { - if (!Offset.isScalable()) { + if (Offset.isFixed()) { Offset = Immediate::getFixed(Offset.getFixedValue() - Step); GenerateOffset(G, Offset); } @@ -4139,12 +4182,10 @@ void LSRInstance::GenerateConstantOffsetsImpl( Immediate Imm = ExtractImmediate(G, SE); if (G->isZero() || Imm.isZero() || - Base.BaseOffset.isScalable() != Imm.isScalable()) + !Base.BaseOffset.isCompatibleImmediate(Imm)) return; Formula F = Base; - F.BaseOffset = Immediate::get((uint64_t)F.BaseOffset.getKnownMinValue() + - Imm.getKnownMinValue(), - Imm.isScalable()); + F.BaseOffset = F.BaseOffset.addUnsigned(Imm); if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) return; if (IsScaledReg) { @@ -4205,8 +4246,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, // Check that the multiplication doesn't overflow. if (Base.BaseOffset.isMin() && Factor == -1) continue; - Immediate NewBaseOffset = - Immediate::getFixed((uint64_t)Base.BaseOffset.getFixedValue() * Factor); + // Not supporting scalable immediates. + if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable()) + continue; + Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor); assert(Factor != 0 && "Zero factor not expected!"); if (NewBaseOffset.getFixedValue() / Factor != Base.BaseOffset.getFixedValue()) @@ -4220,7 +4263,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Immediate Offset = LU.MinOffset; if (Offset.isMin() && Factor == -1) continue; - Offset = Immediate::getFixed((uint64_t)Offset.getFixedValue() * Factor); + Offset = Offset.mulUnsigned(Factor); if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue()) continue; // If the offset will be truncated at this use, check that it is in bounds. @@ -4236,9 +4279,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, continue; // Compensate for the use having MinOffset built into it. - F.BaseOffset = Immediate::getFixed((uint64_t)F.BaseOffset.getFixedValue() + - Offset.getFixedValue() - - LU.MinOffset.getFixedValue()); + F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset); const SCEV *FactorS = SE.getConstant(IntTy, Factor); @@ -4260,8 +4301,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, if (F.UnfoldedOffset.isNonZero()) { if (F.UnfoldedOffset.isMin() && Factor == -1) continue; - F.UnfoldedOffset = Immediate::getFixed( - (uint64_t)F.UnfoldedOffset.getFixedValue() * Factor); + F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor); if (F.UnfoldedOffset.getFixedValue() / Factor != Base.UnfoldedOffset.getFixedValue()) continue; @@ -4517,8 +4557,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // other orig regs. Immediate First = Imms.begin()->first; Immediate Last = std::prev(Imms.end())->first; - if (First.isScalable() != Last.isScalable() && First.isNonZero() && - Last.isNonZero()) { + if (!First.isCompatibleImmediate(Last)) { LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << "\n"); continue; @@ -4539,15 +4578,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { Imms.lower_bound(Immediate::get(Avg, Scalable))}; for (const auto &M : OtherImms) { if (M == J || M == JE) continue; - if (JImm.isScalable() != M->first.isScalable() && JImm.isNonZero() && - M->first.isNonZero()) + if (!JImm.isCompatibleImmediate(M->first)) continue; // Compute the difference between the two. - bool Scalable = JImm.isScalable() || M->first.isScalable(); - Immediate Imm = Immediate::get((uint64_t)JImm.getKnownMinValue() - - M->first.getKnownMinValue(), - Scalable); + Immediate Imm = JImm.subUnsigned(M->first); for (unsigned LUIdx : UsedByIndices.set_bits()) // Make a memo of this use, offset, and register tuple. if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second) @@ -4569,10 +4604,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { const SCEV *OrigReg = WI.OrigReg; Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); - const SCEV *NegImmS = - SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm.getKnownMinValue())); - if (Imm.isScalable()) - NegImmS = SE.getMulExpr(NegImmS, SE.getVScale(NegImmS->getType())); + const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy); unsigned BitWidth = SE.getTypeSizeInBits(IntTy); // TODO: Use a more targeted data structure. @@ -4585,19 +4617,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { F.unscale(); // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { - if (F.BaseOffset.isScalable() != Imm.isScalable() && - F.BaseOffset.isNonZero() && Imm.isNonZero()) + if (!F.BaseOffset.isCompatibleImmediate(Imm)) continue; - bool Scalable = F.BaseOffset.isScalable() || Imm.isScalable(); - Immediate Offset = - Immediate::get((uint64_t)F.BaseOffset.getKnownMinValue() + - Imm.getKnownMinValue() * (uint64_t)F.Scale, - Scalable); + Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale)); // Don't create 50 + reg(-50). - const SCEV *S = SE.getSCEV( - ConstantInt::get(IntTy, -(uint64_t)Offset.getKnownMinValue())); - if (Scalable) - S = SE.getMulExpr(S, SE.getVScale(S->getType())); + const SCEV *S = Offset.getNegativeSCEV(SE, IntTy); if (F.referencesReg(S)) continue; Formula NewF = F; @@ -4610,12 +4634,18 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // If the new scale is a constant in a register, and adding the constant // value to the immediate would produce a value closer to zero than the // immediate itself, then the formula isn't worthwhile. - if (const SCEVConstant *C = dyn_cast(NewF.ScaledReg)) + if (const SCEVConstant *C = dyn_cast(NewF.ScaledReg)) { + // FIXME: Do we need to do something for scalable immediates here? + // A scalable SCEV won't be constant, but we might still have + // something in the offset? Bail out for now to be safe. + if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable()) + continue; if (C->getValue()->isNegative() != (NewF.BaseOffset.isLessThanZero()) && (C->getAPInt().abs() * APInt(BitWidth, F.Scale)) .ule(std::abs(NewF.BaseOffset.getFixedValue()))) continue; + } // OK, looks good. NewF.canonicalize(*this->L); @@ -4631,21 +4661,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) || !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset)) continue; - bool Scalable = NewF.BaseOffset.isScalable() || Imm.isScalable() || - NewF.UnfoldedOffset.isScalable(); - NewF.BaseOffset = - Immediate::get((uint64_t)NewF.BaseOffset.getKnownMinValue() + - Imm.getKnownMinValue(), - Scalable); + NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm); if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, NewF)) { if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) continue; - Immediate NewUnfoldedOffset = Immediate::get( - (uint64_t)NewF.UnfoldedOffset.getKnownMinValue() + - Imm.getKnownMinValue(), - Scalable); + Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm); if (!isLegalAddImmediate(TTI, NewUnfoldedOffset)) continue; NewF = F; @@ -4657,7 +4679,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // constant value to the immediate would produce a value closer to // zero than the immediate itself, then the formula isn't worthwhile. for (const SCEV *NewReg : NewF.BaseRegs) - if (const SCEVConstant *C = dyn_cast(NewReg)) + if (const SCEVConstant *C = dyn_cast(NewReg)) { + if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable()) + goto skip_formula; if ((C->getAPInt() + NewF.BaseOffset.getFixedValue()) .abs() .slt(std::abs(NewF.BaseOffset.getFixedValue())) && @@ -4666,6 +4690,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { (unsigned)llvm::countr_zero( NewF.BaseOffset.getFixedValue())) goto skip_formula; + } // Ok, looks good. NewF.canonicalize(*this->L); @@ -4849,6 +4874,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { bool Any = false; for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { Formula &F = LU.Formulae[i]; + if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable()) + continue; // Look for a formula with a constant or GV in a register. If the use // also has a formula with that same value in an immediate field, // delete the one that uses a register. @@ -5754,14 +5781,10 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, // FIXME: Are we sure we won't get a mismatch here? Is there a way to bail // out at this point, or should we generate a SCEV adding together mixed // offsets? - assert((F.BaseOffset.isScalable() == LF.Offset.isScalable() || - F.BaseOffset.isZero() || LF.Offset.isZero()) && + assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) && "Expanding mismatched offsets\n"); - bool Scalable = F.BaseOffset.isScalable() || LF.Offset.isScalable(); // Expand the immediate portion. - Immediate Offset = Immediate::get((uint64_t)F.BaseOffset.getKnownMinValue() + - LF.Offset.getKnownMinValue(), - Scalable); + Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset); if (Offset.isNonZero()) { if (LU.Kind == LSRUse::ICmpZero) { // The other interesting way of "folding" with an ICmpZero is to use a @@ -5776,23 +5799,15 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, } else { // Just add the immediate values. These again are expected to be matched // as part of the address. - const SCEV *SU = SE.getUnknown( - ConstantInt::getSigned(IntTy, Offset.getKnownMinValue())); - if (Scalable) - SU = SE.getMulExpr(SU, SE.getVScale(SU->getType())); - Ops.push_back(SU); + Ops.push_back(Offset.getUnknownSCEV(SE, IntTy)); } } // Expand the unfolded offset portion. Immediate UnfoldedOffset = F.UnfoldedOffset; if (UnfoldedOffset.isNonZero()) { - const SCEV *SU = SE.getUnknown( - ConstantInt::getSigned(IntTy, UnfoldedOffset.getKnownMinValue())); - if (UnfoldedOffset.isScalable()) - SU = SE.getMulExpr(SU, SE.getVScale(SU->getType())); // Just add the immediate values. - Ops.push_back(SU); + Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy)); } // Emit instructions summing all the operands.