@@ -2689,59 +2689,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
26892689 if (PtrVT == IntVT && isNullConstant(N0))
26902690 return N1;
26912691
2692- if (N0.getOpcode() != ISD::PTRADD ||
2693- reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2694- return SDValue();
2695-
2696- SDValue X = N0.getOperand(0);
2697- SDValue Y = N0.getOperand(1);
2698- SDValue Z = N1;
2699- bool N0OneUse = N0.hasOneUse();
2700- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2701- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2702-
2703- // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2704- // * y is a constant and (ptradd x, y) has one use; or
2705- // * y and z are both constants.
2706- if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2707- // If both additions in the original were NUW, the new ones are as well.
2708- SDNodeFlags Flags =
2709- (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2710- SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2711- AddToWorklist(Add.getNode());
2712- return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2692+ if (N0.getOpcode() == ISD::PTRADD &&
2693+ !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2694+ SDValue X = N0.getOperand(0);
2695+ SDValue Y = N0.getOperand(1);
2696+ SDValue Z = N1;
2697+ bool N0OneUse = N0.hasOneUse();
2698+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2699+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2700+
2701+ // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2702+ // * y is a constant and (ptradd x, y) has one use; or
2703+ // * y and z are both constants.
2704+ if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2705+ // If both additions in the original were NUW, the new ones are as well.
2706+ SDNodeFlags Flags =
2707+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2708+ SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2709+ AddToWorklist(Add.getNode());
2710+ return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2711+ }
2712+ }
2713+
2714+ // The following combines can turn in-bounds pointer arithmetic out of bounds.
2715+ // That is problematic for settings like AArch64's CPA, which checks that
2716+ // intermediate results of pointer arithmetic remain in bounds. The target
2717+ // therefore needs to opt-in to enable them.
2718+ if (!TLI.canTransformPtrArithOutOfBounds(
2719+ DAG.getMachineFunction().getFunction(), PtrVT))
2720+ return SDValue();
2721+
2722+ if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
2723+ // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2724+ // global address GA and constant c, such that c can be folded into GA.
2725+ SDValue GAValue = N0.getOperand(0);
2726+ if (const GlobalAddressSDNode *GA =
2727+ dyn_cast<GlobalAddressSDNode>(GAValue)) {
2728+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2729+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2730+ // If both additions in the original were NUW, reassociation preserves
2731+ // that.
2732+ SDNodeFlags Flags =
2733+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2734+ SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2735+ AddToWorklist(Inner.getNode());
2736+ return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2737+ }
2738+ }
27132739 }
27142740
2715- // TODO: There is another possible fold here that was proven useful.
2716- // It would be this:
2717- //
2718- // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2719- // * (ptradd x, y) has one use; and
2720- // * y is a constant; and
2721- // * z is not a constant.
2722- //
2723- // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2724- // opportunity to select more complex instructions such as SUBPT and
2725- // MSUBPT. However, a hypothetical corner case has been found that we could
2726- // not avoid. Consider this (pseudo-POSIX C):
2727- //
2728- // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2729- // char *p = mmap(LARGE_CONSTANT);
2730- // char *q = foo(p, -LARGE_CONSTANT);
2731- //
2732- // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2733- // further + z takes it back to the start of the mapping, so valid,
2734- // regardless of the address mmap gave back. However, if mmap gives you an
2735- // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2736- // borrow from the high bits (with the subsequent + z carrying back into
2737- // the high bits to give you a well-defined pointer) and thus trip
2738- // FEAT_CPA's pointer corruption checks.
2739- //
2740- // We leave this fold as an opportunity for future work, addressing the
2741- // corner case for FEAT_CPA, as well as reconciling the solution with the
2742- // more general application of pointer arithmetic in other future targets.
2743- // For now each architecture that wants this fold must implement it in the
2744- // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2741+ if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2742+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2743+ // y is not, and (add y, z) is used only once.
2744+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2745+ // z is not, and (add y, z) is used only once.
2746+ // The goal is to move constant offsets to the outermost ptradd, to create
2747+ // more opportunities to fold offsets into memory instructions.
2748+ // Together with the another combine above, this also implements
2749+ // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2750+ SDValue X = N0;
2751+ SDValue Y = N1.getOperand(0);
2752+ SDValue Z = N1.getOperand(1);
2753+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2754+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2755+
2756+ // If both additions in the original were NUW, reassociation preserves that.
2757+ SDNodeFlags ReassocFlags =
2758+ (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2759+
2760+ if (ZIsConstant != YIsConstant) {
2761+ if (YIsConstant)
2762+ std::swap(Y, Z);
2763+ SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2764+ AddToWorklist(Inner.getNode());
2765+ return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2766+ }
2767+ }
27452768
27462769 return SDValue();
27472770}
0 commit comments