-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[GlobalIsel] Combine ADDO #82927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalIsel] Combine ADDO #82927
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1090,12 +1090,6 @@ def mulo_by_0: GICombineRule< | |
| [{ return Helper.matchMulOBy0(*${root}, ${matchinfo}); }]), | ||
| (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; | ||
|
|
||
| def addo_by_0: GICombineRule< | ||
| (defs root:$root, build_fn_matchinfo:$matchinfo), | ||
| (match (wip_match_opcode G_UADDO, G_SADDO):$root, | ||
| [{ return Helper.matchAddOBy0(*${root}, ${matchinfo}); }]), | ||
| (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; | ||
|
|
||
| // Transform (uadde x, y, 0) -> (uaddo x, y) | ||
| // (sadde x, y, 0) -> (saddo x, y) | ||
| // (usube x, y, 0) -> (usubo x, y) | ||
|
|
@@ -1291,6 +1285,12 @@ def match_ors : GICombineRule< | |
| [{ return Helper.matchOr(*${root}, ${matchinfo}); }]), | ||
| (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; | ||
|
|
||
| def match_addos : GICombineRule< | ||
| (defs root:$root, build_fn_matchinfo:$matchinfo), | ||
| (match (wip_match_opcode G_SADDO, G_UADDO):$root, | ||
| [{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]), | ||
| (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know about the current direction, but if we're going to tablegen as much as possible, then we would need separate rules for each of the transformations (constant folding / swapping constant to RHS / replacing with G_ADD etc.). |
||
| // Combines concat operations | ||
| def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">; | ||
| def combine_concat_vector : GICombineRule< | ||
|
|
@@ -1326,7 +1326,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, | |
|
|
||
| def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p, | ||
| overlapping_and, mulo_by_2, mulo_by_0, | ||
| addo_by_0, adde_to_addo, | ||
| adde_to_addo, | ||
| combine_minmax_nan]>; | ||
|
|
||
| def known_bits_simplifications : GICombineGroup<[ | ||
|
|
@@ -1374,7 +1374,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, | |
| and_or_disjoint_mask, fma_combines, fold_binop_into_select, | ||
| sub_add_reg, select_to_minmax, redundant_binop_in_equality, | ||
| fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, | ||
| combine_concat_vector, double_icmp_zero_and_or_combine]>; | ||
| combine_concat_vector, double_icmp_zero_and_or_combine, match_addos]>; | ||
|
|
||
| // A combine group used to for prelegalizer combiners at -O0. The combines in | ||
| // this group have been selected based on experiments to balance code size and | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -4936,24 +4936,6 @@ bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { | |||||||
| return true; | ||||||||
| } | ||||||||
|
|
||||||||
| bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { | ||||||||
| // (G_*ADDO x, 0) -> x + no carry out | ||||||||
| assert(MI.getOpcode() == TargetOpcode::G_UADDO || | ||||||||
| MI.getOpcode() == TargetOpcode::G_SADDO); | ||||||||
| if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0))) | ||||||||
| return false; | ||||||||
| Register Carry = MI.getOperand(1).getReg(); | ||||||||
| if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry))) | ||||||||
| return false; | ||||||||
| Register Dst = MI.getOperand(0).getReg(); | ||||||||
| Register LHS = MI.getOperand(2).getReg(); | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildCopy(Dst, LHS); | ||||||||
| B.buildConstant(Carry, 0); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
|
|
||||||||
| bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) { | ||||||||
| // (G_*ADDE x, y, 0) -> (G_*ADDO x, y) | ||||||||
| // (G_*SUBE x, y, 0) -> (G_*SUBO x, y) | ||||||||
|
|
@@ -6354,6 +6336,26 @@ CombinerHelper::getConstantOrConstantSplatVector(Register Src) { | |||||||
| return Value; | ||||||||
| } | ||||||||
|
|
||||||||
| // FIXME G_SPLAT_VECTOR | ||||||||
| bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const { | ||||||||
| auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); | ||||||||
| if (IConstant) | ||||||||
| return true; | ||||||||
|
|
||||||||
| GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI); | ||||||||
| if (!BuildVector) | ||||||||
| return false; | ||||||||
|
|
||||||||
| unsigned NumSources = BuildVector->getNumSources(); | ||||||||
| for (unsigned I = 0; I < NumSources; ++I) { | ||||||||
| std::optional<ValueAndVReg> IConstant = | ||||||||
| getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI); | ||||||||
| if (!IConstant) | ||||||||
| return false; | ||||||||
| } | ||||||||
| return true; | ||||||||
| } | ||||||||
|
|
||||||||
| // TODO: use knownbits to determine zeros | ||||||||
| bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, | ||||||||
| BuildFnTy &MatchInfo) { | ||||||||
|
|
@@ -6928,3 +6930,178 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) { | |||||||
|
|
||||||||
| return false; | ||||||||
| } | ||||||||
|
|
||||||||
| bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) { | ||||||||
| GAddCarryOut *Add = cast<GAddCarryOut>(&MI); | ||||||||
|
||||||||
|
|
||||||||
| // Addo has no flags | ||||||||
| Register Dst = Add->getReg(0); | ||||||||
| Register Carry = Add->getReg(1); | ||||||||
| Register LHS = Add->getLHSReg(); | ||||||||
| Register RHS = Add->getRHSReg(); | ||||||||
| bool IsSigned = Add->isSigned(); | ||||||||
| LLT DstTy = MRI.getType(Dst); | ||||||||
| LLT CarryTy = MRI.getType(Carry); | ||||||||
|
|
||||||||
| // We want do fold the [u|s]addo. | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo "want to", but the comment seems unrelated to the code. Why would multiple uses of the top level instruction prevent combining?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the result
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes we can and we should. The combine will still be correct and beneficial. You can add tests for the multiple-use case. As a general rule, if you are matching a pattern, you only need one-use checks for the inner nodes in the pattern, not the outer node. The reason for the checks is to ensure that when you remove the outer node, the inner nodes also get removed. |
||||||||
| if (!MRI.hasOneNonDBGUse(Dst)) | ||||||||
| return false; | ||||||||
|
|
||||||||
| // Fold addo, if the carry is dead -> add, undef. | ||||||||
| if (MRI.use_nodbg_empty(Carry) && | ||||||||
| isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildAdd(Dst, LHS, RHS); | ||||||||
| B.buildUndef(Carry); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
|
|
||||||||
| // We want do fold the [u|s]addo. | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likewise. |
||||||||
| if (!MRI.hasOneNonDBGUse(Carry)) | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is not obvious why multiple uses prevent folding / make it unprofitable. Could you clarify the comment?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure will do. If we have more than one use, then we are generating new instructions and keep the old ones. |
||||||||
| return false; | ||||||||
|
|
||||||||
| // Canonicalize constant to RHS. | ||||||||
| if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) { | ||||||||
| if (IsSigned) { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildSAddo(Dst, Carry, RHS, LHS); | ||||||||
| }; | ||||||||
| return true; | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: personally I think early return hurts symmetry here. I would prefer if/else followed by a single "return".
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The anti-symmetry is a result of another else-after-return violation. |
||||||||
| } | ||||||||
| // !IsSigned | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildUAddo(Dst, Carry, RHS, LHS); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
|
|
||||||||
| std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS); | ||||||||
| std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS); | ||||||||
|
||||||||
|
|
||||||||
| // Fold addo(c1, c2) -> c3, carry. | ||||||||
| if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) && | ||||||||
| isConstantLegalOrBeforeLegalizer(CarryTy)) { | ||||||||
| bool Overflow; | ||||||||
| APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow) | ||||||||
| : MaybeLHS->uadd_ov(*MaybeRHS, Overflow); | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildConstant(Dst, Result); | ||||||||
| B.buildConstant(Carry, Overflow); | ||||||||
|
Comment on lines
+6989
to
+6990
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the vector-typed case don't you need to build new splat constants here? The patch is missing vector-typed tests for all these folds.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AArch64 does not support vectorized overflow ops.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You should still add the tests. Your combine-overflow.mir test runs pre-legalization so any MIR should be allowed there.
Ah! I didn't know that, thanks. |
||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
|
|
||||||||
| // Fold (addo x, 0) -> x, no borrow | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: "carry" not "borrow" |
||||||||
| if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildCopy(Dst, LHS); | ||||||||
| B.buildConstant(Carry, 0); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
|
||||||||
|
|
||||||||
| // Given 2 constant operands whose sum does not overflow: | ||||||||
| // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1 | ||||||||
| // saddo (X +nsw C0), C1 -> saddo X, C0 + C1 | ||||||||
| GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI); | ||||||||
| if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) && | ||||||||
| ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) || | ||||||||
| (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) { | ||||||||
|
Comment on lines
+7009
to
+7010
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
| std::optional<APInt> MaybeAddRHS = | ||||||||
| getConstantOrConstantSplatVector(AddLHS->getRHSReg()); | ||||||||
| if (MaybeAddRHS) { | ||||||||
| bool Overflow; | ||||||||
| APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow) | ||||||||
| : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow); | ||||||||
| if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) { | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you need to check |
||||||||
| if (IsSigned) { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| auto ConstRHS = B.buildConstant(DstTy, NewC); | ||||||||
| B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
| // !IsSigned | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| auto ConstRHS = B.buildConstant(DstTy, NewC); | ||||||||
| B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
| } | ||||||||
| }; | ||||||||
|
|
||||||||
| // We try to combine addo to non-overflowing add. | ||||||||
| if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) || | ||||||||
| !isConstantLegalOrBeforeLegalizer(CarryTy)) | ||||||||
| return false; | ||||||||
|
|
||||||||
| // We try to combine uaddo to non-overflowing add. | ||||||||
| if (!IsSigned) { | ||||||||
| ConstantRange CRLHS = | ||||||||
| ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false); | ||||||||
| ConstantRange CRRHS = | ||||||||
| ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false); | ||||||||
|
|
||||||||
| switch (CRLHS.unsignedAddMayOverflow(CRRHS)) { | ||||||||
| case ConstantRange::OverflowResult::MayOverflow: | ||||||||
| return false; | ||||||||
| case ConstantRange::OverflowResult::NeverOverflows: { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap); | ||||||||
| B.buildConstant(Carry, 0); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
| case ConstantRange::OverflowResult::AlwaysOverflowsLow: | ||||||||
| case ConstantRange::OverflowResult::AlwaysOverflowsHigh: { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildAdd(Dst, LHS, RHS); | ||||||||
| B.buildConstant(Carry, 1); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
| } | ||||||||
| return false; | ||||||||
| } | ||||||||
|
|
||||||||
| // We try to combine saddo to non-overflowing add. | ||||||||
|
|
||||||||
| // If LHS and RHS each have at least two sign bits, then there is no signed | ||||||||
| // overflow. | ||||||||
| if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap); | ||||||||
| B.buildConstant(Carry, 0); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
jayfoad marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
|
|
||||||||
| ConstantRange CRLHS = | ||||||||
| ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true); | ||||||||
| ConstantRange CRRHS = | ||||||||
| ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true); | ||||||||
|
|
||||||||
| switch (CRLHS.signedAddMayOverflow(CRRHS)) { | ||||||||
| case ConstantRange::OverflowResult::MayOverflow: | ||||||||
| return false; | ||||||||
| case ConstantRange::OverflowResult::NeverOverflows: { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap); | ||||||||
| B.buildConstant(Carry, 0); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
| case ConstantRange::OverflowResult::AlwaysOverflowsLow: | ||||||||
| case ConstantRange::OverflowResult::AlwaysOverflowsHigh: { | ||||||||
| MatchInfo = [=](MachineIRBuilder &B) { | ||||||||
| B.buildAdd(Dst, LHS, RHS); | ||||||||
| B.buildConstant(Carry, 1); | ||||||||
| }; | ||||||||
| return true; | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| return false; | ||||||||
| } | ||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is an odd interference with the more general
GAddSubCarryOut. Do you really need this class?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The common pattern is to assert that only the expected opcode is in
MI. I usecast<GAddCarryOut>. I don't want unnoticed sub to come in.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GAddSubCarryOuthas aisSub()method for that purpose.