From bfa0ac0f372ef83ecbaff756db2d41bb14cfb03a Mon Sep 17 00:00:00 2001 From: Will Smith Date: Sat, 11 Jun 2022 12:57:04 -0700 Subject: [PATCH] ARM64 - Always morph GT_MOD (#68885) --- src/coreclr/jit/lower.cpp | 3 +- src/coreclr/jit/lower.h | 2 +- src/coreclr/jit/lowerarmarch.cpp | 30 +++------ src/coreclr/jit/morph.cpp | 5 +- src/coreclr/jit/rationalize.cpp | 63 +++++++++++++++++++ src/coreclr/jit/rationalize.h | 4 ++ .../JitBlue/Runtime_68136/Runtime_68136.cs | 37 +++++++++++ .../Runtime_68136/Runtime_68136.csproj | 11 ++++ 8 files changed, 128 insertions(+), 27 deletions(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_68136/Runtime_68136.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_68136/Runtime_68136.csproj diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 98bd11f0fd2d3..49dde8acd1fd4 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5838,7 +5838,8 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node) #if defined(TARGET_ARM64) if (divMod->OperIs(GT_MOD) && divisor->IsIntegralConstPow2()) { - return LowerModPow2(node); + LowerModPow2(node); + return node->gtNext; } assert(node->OperGet() != GT_MOD); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 7e6acf6f03009..d54cc70347971 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -355,7 +355,7 @@ class Lowering final : public Phase #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); - GenTree* LowerModPow2(GenTree* node); + void LowerModPow2(GenTree* node); GenTree* LowerAddForPossibleContainment(GenTreeOp* node); #endif // !TARGET_XARCH && !TARGET_ARM64 #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 5ee0c27767ee1..5b50f178e7c6c 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -677,24 +677,18 @@ void Lowering::LowerRotate(GenTree* tree) // TODO: We could do this optimization in morph but we do not have // a conditional select op in HIR. At some point, we may // introduce such an op. -GenTree* Lowering::LowerModPow2(GenTree* node) +void Lowering::LowerModPow2(GenTree* node) { assert(node->OperIs(GT_MOD)); - GenTree* mod = node; - GenTree* dividend = mod->gtGetOp1(); - GenTree* divisor = mod->gtGetOp2(); + GenTreeOp* mod = node->AsOp(); + GenTree* dividend = mod->gtGetOp1(); + GenTree* divisor = mod->gtGetOp2(); assert(divisor->IsIntegralConstPow2()); const var_types type = mod->TypeGet(); assert((type == TYP_INT) || (type == TYP_LONG)); - LIR::Use use; - if (!BlockRange().TryGetUse(node, &use)) - { - return nullptr; - } - ssize_t cnsValue = static_cast(divisor->AsIntConCommon()->IntegralValue()) - 1; BlockRange().Remove(divisor); @@ -725,21 +719,15 @@ GenTree* Lowering::LowerModPow2(GenTree* node) BlockRange().InsertAfter(cns2, falseExpr); LowerNode(falseExpr); - GenTree* const cc = comp->gtNewOperNode(GT_CSNEG_MI, type, trueExpr, falseExpr); - cc->gtFlags |= GTF_USE_FLAGS; + mod->ChangeOper(GT_CSNEG_MI); + mod->gtOp1 = trueExpr; + mod->gtOp2 = falseExpr; + mod->gtFlags |= GTF_USE_FLAGS; JITDUMP("Lower: optimize X MOD POW2"); DISPNODE(mod); - JITDUMP("to:\n"); - DISPNODE(cc); - - BlockRange().InsertBefore(mod, cc); - ContainCheckNode(cc); - BlockRange().Remove(mod); - - use.ReplaceWith(cc); - return cc->gtNext; + ContainCheckNode(mod); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 29f10051e79f0..29ea92a874092 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10544,10 +10544,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) #ifdef TARGET_ARM64 // ARM64 architecture manual suggests this transformation // for the mod operator. - // However, we do skip this optimization for ARM64 if the second operand - // is an integral constant power of 2 because there is an even better - // optimization in lowering that is specific for ARM64. - else if (!(tree->OperIs(GT_MOD) && op2->IsIntegralConstPow2())) + else #else // XARCH only applies this transformation if we know // that magic division will be used - which is determined diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index faa90cc61ad8b..260cb1a98f4cd 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -265,6 +265,62 @@ void Rationalizer::RewriteIntrinsicAsUserCall(GenTree** use, ArrayStack a % cns +// where cns is a signed integer constant that is a power of 2. +// We do this transformation because Lowering has a specific optimization +// for 'a % cns' that is not easily reduced by other means. +// +void Rationalizer::RewriteSubLshDiv(GenTree** use) +{ + if (!comp->opts.OptimizationEnabled()) + return; + + GenTree* const node = *use; + + if (!node->OperIs(GT_SUB)) + return; + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + if (!(node->TypeIs(TYP_INT, TYP_LONG) && op1->OperIs(GT_LCL_VAR))) + return; + + if (!op2->OperIs(GT_LSH)) + return; + + GenTree* lsh = op2; + GenTree* div = lsh->gtGetOp1(); + GenTree* shift = lsh->gtGetOp2(); + if (div->OperIs(GT_DIV) && shift->IsIntegralConst()) + { + GenTree* a = div->gtGetOp1(); + GenTree* cns = div->gtGetOp2(); + if (a->OperIs(GT_LCL_VAR) && cns->IsIntegralConstPow2() && + op1->AsLclVar()->GetLclNum() == a->AsLclVar()->GetLclNum()) + { + size_t shiftValue = shift->AsIntConCommon()->IntegralValue(); + size_t cnsValue = cns->AsIntConCommon()->IntegralValue(); + if ((cnsValue >> shiftValue) == 1) + { + node->ChangeOper(GT_MOD); + node->AsOp()->gtOp2 = cns; + BlockRange().Remove(lsh); + BlockRange().Remove(div); + BlockRange().Remove(a); + BlockRange().Remove(shift); + } + } + } +} +#endif + #ifdef DEBUG void Rationalizer::ValidateStatement(Statement* stmt, BasicBlock* block) @@ -775,6 +831,13 @@ PhaseStatus Rationalizer::DoPhase() m_rationalizer.RewriteIntrinsicAsUserCall(use, this->m_ancestors); } +#ifdef TARGET_ARM64 + if (node->OperIs(GT_SUB)) + { + m_rationalizer.RewriteSubLshDiv(use); + } +#endif + return Compiler::WALK_CONTINUE; } diff --git a/src/coreclr/jit/rationalize.h b/src/coreclr/jit/rationalize.h index 47d355cbbd38f..329fbcbd1ac24 100644 --- a/src/coreclr/jit/rationalize.h +++ b/src/coreclr/jit/rationalize.h @@ -58,6 +58,10 @@ class Rationalizer final : public Phase void RewriteAssignment(LIR::Use& use); void RewriteAddress(LIR::Use& use); +#ifdef TARGET_ARM64 + void RewriteSubLshDiv(GenTree** use); +#endif + // Root visitor Compiler::fgWalkResult RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parents); }; diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_68136/Runtime_68136.cs b/src/tests/JIT/Regression/JitBlue/Runtime_68136/Runtime_68136.cs new file mode 100644 index 0000000000000..92a3f87e8b441 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_68136/Runtime_68136.cs @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +public class Program +{ + public static IRuntime s_rt; + public static ulong s_1; + public static int Main() + { + try + { + var vr1 = (uint)((int)M2(ref s_1, 0) % (long)1); + M2(ref s_1, vr1); + } + catch (System.Exception) + { + } + + return 100; + } + + public static byte M2(ref ulong arg0, uint arg1) + { + s_rt.WriteLine(arg0); + return 0; + } +} + +public interface IRuntime +{ + void WriteLine(T value); +} + +public class Runtime : IRuntime +{ + public void WriteLine(T value) => System.Console.WriteLine(value); +} \ No newline at end of file diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_68136/Runtime_68136.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_68136/Runtime_68136.csproj new file mode 100644 index 0000000000000..0365eac2ffab7 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_68136/Runtime_68136.csproj @@ -0,0 +1,11 @@ + + + Exe + + + True + + + + + \ No newline at end of file