From 5e1081f6a0c86489e7fda575735ef49ff2ca3efa Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Wed, 24 Jul 2024 13:51:51 +0200 Subject: [PATCH] Improve MinOpts JIT TP (#105250) --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 19 ++- src/coreclr/jit/gentree.cpp | 256 ++++++++++++++++++++++++------ src/coreclr/jit/importercalls.cpp | 8 +- src/coreclr/jit/morph.cpp | 10 +- src/coreclr/jit/vartype.h | 2 +- 6 files changed, 239 insertions(+), 58 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e11d3cd30bbc1..b091a2efbbec5 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5273,7 +5273,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl #ifdef DEBUG // Stash the current estimate of the function's size if necessary. - if (verbose) + if (verbose && opts.OptimizationEnabled()) { compSizeEstimate = 0; compCycleEstimate = 0; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ce8cfe9f57f0d..ee1dcdd0b9a20 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3655,6 +3655,7 @@ class Compiler bool gtMarkAddrMode(GenTree* addr, int* costEx, int* costSz, var_types type); unsigned gtSetEvalOrder(GenTree* tree); + unsigned gtSetEvalOrderMinOpts(GenTree* tree); bool gtMayHaveStoreInterference(GenTree* treeWithStores, GenTree* tree); bool gtTreeHasLocalRead(GenTree* tree, unsigned lclNum); @@ -9985,6 +9986,8 @@ class Compiler // Maximum number of locals before turning off the inlining #define MAX_LV_NUM_COUNT_FOR_INLINING 512 + bool canUseTier0Opts; + bool canUseAllOpts; bool compMinOpts; bool compMinOptsIsSet; #ifdef DEBUG @@ -10011,13 +10014,22 @@ class Compiler } #endif // !DEBUG + // TODO: we should convert these into a single OptimizationLevel + bool OptimizationDisabled() const { - return MinOpts() || compDbgCode; + assert(compMinOptsIsSet); + return !canUseAllOpts; } bool OptimizationEnabled() const { - return !OptimizationDisabled(); + assert(compMinOptsIsSet); + return canUseAllOpts; + } + bool Tier0OptimizationEnabled() const + { + assert(compMinOptsIsSet); + return canUseTier0Opts; } void SetMinOpts(bool val) @@ -10026,6 +10038,9 @@ class Compiler assert(!compMinOptsIsSet || (compMinOpts == val)); compMinOpts = val; compMinOptsIsSet = true; + + canUseTier0Opts = !compDbgCode && !jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); + canUseAllOpts = canUseTier0Opts && !val; } // true if the CLFLG_* for an optimization is set. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8ba61fe3a1121..88d087797a259 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3931,8 +3931,10 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) int costSz = 1; unsigned level = 0; + bool optsEnabled = opts.OptimizationEnabled(); + #if defined(FEATURE_HW_INTRINSICS) - if (multiOp->OperIs(GT_HWINTRINSIC)) + if (multiOp->OperIs(GT_HWINTRINSIC) && optsEnabled) { GenTreeHWIntrinsic* hwTree = multiOp->AsHWIntrinsic(); #if defined(TARGET_XARCH) @@ -4052,8 +4054,12 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) level += 1; } - costEx += (multiOp->Op(1)->GetCostEx() + multiOp->Op(2)->GetCostEx()); - costSz += (multiOp->Op(1)->GetCostSz() + multiOp->Op(2)->GetCostSz()); + if (optsEnabled) + { + // We don't need/have costs in MinOpts + costEx += (multiOp->Op(1)->GetCostEx() + multiOp->Op(2)->GetCostEx()); + costSz += (multiOp->Op(1)->GetCostSz() + multiOp->Op(2)->GetCostSz()); + } } else { @@ -4064,12 +4070,19 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) level = max(lvl, level + 1); - costEx += op->GetCostEx(); - costSz += op->GetCostSz(); + if (optsEnabled) + { + // We don't need/have costs in MinOpts + costEx += op->GetCostEx(); + costSz += op->GetCostSz(); + } } } - multiOp->SetCosts(costEx, costSz); + if (optsEnabled) + { + multiOp->SetCosts(costEx, costSz); + } return level; } #endif @@ -4823,6 +4836,44 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ return false; } +static void SetIndirectStoreEvalOrder(Compiler* comp, GenTreeIndir* store, bool* allowReversal) +{ + assert(store->OperIs(GT_STORE_BLK, GT_STOREIND)); + + GenTree* addr = store->Addr(); + GenTree* data = store->Data(); + *allowReversal = true; + + if (addr->IsInvariant()) + { + *allowReversal = false; + store->SetReverseOp(); + return; + } + + if ((addr->gtFlags & GTF_ALL_EFFECT) != 0) + { + return; + } + + // In case op2 assigns to a local var that is used in op1, we have to evaluate op1 first. + if (comp->gtMayHaveStoreInterference(data, addr)) + { + // TODO-ASG-Cleanup: move this guard to "gtCanSwapOrder". + *allowReversal = false; + return; + } + + // If op2 is simple then evaluate op1 first + if (data->OperIsLeaf()) + { + return; + } + + *allowReversal = false; + store->SetReverseOp(); +} + /***************************************************************************** * * Given a tree, figure out the order in which its sub-operands should be @@ -4848,6 +4899,11 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) { assert(tree); + if (opts.OptimizationDisabled()) + { + return gtSetEvalOrderMinOpts(tree); + } + #ifdef DEBUG /* Clear the GTF_DEBUG_NODE_MORPHED flag as well */ tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; @@ -5838,33 +5894,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // TODO-ASG-Cleanup: this logic emulated the ASG case below. See how of much of it can be deleted. if (!optValnumCSE_phase || optCSE_canSwap(op1, op2)) { - if (op1->IsInvariant()) - { - allowReversal = false; - tree->SetReverseOp(); - break; - } - if ((op1->gtFlags & GTF_ALL_EFFECT) != 0) - { - break; - } - - // In case op2 assigns to a local var that is used in op1, we have to evaluate op1 first. - if (gtMayHaveStoreInterference(op2, op1)) - { - // TODO-ASG-Cleanup: move this guard to "gtCanSwapOrder". - allowReversal = false; - break; - } - - // If op2 is simple then evaluate op1 first - if (op2->OperIsLeaf()) - { - break; - } - - allowReversal = false; - tree->SetReverseOp(); + SetIndirectStoreEvalOrder(this, tree->AsIndir(), &allowReversal); } break; @@ -6212,6 +6242,149 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) #pragma warning(pop) #endif +//------------------------------------------------------------------------ +// gtSetEvalOrderMinOpts: A MinOpts specific version of gtSetEvalOrder. We don't +// need to set costs, but we're looking for opportunities to swap operands. +// +// Arguments: +// tree - The tree for which we are setting the evaluation order. +// +// Return Value: +// the Sethi 'complexity' estimate for this tree (the higher +// the number, the higher is the tree's resources requirement) +// +unsigned Compiler::gtSetEvalOrderMinOpts(GenTree* tree) +{ + assert(tree); + if (fgOrder == FGOrderLinear) + { + // We don't re-order operands in LIR anyway. + return 0; + } + + if (tree->OperIsLeaf()) + { + // Nothing to do for leaves, report as having Sethi 'complexity' of 0 + return 0; + } + + unsigned level = 1; + if (tree->OperIsSimple()) + { + GenTree* op1 = tree->AsOp()->gtOp1; + GenTree* op2 = tree->gtGetOp2IfPresent(); + + // Only GT_LEA may have a nullptr op1 and a non-nullptr op2 + if (tree->OperIs(GT_LEA) && (op1 == nullptr)) + { + std::swap(op1, op2); + } + + // Check for a nilary operator + if (op1 == nullptr) + { + // E.g. void GT_RETURN, GT_RETFIT + assert(op2 == nullptr); + return 0; + } + + if (op2 == nullptr) + { + gtSetEvalOrderMinOpts(op1); + return 1; + } + + level = gtSetEvalOrderMinOpts(op1); + unsigned levelOp2 = gtSetEvalOrderMinOpts(op2); + + bool allowSwap = true; + // TODO: Introduce a function to check whether we can swap the order of its operands or not. + switch (tree->OperGet()) + { + case GT_COMMA: + case GT_BOUNDS_CHECK: + case GT_INTRINSIC: + case GT_QMARK: + case GT_COLON: + // We're not going to swap operands in these + allowSwap = false; + break; + + case GT_STORE_BLK: + case GT_STOREIND: + SetIndirectStoreEvalOrder(this, tree->AsIndir(), &allowSwap); + break; + + default: + break; + } + + const bool shouldSwap = tree->IsReverseOp() ? level > levelOp2 : level < levelOp2; + if (shouldSwap && allowSwap) + { + // Can we swap the order by commuting the operands? + const bool canSwap = tree->IsReverseOp() ? gtCanSwapOrder(op2, op1) : gtCanSwapOrder(op1, op2); + if (canSwap) + { + if (tree->OperIsCmpCompare()) + { + genTreeOps oper = tree->OperGet(); + if (GenTree::SwapRelop(oper) != oper) + { + tree->SetOper(GenTree::SwapRelop(oper)); + } + std::swap(tree->AsOp()->gtOp1, tree->AsOp()->gtOp2); + } + else if (tree->OperIsCommutative()) + { + std::swap(tree->AsOp()->gtOp1, tree->AsOp()->gtOp2); + } + else + { + // Mark the operand's evaluation order to be swapped. + tree->gtFlags ^= GTF_REVERSE_OPS; + } + } + } + + // Swap the level counts + if (tree->IsReverseOp()) + { + std::swap(level, levelOp2); + } + + // Compute the sethi number for this binary operator + if (level < 1) + { + level = levelOp2; + } + else if (level == levelOp2) + { + level++; + } + } + else if (tree->IsCall()) + { + // We ignore late args - they don't bring any noticeable benefits + // according to asmdiffs/tpdiff + for (CallArg& arg : tree->AsCall()->gtArgs.EarlyArgs()) + { + gtSetEvalOrderMinOpts(arg.GetEarlyNode()); + } + level = 3; + } +#if defined(FEATURE_HW_INTRINSICS) + else if (tree->OperIsHWIntrinsic()) + { + return gtSetMultiOpOrder(tree->AsMultiOp()); + } +#endif // FEATURE_HW_INTRINSICS + + // NOTE: we skip many operators here in order to maintain a good trade-off between CQ and TP. + + return level; +} + //------------------------------------------------------------------------ // gtMayHaveStoreInterference: Check if two trees may interfere because of a // store in one of the trees. @@ -13340,10 +13513,7 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree) return tree; } - // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. - // To be fixed in https://github.com/dotnet/runtime/pull/77465 - const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); - if (!tier0opts) + if (!opts.Tier0OptimizationEnabled()) { return tree; } @@ -13406,7 +13576,7 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree) // special operator that can use only one constant // to fold - e.g. booleans - if (tier0opts && opts.OptimizationDisabled()) + if (opts.OptimizationDisabled()) { // Too heavy for tier0 return tree; @@ -15197,10 +15367,7 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2IfPresent(); - // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. - // To be fixed in https://github.com/dotnet/runtime/pull/77465 - const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); - if (!tier0opts) + if (!opts.Tier0OptimizationEnabled()) { return tree; } @@ -30267,10 +30434,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) { assert(tree->OperIsHWIntrinsic()); - // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. - // To be fixed in https://github.com/dotnet/runtime/pull/77465 - const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); - if (!tier0opts) + if (!opts.Tier0OptimizationEnabled()) { return tree; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index e8de998ebed2c..566fdceb8fdf8 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1553,7 +1553,7 @@ GenTree* Compiler::impThrowIfNull(GenTreeCall* call) assert(call->gtArgs.CountUserArgs() == 2); assert(call->TypeIs(TYP_VOID)); - if (opts.compDbgCode || opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)) + if (!opts.Tier0OptimizationEnabled()) { // Don't fold it for debug code or forced MinOpts return call; @@ -3302,11 +3302,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, // Allow some lighweight intrinsics in Tier0 which can improve throughput // we're fine if intrinsic decides to not expand itself in this case unlike mustExpand. - // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. - // To be fixed in https://github.com/dotnet/runtime/pull/77465 - const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); - - if (!mustExpand && tier0opts) + if (!mustExpand && opts.Tier0OptimizationEnabled()) { switch (ni) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0d922459acdc4..0f0753598f786 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1103,7 +1103,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing. SetNeedsTemp(&arg); } - else + else if (comp->opts.OptimizationEnabled()) { // Finally, we call gtPrepareCost to measure the cost of evaluating this tree. comp->gtPrepareCost(argx); @@ -1476,7 +1476,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) assert(begTab == endTab); break; } - else + else if (comp->opts.OptimizationEnabled()) { if (!costsPrepared) { @@ -1492,6 +1492,12 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) expensiveArg = arg; } } + else + { + // We don't have cost information in MinOpts + expensiveArgIndex = curInx; + expensiveArg = arg; + } } } diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index 34668fd545b71..c0cfa87775dab 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -185,7 +185,7 @@ inline bool varTypeIsArithmetic(T vt) template inline bool varTypeIsGC(T vt) { - return ((varTypeClassification[TypeGet(vt)] & (VTF_GCR | VTF_BYR)) != 0); + return (TypeGet(vt) == TYP_REF) || (TypeGet(vt) == TYP_BYREF); } template