From 485fdb9260a8f5a07ee5e7a0b402c146857a2643 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 2 Mar 2025 00:16:35 +0100 Subject: [PATCH 01/12] WIP --- src/coreclr/jit/lower.cpp | 194 ++++++++++++++++++++++++++++------ src/coreclr/jit/lower.h | 1 + src/coreclr/jit/promotion.cpp | 8 +- 3 files changed, 167 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 71500b34a952a1..b6f69dabcdcaea 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4752,20 +4752,129 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList) return; } - unsigned regIndex = 0; - for (GenTreeFieldList::Use& use : fieldList->Uses()) + LowerFieldListToFieldListOfRegisters(fieldList); +} + +void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) +{ + const ReturnTypeDesc& retDesc = comp->compRetTypeDesc; + unsigned numRegs = retDesc.GetReturnRegCount(); + + GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); + assert(fieldList->Uses().IsSorted()); + + for (unsigned i = 0; i < numRegs; i++) { - var_types regType = retDesc.GetReturnRegType(regIndex); - if (varTypeUsesIntReg(regType) != varTypeUsesIntReg(use.GetNode())) + unsigned regStart = retDesc.GetReturnFieldOffset(i); + var_types regType = genActualType(retDesc.GetReturnRegType(i)); + unsigned regEnd = regStart + genTypeSize(regType); + + GenTreeFieldList::Use* regEntry = use; + + assert(use != nullptr); + + GenTree* fieldListPrev = fieldList->gtPrev; + + do { - GenTree* bitcast = comp->gtNewOperNode(GT_BITCAST, regType, use.GetNode()); - BlockRange().InsertBefore(fieldList, bitcast); - use.SetNode(bitcast); - LowerNode(bitcast); + unsigned fieldStart = use->GetOffset(); + + assert(fieldStart >= regStart); + + if (fieldStart >= regEnd) + { + break; + } + + var_types fieldType = use->GetType(); + GenTree* value = use->GetNode(); + + unsigned insertOffset = fieldStart - regStart; + GenTreeFieldList::Use* nextUse = use->GetNext(); + + // First ensure the value does not have upper bits set that + // interfere with the next field. + if ((nextUse != nullptr) && (nextUse->GetOffset() < regEnd) && (fieldStart + genTypeSize(genActualType(fieldType)) > nextUse->GetOffset())) + { + assert(varTypeIsSmall(fieldType)); + // This value may interfere with the next field. Ensure that doesn't happen. + if (comp->fgCastNeeded(value, varTypeToUnsigned(fieldType))) + { + value = comp->gtNewCastNode(TYP_INT, value, true, varTypeToUnsigned(fieldType)); + BlockRange().InsertBefore(fieldList, value); + } + } + + // If this is a float -> int insertion, then we need the bitcast now. + if (varTypeUsesFloatReg(value) && varTypeUsesIntReg(regType)) + { + assert((genTypeSize(value) == 4) || (genTypeSize(value) == 8)); + var_types castType = genTypeSize(value) == 4 ? TYP_INT : TYP_LONG; + value = comp->gtNewBitCastNode(castType, value); + BlockRange().InsertBefore(fieldList, value); + } + + if (insertOffset + genTypeSize(fieldType) > genTypeSize(genActualType(value))) + { + value = comp->gtNewCastNode(TYP_LONG, value, true, TYP_LONG); + BlockRange().InsertBefore(fieldList, value); + } + + if (fieldStart != regStart) + { + GenTree* shiftAmount = comp->gtNewIconNode((ssize_t)insertOffset * BITS_PER_BYTE); + value = comp->gtNewOperNode(GT_LSH, genActualType(value), value, shiftAmount); + BlockRange().InsertBefore(fieldList, shiftAmount, value); + } + + if (regEntry != use) + { + GenTree* prevValue = regEntry->GetNode(); + if (genActualType(value) != genActualType(regEntry->GetNode())) + { + prevValue = comp->gtNewCastNode(TYP_LONG, prevValue, true, TYP_LONG); + BlockRange().InsertBefore(fieldList, prevValue); + regEntry->SetNode(prevValue); + } + + value = comp->gtNewOperNode(GT_OR, genActualType(value), prevValue, value); + BlockRange().InsertBefore(fieldList, value); + + // Remove this field from the FIELD_LIST. + regEntry->SetNext(use->GetNext()); + } + + regEntry->SetNode(value); + regEntry->SetType(genActualType(value)); + use = regEntry->GetNext(); + } while (use != nullptr); + + assert(regEntry != nullptr); + if (varTypeUsesIntReg(regEntry->GetNode()) != varTypeUsesIntReg(regType)) + { + GenTree* bitCast = comp->gtNewBitCastNode(regType, regEntry->GetNode()); + BlockRange().InsertBefore(fieldList, bitCast); + regEntry->SetNode(bitCast); } - regIndex++; + if (fieldListPrev->gtNext != fieldList) + { + GenTree* cur = fieldListPrev->gtNext; + GenTree* last = fieldList->gtPrev; + + while (true) + { + GenTree* next = LowerNode(cur); + if (cur == last) + break; + + cur = next; + assert(cur != nullptr); + } + } } + + assert(use == nullptr); } //---------------------------------------------------------------------------------------------- @@ -4777,47 +4886,68 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList) // fieldList - The FIELD_LIST node // // Returns: -// True if the fields of the FIELD_LIST map cleanly to the ABI returned -// registers. Insertions of bitcasts may still be required. +// True if the fields of the FIELD_LIST are all direct insertions into the +// return registers. // bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList) { JITDUMP("Checking if field list [%06u] is compatible with return ABI: ", Compiler::dspTreeID(fieldList)); const ReturnTypeDesc& retDesc = comp->compRetTypeDesc; unsigned numRetRegs = retDesc.GetReturnRegCount(); - unsigned regIndex = 0; - for (const GenTreeFieldList::Use& use : fieldList->Uses()) + GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); + for (unsigned i = 0; i < numRetRegs; i++) { - if (regIndex >= numRetRegs) - { - JITDUMP("it is not; too many fields\n"); - return false; - } + unsigned regStart = retDesc.GetReturnFieldOffset(i); + var_types regType = retDesc.GetReturnRegType(i); + unsigned regEnd = regStart + genTypeSize(regType); - unsigned offset = retDesc.GetReturnFieldOffset(regIndex); - if (offset != use.GetOffset()) + // TODO-CQ: Could just create a 0 for this. + if (use == nullptr) { - JITDUMP("it is not; field %u register starts at offset %u, but field starts at offset %u\n", regIndex, - offset, use.GetOffset()); + JITDUMP("it is not; register %u has no corresponding field\n", i); return false; } - var_types fieldType = genActualType(use.GetNode()); - var_types regType = genActualType(retDesc.GetReturnRegType(regIndex)); - if (genTypeSize(fieldType) != genTypeSize(regType)) + do { - JITDUMP("it is not; field %u register has type %s but field has type %s\n", regIndex, varTypeName(regType), - varTypeName(fieldType)); - return false; - } + unsigned fieldStart = use->GetOffset(); + + if (fieldStart < regStart) + { + // Not fully contained in a register. + // TODO-CQ: Could just remove these fields if they don't partially overlap with the next register. + JITDUMP("it is not; field [%06u] starts before register %u\n", Compiler::dspTreeID(use->GetNode()), i); + return false; + } + + if (fieldStart >= regEnd) + { + break; + } + + unsigned fieldEnd = fieldStart + genTypeSize(use->GetType()); + if (fieldEnd > regEnd) + { + JITDUMP("it is not; field [%06u] ends after register %u\n", Compiler::dspTreeID(use->GetNode()), i); + return false; + } + + // float -> float insertions are not yet supported + if (varTypeUsesFloatReg(use->GetNode()) && varTypeUsesFloatReg(regType) && (fieldStart != regStart)) + { + JITDUMP("it is not; field [%06u] requires an insertion into register %u\n", Compiler::dspTreeID(use->GetNode()), i); + return false; + } - regIndex++; + use = use->GetNext(); + } while (use != nullptr); } - if (regIndex != numRetRegs) + if (use != nullptr) { - JITDUMP("it is not; too few fields\n"); + // TODO-CQ: Could just remove these fields. + JITDUMP("it is not; field [%06u] corresponds to no register\n", Compiler::dspTreeID(use->GetNode())); return false; } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index e3025e9b9f75ed..9ddf4225864309 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -171,6 +171,7 @@ class Lowering final : public Phase void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret); void LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList); bool IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList); + void LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList); void LowerCallStruct(GenTreeCall* call); void LowerStoreSingleRegCallStruct(GenTreeBlk* store); #if !defined(WINDOWS_AMD64_ABI) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index e695a32a0c82b2..2140f3550358b9 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -2295,10 +2295,10 @@ bool ReplaceVisitor::ReplaceReturnedStructLocal(GenTreeOp* ret, GenTreeLclVarCom return false; } - if (!IsReturnProfitableAsFieldList(value)) - { - return false; - } + //if (!IsReturnProfitableAsFieldList(value)) + //{ + // return false; + //} StructDeaths deaths = m_liveness->GetDeathsForStructLocal(value); GenTreeFieldList* fieldList = m_compiler->gtNewFieldList(); From d07cd6db625a69ddd3f0bdabcdd92e1e3113f1f3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 22:02:27 +0100 Subject: [PATCH 02/12] Morph promoted locals to field list --- src/coreclr/jit/compiler.h | 4 ++-- src/coreclr/jit/gentree.h | 13 ++++++++++++ src/coreclr/jit/lclmorph.cpp | 32 ----------------------------- src/coreclr/jit/morph.cpp | 40 ++++++++++++------------------------ 4 files changed, 28 insertions(+), 61 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e8d9e66cd91363..d6ba71a9826b11 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6586,7 +6586,7 @@ class Compiler #endif // FEATURE_SIMD GenTree* fgMorphIndexAddr(GenTreeIndexAddr* tree); GenTree* fgMorphExpandCast(GenTreeCast* tree); - GenTreeFieldList* fgMorphLclArgToFieldList(GenTreeLclVarCommon* lcl); + GenTreeFieldList* fgMorphLclToFieldList(GenTreeLclVar* lcl); GenTreeCall* fgMorphArgs(GenTreeCall* call); void fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg); @@ -6661,7 +6661,7 @@ class Compiler GenTree* fgMorphCopyBlock(GenTree* tree); private: GenTree* fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optAssertionPropDone = nullptr); - void fgTryReplaceStructLocalWithField(GenTree* tree); + void fgTryReplaceStructLocalWithFields(GenTree** use); GenTree* fgMorphFinalizeIndir(GenTreeIndir* indir); GenTree* fgOptimizeCast(GenTreeCast* cast); GenTree* fgOptimizeCastOnStore(GenTree* store); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index e01d9705eb0d4a..6af651b6860882 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3095,6 +3095,19 @@ struct GenTreeOp : public GenTreeUnOp // then sets the flag GTF_DIV_BY_CNS_OPT and GTF_DONT_CSE on the constant void CheckDivideByConstOptimized(Compiler* comp); + GenTree*& ReturnValueRef() + { + assert(OperIs(GT_RETURN, GT_RETFILT, GT_SWIFT_ERROR_RET)); +#ifdef SWIFT_SUPPORT + if (OperIs(GT_SWIFT_ERROR_RET)) + { + return gtOp2; + } +#endif // SWIFT_SUPPORT + + return gtOp1; + } + GenTree* GetReturnValue() const { assert(OperIs(GT_RETURN, GT_RETFILT, GT_SWIFT_ERROR_RET)); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 4d1b2697215324..df824216d819d3 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1240,38 +1240,6 @@ class LocalAddressVisitor final : public GenTreeVisitor PopValue(); break; - case GT_RETURN: - if (TopValue(0).Node() != node) - { - assert(TopValue(1).Node() == node); - assert(TopValue(0).Node() == node->gtGetOp1()); - GenTreeUnOp* ret = node->AsUnOp(); - GenTree* retVal = ret->gtGetOp1(); - if (retVal->OperIs(GT_LCL_VAR)) - { - // TODO-1stClassStructs: this block is a temporary workaround to keep diffs small, - // having `doNotEnreg` affect block init and copy transformations that affect many methods. - // I have a change that introduces more precise and effective solution for that, but it would - // be merged separately. - GenTreeLclVar* lclVar = retVal->AsLclVar(); - unsigned lclNum = lclVar->GetLclNum(); - if (!m_compiler->compMethodReturnsMultiRegRetType() && - !m_compiler->lvaIsImplicitByRefLocal(lclVar->GetLclNum())) - { - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - if (varDsc->lvFieldCnt > 1) - { - m_compiler->lvaSetVarDoNotEnregister( - lclNum DEBUGARG(DoNotEnregisterReason::BlockOpRet)); - } - } - } - - EscapeValue(TopValue(0), node); - PopValue(); - } - break; - case GT_CALL: while (TopValue(0).Node() != node) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index d0bdd858ef7f9d..aff5d7fe17fd16 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2279,7 +2279,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) // TODO-Arm-CQ: support decomposing "large" promoted structs into field lists. if (!isSplit) { - GenTreeFieldList* fieldList = fgMorphLclArgToFieldList(argNode->AsLclVar()); + GenTreeFieldList* fieldList = fgMorphLclToFieldList(argNode->AsLclVar()); // TODO-Cleanup: The containment/reg optionality for x86 is // conservative in the "no field list" case. #ifdef TARGET_X86 @@ -2321,7 +2321,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) if (argNode->OperIs(GT_LCL_VAR)) { - GenTreeLclVarCommon* lclNode = argNode->AsLclVarCommon(); + GenTreeLclVar* lclNode = argNode->AsLclVar(); unsigned lclNum = lclNode->GetLclNum(); LclVarDsc* varDsc = lvaGetDesc(lclNum); @@ -2365,7 +2365,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) if (fieldsMatch) { - newArg = fgMorphLclArgToFieldList(lclNode)->SoleFieldOrThis(); + newArg = fgMorphLclToFieldList(lclNode)->SoleFieldOrThis(); } } } @@ -2577,7 +2577,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) } //------------------------------------------------------------------------ -// fgMorphLclArgToFieldList: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields +// fgMorphLclToFieldList: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields // // Arguments: // lcl - The GT_LCL_VAR node we will transform @@ -2585,7 +2585,7 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) // Return value: // The new GT_FIELD_LIST that we have created. // -GenTreeFieldList* Compiler::fgMorphLclArgToFieldList(GenTreeLclVarCommon* lcl) +GenTreeFieldList* Compiler::fgMorphLclToFieldList(GenTreeLclVar* lcl) { LclVarDsc* varDsc = lvaGetDesc(lcl); assert(varDsc->lvPromoted); @@ -7458,7 +7458,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA case GT_RETURN: case GT_SWIFT_ERROR_RET: { - GenTree* retVal = tree->AsOp()->GetReturnValue(); + GenTree*& retVal = tree->AsOp()->ReturnValueRef(); // Apply some optimizations that change the type of the return. // These are not applicable when this is a merged return that will @@ -7470,7 +7470,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA retVal = fgMorphRetInd(tree->AsOp()); } - fgTryReplaceStructLocalWithField(retVal); + fgTryReplaceStructLocalWithFields(&retVal); } // normalize small integer return values @@ -7496,7 +7496,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA retVal->gtFlags |= (tree->gtFlags & GTF_COLON_COND); retVal = fgMorphTree(retVal); - tree->AsOp()->SetReturnValue(retVal); // Propagate side effect flags tree->SetAllEffectsFlags(retVal); @@ -8364,10 +8363,10 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA // prop done when morphing this operand changed the local. // Skip this for merged returns that will be changed to a store and // jump to the return BB. - GenTree* const retVal = tree->AsOp()->GetReturnValue(); + GenTree*& retVal = tree->AsOp()->ReturnValueRef(); if ((retVal != nullptr) && ((genReturnBB == nullptr) || (compCurBB == genReturnBB))) { - fgTryReplaceStructLocalWithField(retVal); + fgTryReplaceStructLocalWithFields(&retVal); } break; } @@ -8425,29 +8424,16 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA // Notes: // Currently only called when the tree parent is a GT_RETURN/GT_SWIFT_ERROR_RET. // -void Compiler::fgTryReplaceStructLocalWithField(GenTree* tree) +void Compiler::fgTryReplaceStructLocalWithFields(GenTree** use) { - if (!tree->OperIs(GT_LCL_VAR)) + if (!(*use)->OperIs(GT_LCL_VAR)) { return; } - GenTreeLclVar* lclVar = tree->AsLclVar(); - unsigned lclNum = lclVar->GetLclNum(); - LclVarDsc* const varDsc = lvaGetDesc(lclVar); - if (varDsc->CanBeReplacedWithItsField(this)) + if (lvaGetDesc((*use)->AsLclVar())->lvPromoted) { - // We can replace the struct with its only field and allow copy propagation to replace - // return value that was written as a field. - unsigned const fieldLclNum = varDsc->lvFieldLclStart; - LclVarDsc* const fieldDsc = lvaGetDesc(fieldLclNum); - - JITDUMP("Replacing an independently promoted local var V%02u with its only field " - "V%02u for " - "the return [%06u]\n", - lclVar->GetLclNum(), fieldLclNum, dspTreeID(tree)); - lclVar->SetLclNum(fieldLclNum); - lclVar->ChangeType(fieldDsc->lvType); + *use = fgMorphLclToFieldList((*use)->AsLclVar()); } } From 0872964410fe8b18113bda10d6acc7e7b3d64d31 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 22:07:51 +0100 Subject: [PATCH 03/12] JIT: Fix Lowering::LowerRange For example, this would break when `LowerNode` ended up deciding to remove the current node. --- src/coreclr/jit/lower.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 9ddf4225864309..c48fa8655d9005 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -51,15 +51,23 @@ class Lowering final : public Phase // LowerRange handles new code that is introduced by or after Lowering. void LowerRange(LIR::ReadOnlyRange& range) { - for (GenTree* newNode : range) - { - LowerNode(newNode); - } + LowerRange(range.FirstNode(), range.LastNode()); } void LowerRange(GenTree* firstNode, GenTree* lastNode) { - LIR::ReadOnlyRange range(firstNode, lastNode); - LowerRange(range); + GenTree* cur = firstNode; + + while (true) + { + GenTree* next = LowerNode(cur); + if (cur == lastNode) + { + break; + } + + cur = next; + assert(cur != nullptr); + } } // ContainCheckRange handles new code that is introduced by or after Lowering, From f8ce965e861165ea22e497018adc52a8b2530faa Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 22:47:56 +0100 Subject: [PATCH 04/12] Work on optimizing field lists --- src/coreclr/jit/lower.cpp | 89 ++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index b6f69dabcdcaea..27603543037653 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4760,37 +4760,24 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) const ReturnTypeDesc& retDesc = comp->compRetTypeDesc; unsigned numRegs = retDesc.GetReturnRegCount(); - GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); assert(fieldList->Uses().IsSorted()); + GenTree* fieldListPrev = fieldList->gtPrev; + + // In the first pass we convert all fields into register compatible types + // that do not interfere with other fields. for (unsigned i = 0; i < numRegs; i++) { unsigned regStart = retDesc.GetReturnFieldOffset(i); var_types regType = genActualType(retDesc.GetReturnRegType(i)); unsigned regEnd = regStart + genTypeSize(regType); - GenTreeFieldList::Use* regEntry = use; - - assert(use != nullptr); - - GenTree* fieldListPrev = fieldList->gtPrev; - - do + for (GenTreeFieldList::Use& use : fieldList->Uses()) { - unsigned fieldStart = use->GetOffset(); - - assert(fieldStart >= regStart); - - if (fieldStart >= regEnd) - { - break; - } - - var_types fieldType = use->GetType(); - GenTree* value = use->GetNode(); - - unsigned insertOffset = fieldStart - regStart; - GenTreeFieldList::Use* nextUse = use->GetNext(); + unsigned fieldStart = use.GetOffset(); + var_types fieldType = use.GetType(); + GenTreeFieldList::Use* nextUse = use.GetNext(); + GenTree*& value = use.NodeRef(); // First ensure the value does not have upper bits set that // interfere with the next field. @@ -4813,6 +4800,49 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) value = comp->gtNewBitCastNode(castType, value); BlockRange().InsertBefore(fieldList, value); } + } + } + + // Lower the work we just did, which may fold some of the nodes we just + // inserted. + if (fieldListPrev->gtNext != fieldList) + { + LowerRange(fieldListPrev->gtNext, fieldList->gtPrev); + } + + // Now combine the fields into a field for each register. + GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); + for (unsigned i = 0; i < numRegs; i++) + { + unsigned regStart = retDesc.GetReturnFieldOffset(i); + var_types regType = genActualType(retDesc.GetReturnRegType(i)); + unsigned regEnd = regStart + genTypeSize(regType); + + GenTreeFieldList::Use* regEntry = use; + + assert(use != nullptr); + + fieldListPrev = fieldList->gtPrev; + + do + { + unsigned fieldStart = use->GetOffset(); + + assert(fieldStart >= regStart); + + if (fieldStart >= regEnd) + { + break; + } + + var_types fieldType = use->GetType(); + GenTree* value = use->GetNode(); + + GenTreeFieldList::Use* nextUse = use->GetNext(); + + // See if we can optimize this + + unsigned insertOffset = fieldStart - regStart; if (insertOffset + genTypeSize(fieldType) > genTypeSize(genActualType(value))) { @@ -4859,18 +4889,7 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) if (fieldListPrev->gtNext != fieldList) { - GenTree* cur = fieldListPrev->gtNext; - GenTree* last = fieldList->gtPrev; - - while (true) - { - GenTree* next = LowerNode(cur); - if (cur == last) - break; - - cur = next; - assert(cur != nullptr); - } + LowerRange(fieldListPrev->gtNext, fieldList->gtPrev); } } @@ -4933,7 +4952,7 @@ bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList) return false; } - // float -> float insertions are not yet supported + // TODO-CQ: float -> float insertions are not yet supported if (varTypeUsesFloatReg(use->GetNode()) && varTypeUsesFloatReg(regType) && (fieldStart != regStart)) { JITDUMP("it is not; field [%06u] requires an insertion into register %u\n", Compiler::dspTreeID(use->GetNode()), i); From 38ede1425e62b8ccfecca499af6fa277d3e836f3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 22:49:21 +0100 Subject: [PATCH 05/12] Revert "Work on optimizing field lists" This reverts commit f8ce965e861165ea22e497018adc52a8b2530faa. --- src/coreclr/jit/lower.cpp | 89 +++++++++++++++------------------------ 1 file changed, 35 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 27603543037653..b6f69dabcdcaea 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4760,24 +4760,37 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) const ReturnTypeDesc& retDesc = comp->compRetTypeDesc; unsigned numRegs = retDesc.GetReturnRegCount(); + GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); assert(fieldList->Uses().IsSorted()); - GenTree* fieldListPrev = fieldList->gtPrev; - - // In the first pass we convert all fields into register compatible types - // that do not interfere with other fields. for (unsigned i = 0; i < numRegs; i++) { unsigned regStart = retDesc.GetReturnFieldOffset(i); var_types regType = genActualType(retDesc.GetReturnRegType(i)); unsigned regEnd = regStart + genTypeSize(regType); - for (GenTreeFieldList::Use& use : fieldList->Uses()) + GenTreeFieldList::Use* regEntry = use; + + assert(use != nullptr); + + GenTree* fieldListPrev = fieldList->gtPrev; + + do { - unsigned fieldStart = use.GetOffset(); - var_types fieldType = use.GetType(); - GenTreeFieldList::Use* nextUse = use.GetNext(); - GenTree*& value = use.NodeRef(); + unsigned fieldStart = use->GetOffset(); + + assert(fieldStart >= regStart); + + if (fieldStart >= regEnd) + { + break; + } + + var_types fieldType = use->GetType(); + GenTree* value = use->GetNode(); + + unsigned insertOffset = fieldStart - regStart; + GenTreeFieldList::Use* nextUse = use->GetNext(); // First ensure the value does not have upper bits set that // interfere with the next field. @@ -4800,49 +4813,6 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) value = comp->gtNewBitCastNode(castType, value); BlockRange().InsertBefore(fieldList, value); } - } - } - - // Lower the work we just did, which may fold some of the nodes we just - // inserted. - if (fieldListPrev->gtNext != fieldList) - { - LowerRange(fieldListPrev->gtNext, fieldList->gtPrev); - } - - // Now combine the fields into a field for each register. - GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); - for (unsigned i = 0; i < numRegs; i++) - { - unsigned regStart = retDesc.GetReturnFieldOffset(i); - var_types regType = genActualType(retDesc.GetReturnRegType(i)); - unsigned regEnd = regStart + genTypeSize(regType); - - GenTreeFieldList::Use* regEntry = use; - - assert(use != nullptr); - - fieldListPrev = fieldList->gtPrev; - - do - { - unsigned fieldStart = use->GetOffset(); - - assert(fieldStart >= regStart); - - if (fieldStart >= regEnd) - { - break; - } - - var_types fieldType = use->GetType(); - GenTree* value = use->GetNode(); - - GenTreeFieldList::Use* nextUse = use->GetNext(); - - // See if we can optimize this - - unsigned insertOffset = fieldStart - regStart; if (insertOffset + genTypeSize(fieldType) > genTypeSize(genActualType(value))) { @@ -4889,7 +4859,18 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) if (fieldListPrev->gtNext != fieldList) { - LowerRange(fieldListPrev->gtNext, fieldList->gtPrev); + GenTree* cur = fieldListPrev->gtNext; + GenTree* last = fieldList->gtPrev; + + while (true) + { + GenTree* next = LowerNode(cur); + if (cur == last) + break; + + cur = next; + assert(cur != nullptr); + } } } @@ -4952,7 +4933,7 @@ bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList) return false; } - // TODO-CQ: float -> float insertions are not yet supported + // float -> float insertions are not yet supported if (varTypeUsesFloatReg(use->GetNode()) && varTypeUsesFloatReg(regType) && (fieldStart != regStart)) { JITDUMP("it is not; field [%06u] requires an insertion into register %u\n", Compiler::dspTreeID(use->GetNode()), i); From 131812271bcea70ca2a66f967f57c24ceccbcf24 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 23:01:53 +0100 Subject: [PATCH 06/12] Fold casts of constants --- src/coreclr/jit/lower.cpp | 63 ++++++++++++++++++++++++++++----------- src/coreclr/jit/lower.h | 1 + 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index b6f69dabcdcaea..c55b140d8294b6 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -538,13 +538,14 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_CAST: { - GenTree* nextNode = LowerCast(node); -#if defined(TARGET_XARCH) - if (nextNode != nullptr) + if (!TryRemoveCast(node->AsCast())) { - return nextNode; + GenTree* nextNode = LowerCast(node); + if (nextNode != nullptr) + { + return nextNode; + } } -#endif // TARGET_XARCH } break; @@ -4859,18 +4860,7 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) if (fieldListPrev->gtNext != fieldList) { - GenTree* cur = fieldListPrev->gtNext; - GenTree* last = fieldList->gtPrev; - - while (true) - { - GenTree* next = LowerNode(cur); - if (cur == last) - break; - - cur = next; - assert(cur != nullptr); - } + LowerRange(fieldListPrev->gtNext, fieldList->gtPrev); } } @@ -8922,6 +8912,45 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) #endif // FEATURE_MULTIREG_RET } +//------------------------------------------------------------------------ +// TryRemoveCast: +// Try to remove a cast node by changing its operand. +// +// Arguments: +// node - Cast node +// +// Returns: +// True if the cast was removed. +// +bool Lowering::TryRemoveCast(GenTreeCast* node) +{ + if (comp->opts.OptimizationDisabled()) + { + return false; + } + + if (node->gtOverflow()) + { + return false; + } + + GenTree* op = node->CastOp(); + if (!op->OperIsConst()) + { + return false; + } + + GenTree* folded = comp->gtFoldExprConst(node); + assert(folded == node); + if (folded->OperIs(GT_CAST)) + { + return false; + } + + op->SetUnusedValue(); + return true; +} + //------------------------------------------------------------------------ // TryRemoveBitCast: // Try to remove a bitcast node by changing its operand. diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index c48fa8655d9005..f802c2620356a5 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -396,6 +396,7 @@ class Lowering final : public Phase void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode); GenTree* LowerArrLength(GenTreeArrCommon* node); + bool TryRemoveCast(GenTreeCast* node); bool TryRemoveBitCast(GenTreeUnOp* node); #ifdef TARGET_XARCH From e2352032ed82c7d2a84474a68eb471c74459d1a0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 23:11:51 +0100 Subject: [PATCH 07/12] Clean up --- src/coreclr/jit/lower.cpp | 28 ++++++++++---------- src/coreclr/jit/morph.cpp | 4 +-- src/coreclr/jit/promotion.cpp | 49 +++-------------------------------- src/coreclr/jit/promotion.h | 1 - 4 files changed, 20 insertions(+), 62 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c55b140d8294b6..f0feb71b0f2811 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4766,9 +4766,9 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) for (unsigned i = 0; i < numRegs; i++) { - unsigned regStart = retDesc.GetReturnFieldOffset(i); - var_types regType = genActualType(retDesc.GetReturnRegType(i)); - unsigned regEnd = regStart + genTypeSize(regType); + unsigned regStart = retDesc.GetReturnFieldOffset(i); + var_types regType = genActualType(retDesc.GetReturnRegType(i)); + unsigned regEnd = regStart + genTypeSize(regType); GenTreeFieldList::Use* regEntry = use; @@ -4788,14 +4788,15 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) } var_types fieldType = use->GetType(); - GenTree* value = use->GetNode(); + GenTree* value = use->GetNode(); - unsigned insertOffset = fieldStart - regStart; - GenTreeFieldList::Use* nextUse = use->GetNext(); + unsigned insertOffset = fieldStart - regStart; + GenTreeFieldList::Use* nextUse = use->GetNext(); // First ensure the value does not have upper bits set that // interfere with the next field. - if ((nextUse != nullptr) && (nextUse->GetOffset() < regEnd) && (fieldStart + genTypeSize(genActualType(fieldType)) > nextUse->GetOffset())) + if ((nextUse != nullptr) && (nextUse->GetOffset() < regEnd) && + (fieldStart + genTypeSize(genActualType(fieldType)) > nextUse->GetOffset())) { assert(varTypeIsSmall(fieldType)); // This value may interfere with the next field. Ensure that doesn't happen. @@ -4811,7 +4812,7 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) { assert((genTypeSize(value) == 4) || (genTypeSize(value) == 8)); var_types castType = genTypeSize(value) == 4 ? TYP_INT : TYP_LONG; - value = comp->gtNewBitCastNode(castType, value); + value = comp->gtNewBitCastNode(castType, value); BlockRange().InsertBefore(fieldList, value); } @@ -4824,7 +4825,7 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) if (fieldStart != regStart) { GenTree* shiftAmount = comp->gtNewIconNode((ssize_t)insertOffset * BITS_PER_BYTE); - value = comp->gtNewOperNode(GT_LSH, genActualType(value), value, shiftAmount); + value = comp->gtNewOperNode(GT_LSH, genActualType(value), value, shiftAmount); BlockRange().InsertBefore(fieldList, shiftAmount, value); } @@ -4888,9 +4889,9 @@ bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList) GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); for (unsigned i = 0; i < numRetRegs; i++) { - unsigned regStart = retDesc.GetReturnFieldOffset(i); - var_types regType = retDesc.GetReturnRegType(i); - unsigned regEnd = regStart + genTypeSize(regType); + unsigned regStart = retDesc.GetReturnFieldOffset(i); + var_types regType = retDesc.GetReturnRegType(i); + unsigned regEnd = regStart + genTypeSize(regType); // TODO-CQ: Could just create a 0 for this. if (use == nullptr) @@ -4926,7 +4927,8 @@ bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList) // float -> float insertions are not yet supported if (varTypeUsesFloatReg(use->GetNode()) && varTypeUsesFloatReg(regType) && (fieldStart != regStart)) { - JITDUMP("it is not; field [%06u] requires an insertion into register %u\n", Compiler::dspTreeID(use->GetNode()), i); + JITDUMP("it is not; field [%06u] requires an insertion into register %u\n", + Compiler::dspTreeID(use->GetNode()), i); return false; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index aff5d7fe17fd16..9d84378af78b7a 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2322,8 +2322,8 @@ bool Compiler::fgTryMorphStructArg(CallArg* arg) if (argNode->OperIs(GT_LCL_VAR)) { GenTreeLclVar* lclNode = argNode->AsLclVar(); - unsigned lclNum = lclNode->GetLclNum(); - LclVarDsc* varDsc = lvaGetDesc(lclNum); + unsigned lclNum = lclNode->GetLclNum(); + LclVarDsc* varDsc = lvaGetDesc(lclNum); if (!arg->AbiInfo.HasExactlyOneRegisterSegment()) { diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 2140f3550358b9..e8610268196d7f 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -2295,10 +2295,10 @@ bool ReplaceVisitor::ReplaceReturnedStructLocal(GenTreeOp* ret, GenTreeLclVarCom return false; } - //if (!IsReturnProfitableAsFieldList(value)) + // if (!IsReturnProfitableAsFieldList(value)) //{ - // return false; - //} + // return false; + // } StructDeaths deaths = m_liveness->GetDeathsForStructLocal(value); GenTreeFieldList* fieldList = m_compiler->gtNewFieldList(); @@ -2337,49 +2337,6 @@ bool ReplaceVisitor::ReplaceReturnedStructLocal(GenTreeOp* ret, GenTreeLclVarCom return true; } -//------------------------------------------------------------------------ -// IsReturnProfitableAsFieldList: -// Check if a returned local is expected to be profitable to turn into a -// FIELD_LIST. -// -// Parameters: -// value - The struct local -// -// Returns: -// True if so. -// -bool ReplaceVisitor::IsReturnProfitableAsFieldList(GenTreeLclVarCommon* value) -{ - // Currently the backend requires all fields to map cleanly to registers to - // efficiently return them. Otherwise they will be spilled, and we are - // better off decomposing the store here. - auto fieldMapsCleanly = [=](Replacement& rep) { - const ReturnTypeDesc& retDesc = m_compiler->compRetTypeDesc; - unsigned fieldOffset = rep.Offset - value->GetLclOffs(); - unsigned numRegs = retDesc.GetReturnRegCount(); - for (unsigned i = 0; i < numRegs; i++) - { - unsigned offset = retDesc.GetReturnFieldOffset(i); - var_types regType = retDesc.GetReturnRegType(i); - if ((fieldOffset == offset) && (genTypeSize(rep.AccessType) == genTypeSize(regType))) - { - return true; - } - } - - return false; - }; - - unsigned size = value->GetLayout(m_compiler)->GetSize(); - if (!VisitOverlappingReplacements(value->GetLclNum(), value->GetLclOffs(), size, fieldMapsCleanly)) - { - // Aborted early, so a field did not map - return false; - } - - return true; -} - //------------------------------------------------------------------------ // ReplaceCallArgWithFieldList: // Handle a call that may pass a struct local with replacements as the diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 4fca7bdf9ca1fa..aad98bb3c1f29a 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -296,7 +296,6 @@ class ReplaceVisitor : public GenTreeVisitor bool ReplaceStructLocal(GenTree* user, GenTreeLclVarCommon* value); bool ReplaceReturnedStructLocal(GenTreeOp* ret, GenTreeLclVarCommon* value); - bool IsReturnProfitableAsFieldList(GenTreeLclVarCommon* value); bool ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVarCommon* callArg); bool CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* call, CallArg* callArg, GenTreeLclVarCommon* lcl); void ReadBackAfterCall(GenTreeCall* call, GenTree* user); From efd5e0a0814930d4f2d57fe55b0a239f22ba9462 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 23:13:45 +0100 Subject: [PATCH 08/12] Further clean up --- src/coreclr/jit/lower.cpp | 166 ++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 79 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f0feb71b0f2811..6eb70fb1e10b85 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4756,6 +4756,93 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList) LowerFieldListToFieldListOfRegisters(fieldList); } +//---------------------------------------------------------------------------------------------- +// IsFieldListCompatibleWithReturn: +// Check if the fields of a FIELD_LIST are compatible with the registers +// being returned. +// +// Arguments: +// fieldList - The FIELD_LIST node +// +// Returns: +// True if the fields of the FIELD_LIST are all direct insertions into the +// return registers. +// +bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList) +{ + JITDUMP("Checking if field list [%06u] is compatible with return ABI: ", Compiler::dspTreeID(fieldList)); + const ReturnTypeDesc& retDesc = comp->compRetTypeDesc; + unsigned numRetRegs = retDesc.GetReturnRegCount(); + + GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); + for (unsigned i = 0; i < numRetRegs; i++) + { + unsigned regStart = retDesc.GetReturnFieldOffset(i); + var_types regType = retDesc.GetReturnRegType(i); + unsigned regEnd = regStart + genTypeSize(regType); + + // TODO-CQ: Could just create a 0 for this. + if (use == nullptr) + { + JITDUMP("it is not; register %u has no corresponding field\n", i); + return false; + } + + do + { + unsigned fieldStart = use->GetOffset(); + + if (fieldStart < regStart) + { + // Not fully contained in a register. + // TODO-CQ: Could just remove these fields if they don't partially overlap with the next register. + JITDUMP("it is not; field [%06u] starts before register %u\n", Compiler::dspTreeID(use->GetNode()), i); + return false; + } + + if (fieldStart >= regEnd) + { + break; + } + + unsigned fieldEnd = fieldStart + genTypeSize(use->GetType()); + if (fieldEnd > regEnd) + { + JITDUMP("it is not; field [%06u] ends after register %u\n", Compiler::dspTreeID(use->GetNode()), i); + return false; + } + + // float -> float insertions are not yet supported + if (varTypeUsesFloatReg(use->GetNode()) && varTypeUsesFloatReg(regType) && (fieldStart != regStart)) + { + JITDUMP("it is not; field [%06u] requires an insertion into register %u\n", + Compiler::dspTreeID(use->GetNode()), i); + return false; + } + + use = use->GetNext(); + } while (use != nullptr); + } + + if (use != nullptr) + { + // TODO-CQ: Could just remove these fields. + JITDUMP("it is not; field [%06u] corresponds to no register\n", Compiler::dspTreeID(use->GetNode())); + return false; + } + + JITDUMP("it is\n"); + return true; +} + +//---------------------------------------------------------------------------------------------- +// LowerFieldListToFieldListOfRegisters: +// Lower the specified field list into one that is compatible with the return +// registers. +// +// Arguments: +// fieldList - The field list +// void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) { const ReturnTypeDesc& retDesc = comp->compRetTypeDesc; @@ -4868,85 +4955,6 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList) assert(use == nullptr); } -//---------------------------------------------------------------------------------------------- -// IsFieldListCompatibleWithReturn: -// Check if the fields of a FIELD_LIST are compatible with the registers -// being returned. -// -// Arguments: -// fieldList - The FIELD_LIST node -// -// Returns: -// True if the fields of the FIELD_LIST are all direct insertions into the -// return registers. -// -bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList) -{ - JITDUMP("Checking if field list [%06u] is compatible with return ABI: ", Compiler::dspTreeID(fieldList)); - const ReturnTypeDesc& retDesc = comp->compRetTypeDesc; - unsigned numRetRegs = retDesc.GetReturnRegCount(); - - GenTreeFieldList::Use* use = fieldList->Uses().GetHead(); - for (unsigned i = 0; i < numRetRegs; i++) - { - unsigned regStart = retDesc.GetReturnFieldOffset(i); - var_types regType = retDesc.GetReturnRegType(i); - unsigned regEnd = regStart + genTypeSize(regType); - - // TODO-CQ: Could just create a 0 for this. - if (use == nullptr) - { - JITDUMP("it is not; register %u has no corresponding field\n", i); - return false; - } - - do - { - unsigned fieldStart = use->GetOffset(); - - if (fieldStart < regStart) - { - // Not fully contained in a register. - // TODO-CQ: Could just remove these fields if they don't partially overlap with the next register. - JITDUMP("it is not; field [%06u] starts before register %u\n", Compiler::dspTreeID(use->GetNode()), i); - return false; - } - - if (fieldStart >= regEnd) - { - break; - } - - unsigned fieldEnd = fieldStart + genTypeSize(use->GetType()); - if (fieldEnd > regEnd) - { - JITDUMP("it is not; field [%06u] ends after register %u\n", Compiler::dspTreeID(use->GetNode()), i); - return false; - } - - // float -> float insertions are not yet supported - if (varTypeUsesFloatReg(use->GetNode()) && varTypeUsesFloatReg(regType) && (fieldStart != regStart)) - { - JITDUMP("it is not; field [%06u] requires an insertion into register %u\n", - Compiler::dspTreeID(use->GetNode()), i); - return false; - } - - use = use->GetNext(); - } while (use != nullptr); - } - - if (use != nullptr) - { - // TODO-CQ: Could just remove these fields. - JITDUMP("it is not; field [%06u] corresponds to no register\n", Compiler::dspTreeID(use->GetNode())); - return false; - } - - JITDUMP("it is\n"); - return true; -} - //---------------------------------------------------------------------------------------------- // LowerStoreLocCommon: platform independent part of local var or field store lowering. // From eb11af9f62ba230f08cba211aab9c218cc85ed16 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 23:49:50 +0100 Subject: [PATCH 09/12] Remove some restrictions --- src/coreclr/jit/codegencommon.cpp | 1 - src/coreclr/jit/flowgraph.cpp | 5 ----- src/coreclr/jit/forwardsub.cpp | 24 +----------------------- src/coreclr/jit/importer.cpp | 12 ++---------- src/coreclr/jit/morph.cpp | 5 ----- 5 files changed, 3 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 7fb0d7982f93df..5e2a871f3b8eac 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -7390,7 +7390,6 @@ void CodeGen::genStructReturn(GenTree* treeNode) { GenTreeLclVar* lclNode = actualOp1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - assert(varDsc->lvIsMultiRegRet); #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) var_types type = retTypeDesc.GetReturnRegType(0); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index e5eb99a3bd3bd2..39d5ea57478a8c 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2046,11 +2046,6 @@ class MergedReturns if (varTypeIsStruct(retLclType)) { comp->lvaSetStruct(retLclNum, comp->info.compMethodInfo->args.retTypeClass, false); - - if (comp->compMethodReturnsMultiRegRetType()) - { - retVarDsc->lvIsMultiRegRet = true; - } } else { diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 9d57fa3a4a6dff..649c4d4964a346 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -788,9 +788,7 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) dstVarDsc->lvIsMultiRegRet = true; } - // If a method returns a multi-reg type, only forward sub locals, - // and ensure the local and operand have the required markup. - // (see eg impFixupStructReturnType). + // If a method returns a multi-reg type only forward sub locals. // // TODO-Cleanup: this constraint only exists for multi-reg **struct** // returns, it does not exist for LONGs. However, enabling substitution @@ -818,26 +816,6 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) return false; } } - else if (varTypeIsStruct(fwdSubNode)) - { - GenTreeLclVar* const fwdSubNodeLocal = fwdSubNode->AsLclVar(); - unsigned const fwdLclNum = fwdSubNodeLocal->GetLclNum(); - - // These may later turn into indirections and the backend does not support - // those as sources of multi-reg returns. - // - if (lvaIsImplicitByRefLocal(fwdLclNum)) - { - JITDUMP(" parent is multi-reg return; fwd sub node is implicit byref\n"); - return false; - } - - LclVarDsc* const fwdVarDsc = lvaGetDesc(fwdLclNum); - - JITDUMP(" [marking V%02u as multi-reg-ret]", fwdLclNum); - fwdVarDsc->lvIsMultiRegRet = true; - fwdSubNodeLocal->gtFlags |= GTF_DONT_CSE; - } } // If the value is being roundtripped through a small-typed local then we diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 323951bf64ba91..8f477f1d34ca0e 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4501,17 +4501,10 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op) if (compMethodReturnsMultiRegRetType() || op->IsMultiRegNode()) { - // We can use any local with multiple registers (it will be forced to memory on mismatch), - // except for implicit byrefs (they may turn into indirections). + // We can use any local with multiple registers, except for implicit + // byrefs (they may turn into indirections). if (op->OperIs(GT_LCL_VAR) && !lvaIsImplicitByRefLocal(op->AsLclVar()->GetLclNum())) { - // Note that this is a multi-reg return. - unsigned lclNum = op->AsLclVarCommon()->GetLclNum(); - lvaTable[lclNum].lvIsMultiRegRet = true; - - // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns. - op->gtFlags |= GTF_DONT_CSE; - return op; } @@ -4537,7 +4530,6 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op) } // The backend does not support other struct-producing nodes (e. g. OBJs) as sources of multi-reg returns. - // It also does not support assembling a multi-reg node into one register (for RETURN nodes at least). return impStoreMultiRegValueToVar(op, info.compMethodInfo->args.retTypeClass DEBUGARG(info.compCallConv)); } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 9d84378af78b7a..740f8b15bb302e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5473,11 +5473,6 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig lvaSetVarAddrExposed(newRetLcl DEBUGARG(AddressExposedReason::DISPATCH_RET_BUF)); - if (varTypeIsStruct(origCall) && compMethodReturnsMultiRegRetType()) - { - lvaGetDesc(newRetLcl)->lvIsMultiRegRet = true; - } - retValArg = gtNewLclVarAddrNode(newRetLcl); retVal = gtNewLclvNode(newRetLcl, genActualType(lvaTable[newRetLcl].lvType)); } From bf80210a8e8b83098b6781eb200a1c71cdef2813 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Mar 2025 23:50:01 +0100 Subject: [PATCH 10/12] Revert "Remove some restrictions" This reverts commit eb11af9f62ba230f08cba211aab9c218cc85ed16. --- src/coreclr/jit/codegencommon.cpp | 1 + src/coreclr/jit/flowgraph.cpp | 5 +++++ src/coreclr/jit/forwardsub.cpp | 24 +++++++++++++++++++++++- src/coreclr/jit/importer.cpp | 12 ++++++++++-- src/coreclr/jit/morph.cpp | 5 +++++ 5 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 5e2a871f3b8eac..7fb0d7982f93df 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -7390,6 +7390,7 @@ void CodeGen::genStructReturn(GenTree* treeNode) { GenTreeLclVar* lclNode = actualOp1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); + assert(varDsc->lvIsMultiRegRet); #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) var_types type = retTypeDesc.GetReturnRegType(0); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 39d5ea57478a8c..e5eb99a3bd3bd2 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2046,6 +2046,11 @@ class MergedReturns if (varTypeIsStruct(retLclType)) { comp->lvaSetStruct(retLclNum, comp->info.compMethodInfo->args.retTypeClass, false); + + if (comp->compMethodReturnsMultiRegRetType()) + { + retVarDsc->lvIsMultiRegRet = true; + } } else { diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 649c4d4964a346..9d57fa3a4a6dff 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -788,7 +788,9 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) dstVarDsc->lvIsMultiRegRet = true; } - // If a method returns a multi-reg type only forward sub locals. + // If a method returns a multi-reg type, only forward sub locals, + // and ensure the local and operand have the required markup. + // (see eg impFixupStructReturnType). // // TODO-Cleanup: this constraint only exists for multi-reg **struct** // returns, it does not exist for LONGs. However, enabling substitution @@ -816,6 +818,26 @@ bool Compiler::fgForwardSubStatement(Statement* stmt) return false; } } + else if (varTypeIsStruct(fwdSubNode)) + { + GenTreeLclVar* const fwdSubNodeLocal = fwdSubNode->AsLclVar(); + unsigned const fwdLclNum = fwdSubNodeLocal->GetLclNum(); + + // These may later turn into indirections and the backend does not support + // those as sources of multi-reg returns. + // + if (lvaIsImplicitByRefLocal(fwdLclNum)) + { + JITDUMP(" parent is multi-reg return; fwd sub node is implicit byref\n"); + return false; + } + + LclVarDsc* const fwdVarDsc = lvaGetDesc(fwdLclNum); + + JITDUMP(" [marking V%02u as multi-reg-ret]", fwdLclNum); + fwdVarDsc->lvIsMultiRegRet = true; + fwdSubNodeLocal->gtFlags |= GTF_DONT_CSE; + } } // If the value is being roundtripped through a small-typed local then we diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 8f477f1d34ca0e..323951bf64ba91 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4501,10 +4501,17 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op) if (compMethodReturnsMultiRegRetType() || op->IsMultiRegNode()) { - // We can use any local with multiple registers, except for implicit - // byrefs (they may turn into indirections). + // We can use any local with multiple registers (it will be forced to memory on mismatch), + // except for implicit byrefs (they may turn into indirections). if (op->OperIs(GT_LCL_VAR) && !lvaIsImplicitByRefLocal(op->AsLclVar()->GetLclNum())) { + // Note that this is a multi-reg return. + unsigned lclNum = op->AsLclVarCommon()->GetLclNum(); + lvaTable[lclNum].lvIsMultiRegRet = true; + + // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns. + op->gtFlags |= GTF_DONT_CSE; + return op; } @@ -4530,6 +4537,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op) } // The backend does not support other struct-producing nodes (e. g. OBJs) as sources of multi-reg returns. + // It also does not support assembling a multi-reg node into one register (for RETURN nodes at least). return impStoreMultiRegValueToVar(op, info.compMethodInfo->args.retTypeClass DEBUGARG(info.compCallConv)); } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 740f8b15bb302e..9d84378af78b7a 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5473,6 +5473,11 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig lvaSetVarAddrExposed(newRetLcl DEBUGARG(AddressExposedReason::DISPATCH_RET_BUF)); + if (varTypeIsStruct(origCall) && compMethodReturnsMultiRegRetType()) + { + lvaGetDesc(newRetLcl)->lvIsMultiRegRet = true; + } + retValArg = gtNewLclVarAddrNode(newRetLcl); retVal = gtNewLclvNode(newRetLcl, genActualType(lvaTable[newRetLcl].lvType)); } From bf36ee2190bd62e6aefac5a21401487a24326817 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 6 Mar 2025 00:07:20 +0100 Subject: [PATCH 11/12] Remove commented code --- src/coreclr/jit/promotion.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index e8610268196d7f..bbd14401bc53eb 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -2295,11 +2295,6 @@ bool ReplaceVisitor::ReplaceReturnedStructLocal(GenTreeOp* ret, GenTreeLclVarCom return false; } - // if (!IsReturnProfitableAsFieldList(value)) - //{ - // return false; - // } - StructDeaths deaths = m_liveness->GetDeathsForStructLocal(value); GenTreeFieldList* fieldList = m_compiler->gtNewFieldList(); From 442e7187b80509a21dffbf9f55765f706be96181 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 7 Mar 2025 22:41:56 +0100 Subject: [PATCH 12/12] Check DNER --- src/coreclr/jit/morph.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 9d84378af78b7a..3351f05d7ded0e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -8431,7 +8431,9 @@ void Compiler::fgTryReplaceStructLocalWithFields(GenTree** use) return; } - if (lvaGetDesc((*use)->AsLclVar())->lvPromoted) + LclVarDsc* varDsc = lvaGetDesc((*use)->AsLclVar()); + + if (!varDsc->lvDoNotEnregister && varDsc->lvPromoted) { *use = fgMorphLclToFieldList((*use)->AsLclVar()); }