From 6ce0ed97df245271130e84e3e663839a4d9dab22 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Jul 2024 16:33:17 +0200 Subject: [PATCH 1/8] JIT: Transform multi-reg args to FIELD_LIST in physical promotion This allows promoted fields to stay in registers when they are passed as multi-reg arguments. --- src/coreclr/jit/CMakeLists.txt | 2 + src/coreclr/jit/compiler.cpp | 64 ++ src/coreclr/jit/compiler.h | 17 + src/coreclr/jit/gentree.cpp | 3 + src/coreclr/jit/gentree.h | 6 +- src/coreclr/jit/morph.cpp | 66 +- src/coreclr/jit/promotion.cpp | 671 +++++++++------------ src/coreclr/jit/promotion.h | 58 +- src/coreclr/jit/promotiondecomposition.cpp | 2 +- src/coreclr/jit/structsegments.cpp | 304 ++++++++++ src/coreclr/jit/structsegments.h | 56 ++ 11 files changed, 800 insertions(+), 449 deletions(-) create mode 100644 src/coreclr/jit/structsegments.cpp create mode 100644 src/coreclr/jit/structsegments.h diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 7932e0d452c43..68155021d8eb7 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -178,6 +178,7 @@ set( JIT_SOURCES ssabuilder.cpp ssarenamestate.cpp stacklevelsetter.cpp + structsegments.cpp switchrecognition.cpp treelifeupdater.cpp unwind.cpp @@ -379,6 +380,7 @@ set( JIT_HEADERS ssaconfig.h ssarenamestate.h stacklevelsetter.h + structsegments.h target.h targetx86.h targetamd64.h diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 299d65f4f33cd..2707720525f49 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1990,6 +1990,7 @@ void Compiler::compInit(ArenaAllocator* pAlloc, m_outlinedCompositeSsaNums = nullptr; m_nodeToLoopMemoryBlockMap = nullptr; m_signatureToLookupInfoMap = nullptr; + m_significantSegmentsMap = nullptr; fgSsaPassesCompleted = 0; fgSsaValid = false; fgVNPassesCompleted = 0; @@ -8373,6 +8374,69 @@ void Compiler::TransferTestDataToNode(GenTree* from, GenTree* to) #endif // DEBUG +//------------------------------------------------------------------------ +// GetSignificantSegments: +// Compute a segment tree containing all significant (non-padding) segments +// for the specified class layout. +// +// Parameters: +// layout - The layout +// +// Returns: +// Segment tree containing all significant parts of the layout. +// +const StructSegments& Compiler::GetSignificantSegments(ClassLayout* layout) +{ + StructSegments* cached; + if ((m_significantSegmentsMap != nullptr) && m_significantSegmentsMap->Lookup(layout, &cached)) + { + return *cached; + } + + COMP_HANDLE compHnd = info.compCompHnd; + + StructSegments* newSegments = new (this, CMK_Promotion) StructSegments(getAllocator(CMK_Promotion)); + + if (layout->IsBlockLayout()) + { + newSegments->Add(StructSegments::Segment(0, layout->GetSize())); + } + else + { + CORINFO_TYPE_LAYOUT_NODE nodes[256]; + size_t numNodes = ArrLen(nodes); + GetTypeLayoutResult result = compHnd->getTypeLayout(layout->GetClassHandle(), nodes, &numNodes); + + if (result != GetTypeLayoutResult::Success) + { + newSegments->Add(StructSegments::Segment(0, layout->GetSize())); + } + else + { + for (size_t i = 0; i < numNodes; i++) + { + const CORINFO_TYPE_LAYOUT_NODE& node = nodes[i]; + if ((node.type != CORINFO_TYPE_VALUECLASS) || (node.simdTypeHnd != NO_CLASS_HANDLE) || + node.hasSignificantPadding) + { + newSegments->Add(StructSegments::Segment(node.offset, node.offset + node.size)); + } + } + } + } + + if (m_significantSegmentsMap == nullptr) + { + m_significantSegmentsMap = + new (this, CMK_Promotion) ClassLayoutStructSegmentsMap(getAllocator(CMK_Promotion)); + } + + m_significantSegmentsMap->Set(layout, newSegments); + + return *newSegments; +} + + /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index bb6adaaa5472a..c55c4ff37ff48 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -43,6 +43,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "valuenum.h" #include "scev.h" #include "namedintrinsiclist.h" +#include "structsegments.h" #ifdef LATE_DISASM #include "disasm.h" #endif @@ -5630,12 +5631,28 @@ class Compiler return m_signatureToLookupInfoMap; } + const StructSegments& GetSignificantSegments(ClassLayout* layout); + #ifdef SWIFT_SUPPORT typedef JitHashTable, CORINFO_SWIFT_LOWERING*> SwiftLoweringMap; SwiftLoweringMap* m_swiftLoweringCache; const CORINFO_SWIFT_LOWERING* GetSwiftLowering(CORINFO_CLASS_HANDLE clsHnd); #endif + typedef JitHashTable, class StructSegments*> ClassLayoutStructSegmentsMap; + ClassLayoutStructSegmentsMap* m_significantSegmentsMap; + + ClassLayoutStructSegmentsMap* GetSignificantSegmentsMap() + { + if (m_significantSegmentsMap == nullptr) + { + m_significantSegmentsMap = new (this, CMK_Promotion) ClassLayoutStructSegmentsMap(getAllocator(CMK_Promotion)); + } + + return m_significantSegmentsMap; + } + + void optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, ValueNum memoryVN); void optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5279a800a484d..bc3dcda636489 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1389,6 +1389,7 @@ CallArgs::CallArgs() , m_hasRetBuffer(false) , m_isVarArgs(false) , m_abiInformationDetermined(false) + , m_newAbiInformationDetermined(false) , m_hasRegArgs(false) , m_hasStackArgs(false) , m_argsComplete(false) @@ -2623,6 +2624,8 @@ bool GenTreeCall::Equals(GenTreeCall* c1, GenTreeCall* c2) // void CallArgs::ResetFinalArgsAndABIInfo() { + m_newAbiInformationDetermined = false; + if (!IsAbiInformationDetermined()) { return; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index df0b8d73d5ea2..6e3949c981110 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4817,6 +4817,7 @@ class CallArgs bool m_hasRetBuffer : 1; bool m_isVarArgs : 1; bool m_abiInformationDetermined : 1; + bool m_newAbiInformationDetermined : 1; // True if we have one or more register arguments. bool m_hasRegArgs : 1; // True if we have one or more stack arguments. @@ -4874,8 +4875,10 @@ class CallArgs PushFront(comp, arg); } - void ResetFinalArgsAndABIInfo(); void AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call); + void ResetFinalArgsAndABIInfo(); + + void DetermineNewABIInfo(Compiler* comp, GenTreeCall* call); void ArgsComplete(Compiler* comp, GenTreeCall* call); void EvalArgsToTemps(Compiler* comp, GenTreeCall* call); @@ -4892,6 +4895,7 @@ class CallArgs void SetIsVarArgs() { m_isVarArgs = true; } void ClearIsVarArgs() { m_isVarArgs = false; } bool IsAbiInformationDetermined() const { return m_abiInformationDetermined; } + bool IsNewAbiInformationDetermined() const { return m_newAbiInformationDetermined; } bool AreArgsComplete() const { return m_argsComplete; } bool HasRegArgs() const { return m_hasRegArgs; } bool HasStackArgs() const { return m_hasStackArgs; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 24cee582055c9..fca58f7114afc 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1123,7 +1123,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // Spill multireg struct arguments that have stores or calls embedded in them. SetNeedsTemp(&arg); } - else if (!argx->OperIsLocalRead() && !argx->OperIsLoad()) + else if (!argx->OperIsLocalRead() && !argx->OperIsLoad() && !argx->OperIs(GT_FIELD_LIST)) { // TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing. SetNeedsTemp(&arg); @@ -2313,6 +2313,53 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call #endif m_abiInformationDetermined = true; + m_newAbiInformationDetermined = true; +} + +//------------------------------------------------------------------------ +// DetermineNewABIInfo: +// Determine the new ABI info for all call args without making any IR +// changes. +// +// Parameters: +// comp - The compiler object. +// call - The call to which the CallArgs belongs. +// +void CallArgs::DetermineNewABIInfo(Compiler* comp, GenTreeCall* call) +{ + ClassifierInfo info; + info.CallConv = call->GetUnmanagedCallConv(); + // X86 tailcall helper is considered varargs, but not for ABI classification purposes. + info.IsVarArgs = call->IsVarargs() && !call->IsTailCallViaJitHelper(); + info.HasThis = call->gtArgs.HasThisPointer(); + info.HasRetBuff = call->gtArgs.HasRetBuffer(); + PlatformClassifier classifier(info); + + for (CallArg& arg : Args()) + { + const var_types argSigType = arg.GetSignatureType(); + const CORINFO_CLASS_HANDLE argSigClass = arg.GetSignatureClassHandle(); + ClassLayout* argLayout = argSigClass == NO_CLASS_HANDLE ? nullptr : comp->typGetObjLayout(argSigClass); + + // Some well known args have custom register assignment. + // These should not affect the placement of any other args or stack space required. + // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. + // TODO-Cleanup: Integrate this into the new style ABI classifiers. + regNumber nonStdRegNum = GetCustomRegister(comp, call->GetUnmanagedCallConv(), arg.GetWellKnownArg()); + + if (nonStdRegNum == REG_NA) + { + arg.NewAbiInfo = classifier.Classify(comp, argSigType, argLayout, arg.GetWellKnownArg()); + } + else + { + ABIPassingSegment segment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE); + arg.NewAbiInfo = ABIPassingInformation::FromSegment(comp, segment); + } + } + + m_argsStackSize = classifier.StackSize(); + m_newAbiInformationDetermined = true; } //------------------------------------------------------------------------ @@ -2483,8 +2530,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) unsigned originalSize; if (argObj->TypeGet() == TYP_STRUCT) { - assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); - originalSize = argObj->GetLayout(this)->GetSize(); + if (argObj->OperIs(GT_FIELD_LIST)) + { + originalSize = typGetObjLayout(arg.GetSignatureClassHandle())->GetSize(); + } + else + { + assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); + originalSize = argObj->GetLayout(this)->GetSize(); + } } else { @@ -2564,6 +2618,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) makeOutArgCopy = true; } } + else if (argObj->OperIs(GT_FIELD_LIST)) + { + // Fall through with makeOutArgCopy = false + } else if (!argIsLocal) { // This should only be the case of a value directly producing a known struct type. @@ -2805,12 +2863,12 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) { if ((arg.AbiInfo.ArgType == TYP_STRUCT) && !arg.AbiInfo.PassedByRef) { + foundStructArg = true; GenTree*& argx = (arg.GetLateNode() != nullptr) ? arg.LateNodeRef() : arg.EarlyNodeRef(); if (!argx->OperIs(GT_FIELD_LIST)) { argx = fgMorphMultiregStructArg(&arg); - foundStructArg = true; } } } diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index b656f1576d0d9..11824173974e9 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -58,10 +58,14 @@ struct Access // Number of times this is passed as a call arg. We insert writebacks // before these. unsigned CountCallArgs = 0; + // Number of times this is passed as a register call arg. We may be able to + // avoid the writeback for some overlapping replacements for these. + unsigned CountRegCallArgs = 0; weight_t CountWtd = 0; weight_t CountStoredFromCallWtd = 0; weight_t CountCallArgsWtd = 0; + weight_t CountRegCallArgsWtd = 0; #ifdef DEBUG // Number of times this access is the source of a store. @@ -111,14 +115,15 @@ struct Access enum class AccessKindFlags : uint32_t { - None = 0, - IsCallArg = 1, - IsStoredFromCall = 2, - IsCallRetBuf = 4, + None = 0, + IsCallArg = 1, + IsRegCallArg = 2, + IsStoredFromCall = 4, + IsCallRetBuf = 8, #ifdef DEBUG - IsStoreSource = 8, - IsStoreDestination = 16, - IsReturned = 32, + IsStoreSource = 16, + IsStoreDestination = 32, + IsReturned = 64, #endif }; @@ -352,6 +357,12 @@ class LocalUses { access->CountCallArgs++; access->CountCallArgsWtd += weight; + + if ((flags & AccessKindFlags::IsRegCallArg) != AccessKindFlags::None) + { + access->CountRegCallArgs++; + access->CountRegCallArgsWtd += weight; + } } if ((flags & (AccessKindFlags::IsStoredFromCall | AccessKindFlags::IsCallRetBuf)) != AccessKindFlags::None) @@ -688,6 +699,43 @@ class LocalUses countOverlappedCallArgWtd += otherAccess.CountCallArgsWtd; countOverlappedStoredFromCallWtd += otherAccess.CountStoredFromCallWtd; + + if (otherAccess.CountRegCallArgs > 0) + { + auto willPassFieldInRegister = [=, &access, &otherAccess]() { + if (access.Offset < otherAccess.Offset) + { + return false; + } + + unsigned layoutOffset = access.Offset - otherAccess.Offset; + if ((layoutOffset % TARGET_POINTER_SIZE) != 0) + { + return false; + } + + unsigned accessSize = genTypeSize(access.AccessType); + if (accessSize == TARGET_POINTER_SIZE) + { + return true; + } + + const StructSegments& significantSegments = comp->GetSignificantSegments(otherAccess.Layout); + if (!significantSegments.Intersects(StructSegments::Segment(layoutOffset + accessSize, layoutOffset + TARGET_POINTER_SIZE))) + { + return true; + } + + return false; + }; + // We may be able to decompose the call argument to require no + // write-back. + if (willPassFieldInRegister()) + { + countOverlappedCallArg -= otherAccess.CountRegCallArgs; + countOverlappedCallArgWtd -= otherAccess.CountRegCallArgsWtd; + } + } } // We cost any normal access (which is a struct load or store) without promotion at 3 cycles. @@ -881,6 +929,8 @@ class LocalUses access.CountStoreDestinationWtd); printf(" # as call arg: (%u, " FMT_WT ")\n", access.CountCallArgs, access.CountCallArgsWtd); + printf(" # as reg call arg: (%u, " FMT_WT ")\n", access.CountRegCallArgs, + access.CountRegCallArgsWtd); printf(" # as retbuf: (%u, " FMT_WT ")\n", access.CountPassedAsRetbuf, access.CountPassedAsRetbufWtd); printf(" # as returned value: (%u, " FMT_WT ")\n\n", access.CountReturns, @@ -1245,7 +1295,7 @@ class LocalsUseVisitor : public GenTreeVisitor } #endif - agg->Unpromoted = m_prom->SignificantSegments(m_compiler->lvaGetDesc(agg->LclNum)->GetLayout()); + agg->Unpromoted = m_compiler->GetSignificantSegments(m_compiler->lvaGetDesc(agg->LclNum)->GetLayout()); for (Replacement& rep : reps) { agg->Unpromoted.Subtract(StructSegments::Segment(rep.Offset, rep.Offset + genTypeSize(rep.AccessType))); @@ -1430,13 +1480,31 @@ class LocalsUseVisitor : public GenTreeVisitor if (user->IsCall()) { - for (CallArg& arg : user->AsCall()->gtArgs.Args()) + GenTreeCall* call = user->AsCall(); + for (CallArg& arg : call->gtArgs.Args()) { - if (arg.GetNode()->gtEffectiveVal() == lcl) + if (arg.GetNode()->gtEffectiveVal() != lcl) { - flags |= AccessKindFlags::IsCallArg; - break; + continue; } + + flags |= AccessKindFlags::IsCallArg; + +#if FEATURE_MULTIREG_ARGS + if (!call->gtArgs.IsNewAbiInformationDetermined()) + { + call->gtArgs.DetermineNewABIInfo(m_compiler, call); + } + + if (!arg.NewAbiInfo.HasAnyStackSegment() && !arg.NewAbiInfo.HasExactlyOneRegisterSegment()) + { + // TODO-CQ: Support for other register args than multireg + // args as well. + flags |= AccessKindFlags::IsRegCallArg; + } +#endif + + break; } } @@ -1484,365 +1552,6 @@ bool Replacement::Overlaps(unsigned otherStart, unsigned otherSize) const return true; } -//------------------------------------------------------------------------ -// IntersectsOrAdjacent: -// Check if this segment intersects or is adjacent to another segment. -// -// Parameters: -// other - The other segment. -// -// Returns: -// True if so. -// -bool StructSegments::Segment::IntersectsOrAdjacent(const Segment& other) const -{ - if (End < other.Start) - { - return false; - } - - if (other.End < Start) - { - return false; - } - - return true; -} - -//------------------------------------------------------------------------ -// Intersects: -// Check if this segment intersects another segment. -// -// Parameters: -// other - The other segment. -// -// Returns: -// True if so. -// -bool StructSegments::Segment::Intersects(const Segment& other) const -{ - if (End <= other.Start) - { - return false; - } - - if (other.End <= Start) - { - return false; - } - - return true; -} - -//------------------------------------------------------------------------ -// Contains: -// Check if this segment contains another segment. -// -// Parameters: -// other - The other segment. -// -// Returns: -// True if so. -// -bool StructSegments::Segment::Contains(const Segment& other) const -{ - return (other.Start >= Start) && (other.End <= End); -} - -//------------------------------------------------------------------------ -// Merge: -// Update this segment to also contain another segment. -// -// Parameters: -// other - The other segment. -// -void StructSegments::Segment::Merge(const Segment& other) -{ - Start = min(Start, other.Start); - End = max(End, other.End); -} - -//------------------------------------------------------------------------ -// Add: -// Add a segment to the data structure. -// -// Parameters: -// segment - The segment to add. -// -void StructSegments::Add(const Segment& segment) -{ - size_t index = Promotion::BinarySearch(m_segments, segment.Start); - - if ((ssize_t)index < 0) - { - index = ~index; - } - - m_segments.insert(m_segments.begin() + index, segment); - size_t endIndex; - for (endIndex = index + 1; endIndex < m_segments.size(); endIndex++) - { - if (!m_segments[index].IntersectsOrAdjacent(m_segments[endIndex])) - { - break; - } - - m_segments[index].Merge(m_segments[endIndex]); - } - - m_segments.erase(m_segments.begin() + index + 1, m_segments.begin() + endIndex); -} - -//------------------------------------------------------------------------ -// Subtract: -// Subtract a segment from the data structure. -// -// Parameters: -// segment - The segment to subtract. -// -void StructSegments::Subtract(const Segment& segment) -{ - size_t index = Promotion::BinarySearch(m_segments, segment.Start); - if ((ssize_t)index < 0) - { - index = ~index; - } - else - { - // Start == segment[index].End, which makes it non-interesting. - index++; - } - - if (index >= m_segments.size()) - { - return; - } - - // Here we know Start < segment[index].End. Do they not intersect at all? - if (m_segments[index].Start >= segment.End) - { - // Does not intersect any segment. - return; - } - - assert(m_segments[index].Intersects(segment)); - - if (m_segments[index].Contains(segment)) - { - if (segment.Start > m_segments[index].Start) - { - // New segment (existing.Start, segment.Start) - if (segment.End < m_segments[index].End) - { - m_segments.insert(m_segments.begin() + index, Segment(m_segments[index].Start, segment.Start)); - - // And new segment (segment.End, existing.End) - m_segments[index + 1].Start = segment.End; - return; - } - - m_segments[index].End = segment.Start; - return; - } - if (segment.End < m_segments[index].End) - { - // New segment (segment.End, existing.End) - m_segments[index].Start = segment.End; - return; - } - - // Full segment is being removed - m_segments.erase(m_segments.begin() + index); - return; - } - - if (segment.Start > m_segments[index].Start) - { - m_segments[index].End = segment.Start; - index++; - } - - size_t endIndex = Promotion::BinarySearch(m_segments, segment.End); - if ((ssize_t)endIndex >= 0) - { - m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex + 1); - return; - } - - endIndex = ~endIndex; - if (endIndex == m_segments.size()) - { - m_segments.erase(m_segments.begin() + index, m_segments.end()); - return; - } - - if (segment.End > m_segments[endIndex].Start) - { - m_segments[endIndex].Start = segment.End; - } - - m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex); -} - -//------------------------------------------------------------------------ -// IsEmpty: -// Check if the segment tree is empty. -// -// Returns: -// True if so. -// -bool StructSegments::IsEmpty() -{ - return m_segments.size() == 0; -} - -//------------------------------------------------------------------------ -// CoveringSegment: -// Compute a segment that covers all contained segments in this segment tree. -// -// Parameters: -// result - [out] The single segment. Only valid if the method returns true. -// -// Returns: -// True if this segment tree was non-empty; otherwise false. -// -bool StructSegments::CoveringSegment(Segment* result) -{ - if (m_segments.size() == 0) - { - return false; - } - - result->Start = m_segments[0].Start; - result->End = m_segments[m_segments.size() - 1].End; - return true; -} - -//------------------------------------------------------------------------ -// Intersects: -// Check if a segment intersects with any segment in this segment tree. -// -// Parameters: -// segment - The segment. -// -// Returns: -// True if the input segment intersects with any segment in the tree; -// otherwise false. -// -bool StructSegments::Intersects(const Segment& segment) -{ - size_t index = Promotion::BinarySearch(m_segments, segment.Start); - if ((ssize_t)index < 0) - { - index = ~index; - } - else - { - // Start == segment[index].End, which makes it non-interesting. - index++; - } - - if (index >= m_segments.size()) - { - return false; - } - - // Here we know Start < segment[index].End. Do they not intersect at all? - if (m_segments[index].Start >= segment.End) - { - // Does not intersect any segment. - return false; - } - - assert(m_segments[index].Intersects(segment)); - return true; -} - -#ifdef DEBUG -//------------------------------------------------------------------------ -// Dump: -// Dump a string representation of the segment tree to stdout. -// -void StructSegments::Dump() -{ - if (m_segments.size() == 0) - { - printf(""); - } - else - { - const char* sep = ""; - for (const Segment& segment : m_segments) - { - printf("%s[%03u..%03u)", sep, segment.Start, segment.End); - sep = " "; - } - } -} -#endif - -//------------------------------------------------------------------------ -// SignificantSegments: -// Compute a segment tree containing all significant (non-padding) segments -// for the specified class layout. -// -// Parameters: -// layout - The layout -// -// Returns: -// Segment tree containing all significant parts of the layout. -// -StructSegments Promotion::SignificantSegments(ClassLayout* layout) -{ - StructSegments* cached; - if ((m_significantSegmentsCache != nullptr) && m_significantSegmentsCache->Lookup(layout, &cached)) - { - return StructSegments(*cached); - } - - COMP_HANDLE compHnd = m_compiler->info.compCompHnd; - - StructSegments segments(m_compiler->getAllocator(CMK_Promotion)); - - if (layout->IsBlockLayout()) - { - segments.Add(StructSegments::Segment(0, layout->GetSize())); - } - else - { - CORINFO_TYPE_LAYOUT_NODE nodes[256]; - size_t numNodes = ArrLen(nodes); - GetTypeLayoutResult result = compHnd->getTypeLayout(layout->GetClassHandle(), nodes, &numNodes); - - if (result != GetTypeLayoutResult::Success) - { - segments.Add(StructSegments::Segment(0, layout->GetSize())); - } - else - { - for (size_t i = 0; i < numNodes; i++) - { - const CORINFO_TYPE_LAYOUT_NODE& node = nodes[i]; - if ((node.type != CORINFO_TYPE_VALUECLASS) || (node.simdTypeHnd != NO_CLASS_HANDLE) || - node.hasSignificantPadding) - { - segments.Add(StructSegments::Segment(node.offset, node.offset + node.size)); - } - } - } - } - - if (m_significantSegmentsCache == nullptr) - { - m_significantSegmentsCache = - new (m_compiler, CMK_Promotion) ClassLayoutStructSegmentsMap(m_compiler->getAllocator(CMK_Promotion)); - } - - m_significantSegmentsCache->Set(layout, new (m_compiler, CMK_Promotion) StructSegments(segments)); - - return segments; -} - //------------------------------------------------------------------------ // CreateWriteBack: // Create IR that writes a replacement local's value back to its struct local: @@ -2218,15 +1927,18 @@ void ReplaceVisitor::InsertPreStatementReadBacks() // lcl - The local // offs - Start offset of the segment // size - Size of the segment -// func - Callback +// func - Callback of type bool(Replacement&). If the callback returns false, the visit aborts. +// +// Return Value: +// false if the visitor aborted. // template -void ReplaceVisitor::VisitOverlappingReplacements(unsigned lcl, unsigned offs, unsigned size, Func func) +bool ReplaceVisitor::VisitOverlappingReplacements(unsigned lcl, unsigned offs, unsigned size, Func func) { AggregateInfo* agg = m_aggregates.Lookup(lcl); if (agg == nullptr) { - return; + return true; } jitstd::vector& replacements = agg->Replacements; @@ -2245,10 +1957,15 @@ void ReplaceVisitor::VisitOverlappingReplacements(unsigned lcl, unsigned offs, u while ((index < replacements.size()) && (replacements[index].Offset < end)) { Replacement& rep = replacements[index]; - func(rep); + if (!func(rep)) + { + return false; + } index++; } + + return true; } //------------------------------------------------------------------------ @@ -2294,11 +2011,17 @@ void ReplaceVisitor::InsertPreStatementWriteBacks() for (CallArg& arg : call->gtArgs.Args()) { GenTree* node = arg.GetNode()->gtEffectiveVal(); - if (!node->TypeIs(TYP_STRUCT) || !node->OperIsLocalRead()) + if (!node->TypeIs(TYP_STRUCT) || !node->OperIsLocalRead() || (m_replacer->m_aggregates.Lookup(node->AsLclVarCommon()->GetLclNum()) == nullptr)) { continue; } + if (m_replacer->CanReplaceCallArgWithFieldListOfReplacements(call, &arg, node->AsLclVarCommon())) + { + // Multi-reg arg; can decompose into FIELD_LIST. + continue; + } + GenTreeLclVarCommon* lcl = node->AsLclVarCommon(); m_replacer->WriteBackBeforeCurrentStatement(lcl->GetLclNum(), lcl->GetLclOffs(), lcl->GetLayout(m_compiler)->GetSize()); @@ -2380,6 +2103,171 @@ GenTree** ReplaceVisitor::InsertMidTreeReadBacks(GenTree** use) return use; } +//------------------------------------------------------------------------ +// ReplaceCallArgWithFieldList: +// Handle a call that may pass a struct local with replacements as the +// retbuf. +// +// Parameters: +// call - The call +// argNode - The argument node +// +bool ReplaceVisitor::ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVarCommon* argNode) +{ + CallArg* callArg = call->gtArgs.FindByNode(argNode); + if (callArg == nullptr) + { + // TODO-CQ: Could be wrapped in a comma? Does this happen? + return false; + } + + if (!CanReplaceCallArgWithFieldListOfReplacements(call, callArg, argNode)) + { + return false; + } + + AggregateInfo* agg = m_aggregates.Lookup(argNode->GetLclNum()); + ClassLayout* layout = argNode->GetLayout(m_compiler); + assert(layout != nullptr); + StructDeaths deaths = m_liveness->GetDeathsForStructLocal(argNode); + GenTreeFieldList* fieldList = new (m_compiler, GT_FIELD_LIST) GenTreeFieldList; + for (unsigned i = 0; i < callArg->NewAbiInfo.NumSegments; i++) + { + const ABIPassingSegment& seg = callArg->NewAbiInfo.Segment(i); + + Replacement* rep = nullptr; + if (agg->OverlappingReplacements(argNode->GetLclOffs() + seg.Offset, seg.Size, &rep, nullptr) && rep->NeedsWriteBack) + { + GenTreeLclVar* fieldValue = m_compiler->gtNewLclvNode(rep->LclNum, rep->AccessType); + + if (deaths.IsReplacementDying(static_cast(rep - agg->Replacements.data()))) + { + fieldValue->gtFlags |= GTF_VAR_DEATH; + CheckForwardSubForLastUse(rep->LclNum); + } + + fieldList->AddField(m_compiler, fieldValue, seg.Offset, rep->AccessType); + } + else + { + // Unpromoted part, or replacement local is not up to date. + var_types type; + if (rep != nullptr) + { + type = rep->AccessType; + } + else + { + if ((seg.Offset % TARGET_POINTER_SIZE) == 0 && (seg.Size == TARGET_POINTER_SIZE)) + { + type = layout->GetGCPtrType(seg.Offset / TARGET_POINTER_SIZE); + } + else + { + type = seg.GetRegisterType(); + } + } + + GenTree* fieldValue = m_compiler->gtNewLclFldNode(argNode->GetLclNum(), type, argNode->GetLclOffs() + seg.Offset); + fieldList->AddField(m_compiler, fieldValue, seg.Offset, type); + + if (!m_compiler->lvaGetDesc(argNode->GetLclNum())->lvDoNotEnregister) + { + m_compiler->lvaSetVarDoNotEnregister(argNode->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); + } + } + } + + assert(callArg->GetEarlyNode() == argNode); + callArg->SetEarlyNode(fieldList); + + m_madeChanges = true; + return true; +} + +//------------------------------------------------------------------------ +// CanReplaceCallArgWithFieldListOfReplacements: +// Returns true if a struct arg is replaceable by a FIELD_LIST containing +// some replacement field. +// +// Parameters: +// call - The call +// callArg - The call argument +// lcl - The local that is the node of the call argument +// +// Returns: +// True if the arg can be replaced by a FIELD_LIST that contains at least one +// replacement. +// +bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* call, CallArg* callArg, GenTreeLclVarCommon* lcl) +{ +#if !FEATURE_MULTIREG_ARGS + // TODO-CQ: We should do a similar thing for structs passed in a single + // register. + return false; +#else + // We should have computed ABI information during the costing phase. + assert(call->gtArgs.IsNewAbiInformationDetermined()); + + if (callArg->NewAbiInfo.HasAnyStackSegment() || callArg->NewAbiInfo.HasExactlyOneRegisterSegment()) + { + return false; + } + + AggregateInfo* agg = m_aggregates.Lookup(lcl->GetLclNum()); + assert(agg != nullptr); + + bool anyReplacements = false; + for (unsigned i = 0; i < callArg->NewAbiInfo.NumSegments; i++) + { + const ABIPassingSegment& seg = callArg->NewAbiInfo.Segment(i); + assert(seg.IsPassedInRegister()); + + auto callback = [=, &anyReplacements, &seg](Replacement& rep) { + anyReplacements = true; + + // Replacement must start at the right offset... + if (rep.Offset != lcl->GetLclOffs() + seg.Offset) + { + return false; + } + + // It must not be too long.. + unsigned repSize = genTypeSize(rep.AccessType); + if (repSize > seg.Size) + { + return false; + } + + // If it is too short, the remainder that would be passed in the + // register should be padding. We can check that by only checking + // whether the remainder intersects anything unpromoted, since if + // the remainder is a different promotion we will return false when + // the replacement is visited in this callback. + if ((repSize < seg.Size) && agg->Unpromoted.Intersects(StructSegments::Segment(rep.Offset + repSize, rep.Offset + seg.Size))) + { + return false; + } + + // Finally, the backend requires the register types to match. + if (!varTypeUsesSameRegType(rep.AccessType, seg.GetRegisterType())) + { + return false; + } + + return true; + }; + + if (!VisitOverlappingReplacements(lcl->GetLclNum(), lcl->GetLclOffs() + seg.Offset, seg.Size, callback)) + { + return false; + } + } + + return anyReplacements; +#endif +} + //------------------------------------------------------------------------ // ReadBackAfterCall: // Handle a call that may pass a struct local with replacements as the @@ -2546,22 +2434,27 @@ void ReplaceVisitor::ReplaceLocal(GenTree** use, GenTree* user) offs + lcl->GetLayout(m_compiler)->GetSize()); assert(effectiveUser->OperIs(GT_CALL, GT_RETURN, GT_SWIFT_ERROR_RET)); - unsigned size = lcl->GetLayout(m_compiler)->GetSize(); - WriteBackBeforeUse(use, lclNum, lcl->GetLclOffs(), size); - if (IsPromotedStructLocalDying(lcl)) + if (!effectiveUser->IsCall() || !ReplaceCallArgWithFieldList(effectiveUser->AsCall(), lcl)) { - lcl->gtFlags |= GTF_VAR_DEATH; - CheckForwardSubForLastUse(lclNum); + unsigned size = lcl->GetLayout(m_compiler)->GetSize(); + WriteBackBeforeUse(use, lclNum, lcl->GetLclOffs(), size); - // Relying on the values in the struct local after this struct use - // would effectively introduce another use of the struct, so - // indicate that no replacements are up to date. - for (Replacement& rep : replacements) + if (IsPromotedStructLocalDying(lcl)) { - SetNeedsWriteBack(rep); + lcl->gtFlags |= GTF_VAR_DEATH; + CheckForwardSubForLastUse(lclNum); + + // Relying on the values in the struct local after this struct use + // would effectively introduce another use of the struct, so + // indicate that no replacements are up to date. + for (Replacement& rep : replacements) + { + SetNeedsWriteBack(rep); + } } } + return; } @@ -2690,7 +2583,7 @@ void ReplaceVisitor::WriteBackBeforeCurrentStatement(unsigned lcl, unsigned offs VisitOverlappingReplacements(lcl, offs, size, [this, lcl](Replacement& rep) { if (!rep.NeedsWriteBack) { - return; + return true; } GenTree* readBack = Promotion::CreateWriteBack(m_compiler, lcl, rep); @@ -2699,6 +2592,7 @@ void ReplaceVisitor::WriteBackBeforeCurrentStatement(unsigned lcl, unsigned offs DISPSTMT(stmt); m_compiler->fgInsertStmtBefore(m_currentBlock, m_currentStmt, stmt); ClearNeedsWriteBack(rep); + return true; }); } @@ -2718,7 +2612,7 @@ void ReplaceVisitor::WriteBackBeforeUse(GenTree** use, unsigned lcl, unsigned of VisitOverlappingReplacements(lcl, offs, size, [this, &use, lcl](Replacement& rep) { if (!rep.NeedsWriteBack) { - return; + return true; } GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), @@ -2728,6 +2622,7 @@ void ReplaceVisitor::WriteBackBeforeUse(GenTree** use, unsigned lcl, unsigned of ClearNeedsWriteBack(rep); m_madeChanges = true; + return true; }); } diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 89097d78cd106..37cc06a2153b5 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -40,55 +40,6 @@ struct Replacement bool Overlaps(unsigned otherStart, unsigned otherSize) const; }; -// Represents significant segments of a struct operation. -// -// Essentially a segment tree (but not stored as a tree) that supports boolean -// Add/Subtract operations of segments. Used to compute the remainder after -// replacements have been handled as part of a decomposed block operation. -class StructSegments -{ -public: - struct Segment - { - unsigned Start = 0; - unsigned End = 0; - - Segment() - { - } - - Segment(unsigned start, unsigned end) - : Start(start) - , End(end) - { - } - - bool IntersectsOrAdjacent(const Segment& other) const; - bool Intersects(const Segment& other) const; - bool Contains(const Segment& other) const; - void Merge(const Segment& other); - }; - -private: - jitstd::vector m_segments; - -public: - explicit StructSegments(CompAllocator allocator) - : m_segments(allocator) - { - } - - void Add(const Segment& segment); - void Subtract(const Segment& segment); - bool IsEmpty(); - bool CoveringSegment(Segment* result); - bool Intersects(const Segment& segment); - -#ifdef DEBUG - void Dump(); -#endif -}; - // Represents information about an aggregate that now has replacements in it. struct AggregateInfo { @@ -137,12 +88,9 @@ class AggregateInfoMap } }; -typedef JitHashTable, class StructSegments*> ClassLayoutStructSegmentsMap; - class Promotion { Compiler* m_compiler; - ClassLayoutStructSegmentsMap* m_significantSegmentsCache = nullptr; friend class LocalUses; friend class LocalsUseVisitor; @@ -152,8 +100,6 @@ class Promotion friend class DecompositionPlan; friend class StructSegments; - StructSegments SignificantSegments(ClassLayout* layout); - void ExplicitlyZeroInitReplacementLocals(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt); @@ -341,12 +287,14 @@ class ReplaceVisitor : public GenTreeVisitor void ClearNeedsReadBack(Replacement& rep); template - void VisitOverlappingReplacements(unsigned lcl, unsigned offs, unsigned size, Func func); + bool VisitOverlappingReplacements(unsigned lcl, unsigned offs, unsigned size, Func func); void InsertPreStatementReadBacks(); void InsertPreStatementWriteBacks(); GenTree** InsertMidTreeReadBacks(GenTree** use); + bool ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVarCommon* callArg); + bool CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* call, CallArg* callArg, GenTreeLclVarCommon* lcl); void ReadBackAfterCall(GenTreeCall* call, GenTree* user); bool IsPromotedStructLocalDying(GenTreeLclVarCommon* structLcl); void ReplaceLocal(GenTree** use, GenTree* user); diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index d4f71b9983520..bf89852547fd7 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -238,7 +238,7 @@ class DecompositionPlan { ClassLayout* dstLayout = m_store->GetLayout(m_compiler); - StructSegments segments = m_promotion->SignificantSegments(dstLayout); + StructSegments segments = m_compiler->GetSignificantSegments(dstLayout); for (int i = 0; i < m_entries.Height(); i++) { diff --git a/src/coreclr/jit/structsegments.cpp b/src/coreclr/jit/structsegments.cpp new file mode 100644 index 0000000000000..d1364be0d909e --- /dev/null +++ b/src/coreclr/jit/structsegments.cpp @@ -0,0 +1,304 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" +#include "structsegments.h" +#include "promotion.h" + +//------------------------------------------------------------------------ +// IntersectsOrAdjacent: +// Check if this segment intersects or is adjacent to another segment. +// +// Parameters: +// other - The other segment. +// +// Returns: +// True if so. +// +bool StructSegments::Segment::IntersectsOrAdjacent(const Segment& other) const +{ + if (End < other.Start) + { + return false; + } + + if (other.End < Start) + { + return false; + } + + return true; +} + +//------------------------------------------------------------------------ +// Intersects: +// Check if this segment intersects another segment. +// +// Parameters: +// other - The other segment. +// +// Returns: +// True if so. +// +bool StructSegments::Segment::Intersects(const Segment& other) const +{ + if (End <= other.Start) + { + return false; + } + + if (other.End <= Start) + { + return false; + } + + return true; +} + +//------------------------------------------------------------------------ +// Contains: +// Check if this segment contains another segment. +// +// Parameters: +// other - The other segment. +// +// Returns: +// True if so. +// +bool StructSegments::Segment::Contains(const Segment& other) const +{ + return (other.Start >= Start) && (other.End <= End); +} + +//------------------------------------------------------------------------ +// Merge: +// Update this segment to also contain another segment. +// +// Parameters: +// other - The other segment. +// +void StructSegments::Segment::Merge(const Segment& other) +{ + Start = min(Start, other.Start); + End = max(End, other.End); +} + +//------------------------------------------------------------------------ +// Add: +// Add a segment to the data structure. +// +// Parameters: +// segment - The segment to add. +// +void StructSegments::Add(const Segment& segment) +{ + size_t index = Promotion::BinarySearch(m_segments, segment.Start); + + if ((ssize_t)index < 0) + { + index = ~index; + } + + m_segments.insert(m_segments.begin() + index, segment); + size_t endIndex; + for (endIndex = index + 1; endIndex < m_segments.size(); endIndex++) + { + if (!m_segments[index].IntersectsOrAdjacent(m_segments[endIndex])) + { + break; + } + + m_segments[index].Merge(m_segments[endIndex]); + } + + m_segments.erase(m_segments.begin() + index + 1, m_segments.begin() + endIndex); +} + +//------------------------------------------------------------------------ +// Subtract: +// Subtract a segment from the data structure. +// +// Parameters: +// segment - The segment to subtract. +// +void StructSegments::Subtract(const Segment& segment) +{ + size_t index = Promotion::BinarySearch(m_segments, segment.Start); + if ((ssize_t)index < 0) + { + index = ~index; + } + else + { + // Start == segment[index].End, which makes it non-interesting. + index++; + } + + if (index >= m_segments.size()) + { + return; + } + + // Here we know Start < segment[index].End. Do they not intersect at all? + if (m_segments[index].Start >= segment.End) + { + // Does not intersect any segment. + return; + } + + assert(m_segments[index].Intersects(segment)); + + if (m_segments[index].Contains(segment)) + { + if (segment.Start > m_segments[index].Start) + { + // New segment (existing.Start, segment.Start) + if (segment.End < m_segments[index].End) + { + m_segments.insert(m_segments.begin() + index, Segment(m_segments[index].Start, segment.Start)); + + // And new segment (segment.End, existing.End) + m_segments[index + 1].Start = segment.End; + return; + } + + m_segments[index].End = segment.Start; + return; + } + if (segment.End < m_segments[index].End) + { + // New segment (segment.End, existing.End) + m_segments[index].Start = segment.End; + return; + } + + // Full segment is being removed + m_segments.erase(m_segments.begin() + index); + return; + } + + if (segment.Start > m_segments[index].Start) + { + m_segments[index].End = segment.Start; + index++; + } + + size_t endIndex = Promotion::BinarySearch(m_segments, segment.End); + if ((ssize_t)endIndex >= 0) + { + m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex + 1); + return; + } + + endIndex = ~endIndex; + if (endIndex == m_segments.size()) + { + m_segments.erase(m_segments.begin() + index, m_segments.end()); + return; + } + + if (segment.End > m_segments[endIndex].Start) + { + m_segments[endIndex].Start = segment.End; + } + + m_segments.erase(m_segments.begin() + index, m_segments.begin() + endIndex); +} + +//------------------------------------------------------------------------ +// IsEmpty: +// Check if the segment tree is empty. +// +// Returns: +// True if so. +// +bool StructSegments::IsEmpty() const +{ + return m_segments.size() == 0; +} + +//------------------------------------------------------------------------ +// CoveringSegment: +// Compute a segment that covers all contained segments in this segment tree. +// +// Parameters: +// result - [out] The single segment. Only valid if the method returns true. +// +// Returns: +// True if this segment tree was non-empty; otherwise false. +// +bool StructSegments::CoveringSegment(Segment* result) const +{ + if (m_segments.size() == 0) + { + return false; + } + + result->Start = m_segments[0].Start; + result->End = m_segments[m_segments.size() - 1].End; + return true; +} + +//------------------------------------------------------------------------ +// Intersects: +// Check if a segment intersects with any segment in this segment tree. +// +// Parameters: +// segment - The segment. +// +// Returns: +// True if the input segment intersects with any segment in the tree; +// otherwise false. +// +bool StructSegments::Intersects(const Segment& segment) const +{ + size_t index = Promotion::BinarySearch(m_segments, segment.Start); + if ((ssize_t)index < 0) + { + index = ~index; + } + else + { + // Start == segment[index].End, which makes it non-interesting. + index++; + } + + if (index >= m_segments.size()) + { + return false; + } + + // Here we know Start < segment[index].End. Do they not intersect at all? + if (m_segments[index].Start >= segment.End) + { + // Does not intersect any segment. + return false; + } + + assert(m_segments[index].Intersects(segment)); + return true; +} + +#ifdef DEBUG +//------------------------------------------------------------------------ +// Dump: +// Dump a string representation of the segment tree to stdout. +// +void StructSegments::Dump() +{ + if (m_segments.size() == 0) + { + printf(""); + } + else + { + const char* sep = ""; + for (const Segment& segment : m_segments) + { + printf("%s[%03u..%03u)", sep, segment.Start, segment.End); + sep = " "; + } + } +} +#endif + diff --git a/src/coreclr/jit/structsegments.h b/src/coreclr/jit/structsegments.h new file mode 100644 index 0000000000000..2a78a181a5b18 --- /dev/null +++ b/src/coreclr/jit/structsegments.h @@ -0,0 +1,56 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#pragma once + +#include "alloc.h" +#include "jitstd/vector.h" + +// Represents significant segments of a struct operation. +// +// Essentially a segment tree (but not stored as a tree) that supports boolean +// Add/Subtract operations of segments. Used to compute the remainder after +// replacements have been handled as part of a decomposed block operation. +class StructSegments +{ +public: + struct Segment + { + unsigned Start = 0; + unsigned End = 0; + + Segment() + { + } + + Segment(unsigned start, unsigned end) + : Start(start) + , End(end) + { + } + + bool IntersectsOrAdjacent(const Segment& other) const; + bool Intersects(const Segment& other) const; + bool Contains(const Segment& other) const; + void Merge(const Segment& other); + }; + +private: + jitstd::vector m_segments; + +public: + explicit StructSegments(CompAllocator allocator) + : m_segments(allocator) + { + } + + void Add(const Segment& segment); + void Subtract(const Segment& segment); + bool IsEmpty() const; + bool CoveringSegment(Segment* result) const; + bool Intersects(const Segment& segment) const; + +#ifdef DEBUG + void Dump(); +#endif +}; From ae28f6f8f6941718271d6fe58c3a5fc9890797e4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Jul 2024 17:00:32 +0200 Subject: [PATCH 2/8] Few fixes --- src/coreclr/jit/forwardsub.cpp | 17 ++++++++++++----- src/coreclr/jit/morph.cpp | 4 ++-- src/coreclr/jit/promotion.cpp | 4 ++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 9d57fa3a4a6df..3813fe3be0362 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -222,14 +222,21 @@ class ForwardSubVisitor final : public GenTreeVisitor // // fgGetStubAddrArg cannot handle complex trees (it calls gtClone) // - bool isCallTarget = false; - if ((parent != nullptr) && parent->IsCall()) + bool canSubstituteIntoParent = true; + if (parent != nullptr) { - GenTreeCall* const parentCall = parent->AsCall(); - isCallTarget = (parentCall->gtCallType == CT_INDIRECT) && (parentCall->gtCallAddr == node); + if (parent->IsCall()) + { + GenTreeCall* const parentCall = parent->AsCall(); + canSubstituteIntoParent = (parentCall->gtCallType != CT_INDIRECT) || (parentCall->gtCallAddr != node); + } + else if (parent->OperIs(GT_FIELD_LIST)) + { + canSubstituteIntoParent = false; + } } - if (!isCallTarget && IsLastUse(node->AsLclVar())) + if (canSubstituteIntoParent && IsLastUse(node->AsLclVar())) { m_node = node; m_use = use; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index fca58f7114afc..76a42f9b327a4 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1114,7 +1114,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // here we consider spilling it into a local. We also need to spill it in case we have a node that we do not // currently handle in multi-reg morphing. // - if (varTypeIsStruct(argx) && !arg.m_needTmp) + if (varTypeIsStruct(argx) && !arg.m_needTmp && !argx->OperIs(GT_FIELD_LIST)) { if ((arg.AbiInfo.NumRegs > 0) && ((arg.AbiInfo.NumRegs + arg.AbiInfo.GetStackSlotsNumber()) > 1)) { @@ -1123,7 +1123,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // Spill multireg struct arguments that have stores or calls embedded in them. SetNeedsTemp(&arg); } - else if (!argx->OperIsLocalRead() && !argx->OperIsLoad() && !argx->OperIs(GT_FIELD_LIST)) + else if (!argx->OperIsLocalRead() && !argx->OperIsLoad()) { // TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing. SetNeedsTemp(&arg); diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 11824173974e9..ac788e2dc8a6b 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -2156,6 +2156,10 @@ bool ReplaceVisitor::ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVa { type = rep->AccessType; } + else if (genIsValidFloatReg(seg.GetRegister())) + { + type = seg.GetRegisterType(); + } else { if ((seg.Offset % TARGET_POINTER_SIZE) == 0 && (seg.Size == TARGET_POINTER_SIZE)) From 034a992c16b075d4d3cfa58116923a0ccba052cf Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 3 Jul 2024 17:00:52 +0200 Subject: [PATCH 3/8] Run jit-format --- src/coreclr/jit/compiler.cpp | 6 ++--- src/coreclr/jit/forwardsub.cpp | 3 ++- src/coreclr/jit/gentree.h | 8 +++--- src/coreclr/jit/morph.cpp | 6 ++--- src/coreclr/jit/promotion.cpp | 41 +++++++++++++++++------------- src/coreclr/jit/promotion.h | 2 +- src/coreclr/jit/structsegments.cpp | 1 - 7 files changed, 36 insertions(+), 31 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 2707720525f49..f8080a73cc211 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1990,7 +1990,7 @@ void Compiler::compInit(ArenaAllocator* pAlloc, m_outlinedCompositeSsaNums = nullptr; m_nodeToLoopMemoryBlockMap = nullptr; m_signatureToLookupInfoMap = nullptr; - m_significantSegmentsMap = nullptr; + m_significantSegmentsMap = nullptr; fgSsaPassesCompleted = 0; fgSsaValid = false; fgVNPassesCompleted = 0; @@ -8427,8 +8427,7 @@ const StructSegments& Compiler::GetSignificantSegments(ClassLayout* layout) if (m_significantSegmentsMap == nullptr) { - m_significantSegmentsMap = - new (this, CMK_Promotion) ClassLayoutStructSegmentsMap(getAllocator(CMK_Promotion)); + m_significantSegmentsMap = new (this, CMK_Promotion) ClassLayoutStructSegmentsMap(getAllocator(CMK_Promotion)); } m_significantSegmentsMap->Set(layout, newSegments); @@ -8436,7 +8435,6 @@ const StructSegments& Compiler::GetSignificantSegments(ClassLayout* layout) return *newSegments; } - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 3813fe3be0362..8a4fff11c6019 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -228,7 +228,8 @@ class ForwardSubVisitor final : public GenTreeVisitor if (parent->IsCall()) { GenTreeCall* const parentCall = parent->AsCall(); - canSubstituteIntoParent = (parentCall->gtCallType != CT_INDIRECT) || (parentCall->gtCallAddr != node); + canSubstituteIntoParent = + (parentCall->gtCallType != CT_INDIRECT) || (parentCall->gtCallAddr != node); } else if (parent->OperIs(GT_FIELD_LIST)) { diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 6e3949c981110..d008cf7ee296a 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4813,10 +4813,10 @@ class CallArgs // made for this call. unsigned m_padStkAlign; #endif - bool m_hasThisPointer : 1; - bool m_hasRetBuffer : 1; - bool m_isVarArgs : 1; - bool m_abiInformationDetermined : 1; + bool m_hasThisPointer : 1; + bool m_hasRetBuffer : 1; + bool m_isVarArgs : 1; + bool m_abiInformationDetermined : 1; bool m_newAbiInformationDetermined : 1; // True if we have one or more register arguments. bool m_hasRegArgs : 1; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 76a42f9b327a4..e575eb81336b6 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2312,7 +2312,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } #endif - m_abiInformationDetermined = true; + m_abiInformationDetermined = true; m_newAbiInformationDetermined = true; } @@ -2358,7 +2358,7 @@ void CallArgs::DetermineNewABIInfo(Compiler* comp, GenTreeCall* call) } } - m_argsStackSize = classifier.StackSize(); + m_argsStackSize = classifier.StackSize(); m_newAbiInformationDetermined = true; } @@ -2868,7 +2868,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) if (!argx->OperIs(GT_FIELD_LIST)) { - argx = fgMorphMultiregStructArg(&arg); + argx = fgMorphMultiregStructArg(&arg); } } } diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index ac788e2dc8a6b..7fe9bffa70e94 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -65,7 +65,7 @@ struct Access weight_t CountWtd = 0; weight_t CountStoredFromCallWtd = 0; weight_t CountCallArgsWtd = 0; - weight_t CountRegCallArgsWtd = 0; + weight_t CountRegCallArgsWtd = 0; #ifdef DEBUG // Number of times this access is the source of a store. @@ -115,11 +115,11 @@ struct Access enum class AccessKindFlags : uint32_t { - None = 0, - IsCallArg = 1, - IsRegCallArg = 2, - IsStoredFromCall = 4, - IsCallRetBuf = 8, + None = 0, + IsCallArg = 1, + IsRegCallArg = 2, + IsStoredFromCall = 4, + IsCallRetBuf = 8, #ifdef DEBUG IsStoreSource = 16, IsStoreDestination = 32, @@ -721,13 +721,14 @@ class LocalUses } const StructSegments& significantSegments = comp->GetSignificantSegments(otherAccess.Layout); - if (!significantSegments.Intersects(StructSegments::Segment(layoutOffset + accessSize, layoutOffset + TARGET_POINTER_SIZE))) + if (!significantSegments.Intersects( + StructSegments::Segment(layoutOffset + accessSize, layoutOffset + TARGET_POINTER_SIZE))) { return true; } return false; - }; + }; // We may be able to decompose the call argument to require no // write-back. if (willPassFieldInRegister()) @@ -2011,7 +2012,8 @@ void ReplaceVisitor::InsertPreStatementWriteBacks() for (CallArg& arg : call->gtArgs.Args()) { GenTree* node = arg.GetNode()->gtEffectiveVal(); - if (!node->TypeIs(TYP_STRUCT) || !node->OperIsLocalRead() || (m_replacer->m_aggregates.Lookup(node->AsLclVarCommon()->GetLclNum()) == nullptr)) + if (!node->TypeIs(TYP_STRUCT) || !node->OperIsLocalRead() || + (m_replacer->m_aggregates.Lookup(node->AsLclVarCommon()->GetLclNum()) == nullptr)) { continue; } @@ -2126,17 +2128,18 @@ bool ReplaceVisitor::ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVa return false; } - AggregateInfo* agg = m_aggregates.Lookup(argNode->GetLclNum()); - ClassLayout* layout = argNode->GetLayout(m_compiler); + AggregateInfo* agg = m_aggregates.Lookup(argNode->GetLclNum()); + ClassLayout* layout = argNode->GetLayout(m_compiler); assert(layout != nullptr); - StructDeaths deaths = m_liveness->GetDeathsForStructLocal(argNode); + StructDeaths deaths = m_liveness->GetDeathsForStructLocal(argNode); GenTreeFieldList* fieldList = new (m_compiler, GT_FIELD_LIST) GenTreeFieldList; for (unsigned i = 0; i < callArg->NewAbiInfo.NumSegments; i++) { const ABIPassingSegment& seg = callArg->NewAbiInfo.Segment(i); Replacement* rep = nullptr; - if (agg->OverlappingReplacements(argNode->GetLclOffs() + seg.Offset, seg.Size, &rep, nullptr) && rep->NeedsWriteBack) + if (agg->OverlappingReplacements(argNode->GetLclOffs() + seg.Offset, seg.Size, &rep, nullptr) && + rep->NeedsWriteBack) { GenTreeLclVar* fieldValue = m_compiler->gtNewLclvNode(rep->LclNum, rep->AccessType); @@ -2172,7 +2175,8 @@ bool ReplaceVisitor::ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVa } } - GenTree* fieldValue = m_compiler->gtNewLclFldNode(argNode->GetLclNum(), type, argNode->GetLclOffs() + seg.Offset); + GenTree* fieldValue = + m_compiler->gtNewLclFldNode(argNode->GetLclNum(), type, argNode->GetLclOffs() + seg.Offset); fieldList->AddField(m_compiler, fieldValue, seg.Offset, type); if (!m_compiler->lvaGetDesc(argNode->GetLclNum())->lvDoNotEnregister) @@ -2203,7 +2207,9 @@ bool ReplaceVisitor::ReplaceCallArgWithFieldList(GenTreeCall* call, GenTreeLclVa // True if the arg can be replaced by a FIELD_LIST that contains at least one // replacement. // -bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* call, CallArg* callArg, GenTreeLclVarCommon* lcl) +bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* call, + CallArg* callArg, + GenTreeLclVarCommon* lcl) { #if !FEATURE_MULTIREG_ARGS // TODO-CQ: We should do a similar thing for structs passed in a single @@ -2248,7 +2254,8 @@ bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* c // whether the remainder intersects anything unpromoted, since if // the remainder is a different promotion we will return false when // the replacement is visited in this callback. - if ((repSize < seg.Size) && agg->Unpromoted.Intersects(StructSegments::Segment(rep.Offset + repSize, rep.Offset + seg.Size))) + if ((repSize < seg.Size) && + agg->Unpromoted.Intersects(StructSegments::Segment(rep.Offset + repSize, rep.Offset + seg.Size))) { return false; } @@ -2260,7 +2267,7 @@ bool ReplaceVisitor::CanReplaceCallArgWithFieldListOfReplacements(GenTreeCall* c } return true; - }; + }; if (!VisitOverlappingReplacements(lcl->GetLclNum(), lcl->GetLclOffs() + seg.Offset, seg.Size, callback)) { diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 37cc06a2153b5..b002c6d55e9f8 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -90,7 +90,7 @@ class AggregateInfoMap class Promotion { - Compiler* m_compiler; + Compiler* m_compiler; friend class LocalUses; friend class LocalsUseVisitor; diff --git a/src/coreclr/jit/structsegments.cpp b/src/coreclr/jit/structsegments.cpp index d1364be0d909e..bbd54af6500e6 100644 --- a/src/coreclr/jit/structsegments.cpp +++ b/src/coreclr/jit/structsegments.cpp @@ -301,4 +301,3 @@ void StructSegments::Dump() } } #endif - From 96d3946bb16f95f5bd88fbd776de333a90557b4f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 Jul 2024 13:35:13 +0200 Subject: [PATCH 4/8] Revert forward sub change --- src/coreclr/jit/forwardsub.cpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/forwardsub.cpp b/src/coreclr/jit/forwardsub.cpp index 8a4fff11c6019..9d57fa3a4a6df 100644 --- a/src/coreclr/jit/forwardsub.cpp +++ b/src/coreclr/jit/forwardsub.cpp @@ -222,22 +222,14 @@ class ForwardSubVisitor final : public GenTreeVisitor // // fgGetStubAddrArg cannot handle complex trees (it calls gtClone) // - bool canSubstituteIntoParent = true; - if (parent != nullptr) + bool isCallTarget = false; + if ((parent != nullptr) && parent->IsCall()) { - if (parent->IsCall()) - { - GenTreeCall* const parentCall = parent->AsCall(); - canSubstituteIntoParent = - (parentCall->gtCallType != CT_INDIRECT) || (parentCall->gtCallAddr != node); - } - else if (parent->OperIs(GT_FIELD_LIST)) - { - canSubstituteIntoParent = false; - } + GenTreeCall* const parentCall = parent->AsCall(); + isCallTarget = (parentCall->gtCallType == CT_INDIRECT) && (parentCall->gtCallAddr == node); } - if (canSubstituteIntoParent && IsLastUse(node->AsLclVar())) + if (!isCallTarget && IsLastUse(node->AsLclVar())) { m_node = node; m_use = use; From d2cbbc72c5e212c2b081ca01855808f7ec1952ba Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 Jul 2024 13:15:58 +0200 Subject: [PATCH 5/8] JIT: Remove CallArg::m_tmpNum Before this change the JIT has two places where it may introduce temps during call args morphing: 1. Directly during `fgMorphArgs` as part of making struct copies for some struct args, like implicit byref args and other struct args requiring to be of `LCL_VAR` shape 2. During `EvalArgsToTemps` as part of making sure evaluation order is maintained while we get the call into the right shape with registers To make this work we have `CallArg::m_tmpNum` that communicates the local from 1 to 2; the responsibility of creating the actual `LCL_VAR` node to put in the late argument list was left to `EvalArgsToTemps` while `fgMorphArgs` would just change the early setup node to a store into the temporary. This PR changes it so that 1 directly introduces the right late node when it creates its temporary. That is, it puts the call argument into the right shape immediately and avoids relying on `EvalArgsToTemps` to create the local node in the late list. The benefit of that is that we no longer need to communicate the temporary as part of every `CallArg`. However, the motivation here is something else: as part of #104370 we may have arguments that need multiple temporaries to copy, so having things working in this way introduces complications for that. --- src/coreclr/jit/gentree.cpp | 2 - src/coreclr/jit/gentree.h | 10 +- src/coreclr/jit/morph.cpp | 176 ++++++++++++++---------------------- 3 files changed, 69 insertions(+), 119 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index bc3dcda636489..92ea5f3cad613 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9741,12 +9741,10 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co carg->m_earlyNode = arg.m_earlyNode != nullptr ? copyNode(arg.m_earlyNode) : nullptr; carg->m_lateNode = arg.m_lateNode != nullptr ? copyNode(arg.m_lateNode) : nullptr; carg->m_signatureClsHnd = arg.m_signatureClsHnd; - carg->m_tmpNum = arg.m_tmpNum; carg->m_signatureType = arg.m_signatureType; carg->m_wellKnownArg = arg.m_wellKnownArg; carg->m_needTmp = arg.m_needTmp; carg->m_needPlace = arg.m_needPlace; - carg->m_isTmp = arg.m_isTmp; carg->m_processed = arg.m_processed; carg->AbiInfo = arg.AbiInfo; carg->NewAbiInfo = arg.NewAbiInfo; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index d008cf7ee296a..8c0327b5222b8 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4706,8 +4706,6 @@ class CallArg // The class handle for the signature type (when varTypeIsStruct(SignatureType)). CORINFO_CLASS_HANDLE m_signatureClsHnd; - // The LclVar number if we had to force evaluation of this arg. - unsigned m_tmpNum; // The type of the argument in the signature. var_types m_signatureType : 5; // The type of well-known argument this is. @@ -4716,8 +4714,6 @@ class CallArg bool m_needTmp : 1; // True when we must replace this argument with a placeholder node. bool m_needPlace : 1; - // True when we setup a temp LclVar for this argument. - bool m_isTmp : 1; // True when we have decided the evaluation order for this argument in LateArgs bool m_processed : 1; @@ -4728,12 +4724,10 @@ class CallArg , m_next(nullptr) , m_lateNext(nullptr) , m_signatureClsHnd(NO_CLASS_HANDLE) - , m_tmpNum(BAD_VAR_NUM) , m_signatureType(TYP_UNDEF) , m_wellKnownArg(WellKnownArg::None) , m_needTmp(false) , m_needPlace(false) - , m_isTmp(false) , m_processed(false) { } @@ -4770,7 +4764,6 @@ class CallArg CORINFO_CLASS_HANDLE GetSignatureClassHandle() { return m_signatureClsHnd; } var_types GetSignatureType() { return m_signatureType; } WellKnownArg GetWellKnownArg() { return m_wellKnownArg; } - bool IsTemp() { return m_isTmp; } // clang-format on // Get the real argument node, i.e. not a setup or placeholder node. @@ -4885,8 +4878,7 @@ class CallArgs void SetNeedsTemp(CallArg* arg); bool IsNonStandard(Compiler* comp, GenTreeCall* call, CallArg* arg); - GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg); - void SetTemp(CallArg* arg, unsigned tmpNum); + GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum); // clang-format off bool HasThisPointer() const { return m_hasThisPointer; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index e575eb81336b6..971704e8d941c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -821,18 +821,10 @@ void CallArg::Dump(Compiler* comp) { printf(", isSplit"); } - if (m_needTmp) - { - printf(", tmpNum=V%02u", m_tmpNum); - } if (m_needPlace) { printf(", needPlace"); } - if (m_isTmp) - { - printf(", isTmp"); - } if (m_processed) { printf(", processed"); @@ -849,15 +841,6 @@ void CallArg::Dump(Compiler* comp) } #endif -//------------------------------------------------------------------------ -// SetTemp: Set that the specified argument was evaluated into a temp. -// -void CallArgs::SetTemp(CallArg* arg, unsigned tmpNum) -{ - arg->m_tmpNum = tmpNum; - arg->m_isTmp = true; -} - //------------------------------------------------------------------------ // ArgsComplete: Make final decisions on which arguments to evaluate into temporaries. // @@ -874,13 +857,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) for (CallArg& arg : Args()) { GenTree* argx = arg.GetEarlyNode(); - - if (argx == nullptr) - { - // Should only happen if remorphing in which case we do not need to - // make a decision about temps. - continue; - } + assert(argx != nullptr); bool canEvalToTemp = true; if (arg.AbiInfo.GetRegNum() == REG_STK) @@ -909,19 +886,16 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // if ((argx->gtFlags & GTF_ASG) != 0) { - // If this is not the only argument, or it's a copyblk, or it - // already evaluates the expression to a tmp then we need a temp in - // the late arg list. - // In the latter case this might not even be a value; - // fgMakeOutgoingStructArgCopy will leave the copying nodes here - // for FEATURE_FIXED_OUT_ARGS. - if (canEvalToTemp && ((argCount > 1) || argx->OperIsCopyBlkOp() || (FEATURE_FIXED_OUT_ARGS && arg.m_isTmp))) + // fgMakeOutgoingStructArgCopy can have introduced a temp already, + // in which case it will have created a setup node in the early + // node. + if (!argx->IsValue()) { - SetNeedsTemp(&arg); + assert(arg.m_needTmp); } - else + else if (canEvalToTemp && (argCount > 1)) { - assert(argx->IsValue()); + SetNeedsTemp(&arg); } // For all previous arguments that may interfere with the store we @@ -1318,8 +1292,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) regCount++; } - assert(arg->GetLateNode() == nullptr); - // Skip any already processed args // if (!arg->m_processed) @@ -1556,9 +1528,8 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // Return Value: // the newly created temp var tree. // -GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg) +GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum) { - unsigned lclNum = arg->m_tmpNum; LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); var_types argType = varDsc->TypeGet(); assert(genActualType(argType) == genActualType(arg->GetSignatureType())); @@ -1633,7 +1604,16 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) for (size_t i = 0; i < numArgs; i++) { CallArg& arg = *(sortedArgs[i]); - assert(arg.GetLateNode() == nullptr); + + if (arg.GetLateNode() != nullptr) + { + // We may already have created the temp as part of + // fgMakeOutgoingStructArgCopy. In that case there is no work to be + // done. + *lateTail = &arg; + lateTail = &arg.LateNextRef(); + continue; + } GenTree* argx = arg.GetEarlyNode(); assert(argx != nullptr); @@ -1655,82 +1635,60 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) if (arg.m_needTmp) { - if (arg.m_isTmp) - { - // Create a copy of the temp to go into the late argument list - defArg = MakeTmpArgNode(comp, &arg); - } - else - { - // Create a temp store for the argument - // Put the temp in the late arg list + // Create a temp store for the argument + // Put the temp in the late arg list #ifdef DEBUG - if (comp->verbose) - { - printf("Argument with 'side effect'...\n"); - comp->gtDispTree(argx); - } + if (comp->verbose) + { + printf("Argument with 'side effect'...\n"); + comp->gtDispTree(argx); + } #endif #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - noway_assert(argx->gtType != TYP_STRUCT); + noway_assert(argx->gtType != TYP_STRUCT); #endif - unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); + unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); - if (setupArg != nullptr) - { - // Now keep the mkrefany for the late argument list - defArg = argx; + setupArg = comp->gtNewTempStore(tmpVarNum, argx); - // Clear the side-effect flags because now both op1 and op2 have no side-effects - defArg->gtFlags &= ~GTF_ALL_EFFECT; - } - else - { - setupArg = comp->gtNewTempStore(tmpVarNum, argx); + LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum); + var_types lclVarType = genActualType(argx->gtType); + var_types scalarType = TYP_UNKNOWN; - LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum); - var_types lclVarType = genActualType(argx->gtType); - var_types scalarType = TYP_UNKNOWN; - - if (setupArg->OperIsCopyBlkOp()) - { - setupArg = comp->fgMorphCopyBlock(setupArg); + if (setupArg->OperIsCopyBlkOp()) + { + setupArg = comp->fgMorphCopyBlock(setupArg); #if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT)) - { - scalarType = arg.AbiInfo.ArgType; - } + if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT)) + { + scalarType = arg.AbiInfo.ArgType; + } #endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - } - - // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => - // 8) - if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) - { - // Create a GT_LCL_FLD using the wider type to go to the late argument list - defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); - } - else - { - // Create a copy of the temp to go to the late argument list - defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); - } + } - arg.m_isTmp = true; - arg.m_tmpNum = tmpVarNum; - } + // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => + // 8) + if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) + { + // Create a GT_LCL_FLD using the wider type to go to the late argument list + defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); + } + else + { + // Create a copy of the temp to go to the late argument list + defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); + } #ifdef DEBUG - if (comp->verbose) - { - printf("\n Evaluate to a temp:\n"); - comp->gtDispTree(setupArg); - } -#endif + if (comp->verbose) + { + printf("\n Evaluate to a temp:\n"); + comp->gtDispTree(setupArg); } +#endif } else // curArgTabEntry->needTmp == false { @@ -3322,29 +3280,31 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) assert(!fgGlobalMorph); } + call->gtArgs.SetNeedsTemp(arg); + // Copy the valuetype to the temp GenTree* copyBlk = gtNewStoreLclVarNode(tmp, argx); copyBlk = fgMorphCopyBlock(copyBlk); - call->gtArgs.SetTemp(arg, tmp); #if FEATURE_FIXED_OUT_ARGS - // Do the copy early, and evaluate the temp later (see EvalArgsToTemps) - // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode - GenTree* argNode = copyBlk; + // For fixed out args we create the setup node here; EvalArgsToTemps knows + // to handle the case of "already have a setup node" properly. + arg->SetEarlyNode(copyBlk); + arg->SetLateNode(call->gtArgs.MakeTmpArgNode(this, arg, tmp)); #else // !FEATURE_FIXED_OUT_ARGS // Structs are always on the stack, and thus never need temps // so we have to put the copy and temp all into one expression. - GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg); + GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp); // Change the expression to "(tmp=val),tmp" argNode = gtNewOperNode(GT_COMMA, argNode->TypeGet(), copyBlk, argNode); -#endif // !FEATURE_FIXED_OUT_ARGS - arg->SetEarlyNode(argNode); + +#endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** @@ -6805,12 +6765,12 @@ Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, // TODO-CQ: enable calls with struct arguments passed in registers. noway_assert(!varTypeIsStruct(arg->TypeGet())); - if (callArg->IsTemp() || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) + if (arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) { // The argument is already assigned to a temp or is a const. argInTemp = arg; } - else if (arg->OperGet() == GT_LCL_VAR) + else if (arg->OperIs(GT_LCL_VAR)) { unsigned lclNum = arg->AsLclVar()->GetLclNum(); LclVarDsc* varDsc = lvaGetDesc(lclNum); From 6674dd0723b7f39cc7c7391a065ccb51408b26c2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 Jul 2024 13:59:27 +0200 Subject: [PATCH 6/8] Support copying FIELD_LISTs as part of call args morphing --- src/coreclr/jit/morph.cpp | 94 +++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 971704e8d941c..f76cadd83ab02 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1087,6 +1087,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // In "fgMorphMultiRegStructArg" we will expand the arg into a GT_FIELD_LIST with multiple indirections, so // here we consider spilling it into a local. We also need to spill it in case we have a node that we do not // currently handle in multi-reg morphing. + // This logic can be skipped when the arg is already in the right multireg arg shape. // if (varTypeIsStruct(argx) && !arg.m_needTmp && !argx->OperIs(GT_FIELD_LIST)) { @@ -1650,36 +1651,64 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) noway_assert(argx->gtType != TYP_STRUCT); #endif - unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); - - setupArg = comp->gtNewTempStore(tmpVarNum, argx); - - LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum); - var_types lclVarType = genActualType(argx->gtType); - var_types scalarType = TYP_UNKNOWN; - - if (setupArg->OperIsCopyBlkOp()) + if (argx->OperIs(GT_FIELD_LIST)) { - setupArg = comp->fgMorphCopyBlock(setupArg); -#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT)) + GenTreeFieldList* fieldList = argx->AsFieldList(); + fieldList->gtFlags &= ~GTF_ALL_EFFECT; + for (GenTreeFieldList::Use& use : fieldList->Uses()) { - scalarType = arg.AbiInfo.ArgType; + unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); + GenTree* store = comp->gtNewTempStore(tmpVarNum, use.GetNode()); + + if (setupArg == nullptr) + { + setupArg = store; + } + else + { + setupArg = comp->gtNewOperNode(GT_COMMA, TYP_VOID, setupArg, store); + } + + use.SetNode(comp->gtNewLclvNode(tmpVarNum, genActualType(use.GetNode()))); + fieldList->AddAllEffectsFlags(use.GetNode()); } -#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - } - // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => - // 8) - if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) - { - // Create a GT_LCL_FLD using the wider type to go to the late argument list - defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); + // Keep the field list in the late list + defArg = fieldList; } else { - // Create a copy of the temp to go to the late argument list - defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); + unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); + + setupArg = comp->gtNewTempStore(tmpVarNum, argx); + + LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum); + var_types lclVarType = genActualType(argx->gtType); + var_types scalarType = TYP_UNKNOWN; + + if (setupArg->OperIsCopyBlkOp()) + { + setupArg = comp->fgMorphCopyBlock(setupArg); +#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT)) + { + scalarType = arg.AbiInfo.ArgType; + } +#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + } + + // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => + // 8) + if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) + { + // Create a GT_LCL_FLD using the wider type to go to the late argument list + defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); + } + else + { + // Create a copy of the temp to go to the late argument list + defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); + } } #ifdef DEBUG @@ -2483,20 +2512,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) GenTree* argObj = argx->gtEffectiveVal(); bool makeOutArgCopy = false; - if (isStructArg && !reMorphing) + if (isStructArg && !reMorphing && !argObj->OperIs(GT_FIELD_LIST)) { unsigned originalSize; - if (argObj->TypeGet() == TYP_STRUCT) + if (argObj->TypeIs(TYP_STRUCT)) { - if (argObj->OperIs(GT_FIELD_LIST)) - { - originalSize = typGetObjLayout(arg.GetSignatureClassHandle())->GetSize(); - } - else - { - assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); - originalSize = argObj->GetLayout(this)->GetSize(); - } + assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); + originalSize = argObj->GetLayout(this)->GetSize(); } else { @@ -2576,10 +2598,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) makeOutArgCopy = true; } } - else if (argObj->OperIs(GT_FIELD_LIST)) - { - // Fall through with makeOutArgCopy = false - } else if (!argIsLocal) { // This should only be the case of a value directly producing a known struct type. From 93917ca9515f5bde155fafbd0143edc7326e2658 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 10 Jul 2024 12:57:00 +0200 Subject: [PATCH 7/8] Fix bad merge --- src/coreclr/jit/morph.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 58679979c3a36..da5c33c318bd7 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1672,21 +1672,6 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) use.SetNode(comp->gtNewLclvNode(tmpVarNum, genActualType(use.GetNode()))); fieldList->AddAllEffectsFlags(use.GetNode()); } -#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - } - - // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => - // 8) - if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) - { - // Create a GT_LCL_FLD using the wider type to go to the late argument list - defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); - } - else - { - // Create a copy of the temp to go to the late argument list - defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); - } // Keep the field list in the late list defArg = fieldList; From 82af8d9296c620e186a87701f49ef378428f1d93 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 11 Jul 2024 16:06:34 +0200 Subject: [PATCH 8/8] Work around bad codegen --- src/coreclr/jit/gentree.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 163965efbad0a..3395f3e242afc 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4890,7 +4890,14 @@ class CallArgs void ClearIsVarArgs() { m_isVarArgs = false; } bool IsAbiInformationDetermined() const { return m_abiInformationDetermined; } bool IsNewAbiInformationDetermined() const { return m_newAbiInformationDetermined; } - bool AreArgsComplete() const { return m_argsComplete; } + + // TODO-Remove: Workaround for bad codegen in MSVC versions < 19.41, see + // https://github.com/dotnet/runtime/pull/104370#issuecomment-2222910359 +#ifdef _MSC_VER + __declspec(noinline) +#endif + bool AreArgsComplete() const { return m_argsComplete; } + bool HasRegArgs() const { return m_hasRegArgs; } bool HasStackArgs() const { return m_hasStackArgs; } bool NeedsTemps() const { return m_needsTemps; }