From dc857590c03bb8b0a73115c9e6ea2978877f261d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 15 Jun 2024 09:19:38 +0200 Subject: [PATCH 01/16] JIT: Start using new-style ABI classifiers for call args --- src/coreclr/jit/abi.cpp | 20 + src/coreclr/jit/abi.h | 4 + src/coreclr/jit/gentree.cpp | 85 +--- src/coreclr/jit/gentree.h | 18 +- src/coreclr/jit/lclvars.cpp | 7 +- src/coreclr/jit/lower.cpp | 3 +- src/coreclr/jit/morph.cpp | 968 ++++-------------------------------- 7 files changed, 127 insertions(+), 978 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index 524eedb48ee4d..f11a84583ca2e 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -275,6 +275,26 @@ bool ABIPassingInformation::IsSplitAcrossRegistersAndStack() const return false; } +//----------------------------------------------------------------------------- +// CountRegisterSegments: +// Count the number of registers used to pass this argument. +// +// Return Value: +// Count of registers. +// +unsigned ABIPassingInformation::CountRegisterSegments() const +{ + unsigned count = 0; + for (unsigned i = 0; i < NumSegments; i++) + { + if (Segments[i].IsPassedInRegister()) + { + count++; + } + } + return count; +} + //----------------------------------------------------------------------------- // FromSegment: // Create ABIPassingInformation from a single segment. diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 7b8baf1052784..2ab0268d4cee2 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -3,6 +3,9 @@ #pragma once +class ClassLayout; +enum class WellKnownArg : unsigned; + class ABIPassingSegment { regNumber m_register = REG_NA; @@ -65,6 +68,7 @@ struct ABIPassingInformation bool HasExactlyOneRegisterSegment() const; bool HasExactlyOneStackSegment() const; bool IsSplitAcrossRegistersAndStack() const; + unsigned CountRegisterSegments() const; static ABIPassingInformation FromSegment(Compiler* comp, const ABIPassingSegment& segment); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 98d6bb788d450..8d65a0cea7ab3 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1258,87 +1258,6 @@ void CallArgABIInformation::SetHfaType(var_types type, unsigned hfaSlots) } } -//--------------------------------------------------------------- -// SetByteSize: Set information related to this argument's size and alignment. -// -// Arguments: -// byteSize - The size in bytes of the argument. -// byteAlignment - The alignment in bytes of the argument. -// isStruct - Whether this arg is a struct. -// isFloatHfa - Whether this is a float HFA. -// -// Remarks: -// This function will determine how the argument size needs to be rounded. On -// most ABIs all arguments are rounded to stack pointer size, but Apple arm64 -// ABI is an exception as it allows packing some small arguments into the -// same stack slot. -// -void CallArgABIInformation::SetByteSize(unsigned byteSize, unsigned byteAlignment, bool isStruct, bool isFloatHfa) -{ - unsigned roundedByteSize; - if (compAppleArm64Abi()) - { - // Only struct types need extension or rounding to pointer size, but HFA does not. - if (isStruct && !isFloatHfa) - { - roundedByteSize = roundUp(byteSize, TARGET_POINTER_SIZE); - } - else - { - roundedByteSize = byteSize; - } - } - else - { - roundedByteSize = roundUp(byteSize, TARGET_POINTER_SIZE); - } - -#if !defined(TARGET_ARM) - // Arm32 could have a struct with 8 byte alignment - // which rounded size % 8 is not 0. - assert(byteAlignment != 0); - assert(roundedByteSize % byteAlignment == 0); -#endif // TARGET_ARM - - ByteSize = roundedByteSize; - ByteAlignment = byteAlignment; -} - -//--------------------------------------------------------------- -// SetMultiRegsNumw: Set the registers for a multi-reg arg using 'sequential' registers. -// -// Remarks: -// This assumes that `NumRegs` and the first reg num has already been set and -// determines how many sequential registers are necessary to pass the -// argument. -// Note that on ARM the registers set may skip odd float registers if the arg -// is a HFA of doubles, since double and float registers overlap. -void CallArgABIInformation::SetMultiRegNums() -{ -#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - if (NumRegs == 1) - { - return; - } - - regNumber argReg = GetRegNum(0); -#ifdef TARGET_ARM - unsigned int regSize = (GetHfaType() == TYP_DOUBLE) ? 2 : 1; -#else - unsigned int regSize = 1; -#endif - - if (NumRegs > MAX_ARG_REG_COUNT) - NO_WAY("Multireg argument exceeds the maximum length"); - - for (unsigned int regIndex = 1; regIndex < NumRegs; regIndex++) - { - argReg = (regNumber)(argReg + regSize); - SetRegNum(regIndex, argReg); - } -#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) -} - //--------------------------------------------------------------- // GetStackByteSize: Get the number of stack bytes used to pass this argument. // @@ -1468,7 +1387,7 @@ void CallArg::CheckIsStruct() CallArgs::CallArgs() : m_head(nullptr) , m_lateHead(nullptr) - , m_nextStackByteOffset(0) + , m_argsStackSize(0) #ifdef UNIX_X86_ABI , m_stkSizeBytes(0) , m_padStkAlign(0) @@ -9817,7 +9736,7 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co { assert((m_head == nullptr) && (m_lateHead == nullptr)); - m_nextStackByteOffset = other->m_nextStackByteOffset; + m_argsStackSize = other->m_argsStackSize; m_hasThisPointer = other->m_hasThisPointer; m_hasRetBuffer = other->m_hasRetBuffer; m_isVarArgs = other->m_isVarArgs; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index cc1c4917604ac..b29c619585ef9 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -18,6 +18,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #define _GENTREE_H_ /*****************************************************************************/ +#include "abi.h" #include "vartype.h" // For "var_types" #include "target.h" // For "regNumber" #include "ssaconfig.h" // For "SsaConfig::RESERVED_SSA_NUM" @@ -4489,12 +4490,10 @@ struct CallArgABIInformation : NumRegs(0) , ByteOffset(0) , ByteSize(0) - , ByteAlignment(0) #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) , StructFloatFieldType() #endif , ArgType(TYP_UNDEF) - , IsBackFilled(false) , PassedByRef(false) #if FEATURE_ARG_SPLIT , m_isSplit(false) @@ -4520,7 +4519,6 @@ struct CallArgABIInformation unsigned NumRegs; unsigned ByteOffset; unsigned ByteSize; - unsigned ByteAlignment; #if defined(UNIX_AMD64_ABI) // Unix amd64 will split floating point types and integer types in structs // between floating point and general purpose registers. Keep track of that @@ -4538,9 +4536,6 @@ struct CallArgABIInformation // that type. Note that if a struct is passed by reference, this will still // be the struct type. var_types ArgType : 5; - // True when the argument fills a register slot skipped due to alignment - // requirements of previous arguments. - bool IsBackFilled : 1; // True iff the argument is passed by reference. bool PassedByRef : 1; @@ -4638,18 +4633,11 @@ struct CallArgABIInformation #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 } - void SetByteSize(unsigned byteSize, unsigned byteAlignment, bool isStruct, bool isFloatHfa); - // Get the number of bytes that this argument is occupying on the stack, // including padding up to the target pointer size for platforms // where a stack argument can't take less. unsigned GetStackByteSize() const; - // Set the register numbers for a multireg argument. - // There's nothing to do on x64/Ux because the structDesc has already been used to set the - // register numbers. - void SetMultiRegNums(); - // Return number of stack slots that this argument is taking. // This value is not meaningful on Apple arm64 where multiple arguments can // be passed in the same stack slot. @@ -4752,6 +4740,7 @@ class CallArg public: CallArgABIInformation AbiInfo; + ABIPassingInformation NewAbiInfo; CallArg(const NewCallArg& arg) : CallArg() @@ -4814,7 +4803,7 @@ class CallArgs CallArg* m_head; CallArg* m_lateHead; - unsigned m_nextStackByteOffset; + unsigned m_argsStackSize; #ifdef UNIX_X86_ABI // Number of stack bytes pushed before we start pushing these arguments. unsigned m_stkSizeBytes; @@ -4843,7 +4832,6 @@ class CallArgs void AddedWellKnownArg(WellKnownArg arg); void RemovedWellKnownArg(WellKnownArg arg); regNumber GetCustomRegister(Compiler* comp, CorInfoCallConvExtension cc, WellKnownArg arg); - void SplitArg(CallArg* arg, unsigned numRegs, unsigned numSlots); void SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs); public: diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 889eacefd29f1..354f0d827db8e 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1692,7 +1692,7 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, template void Compiler::lvaClassifyParameterABI(Classifier& classifier) { - lvaParameterPassingInfo = new (this, CMK_LvaTable) ABIPassingInformation[info.compArgsCount]; + lvaParameterPassingInfo = info.compArgsCount == 0 ? nullptr : new (this, CMK_LvaTable) ABIPassingInformation[info.compArgsCount]; for (unsigned i = 0; i < info.compArgsCount; i++) { @@ -1735,11 +1735,6 @@ void Compiler::lvaClassifyParameterABI(Classifier& classifier) // void Compiler::lvaClassifyParameterABI() { - if (info.compArgsCount == 0) - { - return; - } - ClassifierInfo cInfo; cInfo.CallConv = info.compCallConv; cInfo.IsVarArgs = info.compIsVarArgs; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2137874887e70..c0bb658e55a45 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3443,10 +3443,11 @@ void Lowering::LowerCFGCall(GenTreeCall* call) call->gtArgs.PushLateBack(targetArg); // Set up ABI information for this arg. + targetArg->NewAbiInfo = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_DISPATCH_INDIRECT_CALL_ADDR, 0, TARGET_POINTER_SIZE)); targetArg->AbiInfo.ArgType = callTarget->TypeGet(); targetArg->AbiInfo.SetRegNum(0, REG_DISPATCH_INDIRECT_CALL_ADDR); targetArg->AbiInfo.NumRegs = 1; - targetArg->AbiInfo.SetByteSize(TARGET_POINTER_SIZE, TARGET_POINTER_SIZE, false, false); + targetArg->AbiInfo.ByteSize = TARGET_POINTER_SIZE; // Lower the newly added args now that call is updated LowerArg(call, targetArg, true /* late */); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8f0d1009d0f63..b6485c6c04306 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -811,7 +811,6 @@ void CallArg::Dump(Compiler* comp) { printf(", byteSize=%u, byteOffset=%u", AbiInfo.ByteSize, AbiInfo.ByteOffset); } - printf(", byteAlignment=%u", AbiInfo.ByteAlignment); if (GetLateNode() != nullptr) { printf(", isLate"); @@ -840,10 +839,6 @@ void CallArg::Dump(Compiler* comp) { printf(", isHfa(%s)", varTypeName(AbiInfo.GetHfaType())); } - if (AbiInfo.IsBackFilled) - { - printf(", isBackFilled"); - } if (m_wellKnownArg != WellKnownArg::None) { printf(", wellKnown[%s]", getWellKnownArgName(m_wellKnownArg)); @@ -852,37 +847,6 @@ void CallArg::Dump(Compiler* comp) } #endif -//------------------------------------------------------------------------ -// SplitArg: -// Record that the arg will be split over registers and stack, increasing the -// current stack usage. -// -// Parameters: -// arg - The argument. -// numRegs - The number of registers that will be used. -// numSlots - The number of stack slots that will be used. -// -void CallArgs::SplitArg(CallArg* arg, unsigned numRegs, unsigned numSlots) -{ - assert(numRegs > 0); - assert(numSlots > 0); - - if (m_argsComplete) - { - assert(arg->AbiInfo.IsSplit() == true); - assert(arg->AbiInfo.NumRegs == numRegs); - assert(m_hasStackArgs); - } - else - { - arg->AbiInfo.SetSplit(true); - arg->AbiInfo.NumRegs = numRegs; - arg->AbiInfo.ByteOffset = 0; - m_hasStackArgs = true; - } - m_nextStackByteOffset += numSlots * TARGET_POINTER_SIZE; -} - //------------------------------------------------------------------------ // SetTemp: Set that the specified argument was evaluated into a temp. // @@ -1951,23 +1915,6 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree) void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call) { assert(&call->gtArgs == this); - unsigned argIndex = 0; - unsigned intArgRegNum = 0; - unsigned fltArgRegNum = 0; - - bool callHasRetBuffArg = HasRetBuffer(); - bool callIsVararg = IsVarArgs(); - -#ifdef TARGET_ARM - regMaskTP argSkippedRegMask = RBM_NONE; - regMaskTP fltArgSkippedRegMask = RBM_NONE; -#endif // TARGET_ARM - -#if defined(TARGET_X86) - unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated -#else - const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number -#endif if (IsAbiInformationDetermined()) { @@ -1976,13 +1923,12 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } JITDUMP("Initializing arg info for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); - m_nextStackByteOffset = INIT_ARG_STACK_SLOT * TARGET_POINTER_SIZE; m_hasRegArgs = false; m_hasStackArgs = false; // At this point, we should not have any late args, as this needs to be done before those are determined. assert(m_lateHead == nullptr); - if (TargetOS::IsUnix && callIsVararg) + if (TargetOS::IsUnix && IsVarArgs()) { // Currently native varargs is not implemented on non windows targets. // @@ -2119,102 +2065,14 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } #endif - unsigned numArgs = CountArgs(); - -#ifdef TARGET_X86 - // Compute the maximum number of arguments that can be passed in registers. - // For X86 we handle the varargs and unmanaged calling conventions - -#ifndef UNIX_X86_ABI - if (call->gtFlags & GTF_CALL_POP_ARGS) - { - noway_assert(intArgRegNum < MAX_REG_ARG); - // No more register arguments for varargs (CALL_POP_ARGS) - maxRegArgs = intArgRegNum; - - // Add in this arg - if (HasThisPointer()) - { - maxRegArgs++; - } - // Add in the ret buff arg - if (callHasRetBuffArg) - { - maxRegArgs++; - } - } -#endif // UNIX_X86_ABI - - if (call->IsUnmanaged()) - { - noway_assert(intArgRegNum == 0); - - if (call->unmgdCallConv == CorInfoCallConvExtension::Thiscall) - { - noway_assert((call->gtArgs.GetArgByIndex(0)->GetEarlyNode() == nullptr) || - (call->gtArgs.GetArgByIndex(0)->GetEarlyNode()->TypeGet() == TYP_I_IMPL) || - (call->gtArgs.GetArgByIndex(0)->GetEarlyNode()->TypeGet() == TYP_BYREF)); - maxRegArgs = 1; - } - else - { - maxRegArgs = 0; - } -#ifdef UNIX_X86_ABI - // Add in the ret buff arg - if (callHasRetBuffArg && - call->unmgdCallConv != CorInfoCallConvExtension::C && // C and Stdcall calling conventions do not - call->unmgdCallConv != CorInfoCallConvExtension::Stdcall) // use registers to pass arguments. - maxRegArgs++; -#endif - } -#endif // TARGET_X86 - - /* Morph the user arguments */ - -#if defined(TARGET_ARM) - - // The ARM ABI has a concept of back-filling of floating-point argument registers, according - // to the "Procedure Call Standard for the ARM Architecture" document, especially - // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can - // appear in a lower-numbered register than floating point argument N. That is, argument - // register allocation is not strictly increasing. To support this, we need to keep track of unused - // floating-point argument registers that we can back-fill. We only support 4-byte float and - // 8-byte double types, and one to four element HFAs composed of these types. With this, we will - // only back-fill single registers, since there is no way with these types to create - // an alignment hole greater than one register. However, there can be up to 3 back-fill slots - // available (with 16 FP argument registers). Consider this code: - // - // struct HFA { float x, y, z; }; // a three element HFA - // void bar(float a1, // passed in f0 - // double a2, // passed in f2/f3; skip f1 for alignment - // HFA a3, // passed in f4/f5/f6 - // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot - // HFA a5, // passed in f10/f11/f12 - // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill - // // slots - // float a7, // passed in f1 (back-filled) - // float a8, // passed in f7 (back-filled) - // float a9, // passed in f13 (back-filled) - // float a10) // passed on the stack in [OutArg+0] - // - // Note that if we ever support FP types with larger alignment requirements, then there could - // be more than single register back-fills. - // - // Once we assign a floating-pointer register to the stack, they all must be on the stack. - // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling - // continues only so long as no VFP CPRC has been allocated to a slot on the stack." - // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack - // and prevent any additional floating-point arguments from going in registers. - - bool anyFloatStackArgs = false; - -#endif // TARGET_ARM - -#ifdef UNIX_AMD64_ABI - SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; -#endif // UNIX_AMD64_ABI + ClassifierInfo info; + info.CallConv = call->GetUnmanagedCallConv(); + info.IsVarArgs = call->IsVarargs(); + info.HasThis = call->gtArgs.HasThisPointer(); + info.HasRetBuff = call->gtArgs.HasRetBuffer(); + PlatformClassifier classifier(info); + // Morph the user arguments for (CallArg& arg : Args()) { assert(arg.GetEarlyNode() != nullptr); @@ -2231,778 +2089,144 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // where the "argx" node can legally have a type that is not ABI-compatible with the one in the signature. const var_types argSigType = arg.GetSignatureType(); const CORINFO_CLASS_HANDLE argSigClass = arg.GetSignatureClassHandle(); + ClassLayout* argLayout = argSigClass == NO_CLASS_HANDLE ? nullptr : comp->typGetObjLayout(argSigClass); - // Setup any HFA information about the argument. - bool isHfaArg = false; - var_types hfaType = TYP_UNDEF; - unsigned hfaSlots = 0; - - bool passUsingFloatRegs; - unsigned argAlignBytes = TARGET_POINTER_SIZE; - unsigned size = 0; - unsigned byteSize = 0; - - if (GlobalJitOptions::compFeatureHfa) - { - hfaType = comp->GetHfaType(argSigClass); - isHfaArg = varTypeIsValidHfaType(hfaType); - - if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && callIsVararg) - { - // Make sure for vararg methods isHfaArg is not true. - isHfaArg = false; - } - - if (isHfaArg) - { - hfaSlots = comp->GetHfaCount(argSigClass); - - // If we have a HFA struct it's possible we transition from a method that originally - // only had integer types to now start having FP types. We have to communicate this - // through this flag since LSRA later on will use this flag to determine whether - // or not to track the FP register set. - // - comp->compFloatingPointUsed = true; - } - } - - const bool isFloatHfa = (hfaType == TYP_FLOAT); - -#ifdef TARGET_ARM - passUsingFloatRegs = - !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argSigType)) && !comp->opts.compUseSoftFP; - bool passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG); - - // TODO-Cleanup: use "eeGetArgSizeAlignment" here. See also: https://github.com/dotnet/runtime/issues/46026. - if (varTypeIsStruct(argSigType)) - { - argAlignBytes = comp->info.compCompHnd->getClassAlignmentRequirement(argSigClass); - } - else - { - argAlignBytes = genTypeSize(argSigType); - } - - argAlignBytes = roundUp(argAlignBytes, TARGET_POINTER_SIZE); - - if (argAlignBytes == 2 * TARGET_POINTER_SIZE) - { - if (passUsingFloatRegs) - { - if (fltArgRegNum % 2 == 1) - { - fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); - fltArgRegNum++; - } - } - else if (passUsingIntRegs) - { - if (intArgRegNum % 2 == 1) - { - argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); - intArgRegNum++; - } - } - } - -#elif defined(TARGET_ARM64) - - assert(!callIsVararg || !isHfaArg); - passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argSigType)); - -#elif defined(TARGET_AMD64) - - passUsingFloatRegs = varTypeIsFloating(argSigType); - -#elif defined(TARGET_X86) - - passUsingFloatRegs = false; - -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - - assert(!callIsVararg && !isHfaArg); - passUsingFloatRegs = varTypeUsesFloatReg(argSigType); - DWORD floatFieldFlags = STRUCT_NO_FLOAT_FIELD; + ABIPassingInformation abiInfo; + ABIPassingSegment inlineSegment; -#else -#error Unsupported or unset target architecture -#endif // TARGET* - - bool isBackFilled = false; - unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use - bool isStructArg = varTypeIsStruct(argSigType); - var_types structBaseType = TYP_STRUCT; - unsigned structSize = 0; - bool passStructByRef = false; - - // - // Figure out the size of the argument. This is either in number of registers, or number of - // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and - // the stack. - // - - if (isStructArg) - { - GenTree* actualArg = argx->gtEffectiveVal(); - - // Here we look at "actualArg" to avoid calling "getClassSize". - structSize = actualArg->TypeIs(TYP_STRUCT) ? actualArg->GetLayout(comp)->GetSize() : genTypeSize(actualArg); + // Some well known args have custom register assignment. + // These should not affect the placement of any other args or stack space required. + // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. + // TODO-Cleanup: Integrate this into the new style ABI classifiers. + regNumber nonStdRegNum = GetCustomRegister(comp, call->GetUnmanagedCallConv(), arg.GetWellKnownArg()); - assert(structSize == comp->info.compCompHnd->getClassSize(argSigClass)); - } -#if defined(TARGET_AMD64) -#ifdef UNIX_AMD64_ABI - if (!isStructArg) + if (nonStdRegNum == REG_NA) { - size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' - byteSize = genTypeSize(argSigType); + abiInfo = classifier.Classify(comp, argSigType, argLayout, arg.GetWellKnownArg()); } else { - size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - byteSize = structSize; - comp->eeGetSystemVAmd64PassStructInRegisterDescriptor(argSigClass, &structDesc); - } -#else // !UNIX_AMD64_ABI - size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' - if (!isStructArg) - { - byteSize = genTypeSize(argSigType); + inlineSegment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE); + abiInfo = ABIPassingInformation(1, &inlineSegment); } -#endif // UNIX_AMD64_ABI -#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (isStructArg) - { - if (isHfaArg) - { - // HFA structs are passed by value in multiple registers. - // The "size" in registers may differ the size in pointer-sized units. - size = hfaSlots; - byteSize = structSize; - } - else - { - // Structs are either passed in 1 or 2 (64-bit) slots. - // Structs that are the size of 2 pointers are passed by value in multiple registers, - // if sufficient registers are available. - // Structs that are larger than 2 pointers (except for HFAs) are passed by - // reference (to a copy) - size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - byteSize = structSize; - if (size > 2) - { - size = 1; - } - } - // Note that there are some additional rules for multireg structs on ARM64. - // (i.e they cannot be split between registers and the stack) - } - else - { - size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot' - byteSize = genTypeSize(argSigType); - } -#elif defined(TARGET_ARM) || defined(TARGET_X86) - if (isStructArg) - { - size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - byteSize = structSize; - } - else - { - // The typical case. - // Long/double type argument(s) will be modified as needed in Lowering. - size = genTypeStSz(argSigType); - byteSize = genTypeSize(argSigType); - } -#else -#error Unsupported or unset target architecture -#endif // TARGET_XXX + arg.NewAbiInfo = abiInfo; + arg.AbiInfo = CallArgABIInformation(); + arg.AbiInfo.NumRegs = abiInfo.CountRegisterSegments(); - if (isStructArg) + if (varTypeIsStruct(argSigType)) { assert(argx == arg.GetEarlyNode()); - assert(structSize != 0); Compiler::structPassingKind howToPassStruct; - structBaseType = comp->getArgTypeForStruct(argSigClass, &howToPassStruct, callIsVararg, structSize); - passStructByRef = (howToPassStruct == Compiler::SPK_ByReference); -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (!passStructByRef) - { - assert((howToPassStruct == Compiler::SPK_ByValue) || (howToPassStruct == Compiler::SPK_PrimitiveType)); + var_types structBaseType = comp->getArgTypeForStruct(argSigClass, &howToPassStruct, IsVarArgs(), argLayout->GetSize()); + arg.AbiInfo.PassedByRef = howToPassStruct == Compiler::SPK_ByReference; + arg.AbiInfo.ArgType = structBaseType == TYP_UNKNOWN ? argx->TypeGet() : structBaseType; -#if defined(TARGET_LOONGARCH64) - floatFieldFlags = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(argSigClass); -#else - floatFieldFlags = comp->info.compCompHnd->getRISCV64PassStructInRegisterFlags(argSigClass); -#endif - - passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false; - comp->compFloatingPointUsed |= passUsingFloatRegs; - - if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) != 0) - { - // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG" - // for "struct { float, float }", and retyping to a primitive here will cause the - // multi-reg morphing to not kick in (the struct in question needs to be passed in - // two FP registers). Here is just keep "structBaseType" as "TYP_STRUCT". - // TODO-LoongArch64: fix "getPrimitiveTypeForStruct". - structBaseType = TYP_STRUCT; - } - - if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) != 0) - { - size = 1; - } - else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) - { - size = 2; - } - } - else // if (passStructByRef) - { - size = 1; - byteSize = TARGET_POINTER_SIZE; - } -#else - if (howToPassStruct == Compiler::SPK_ByReference) - { - byteSize = TARGET_POINTER_SIZE; - } - else - { - byteSize = structSize; - } - - if (howToPassStruct == Compiler::SPK_PrimitiveType) - { -#ifdef TARGET_ARM - // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct, - // or for a struct of two floats. This causes the struct to be address-taken. - if (structBaseType == TYP_DOUBLE) - { - size = 2; - } - else -#endif // TARGET_ARM - { - size = 1; - } - } - else if (passStructByRef) - { - size = 1; - } +#ifdef UNIX_AMD64_ABI + comp->eeGetSystemVAmd64PassStructInRegisterDescriptor(argSigClass, &arg.AbiInfo.StructDesc); #endif } - - // The 'size' value has now must have been set. (the original value of zero is an invalid value) - assert(size != 0); - assert(byteSize != 0); - - if (compAppleArm64Abi()) + else { - // Arm64 Apple has a special ABI for passing small size arguments on stack, - // bytes are aligned to 1-byte, shorts to 2-byte, int/float to 4-byte, etc. - // It means passing 8 1-byte arguments on stack can take as small as 8 bytes. - argAlignBytes = comp->eeGetArgSizeAlignment(argSigType, isFloatHfa); + arg.AbiInfo.ArgType = argx->TypeGet(); } -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - regNumber nextOtherRegNum = REG_STK; -#endif - // - // Figure out if the argument will be passed in a register. - // - bool isRegArg = false; - regNumber nonStdRegNum = REG_NA; + // TODO-Fixme: remove HFA information from VarDsc. + var_types hfaType = TYP_UNDEF; + bool isHfaArg = false; + unsigned hfaSlots = 0; - if (isRegParamType(genActualType(argSigType)) -#ifdef UNIX_AMD64_ABI - && (!isStructArg || structDesc.passedInRegisters) -#elif defined(TARGET_X86) - || (isStructArg && comp->isTrivialPointerSizedStruct(argSigClass)) -#endif - ) + if (GlobalJitOptions::compFeatureHfa) { -#ifdef TARGET_ARM - if (passUsingFloatRegs) - { - // First, see if it can be back-filled - if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet) - (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot? - (size == 1)) // The size to back-fill is one float register - { - // Back-fill the register. - isBackFilled = true; - regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask); - fltArgSkippedRegMask &= - ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask - nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask)); - assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG); - } - - // Does the entire float, double, or HFA fit in the FP arg registers? - // Check if the last register needed is still in the argument register range. - isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG; - - if (!isRegArg) - { - anyFloatStackArgs = true; - } - } - else - { - isRegArg = intArgRegNum < MAX_REG_ARG; - } -#elif defined(TARGET_ARM64) - if (passUsingFloatRegs) - { - // Check if the last register needed is still in the fp argument register range. - isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; - - // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers? - if (isHfaArg && !isRegArg) - { - // recompute the 'size' so that it represent the number of stack slots rather than the number of - // registers - // - unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE); - size = roundupSize / TARGET_POINTER_SIZE; + hfaType = comp->GetHfaType(argSigClass); + isHfaArg = varTypeIsValidHfaType(hfaType); - // We also must update fltArgRegNum so that we no longer try to - // allocate any new floating point registers for args - // This prevents us from backfilling a subsequent arg into d7 - // - fltArgRegNum = MAX_FLOAT_REG_ARG; - } - } - else + if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && IsVarArgs()) { - // Check if the last register needed is still in the int argument register range. - isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; - - // Did we run out of registers when we had a 16-byte struct (size===2) ? - // (i.e we only have one register remaining but we needed two registers to pass this arg) - // This prevents us from backfilling a subsequent arg into x7 - // - if (!isRegArg && (size > 1)) - { - // Arm64 windows native varargs allows splitting a 16 byte struct (or SIMD type) between stack - // and the last general purpose register. - if (TargetOS::IsWindows && callIsVararg) - { - // Override the decision and force a split. - isRegArg = (intArgRegNum + (size - 1)) <= maxRegArgs; - } - else - { - // We also must update intArgRegNum so that we no longer try to - // allocate any new general purpose registers for args - // - intArgRegNum = maxRegArgs; - } - } + // Make sure for vararg methods isHfaArg is not true. + isHfaArg = false; } -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (passUsingFloatRegs) + if (isHfaArg) { - // Check if the last register needed is still in the fp argument register range. - passUsingFloatRegs = isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG; - - if (isStructArg) - { - if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) && - passUsingFloatRegs) - { - passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs; - } - - if (!passUsingFloatRegs) - { - size = structSize > 8 ? 2 : 1; - structBaseType = structSize <= 8 ? TYP_I_IMPL : TYP_STRUCT; - floatFieldFlags = 0; - } - else if (passUsingFloatRegs) - { - if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) - { - nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1); - } - else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) != 0) - { - assert(size == 1); - size = 2; - passUsingFloatRegs = false; - nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum); - } - else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0) - { - assert(size == 1); - size = 2; - nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum, call->GetUnmanagedCallConv()); - } - } - } + hfaSlots = abiInfo.NumSegments; - assert(!isHfaArg); // LoongArch64 does not support HFA. - } + assert(hfaSlots == comp->GetHfaCount(argSigClass)); - // if we run out of floating-point argument registers, try the int argument registers. - if (!isRegArg) - { - // Check if the last register needed is still in the int argument register range. - isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; - if (!passUsingFloatRegs && isRegArg && (size > 1)) - { - nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1, call->GetUnmanagedCallConv()); - } - - // Did we run out of registers when we had a 16-byte struct (size===2) ? - // (i.e we only have one register remaining but we needed two registers to pass this arg) + // If we have a HFA struct it's possible we transition from a method that originally + // only had integer types to now start having FP types. We have to communicate this + // through this flag since LSRA later on will use this flag to determine whether + // or not to track the FP register set. // - if (!isRegArg && (size > 1)) - { - // We also must update intArgRegNum so that we no longer try to - // allocate any new general purpose registers for args - // - isRegArg = intArgRegNum < maxRegArgs; // the split-struct case. - nextOtherRegNum = REG_STK; - } + comp->compFloatingPointUsed = true; } -#else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64 or TARGET_RISCV64 + } -#if defined(UNIX_AMD64_ABI) + if (isHfaArg) + { + arg.AbiInfo.SetHfaType(hfaType, hfaSlots); + } - // Here a struct can be passed in register following the classifications of its members and size. - // Now make sure there are actually enough registers to do so. - if (isStructArg) - { - unsigned int structFloatRegs = 0; - unsigned int structIntRegs = 0; - for (unsigned int i = 0; i < structDesc.eightByteCount; i++) - { - if (structDesc.IsIntegralSlot(i)) - { - structIntRegs++; - } - else if (structDesc.IsSseSlot(i)) - { - structFloatRegs++; - } - } + if (abiInfo.IsSplitAcrossRegistersAndStack()) + { + m_hasStackArgs = true; + m_hasRegArgs = true; - isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) && - ((intArgRegNum + structIntRegs) <= MAX_REG_ARG); - } - else + arg.AbiInfo.SetSplit(true); + // All of our ABIs have their split args at offset 0 relative to + // the stack arguments passed. + arg.AbiInfo.ByteOffset = 0; + arg.AbiInfo.ByteSize = 0; + for (unsigned i = 0; i < abiInfo.NumSegments; i++) { - if (passUsingFloatRegs) + const ABIPassingSegment& segment = abiInfo.Segments[i]; + arg.AbiInfo.ByteSize += segment.Size; + if (segment.IsPassedInRegister()) { - isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG; + arg.AbiInfo.SetRegNum(i, segment.GetRegister()); } else { - isRegArg = intArgRegNum < MAX_REG_ARG; + assert(segment.GetStackOffset() == 0); } } -#else // !defined(UNIX_AMD64_ABI) - isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; -#endif // !defined(UNIX_AMD64_ABI) -#endif // TARGET_ARM - } - else - { - isRegArg = false; - } - - // Some well known args have custom register assignment. - // These should not affect the placement of any other args or stack space required. - // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. - nonStdRegNum = GetCustomRegister(comp, call->GetUnmanagedCallConv(), arg.GetWellKnownArg()); - if (nonStdRegNum != REG_NA) - { - isRegArg = true; - } - else if (call->IsTailCallViaJitHelper()) - { - // We have already (before calling fgMorphArgs()) appended the 4 special args - // required by the x86 tailcall helper. These args are required to go on the - // stack. Force them to the stack here. - assert(numArgs >= 4); - if (argIndex >= numArgs - 4) - { - isRegArg = false; - } - } - - // Now we know if the argument goes in registers or not and how big it is. - -#ifdef TARGET_ARM - // If we ever allocate a floating point argument to the stack, then all - // subsequent HFA/float/double arguments go on the stack. - if (!isRegArg && passUsingFloatRegs) - { - for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum) - { - fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT); - } } - - // If we think we're going to split a struct between integer registers and the stack, check to - // see if we've already assigned a floating-point arg to the stack. - if (isRegArg && // We decided above to use a register for the argument - !passUsingFloatRegs && // We're using integer registers - (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack - anyFloatStackArgs) // We've already used the stack for a floating-point argument + else if (abiInfo.HasAnyRegisterSegment()) { - isRegArg = false; // Change our mind; don't pass this struct partially in registers - - // Skip the rest of the integer argument registers - for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum) - { - argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL); - } - } -#endif // TARGET_ARM - - arg.AbiInfo = CallArgABIInformation(); - arg.AbiInfo.ArgType = argx->TypeGet(); - - if (isRegArg) - { - regNumber nextRegNum = REG_STK; - -#if defined(UNIX_AMD64_ABI) - regNumber nextOtherRegNum = REG_STK; - unsigned int structFloatRegs = 0; - unsigned int structIntRegs = 0; -#endif // defined(UNIX_AMD64_ABI) - - if (nonStdRegNum != REG_NA) - { - nextRegNum = nonStdRegNum; - } -#if defined(UNIX_AMD64_ABI) - else if (isStructArg && structDesc.passedInRegisters) - { - // It is a struct passed in registers. Assign the next available register. - assert((structDesc.eightByteCount <= 2) && "Too many eightbytes."); - regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum}; - for (unsigned int i = 0; i < structDesc.eightByteCount; i++) - { - if (structDesc.IsIntegralSlot(i)) - { - *nextRegNumPtrs[i] = - genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs, call->GetUnmanagedCallConv()); - ++structIntRegs; - } - else if (structDesc.IsSseSlot(i)) - { - *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs); - ++structFloatRegs; - } - } - } -#endif // defined(UNIX_AMD64_ABI) - else - { - // fill in or update the argInfo table - nextRegNum = passUsingFloatRegs - ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum) - : genMapIntRegArgNumToRegNum(intArgRegNum, call->GetUnmanagedCallConv()); - } - -#ifdef WINDOWS_AMD64_ABI - assert(size == 1); -#endif - // This is a register argument m_hasRegArgs = true; - arg.AbiInfo.SetRegNum(0, nextRegNum); - arg.AbiInfo.NumRegs = size; - arg.AbiInfo.SetByteSize(byteSize, argAlignBytes, isStructArg, isFloatHfa); -#ifdef UNIX_AMD64_ABI - if (isStructArg) + unsigned numRegsToWrite = min(abiInfo.NumSegments, (unsigned)MAX_ARG_REG_COUNT); + arg.AbiInfo.ByteSize = 0; + for (unsigned i = 0; i < numRegsToWrite; i++) { - arg.AbiInfo.StructDesc.CopyFrom(structDesc); + const ABIPassingSegment& segment = abiInfo.Segments[i]; + arg.AbiInfo.SetRegNum(0, segment.GetRegister()); + arg.AbiInfo.ByteSize += segment.Size; } -#endif -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - assert(size <= 2); - - if (size == 2) - { - arg.AbiInfo.SetRegNum(1, nextOtherRegNum); - } +#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) INDEBUG(arg.CheckIsStruct()); #endif - - arg.AbiInfo.IsBackFilled = isBackFilled; - - // Set up the next intArgRegNum and fltArgRegNum values. - if (!isBackFilled) - { -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // Increment intArgRegNum by 'size' registers - if (nonStdRegNum == REG_NA) - { - if ((size > 1) && ((intArgRegNum + 1) == maxRegArgs) && (nextOtherRegNum == REG_STK)) - { - // This indicates a partial enregistration of a struct type - assert((isStructArg) || argx->OperIs(GT_FIELD_LIST) || argx->OperIsCopyBlkOp() || - (argx->gtOper == GT_COMMA && (argx->gtFlags & GTF_ASG))); - unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum; - assert((unsigned char)numRegsPartial == numRegsPartial); - SplitArg(&arg, numRegsPartial, size - numRegsPartial); - assert(!passUsingFloatRegs); - assert(size == 2); - intArgRegNum = maxRegArgs; - } - else if ((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) == 0x0) - { - if (passUsingFloatRegs) - { - fltArgRegNum += 1; - } - else - { - intArgRegNum += size; - } - } - else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0) - { - structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT; - fltArgRegNum += 1; - arg.AbiInfo.StructFloatFieldType[0] = structBaseType; - } - else if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) != 0) - { - fltArgRegNum += 1; - intArgRegNum += 1; - if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0) - { - arg.AbiInfo.StructFloatFieldType[0] = - (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - arg.AbiInfo.StructFloatFieldType[1] = - (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; - } - else - { - arg.AbiInfo.StructFloatFieldType[0] = - (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT; - arg.AbiInfo.StructFloatFieldType[1] = - (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - } - } - else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0) - { - fltArgRegNum += 2; - arg.AbiInfo.StructFloatFieldType[0] = - (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - arg.AbiInfo.StructFloatFieldType[1] = - (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT; - } - } -#else - -#if defined(UNIX_AMD64_ABI) - if (isStructArg) - { - // For this case, we've already set the regNums in the argTabEntry - intArgRegNum += structIntRegs; - fltArgRegNum += structFloatRegs; - } - else -#endif // defined(UNIX_AMD64_ABI) - { - if (nonStdRegNum == REG_NA) - { -#if FEATURE_ARG_SPLIT - // Check for a split (partially enregistered) struct - if (compFeatureArgSplit() && !passUsingFloatRegs && ((intArgRegNum + size) > MAX_REG_ARG)) - { - // This indicates a partial enregistration of a struct type - assert((isStructArg) || argx->OperIs(GT_FIELD_LIST) || argx->OperIsCopyBlkOp() || - (argx->gtOper == GT_COMMA && (argx->gtFlags & GTF_ASG))); - unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum; - assert((unsigned char)numRegsPartial == numRegsPartial); - SplitArg(&arg, numRegsPartial, size - numRegsPartial); - } -#endif // FEATURE_ARG_SPLIT - - if (passUsingFloatRegs) - { - fltArgRegNum += size; - -#ifdef WINDOWS_AMD64_ABI - // Whenever we pass an integer register argument - // we skip the corresponding floating point register argument - intArgRegNum = min(intArgRegNum + size, (unsigned)MAX_REG_ARG); -#endif // WINDOWS_AMD64_ABI - // No supported architecture supports partial structs using float registers. - assert(fltArgRegNum <= MAX_FLOAT_REG_ARG); - } - else - { - // Increment intArgRegNum by 'size' registers - intArgRegNum += size; - -#ifdef WINDOWS_AMD64_ABI - fltArgRegNum = min(fltArgRegNum + size, (unsigned)MAX_FLOAT_REG_ARG); -#endif // WINDOWS_AMD64_ABI - } - } - } -#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - } } - else // We have an argument that is not passed in a register + else { + assert(abiInfo.HasAnyStackSegment()); + // We only expect to see one stack segment in these cases. + assert(abiInfo.NumSegments == 1); // This is a stack argument m_hasStackArgs = true; + const ABIPassingSegment& segment = abiInfo.Segments[0]; arg.AbiInfo.SetRegNum(0, REG_STK); - m_nextStackByteOffset = roundUp(m_nextStackByteOffset, argAlignBytes); - arg.AbiInfo.ByteOffset = m_nextStackByteOffset; - arg.AbiInfo.SetByteSize(byteSize, argAlignBytes, isStructArg, isFloatHfa); - - m_nextStackByteOffset += arg.AbiInfo.ByteSize; -#ifdef UNIX_AMD64_ABI - // TODO-Amd64-Unix-CQ: This is temporary (see also in fgMorphArgs). - if (structDesc.passedInRegisters) - { - arg.AbiInfo.StructDesc.CopyFrom(structDesc); - } -#endif - } - - if (isHfaArg) - { - arg.AbiInfo.SetHfaType(hfaType, hfaSlots); - } - - arg.AbiInfo.SetMultiRegNums(); - - if (varTypeIsStruct(arg.GetSignatureType())) - { - arg.AbiInfo.PassedByRef = passStructByRef; - arg.AbiInfo.ArgType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType; + arg.AbiInfo.ByteSize = segment.Size; + arg.AbiInfo.ByteOffset = segment.GetStackOffset(); } - else - { - arg.AbiInfo.ArgType = argx->TypeGet(); - } - - argIndex++; } // end foreach argument loop + m_argsStackSize = classifier.StackSize(); + #ifdef DEBUG if (VERBOSE) { @@ -3029,7 +2253,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // unsigned CallArgs::OutgoingArgsStackSize() const { - unsigned aligned = Compiler::GetOutgoingArgByteSize(m_nextStackByteOffset); + unsigned aligned = Compiler::GetOutgoingArgByteSize(m_argsStackSize); return max(aligned, (unsigned)MIN_ARG_AREA_FOR_CALL); } @@ -5350,8 +4574,11 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) callee->gtArgs.AddFinalArgsAndDetermineABIInfo(this, callee); - unsigned calleeArgStackSize = 0; - unsigned callerArgStackSize = info.compArgStackSize; + unsigned calleeArgStackSize = callee->gtArgs.OutgoingArgsStackSize(); + unsigned callerArgStackSize = roundUp(lvaParameterStackSize, TARGET_POINTER_SIZE); + + JITDUMP("Caller parameter stack size: %u\n", callerArgStackSize); + JITDUMP("Callee arguments stack size: %u", calleeArgStackSize); auto reportFastTailCallDecision = [&](const char* thisFailReason) { if (failReason != nullptr) @@ -5403,21 +4630,16 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) #endif // DEBUG }; +#if defined(TARGET_ARM) || defined(TARGET_RISCV64) for (CallArg& arg : callee->gtArgs.Args()) { - calleeArgStackSize = roundUp(calleeArgStackSize, arg.AbiInfo.ByteAlignment); - calleeArgStackSize += arg.AbiInfo.GetStackByteSize(); - -#if defined(TARGET_ARM) || defined(TARGET_RISCV64) - if (arg.AbiInfo.IsSplit()) + if (arg.NewAbiInfo.IsSplitAcrossRegistersAndStack()) { reportFastTailCallDecision("Argument splitting in callee is not supported on " TARGET_READABLE_NAME); return false; } -#endif // TARGET_ARM || TARGET_RISCV64 } - - calleeArgStackSize = GetOutgoingArgByteSize(calleeArgStackSize); +#endif // TARGET_ARM || TARGET_RISCV64 #if defined(TARGET_ARM) || defined(TARGET_RISCV64) if (compHasSplitParam) From a95f74504de174af4b2edd86fbecdcc147aed5d3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 15 Jun 2024 09:20:43 +0200 Subject: [PATCH 02/16] Run jit-format --- src/coreclr/jit/abi.h | 10 +++++----- src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/lclvars.cpp | 3 ++- src/coreclr/jit/lower.cpp | 6 ++++-- src/coreclr/jit/morph.cpp | 38 ++++++++++++++++++------------------- 5 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 2ab0268d4cee2..b03f43435e3dd 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -63,11 +63,11 @@ struct ABIPassingInformation { } - bool HasAnyRegisterSegment() const; - bool HasAnyStackSegment() const; - bool HasExactlyOneRegisterSegment() const; - bool HasExactlyOneStackSegment() const; - bool IsSplitAcrossRegistersAndStack() const; + bool HasAnyRegisterSegment() const; + bool HasAnyStackSegment() const; + bool HasExactlyOneRegisterSegment() const; + bool HasExactlyOneStackSegment() const; + bool IsSplitAcrossRegistersAndStack() const; unsigned CountRegisterSegments() const; static ABIPassingInformation FromSegment(Compiler* comp, const ABIPassingSegment& segment); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8d65a0cea7ab3..c53522154981e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9736,7 +9736,7 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co { assert((m_head == nullptr) && (m_lateHead == nullptr)); - m_argsStackSize = other->m_argsStackSize; + m_argsStackSize = other->m_argsStackSize; m_hasThisPointer = other->m_hasThisPointer; m_hasRetBuffer = other->m_hasRetBuffer; m_isVarArgs = other->m_isVarArgs; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 354f0d827db8e..f2f008ad4c9bd 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1692,7 +1692,8 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, template void Compiler::lvaClassifyParameterABI(Classifier& classifier) { - lvaParameterPassingInfo = info.compArgsCount == 0 ? nullptr : new (this, CMK_LvaTable) ABIPassingInformation[info.compArgsCount]; + lvaParameterPassingInfo = + info.compArgsCount == 0 ? nullptr : new (this, CMK_LvaTable) ABIPassingInformation[info.compArgsCount]; for (unsigned i = 0; i < info.compArgsCount; i++) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c0bb658e55a45..e5a0f932f6b30 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3443,10 +3443,12 @@ void Lowering::LowerCFGCall(GenTreeCall* call) call->gtArgs.PushLateBack(targetArg); // Set up ABI information for this arg. - targetArg->NewAbiInfo = ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_DISPATCH_INDIRECT_CALL_ADDR, 0, TARGET_POINTER_SIZE)); + targetArg->NewAbiInfo = + ABIPassingInformation::FromSegment(comp, ABIPassingSegment::InRegister(REG_DISPATCH_INDIRECT_CALL_ADDR, + 0, TARGET_POINTER_SIZE)); targetArg->AbiInfo.ArgType = callTarget->TypeGet(); targetArg->AbiInfo.SetRegNum(0, REG_DISPATCH_INDIRECT_CALL_ADDR); - targetArg->AbiInfo.NumRegs = 1; + targetArg->AbiInfo.NumRegs = 1; targetArg->AbiInfo.ByteSize = TARGET_POINTER_SIZE; // Lower the newly added args now that call is updated diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index b6485c6c04306..bd989a904e745 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1923,8 +1923,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } JITDUMP("Initializing arg info for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper)); - m_hasRegArgs = false; - m_hasStackArgs = false; + m_hasRegArgs = false; + m_hasStackArgs = false; // At this point, we should not have any late args, as this needs to be done before those are determined. assert(m_lateHead == nullptr); @@ -2066,9 +2066,9 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call #endif ClassifierInfo info; - info.CallConv = call->GetUnmanagedCallConv(); - info.IsVarArgs = call->IsVarargs(); - info.HasThis = call->gtArgs.HasThisPointer(); + info.CallConv = call->GetUnmanagedCallConv(); + info.IsVarArgs = call->IsVarargs(); + info.HasThis = call->gtArgs.HasThisPointer(); info.HasRetBuff = call->gtArgs.HasRetBuffer(); PlatformClassifier classifier(info); @@ -2092,7 +2092,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call ClassLayout* argLayout = argSigClass == NO_CLASS_HANDLE ? nullptr : comp->typGetObjLayout(argSigClass); ABIPassingInformation abiInfo; - ABIPassingSegment inlineSegment; + ABIPassingSegment inlineSegment; // Some well known args have custom register assignment. // These should not affect the placement of any other args or stack space required. @@ -2107,10 +2107,10 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call else { inlineSegment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE); - abiInfo = ABIPassingInformation(1, &inlineSegment); + abiInfo = ABIPassingInformation(1, &inlineSegment); } - arg.NewAbiInfo = abiInfo; + arg.NewAbiInfo = abiInfo; arg.AbiInfo = CallArgABIInformation(); arg.AbiInfo.NumRegs = abiInfo.CountRegisterSegments(); @@ -2119,7 +2119,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call assert(argx == arg.GetEarlyNode()); Compiler::structPassingKind howToPassStruct; - var_types structBaseType = comp->getArgTypeForStruct(argSigClass, &howToPassStruct, IsVarArgs(), argLayout->GetSize()); + var_types structBaseType = + comp->getArgTypeForStruct(argSigClass, &howToPassStruct, IsVarArgs(), argLayout->GetSize()); arg.AbiInfo.PassedByRef = howToPassStruct == Compiler::SPK_ByReference; arg.AbiInfo.ArgType = structBaseType == TYP_UNKNOWN ? argx->TypeGet() : structBaseType; @@ -2133,13 +2134,13 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } // TODO-Fixme: remove HFA information from VarDsc. - var_types hfaType = TYP_UNDEF; - bool isHfaArg = false; - unsigned hfaSlots = 0; + var_types hfaType = TYP_UNDEF; + bool isHfaArg = false; + unsigned hfaSlots = 0; if (GlobalJitOptions::compFeatureHfa) { - hfaType = comp->GetHfaType(argSigClass); + hfaType = comp->GetHfaType(argSigClass); isHfaArg = varTypeIsValidHfaType(hfaType); if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && IsVarArgs()) @@ -2171,13 +2172,13 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call if (abiInfo.IsSplitAcrossRegistersAndStack()) { m_hasStackArgs = true; - m_hasRegArgs = true; + m_hasRegArgs = true; arg.AbiInfo.SetSplit(true); // All of our ABIs have their split args at offset 0 relative to // the stack arguments passed. arg.AbiInfo.ByteOffset = 0; - arg.AbiInfo.ByteSize = 0; + arg.AbiInfo.ByteSize = 0; for (unsigned i = 0; i < abiInfo.NumSegments; i++) { const ABIPassingSegment& segment = abiInfo.Segments[i]; @@ -2198,7 +2199,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call m_hasRegArgs = true; unsigned numRegsToWrite = min(abiInfo.NumSegments, (unsigned)MAX_ARG_REG_COUNT); - arg.AbiInfo.ByteSize = 0; + arg.AbiInfo.ByteSize = 0; for (unsigned i = 0; i < numRegsToWrite; i++) { const ABIPassingSegment& segment = abiInfo.Segments[i]; @@ -2206,7 +2207,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call arg.AbiInfo.ByteSize += segment.Size; } - #if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) INDEBUG(arg.CheckIsStruct()); #endif @@ -2217,10 +2217,10 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // We only expect to see one stack segment in these cases. assert(abiInfo.NumSegments == 1); // This is a stack argument - m_hasStackArgs = true; + m_hasStackArgs = true; const ABIPassingSegment& segment = abiInfo.Segments[0]; arg.AbiInfo.SetRegNum(0, REG_STK); - arg.AbiInfo.ByteSize = segment.Size; + arg.AbiInfo.ByteSize = segment.Size; arg.AbiInfo.ByteOffset = segment.GetStackOffset(); } } // end foreach argument loop From c1cdad44243a2e587b6680d2c283e3a288eef230 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 10:25:46 +0200 Subject: [PATCH 03/16] Fix x86 special tailcall args --- src/coreclr/jit/gentree.cpp | 2 ++ src/coreclr/jit/gentree.h | 1 + src/coreclr/jit/morph.cpp | 16 +++++++++++----- src/coreclr/jit/targetx86.cpp | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index c53522154981e..0c104d4d8c18d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -13093,6 +13093,8 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg) return "swift error"; case WellKnownArg::SwiftSelf: return "swift self"; + case WellKnownArg::X86TailCallSpecialArg: + return "tail call"; default: return nullptr; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index b29c619585ef9..382057c0c7b8b 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4478,6 +4478,7 @@ enum class WellKnownArg : unsigned DispatchIndirectCallTarget, SwiftError, SwiftSelf, + X86TailCallSpecialArg, }; #ifdef DEBUG diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index bd989a904e745..996da921702c2 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -786,6 +786,8 @@ const char* getWellKnownArgName(WellKnownArg arg) return "SwiftError"; case WellKnownArg::SwiftSelf: return "SwiftSelf"; + case WellKnownArg::X86TailCallSpecialArg: + return "X86TailCallSpecialArg"; } return "N/A"; @@ -2067,7 +2069,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call ClassifierInfo info; info.CallConv = call->GetUnmanagedCallConv(); - info.IsVarArgs = call->IsVarargs(); + // X86 tailcall helper is considered varargs, but not for ABI classification purposes. + info.IsVarArgs = call->IsVarargs() && !call->IsTailCallViaJitHelper(); info.HasThis = call->gtArgs.HasThisPointer(); info.HasRetBuff = call->gtArgs.HasRetBuffer(); PlatformClassifier classifier(info); @@ -2110,6 +2113,9 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call abiInfo = ABIPassingInformation(1, &inlineSegment); } + JITDUMP("Argument %u ABI info: ", GetIndex(&arg)); + DBEXEC(VERBOSE, abiInfo.Dump()); + arg.NewAbiInfo = abiInfo; arg.AbiInfo = CallArgABIInformation(); arg.AbiInfo.NumRegs = abiInfo.CountRegisterSegments(); @@ -6368,19 +6374,19 @@ void Compiler::fgMorphTailCallViaJitHelper(GenTreeCall* call) unsigned nOldStkArgsWords = (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; GenTree* arg3Node = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); - CallArg* arg3 = call->gtArgs.PushBack(this, NewCallArg::Primitive(arg3Node)); + CallArg* arg3 = call->gtArgs.PushBack(this, NewCallArg::Primitive(arg3Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. // The constant will be replaced. GenTree* arg2Node = gtNewIconNode(9, TYP_I_IMPL); - CallArg* arg2 = call->gtArgs.InsertAfter(this, arg3, NewCallArg::Primitive(arg2Node)); + CallArg* arg2 = call->gtArgs.InsertAfter(this, arg3, NewCallArg::Primitive(arg2Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // Inject a placeholder for the flags. // The constant will be replaced. GenTree* arg1Node = gtNewIconNode(8, TYP_I_IMPL); - CallArg* arg1 = call->gtArgs.InsertAfter(this, arg2, NewCallArg::Primitive(arg1Node)); + CallArg* arg1 = call->gtArgs.InsertAfter(this, arg2, NewCallArg::Primitive(arg1Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // Inject a placeholder for the real call target that the Lowering phase will generate. // The constant will be replaced. GenTree* arg0Node = gtNewIconNode(7, TYP_I_IMPL); - CallArg* arg0 = call->gtArgs.InsertAfter(this, arg1, NewCallArg::Primitive(arg0Node)); + CallArg* arg0 = call->gtArgs.InsertAfter(this, arg1, NewCallArg::Primitive(arg0Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // It is now a varargs tail call. call->gtArgs.SetIsVarArgs(); diff --git a/src/coreclr/jit/targetx86.cpp b/src/coreclr/jit/targetx86.cpp index eb6ced79ce1eb..e00bba78a3a53 100644 --- a/src/coreclr/jit/targetx86.cpp +++ b/src/coreclr/jit/targetx86.cpp @@ -84,7 +84,7 @@ ABIPassingInformation X86Classifier::Classify(Compiler* comp, unsigned numSlots = (size + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; bool canEnreg = false; - if (m_regs.Count() >= numSlots) + if ((m_regs.Count() >= numSlots) && (wellKnownParam != WellKnownArg::X86TailCallSpecialArg)) { switch (type) { From 49d9e6eb11316c750e70c1c69d34293ccab018ec Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 10:54:42 +0200 Subject: [PATCH 04/16] Round up stack segments in old ABI info --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 996da921702c2..5dea38c73bfb3 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2226,7 +2226,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call m_hasStackArgs = true; const ABIPassingSegment& segment = abiInfo.Segments[0]; arg.AbiInfo.SetRegNum(0, REG_STK); - arg.AbiInfo.ByteSize = segment.Size; + arg.AbiInfo.ByteSize = roundUp(segment.Size, TARGET_POINTER_SIZE); arg.AbiInfo.ByteOffset = segment.GetStackOffset(); } } // end foreach argument loop From cfe0f1d1cee359ecbd78fba570562be5357215cc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 11:20:43 +0200 Subject: [PATCH 05/16] Fix multiple regs --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 5dea38c73bfb3..e0a1d2d657463 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2209,7 +2209,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call for (unsigned i = 0; i < numRegsToWrite; i++) { const ABIPassingSegment& segment = abiInfo.Segments[i]; - arg.AbiInfo.SetRegNum(0, segment.GetRegister()); + arg.AbiInfo.SetRegNum(i, segment.GetRegister()); arg.AbiInfo.ByteSize += segment.Size; } From 11a3bc2d0dee894b3ba66aa7a4019b725ba156b8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 11:20:52 +0200 Subject: [PATCH 06/16] Reorder HFA --- src/coreclr/jit/morph.cpp | 70 +++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index e0a1d2d657463..9ed1d615e0c95 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2139,42 +2139,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call arg.AbiInfo.ArgType = argx->TypeGet(); } - // TODO-Fixme: remove HFA information from VarDsc. - var_types hfaType = TYP_UNDEF; - bool isHfaArg = false; - unsigned hfaSlots = 0; - - if (GlobalJitOptions::compFeatureHfa) - { - hfaType = comp->GetHfaType(argSigClass); - isHfaArg = varTypeIsValidHfaType(hfaType); - - if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && IsVarArgs()) - { - // Make sure for vararg methods isHfaArg is not true. - isHfaArg = false; - } - - if (isHfaArg) - { - hfaSlots = abiInfo.NumSegments; - - assert(hfaSlots == comp->GetHfaCount(argSigClass)); - - // If we have a HFA struct it's possible we transition from a method that originally - // only had integer types to now start having FP types. We have to communicate this - // through this flag since LSRA later on will use this flag to determine whether - // or not to track the FP register set. - // - comp->compFloatingPointUsed = true; - } - } - - if (isHfaArg) - { - arg.AbiInfo.SetHfaType(hfaType, hfaSlots); - } - if (abiInfo.IsSplitAcrossRegistersAndStack()) { m_hasStackArgs = true; @@ -2229,6 +2193,40 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call arg.AbiInfo.ByteSize = roundUp(segment.Size, TARGET_POINTER_SIZE); arg.AbiInfo.ByteOffset = segment.GetStackOffset(); } + + // TODO-Cleanup: remove HFA information from VarDsc. + var_types hfaType = TYP_UNDEF; + bool isHfaArg = false; + unsigned hfaSlots = 0; + + if (GlobalJitOptions::compFeatureHfa) + { + hfaType = comp->GetHfaType(argSigClass); + isHfaArg = varTypeIsValidHfaType(hfaType); + + if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && IsVarArgs()) + { + // Make sure for vararg methods isHfaArg is not true. + isHfaArg = false; + } + + if (isHfaArg) + { + hfaSlots = comp->GetHfaCount(argSigClass); + + // If we have a HFA struct it's possible we transition from a method that originally + // only had integer types to now start having FP types. We have to communicate this + // through this flag since LSRA later on will use this flag to determine whether + // or not to track the FP register set. + // + comp->compFloatingPointUsed = true; + } + } + + if (isHfaArg) + { + arg.AbiInfo.SetHfaType(hfaType, hfaSlots); + } } // end foreach argument loop m_argsStackSize = classifier.StackSize(); From 03d33c07d135ef7ba6d1827edafbf5bd1000f9f0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 12:22:18 +0200 Subject: [PATCH 07/16] Quirk reg counting for arm32 --- src/coreclr/jit/morph.cpp | 42 +++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 9ed1d615e0c95..09a10bc7c1e87 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2118,7 +2118,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call arg.NewAbiInfo = abiInfo; arg.AbiInfo = CallArgABIInformation(); - arg.AbiInfo.NumRegs = abiInfo.CountRegisterSegments(); if (varTypeIsStruct(argSigType)) { @@ -2145,20 +2144,26 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call m_hasRegArgs = true; arg.AbiInfo.SetSplit(true); - // All of our ABIs have their split args at offset 0 relative to - // the stack arguments passed. arg.AbiInfo.ByteOffset = 0; arg.AbiInfo.ByteSize = 0; + unsigned regNumIndex = 0; for (unsigned i = 0; i < abiInfo.NumSegments; i++) { const ABIPassingSegment& segment = abiInfo.Segments[i]; - arg.AbiInfo.ByteSize += segment.Size; if (segment.IsPassedInRegister()) { - arg.AbiInfo.SetRegNum(i, segment.GetRegister()); + arg.AbiInfo.ByteSize += segment.Size; + if (regNumIndex < MAX_ARG_REG_COUNT) + { + arg.AbiInfo.SetRegNum(regNumIndex, segment.GetRegister()); + regNumIndex++; + } + + arg.AbiInfo.NumRegs++; } else { + arg.AbiInfo.ByteSize += roundUp(segment.Size, TARGET_POINTER_SIZE); assert(segment.GetStackOffset() == 0); } } @@ -2168,13 +2173,34 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // This is a register argument m_hasRegArgs = true; - unsigned numRegsToWrite = min(abiInfo.NumSegments, (unsigned)MAX_ARG_REG_COUNT); arg.AbiInfo.ByteSize = 0; - for (unsigned i = 0; i < numRegsToWrite; i++) + unsigned regNumIndex = 0; + for (unsigned i = 0; i < abiInfo.NumSegments; i++) { const ABIPassingSegment& segment = abiInfo.Segments[i]; - arg.AbiInfo.SetRegNum(i, segment.GetRegister()); + + if (regNumIndex < MAX_ARG_REG_COUNT) + { + arg.AbiInfo.SetRegNum(regNumIndex, segment.GetRegister()); + regNumIndex++; + } + arg.AbiInfo.ByteSize += segment.Size; + arg.AbiInfo.NumRegs++; + +#ifdef TARGET_ARM + // Old style ABI info expects two registers counted for these segments. + if (segment.GetRegisterType() == TYP_DOUBLE) + { + arg.AbiInfo.NumRegs++; + + if (argSigType == TYP_DOUBLE) + { + arg.AbiInfo.SetRegNum(regNumIndex, REG_NEXT(segment.GetRegister())); + regNumIndex++; + } + } +#endif } #if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) From 786111665171a2f436a46066b94a79e177fbf967 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 12:27:23 +0200 Subject: [PATCH 08/16] Run jit-format --- src/coreclr/jit/morph.cpp | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 09a10bc7c1e87..143a3902c5857 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2068,7 +2068,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call #endif ClassifierInfo info; - info.CallConv = call->GetUnmanagedCallConv(); + info.CallConv = call->GetUnmanagedCallConv(); // X86 tailcall helper is considered varargs, but not for ABI classification purposes. info.IsVarArgs = call->IsVarargs() && !call->IsTailCallViaJitHelper(); info.HasThis = call->gtArgs.HasThisPointer(); @@ -2116,8 +2116,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call JITDUMP("Argument %u ABI info: ", GetIndex(&arg)); DBEXEC(VERBOSE, abiInfo.Dump()); - arg.NewAbiInfo = abiInfo; - arg.AbiInfo = CallArgABIInformation(); + arg.NewAbiInfo = abiInfo; + arg.AbiInfo = CallArgABIInformation(); if (varTypeIsStruct(argSigType)) { @@ -2146,7 +2146,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call arg.AbiInfo.SetSplit(true); arg.AbiInfo.ByteOffset = 0; arg.AbiInfo.ByteSize = 0; - unsigned regNumIndex = 0; + unsigned regNumIndex = 0; for (unsigned i = 0; i < abiInfo.NumSegments; i++) { const ABIPassingSegment& segment = abiInfo.Segments[i]; @@ -2173,7 +2173,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // This is a register argument m_hasRegArgs = true; - arg.AbiInfo.ByteSize = 0; + arg.AbiInfo.ByteSize = 0; unsigned regNumIndex = 0; for (unsigned i = 0; i < abiInfo.NumSegments; i++) { @@ -6398,19 +6398,26 @@ void Compiler::fgMorphTailCallViaJitHelper(GenTreeCall* call) unsigned nOldStkArgsWords = (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; GenTree* arg3Node = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); - CallArg* arg3 = call->gtArgs.PushBack(this, NewCallArg::Primitive(arg3Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); + CallArg* arg3 = + call->gtArgs.PushBack(this, NewCallArg::Primitive(arg3Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. // The constant will be replaced. GenTree* arg2Node = gtNewIconNode(9, TYP_I_IMPL); - CallArg* arg2 = call->gtArgs.InsertAfter(this, arg3, NewCallArg::Primitive(arg2Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); + CallArg* arg2 = + call->gtArgs.InsertAfter(this, arg3, + NewCallArg::Primitive(arg2Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // Inject a placeholder for the flags. // The constant will be replaced. GenTree* arg1Node = gtNewIconNode(8, TYP_I_IMPL); - CallArg* arg1 = call->gtArgs.InsertAfter(this, arg2, NewCallArg::Primitive(arg1Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); + CallArg* arg1 = + call->gtArgs.InsertAfter(this, arg2, + NewCallArg::Primitive(arg1Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // Inject a placeholder for the real call target that the Lowering phase will generate. // The constant will be replaced. GenTree* arg0Node = gtNewIconNode(7, TYP_I_IMPL); - CallArg* arg0 = call->gtArgs.InsertAfter(this, arg1, NewCallArg::Primitive(arg0Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); + CallArg* arg0 = + call->gtArgs.InsertAfter(this, arg1, + NewCallArg::Primitive(arg0Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // It is now a varargs tail call. call->gtArgs.SetIsVarArgs(); From 693a980b867f789a398ffe01df7e2da32c43c698 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 12:29:34 +0200 Subject: [PATCH 09/16] Clean up --- src/coreclr/jit/abi.cpp | 20 -------------------- src/coreclr/jit/abi.h | 1 - 2 files changed, 21 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index f11a84583ca2e..524eedb48ee4d 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -275,26 +275,6 @@ bool ABIPassingInformation::IsSplitAcrossRegistersAndStack() const return false; } -//----------------------------------------------------------------------------- -// CountRegisterSegments: -// Count the number of registers used to pass this argument. -// -// Return Value: -// Count of registers. -// -unsigned ABIPassingInformation::CountRegisterSegments() const -{ - unsigned count = 0; - for (unsigned i = 0; i < NumSegments; i++) - { - if (Segments[i].IsPassedInRegister()) - { - count++; - } - } - return count; -} - //----------------------------------------------------------------------------- // FromSegment: // Create ABIPassingInformation from a single segment. diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index b03f43435e3dd..f7ea0d080f6de 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -68,7 +68,6 @@ struct ABIPassingInformation bool HasExactlyOneRegisterSegment() const; bool HasExactlyOneStackSegment() const; bool IsSplitAcrossRegistersAndStack() const; - unsigned CountRegisterSegments() const; static ABIPassingInformation FromSegment(Compiler* comp, const ABIPassingSegment& segment); From 00f5bbc4851cbd3e78580b1c1d744d17196d674d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 12:46:46 +0200 Subject: [PATCH 10/16] Run jit-format --- src/coreclr/jit/abi.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index f7ea0d080f6de..4172844c4d553 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -63,11 +63,11 @@ struct ABIPassingInformation { } - bool HasAnyRegisterSegment() const; - bool HasAnyStackSegment() const; - bool HasExactlyOneRegisterSegment() const; - bool HasExactlyOneStackSegment() const; - bool IsSplitAcrossRegistersAndStack() const; + bool HasAnyRegisterSegment() const; + bool HasAnyStackSegment() const; + bool HasExactlyOneRegisterSegment() const; + bool HasExactlyOneStackSegment() const; + bool IsSplitAcrossRegistersAndStack() const; static ABIPassingInformation FromSegment(Compiler* comp, const ABIPassingSegment& segment); From d37cba78555952614cb9555e1ab421a8bbf7a0a9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 16 Jun 2024 15:42:41 +0200 Subject: [PATCH 11/16] Compute ByteSize the old way --- src/coreclr/jit/morph.cpp | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 143a3902c5857..d3ac281b3ec66 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2145,14 +2145,12 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call arg.AbiInfo.SetSplit(true); arg.AbiInfo.ByteOffset = 0; - arg.AbiInfo.ByteSize = 0; unsigned regNumIndex = 0; for (unsigned i = 0; i < abiInfo.NumSegments; i++) { const ABIPassingSegment& segment = abiInfo.Segments[i]; if (segment.IsPassedInRegister()) { - arg.AbiInfo.ByteSize += segment.Size; if (regNumIndex < MAX_ARG_REG_COUNT) { arg.AbiInfo.SetRegNum(regNumIndex, segment.GetRegister()); @@ -2163,7 +2161,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } else { - arg.AbiInfo.ByteSize += roundUp(segment.Size, TARGET_POINTER_SIZE); assert(segment.GetStackOffset() == 0); } } @@ -2173,7 +2170,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call // This is a register argument m_hasRegArgs = true; - arg.AbiInfo.ByteSize = 0; unsigned regNumIndex = 0; for (unsigned i = 0; i < abiInfo.NumSegments; i++) { @@ -2185,7 +2181,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call regNumIndex++; } - arg.AbiInfo.ByteSize += segment.Size; arg.AbiInfo.NumRegs++; #ifdef TARGET_ARM @@ -2216,7 +2211,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call m_hasStackArgs = true; const ABIPassingSegment& segment = abiInfo.Segments[0]; arg.AbiInfo.SetRegNum(0, REG_STK); - arg.AbiInfo.ByteSize = roundUp(segment.Size, TARGET_POINTER_SIZE); arg.AbiInfo.ByteOffset = segment.GetStackOffset(); } @@ -2249,6 +2243,28 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } } + if (arg.AbiInfo.PassedByRef) + { + arg.AbiInfo.ByteSize = TARGET_POINTER_SIZE; + } + else + { + unsigned size = argLayout != nullptr ? argLayout->GetSize() : genTypeSize(argSigType); + + // Apple arm64 reuses the same stack slot for multiple args in some + // cases; old ABI info reflects that in the size. + // Primitives and float HFAs do not necessarily take up full stack + // slots. + if (compAppleArm64Abi() && (!varTypeIsStruct(argSigType) || (isHfaArg && (hfaType == TYP_FLOAT)))) + { + arg.AbiInfo.ByteSize = size; + } + else + { + arg.AbiInfo.ByteSize = roundUp(size, TARGET_POINTER_SIZE); + } + } + if (isHfaArg) { arg.AbiInfo.SetHfaType(hfaType, hfaSlots); @@ -4607,9 +4623,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) unsigned calleeArgStackSize = callee->gtArgs.OutgoingArgsStackSize(); unsigned callerArgStackSize = roundUp(lvaParameterStackSize, TARGET_POINTER_SIZE); - JITDUMP("Caller parameter stack size: %u\n", callerArgStackSize); - JITDUMP("Callee arguments stack size: %u", calleeArgStackSize); - auto reportFastTailCallDecision = [&](const char* thisFailReason) { if (failReason != nullptr) { From 9348fd1d40047b6b6a15826cece3772341b0b9d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Sowi=C5=84ski?= Date: Thu, 20 Jun 2024 10:48:56 +0200 Subject: [PATCH 12/16] [RISC-V] Fix struct types passed by floating-point calling convention (#4) * Classify stack arguments as single segment * Bring back type fix-ups for structs passed according to hardware floating-point calling convention * Use more strict reg masking condition for fixed refs like on other platforms because after #97368 we don't need an exception --- src/coreclr/jit/abi.cpp | 19 +++++++++++++++ src/coreclr/jit/abi.h | 1 + src/coreclr/jit/lsrabuild.cpp | 7 ------ src/coreclr/jit/morph.cpp | 27 +++++++++++++++++++++ src/coreclr/jit/targetriscv64.cpp | 40 ++++++++++++++++++------------- 5 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index 524eedb48ee4d..d007e82577339 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -206,6 +206,25 @@ bool ABIPassingInformation::HasAnyRegisterSegment() const return false; } +//----------------------------------------------------------------------------- +// HasAnyFloatingRegisterSegment: +// Check if any part of this value is passed in a floating-point register. +// +// Return Value: +// True if so. +// +bool ABIPassingInformation::HasAnyFloatingRegisterSegment() const +{ + for (unsigned i = 0; i < NumSegments; i++) + { + if (Segments[i].IsPassedInRegister() && genIsValidFloatReg(Segments[i].GetRegister())) + { + return true; + } + } + return false; +} + //----------------------------------------------------------------------------- // HasAnyStackSegment: // Check if any part of this value is passed on the stack. diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 4172844c4d553..2d2690f159795 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -64,6 +64,7 @@ struct ABIPassingInformation } bool HasAnyRegisterSegment() const; + bool HasAnyFloatingRegisterSegment() const; bool HasAnyStackSegment() const; bool HasExactlyOneRegisterSegment() const; bool HasExactlyOneStackSegment() const; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e130d9fc600cf..831d5f3e80396 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -603,14 +603,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, regNumber physicalReg = genRegNumFromMask(mask, theInterval->registerType); RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); assert(theInterval != nullptr); -#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // The LoongArch64's ABI which the float args maybe passed by integer register - // when no float register left but free integer register. - assert((regType(theInterval->registerType) == FloatRegisterType) || - (allRegs(theInterval->registerType) & mask) != 0); -#else assert((allRegs(theInterval->registerType) & mask) != 0); -#endif } RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index d3ac281b3ec66..0e416a8952784 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2126,6 +2126,33 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call Compiler::structPassingKind howToPassStruct; var_types structBaseType = comp->getArgTypeForStruct(argSigClass, &howToPassStruct, IsVarArgs(), argLayout->GetSize()); +#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) + if (arg.NewAbiInfo.HasAnyFloatingRegisterSegment()) + { + // Struct passed according to hardware floating-point calling convention + assert(arg.NewAbiInfo.NumSegments <= 2); + assert(!arg.NewAbiInfo.HasAnyStackSegment()); + if (arg.NewAbiInfo.NumSegments == 2) + { + // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG" + // for "struct { float, float }", and retyping to a primitive here will cause the + // multi-reg morphing to not kick in (the struct in question needs to be passed in + // two FP registers). Here is just keep "structBaseType" as "TYP_STRUCT". + // TODO-LoongArch64: fix "getPrimitiveTypeForStruct". + structBaseType = TYP_STRUCT; + } + else + { + assert(arg.NewAbiInfo.NumSegments == 1); + structBaseType = arg.NewAbiInfo.Segments[0].GetRegisterType(); + } + + for (unsigned i = 0; i < arg.NewAbiInfo.NumSegments; ++i) + { + arg.AbiInfo.StructFloatFieldType[i] = arg.NewAbiInfo.Segments[i].GetRegisterType(); + } + } +#endif // defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) arg.AbiInfo.PassedByRef = howToPassStruct == Compiler::SPK_ByReference; arg.AbiInfo.ArgType = structBaseType == TYP_UNKNOWN ? argx->TypeGet() : structBaseType; diff --git a/src/coreclr/jit/targetriscv64.cpp b/src/coreclr/jit/targetriscv64.cpp index ea4c99872b794..4df767d8cbfcc 100644 --- a/src/coreclr/jit/targetriscv64.cpp +++ b/src/coreclr/jit/targetriscv64.cpp @@ -138,32 +138,38 @@ ABIPassingInformation RiscV64Classifier::Classify(Compiler* comp, else { // Integer calling convention - auto passSlot = [this](unsigned offset, unsigned size) -> ABIPassingSegment { + auto passOnStack = [this](unsigned offset, unsigned size) -> ABIPassingSegment { assert(size > 0); - assert(size <= TARGET_POINTER_SIZE); - if (m_intRegs.Count() > 0) + assert(size <= 2 * TARGET_POINTER_SIZE); + assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0); + ABIPassingSegment seg = ABIPassingSegment::OnStack(m_stackArgSize, offset, size); + m_stackArgSize += (size > TARGET_POINTER_SIZE) ? (2 * TARGET_POINTER_SIZE) : TARGET_POINTER_SIZE; + return seg; + }; + + if (m_intRegs.Count() > 0) + { + if (passedSize <= TARGET_POINTER_SIZE) { - return ABIPassingSegment::InRegister(m_intRegs.Dequeue(), offset, size); + ABIPassingSegment seg = ABIPassingSegment::InRegister(m_intRegs.Dequeue(), 0, passedSize); + return ABIPassingInformation::FromSegment(comp, seg); } else { - assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0); - ABIPassingSegment seg = ABIPassingSegment::OnStack(m_stackArgSize, offset, size); - m_stackArgSize += TARGET_POINTER_SIZE; - return seg; + assert(varTypeIsStruct(type)); + unsigned int tailSize = passedSize - TARGET_POINTER_SIZE; + + ABIPassingSegment head = ABIPassingSegment::InRegister(m_intRegs.Dequeue(), 0, TARGET_POINTER_SIZE); + ABIPassingSegment tail = + (m_intRegs.Count() > 0) + ? ABIPassingSegment::InRegister(m_intRegs.Dequeue(), TARGET_POINTER_SIZE, tailSize) + : passOnStack(TARGET_POINTER_SIZE, tailSize); + return {2, new (comp, CMK_ABI) ABIPassingSegment[2]{head, tail}}; } - }; - - if (passedSize <= TARGET_POINTER_SIZE) - { - return ABIPassingInformation::FromSegment(comp, passSlot(0, passedSize)); } else { - assert(varTypeIsStruct(type)); - return {2, new (comp, CMK_ABI) - ABIPassingSegment[2]{passSlot(0, TARGET_POINTER_SIZE), - passSlot(TARGET_POINTER_SIZE, passedSize - TARGET_POINTER_SIZE)}}; + return ABIPassingInformation::FromSegment(comp, passOnStack(0, passedSize)); } } } From 221d46418c93290dd10ce4ae0abdf5bbb8e4099d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 20 Jun 2024 11:23:29 +0200 Subject: [PATCH 13/16] Fix build --- src/coreclr/jit/morph.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 6b67bd8e17954..f3ac5dba57c8e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2095,7 +2095,6 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call ClassLayout* argLayout = argSigClass == NO_CLASS_HANDLE ? nullptr : comp->typGetObjLayout(argSigClass); ABIPassingInformation abiInfo; - ABIPassingSegment inlineSegment; // Some well known args have custom register assignment. // These should not affect the placement of any other args or stack space required. @@ -2109,8 +2108,8 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call } else { - inlineSegment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE); - abiInfo = ABIPassingInformation(1, &inlineSegment); + ABIPassingSegment segment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE); + abiInfo = ABIPassingInformation::FromSegment(comp, segment); } JITDUMP("Argument %u ABI info: ", GetIndex(&arg)); @@ -2175,7 +2174,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call unsigned regNumIndex = 0; for (unsigned i = 0; i < abiInfo.NumSegments; i++) { - const ABIPassingSegment& segment = abiInfo.Segments[i]; + const ABIPassingSegment& segment = abiInfo.Segment(i); if (segment.IsPassedInRegister()) { if (regNumIndex < MAX_ARG_REG_COUNT) @@ -2200,7 +2199,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call unsigned regNumIndex = 0; for (unsigned i = 0; i < abiInfo.NumSegments; i++) { - const ABIPassingSegment& segment = abiInfo.Segments[i]; + const ABIPassingSegment& segment = abiInfo.Segment(i); if (regNumIndex < MAX_ARG_REG_COUNT) { @@ -2236,7 +2235,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call assert(abiInfo.NumSegments == 1); // This is a stack argument m_hasStackArgs = true; - const ABIPassingSegment& segment = abiInfo.Segments[0]; + const ABIPassingSegment& segment = abiInfo.Segment(0); arg.AbiInfo.SetRegNum(0, REG_STK); arg.AbiInfo.ByteOffset = segment.GetStackOffset(); } From fe7d665f90437e7429359a5a3073dfd03154b280 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 20 Jun 2024 11:53:04 +0200 Subject: [PATCH 14/16] Fix RISC-V build --- src/coreclr/jit/morph.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f3ac5dba57c8e..b85f7ea0e9a7e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2143,12 +2143,12 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call else { assert(arg.NewAbiInfo.NumSegments == 1); - structBaseType = arg.NewAbiInfo.Segments[0].GetRegisterType(); + structBaseType = arg.NewAbiInfo.Segment(0).GetRegisterType(); } for (unsigned i = 0; i < arg.NewAbiInfo.NumSegments; ++i) { - arg.AbiInfo.StructFloatFieldType[i] = arg.NewAbiInfo.Segments[i].GetRegisterType(); + arg.AbiInfo.StructFloatFieldType[i] = arg.NewAbiInfo.Segment(i).GetRegisterType(); } } #endif // defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) From b8595f00180f7240c747e34e4948df21d107a557 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 20 Jun 2024 12:03:41 +0200 Subject: [PATCH 15/16] Copy new ABI info in CallArgs::InternalCopyFrom --- src/coreclr/jit/gentree.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8ba9c58b1cde0..20ed1cc15eef2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9764,6 +9764,7 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co carg->m_isTmp = arg.m_isTmp; carg->m_processed = arg.m_processed; carg->AbiInfo = arg.AbiInfo; + carg->NewAbiInfo = arg.NewAbiInfo; *tail = carg; tail = &carg->m_next; } From 46e461b09e0411f39c9d81543817e60a13d0fd9b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 20 Jun 2024 12:43:33 +0200 Subject: [PATCH 16/16] Fix HasAnyFloatingRegisterSegment for new representation, rename private fields --- src/coreclr/jit/abi.cpp | 14 +++++++------- src/coreclr/jit/abi.h | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index f5615b820f3b6..3eb3c853e41a9 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -206,7 +206,7 @@ ABIPassingInformation::ABIPassingInformation(Compiler* comp, unsigned numSegment if (numSegments > 1) { - Segments = new (comp, CMK_ABI) ABIPassingSegment[numSegments]; + m_segments = new (comp, CMK_ABI) ABIPassingSegment[numSegments]; } } @@ -225,10 +225,10 @@ const ABIPassingSegment& ABIPassingInformation::Segment(unsigned index) const assert(index < NumSegments); if (NumSegments == 1) { - return SingleSegment; + return m_singleSegment; } - return Segments[index]; + return m_segments[index]; } //----------------------------------------------------------------------------- @@ -276,7 +276,7 @@ bool ABIPassingInformation::HasAnyFloatingRegisterSegment() const { for (unsigned i = 0; i < NumSegments; i++) { - if (Segments[i].IsPassedInRegister() && genIsValidFloatReg(Segments[i].GetRegister())) + if (Segment(i).IsPassedInRegister() && genIsValidFloatReg(Segment(i).GetRegister())) { return true; } @@ -367,8 +367,8 @@ bool ABIPassingInformation::IsSplitAcrossRegistersAndStack() const ABIPassingInformation ABIPassingInformation::FromSegment(Compiler* comp, const ABIPassingSegment& segment) { ABIPassingInformation info; - info.NumSegments = 1; - info.SingleSegment = segment; + info.NumSegments = 1; + info.m_singleSegment = segment; return info; } @@ -390,7 +390,7 @@ ABIPassingInformation ABIPassingInformation::FromSegments(Compiler* { ABIPassingInformation info; info.NumSegments = 2; - info.Segments = new (comp, CMK_ABI) ABIPassingSegment[2]{firstSegment, secondSegment}; + info.m_segments = new (comp, CMK_ABI) ABIPassingSegment[2]{firstSegment, secondSegment}; return info; } diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 4bb7e926b38a3..95c62e0b9f9c0 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -45,8 +45,8 @@ struct ABIPassingInformation private: union { - ABIPassingSegment* Segments; - ABIPassingSegment SingleSegment; + ABIPassingSegment* m_segments; + ABIPassingSegment m_singleSegment; }; public: