diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index bf0412dd6642..4ec13f719420 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -9676,7 +9676,12 @@ void CodeGen::genFnEpilog(BasicBlock* block) // Call target = REG_FASTTAILCALL_TARGET // https://github.com/dotnet/coreclr/issues/4827 // Do we need a special encoding for stack walker like rex.w prefix for x64? + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef _TARGET_ARM_ + getEmitter()->emitIns_R(INS_bx, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET); +#else getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET); +#endif } #endif // FEATURE_FASTTAILCALL } diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index ebd1894cca6f..f3fd89000ff7 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -673,7 +673,7 @@ inline bool isRegParamType(var_types type) #endif // !_TARGET_X86_ } -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) +#if defined(_TARGET_AMD64_) || defined(_TARGET_ARMARCH_) /*****************************************************************************/ // Returns true if 'type' is a struct that can be enregistered for call args // or can be returned by value in multiple registers. @@ -724,7 +724,7 @@ inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type, return result; } -#endif //_TARGET_AMD64_ || _TARGET_ARM64_ +#endif //_TARGET_AMD64_ || _TARGET_ARMARCH_ /*****************************************************************************/ diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 84c7f61aad7d..9c055563dac8 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -6708,7 +6708,7 @@ bool Compiler::impTailCallRetTypeCompatible(var_types callerRetType, return true; } -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) +#if defined(_TARGET_AMD64_) || defined(_TARGET_ARMARCH_) // Jit64 compat: if (callerRetType == TYP_VOID) { @@ -6738,7 +6738,7 @@ bool Compiler::impTailCallRetTypeCompatible(var_types callerRetType, { return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize); } -#endif // _TARGET_AMD64_ || _TARGET_ARM64_ +#endif // _TARGET_AMD64_ || _TARGET_ARMARCH_ return false; } diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index bc2470c71f7c..15454b592fbe 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -948,6 +948,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) #endif // _TARGET_XXX_ #if FEATURE_FASTTAILCALL +#ifdef _TARGET_ARM_ + if ((varDscInfo->stackArgSize / TARGET_POINTER_SIZE) % cAlign != 0) + { + varDscInfo->stackArgSize += TARGET_POINTER_SIZE; + } +#endif varDscInfo->stackArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE); #endif // FEATURE_FASTTAILCALL } @@ -3571,7 +3577,7 @@ var_types LclVarDsc::lvaArgType() } } #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING -#elif defined(_TARGET_ARM64_) +#elif defined(_TARGET_ARMARCH_) if (type == TYP_STRUCT) { NYI("lvaArgType"); diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index f2118bf5a931..55abe884ed84 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -1027,7 +1027,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP if (info->isSplit) { assert(arg->OperGet() == GT_OBJ || arg->OperGet() == GT_FIELD_LIST); - // TODO: Need to check correctness for FastTailCall if (call->IsFastTailCall()) { NYI_ARM("lower: struct argument by fast tail call"); diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index f85bafb19af6..e3d4c283788b 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7419,6 +7419,19 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) ++nCalleeArgs; assert(args->OperIsList()); GenTreePtr argx = args->gtOp.gtOp1; +#ifdef _TARGET_ARM_ + unsigned argAlign = 1; + codeGen->InferOpSizeAlign(argx, &argAlign); + + argAlign = roundUp(argAlign, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; + + // We don't care float register because we will not use fast tailcall + // for callee method using float register + if (calleeArgRegCount % argAlign != 0) + { + calleeArgRegCount++; + } +#endif if (varTypeIsStruct(argx)) { @@ -7441,8 +7454,9 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } if (objClass != nullptr) { -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) +#if defined(_TARGET_AMD64_) || defined(_TARGET_ARMARCH_) +#ifndef _TARGET_ARM_ // hasMultiByteStackArgs will determine if the struct can be passed // in registers. If it cannot we will break the loop and not // fastTailCall. This is an implementation limitation @@ -7451,6 +7465,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) unsigned typeSize = 0; hasMultiByteStackArgs = hasMultiByteStackArgs || !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false); +#endif // !_TARGET_ARM_ #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; @@ -7517,6 +7532,45 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) calleeArgRegCount += size; } +#elif defined(_TARGET_ARM_) // ARM + var_types hfaType = GetHfaType(argx); + bool isHfaArg = varTypeIsFloating(hfaType); + size_t size = 1; + + if (isHfaArg) + { + reportFastTailCallDecision("Callee uses float register arguments.", 0, 0); + return false; + } + else + { + size = (unsigned)(roundUp(info.compCompHnd->getClassSize(objClass), TARGET_POINTER_SIZE)) / + TARGET_POINTER_SIZE; + // We cannot handle split struct yet + // TODO: Fix to calculate exact count + if ((calleeArgRegCount < MAX_REG_ARG) && (size + calleeArgRegCount > MAX_REG_ARG)) + { + reportFastTailCallDecision("Callee uses split struct argument.", 0, 0); + return false; + } + + if (size > 1) + { + // hasTwoSlotSizedStruct will determine if the struct value can be passed multiple slot. + // We set hasTwoSlotSizedStruct if size > 1 because all struct are passed by value on ARM32. + hasTwoSlotSizedStruct = true; + if (calleeArgRegCount >= MAX_REG_ARG) + { + // hasMultiByteStackArgs will determine if the struct can be passed + // in registers. If it cannot we will break the loop and not + // fastTailCall. This is an implementation limitation + // where the callee only is checked for non enregisterable structs. + // It is tracked with https://github.com/dotnet/coreclr/issues/12644. + hasMultiByteStackArgs = true; + } + } + calleeArgRegCount += size; + } #elif defined(WINDOWS_AMD64_ABI) ++calleeArgRegCount; @@ -7526,7 +7580,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) #else assert(!"Target platform ABI rules regarding passing struct type args in registers"); unreached(); -#endif //_TARGET_AMD64_ || _TARGET_ARM64_ +#endif //_TARGET_AMD64_ || _TARGET_ARMARCH_ } else { @@ -7535,7 +7589,17 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } else { +#ifdef _TARGET_ARM_ + if (varTypeIsFloating(argx)) + { + return false; + } + unsigned size = genTypeStSz(argx->gtType); + + varTypeIsFloating(argx) ? calleeFloatArgRegCount += size : calleeArgRegCount += size; +#else // !_TARGET_ARM_ varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount; +#endif // !_TARGET_ARM_ } // We can break early on multiByte cases. @@ -7587,7 +7651,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) return false; } -#elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) +#elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARMARCH_) // For *nix Amd64 and Arm64 check to see if all arguments for the callee // and caller are passing in registers. If not, ensure that the outgoing argument stack size diff --git a/src/jit/target.h b/src/jit/target.h index 1753f440e9f4..92fb0eadb3d5 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -1191,7 +1191,11 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers +#ifdef LEGACY_BACKEND #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp +#else + #define FEATURE_FASTTAILCALL 1 +#endif #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register (including HFA support)