-
Notifications
You must be signed in to change notification settings - Fork 2.7k
[RyuJIT/ARM32] Enabling fast tail call feature #14056
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7419,6 +7419,19 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) | |
++nCalleeArgs; | ||
assert(args->OperIsList()); | ||
GenTreePtr argx = args->gtOp.gtOp1; | ||
#ifdef _TARGET_ARM_ | ||
unsigned argAlign = 1; | ||
codeGen->InferOpSizeAlign(argx, &argAlign); | ||
|
||
argAlign = roundUp(argAlign, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; | ||
|
||
// We don't care float register because we will not use fast tailcall | ||
// for callee method using float register | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Do not track floating point registers for arm32. It is NYI. |
||
if (calleeArgRegCount % argAlign != 0) | ||
{ | ||
calleeArgRegCount++; | ||
} | ||
#endif | ||
|
||
if (varTypeIsStruct(argx)) | ||
{ | ||
|
@@ -7441,8 +7454,9 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) | |
} | ||
if (objClass != nullptr) | ||
{ | ||
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) | ||
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARMARCH_) | ||
|
||
#ifndef _TARGET_ARM_ | ||
// hasMultiByteStackArgs will determine if the struct can be passed | ||
// in registers. If it cannot we will break the loop and not | ||
// fastTailCall. This is an implementation limitation | ||
|
@@ -7451,6 +7465,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) | |
unsigned typeSize = 0; | ||
hasMultiByteStackArgs = hasMultiByteStackArgs || | ||
!VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false); | ||
#endif // !_TARGET_ARM_ | ||
|
||
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) | ||
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; | ||
|
@@ -7517,6 +7532,45 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) | |
calleeArgRegCount += size; | ||
} | ||
|
||
#elif defined(_TARGET_ARM_) // ARM | ||
var_types hfaType = GetHfaType(argx); | ||
bool isHfaArg = varTypeIsFloating(hfaType); | ||
size_t size = 1; | ||
|
||
if (isHfaArg) | ||
{ | ||
reportFastTailCallDecision("Callee uses float register arguments.", 0, 0); | ||
return false; | ||
} | ||
else | ||
{ | ||
size = (unsigned)(roundUp(info.compCompHnd->getClassSize(objClass), TARGET_POINTER_SIZE)) / | ||
TARGET_POINTER_SIZE; | ||
// We cannot handle split struct yet | ||
// TODO: Fix to calculate exact count | ||
if ((calleeArgRegCount < MAX_REG_ARG) && (size + calleeArgRegCount > MAX_REG_ARG)) | ||
{ | ||
reportFastTailCallDecision("Callee uses split struct argument.", 0, 0); | ||
return false; | ||
} | ||
|
||
if (size > 1) | ||
{ | ||
// hasTwoSlotSizedStruct will determine if the struct value can be passed multiple slot. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: two spaces in between passed and multiple |
||
// We set hasTwoSlotSizedStruct if size > 1 because all struct are passed by value on ARM32. | ||
hasTwoSlotSizedStruct = true; | ||
if (calleeArgRegCount >= MAX_REG_ARG) | ||
{ | ||
// hasMultiByteStackArgs will determine if the struct can be passed | ||
// in registers. If it cannot we will break the loop and not | ||
// fastTailCall. This is an implementation limitation | ||
// where the callee only is checked for non enregisterable structs. | ||
// It is tracked with https://github.com/dotnet/coreclr/issues/12644. | ||
hasMultiByteStackArgs = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to be using hasMultByteStackArgs in a undesired way as this struct can be passed in registers, there are just more callee arguments and we have to spill. Plus, I am not a big fan of the original hasMultiByteStackArgs code path I would like to avoid building on it. I think the preferred way of dealing with the case of calleeArgRegCount >= MAX_REG_ARG && hasTwoSlotSizedStruct is here: https://github.com/dotnet/coreclr/blob/master/src/jit/morph.cpp#L7524. |
||
} | ||
} | ||
calleeArgRegCount += size; | ||
} | ||
#elif defined(WINDOWS_AMD64_ABI) | ||
|
||
++calleeArgRegCount; | ||
|
@@ -7526,7 +7580,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) | |
#else | ||
assert(!"Target platform ABI rules regarding passing struct type args in registers"); | ||
unreached(); | ||
#endif //_TARGET_AMD64_ || _TARGET_ARM64_ | ||
#endif //_TARGET_AMD64_ || _TARGET_ARMARCH_ | ||
} | ||
else | ||
{ | ||
|
@@ -7535,7 +7589,17 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) | |
} | ||
else | ||
{ | ||
#ifdef _TARGET_ARM_ | ||
if (varTypeIsFloating(argx)) | ||
{ | ||
return false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add logging to this return using reportFastTailCall decision. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also adding some sort of NYI or comment here explaining why this returns false would be nice. |
||
} | ||
unsigned size = genTypeStSz(argx->gtType); | ||
|
||
varTypeIsFloating(argx) ? calleeFloatArgRegCount += size : calleeArgRegCount += size; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems better to just have this as:
|
||
#else // !_TARGET_ARM_ | ||
varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount; | ||
#endif // !_TARGET_ARM_ | ||
} | ||
|
||
// We can break early on multiByte cases. | ||
|
@@ -7587,7 +7651,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) | |
return false; | ||
} | ||
|
||
#elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) | ||
#elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARMARCH_) | ||
|
||
// For *nix Amd64 and Arm64 check to see if all arguments for the callee | ||
// and caller are passing in registers. If not, ensure that the outgoing argument stack size | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems like it is worth tracking fastTailCall support for callee functions with floating point. Is there an issue to track adding support for this later?
In addition what is the rational for not implementing it now?