Skip to content

Commit

Permalink
Fix compFastTailCalls check.
Browse files Browse the repository at this point in the history
compFastTailCalls is true if COMPlus_FastTailCalls is not 0.
It was introduced in #341 to help with testing:
setting COMPlus_FastTailCalls to 0 forces all tail-prefixed
calls to be dispatched via helpers.

This fixes a subtle bug with checking compFastTailCalls:
it needs to be checked after `fgInitargInfo` has been called in
`fgCanFastTailCall`. `fgInitArgInfo` adds the stub address for VSD calls
and `fgMorphTailCallViaHelpers` then removes it.

This change also factors out the logic for checking whether the call
has byref parameters that must be copied by the caller.
  • Loading branch information
erozenfeld committed Apr 29, 2020
1 parent e1ffadd commit 9960d15
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 150 deletions.
3 changes: 3 additions & 0 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5450,6 +5450,9 @@ class Compiler
private:
GenTree* fgMorphField(GenTree* tree, MorphAddrContext* mac);
bool fgCanFastTailCall(GenTreeCall* call, const char** failReason);
#if FEATURE_FASTTAILCALL
bool fgCallHasMustCopyByrefParameter(GenTreeCall* callee);
#endif
bool fgCheckStmtAfterTailCall();
GenTree* fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL_HELPERS& help);
bool fgCanTailCallViaJitHelper();
Expand Down
325 changes: 175 additions & 150 deletions src/coreclr/src/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6670,15 +6670,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason)
{
#if FEATURE_FASTTAILCALL

if (!opts.compFastTailCalls)
{
if (failReason)
{
*failReason = "Configuration doesn't allow fast tail calls";
}
return false;
}

// To reach here means that the return types of the caller and callee are tail call compatible.
// In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
CLANG_FORMAT_COMMENT_ANCHOR;
Expand All @@ -6694,17 +6685,182 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason)
assert(!callee->AreArgsComplete());

fgInitArgInfo(callee);

fgArgInfo* argInfo = callee->fgArgInfo;

bool hasMustCopyByrefParameter = false;
size_t calleeArgStackSize = 0;
size_t callerArgStackSize = info.compArgStackSize;
size_t calleeArgStackSize = 0;
size_t callerArgStackSize = info.compArgStackSize;

for (unsigned index = 0; index < argInfo->ArgCount(); ++index)
{
fgArgTabEntry* arg = argInfo->GetArgEntry(index, false);

calleeArgStackSize += arg->stackSize();
}

auto reportFastTailCallDecision = [&](const char* thisFailReason) {
if (failReason != nullptr)
{
*failReason = thisFailReason;
}

#ifdef DEBUG
if ((JitConfig.JitReportFastTailCallDecisions()) == 1)
{
if (callee->gtCallType != CT_INDIRECT)
{
const char* methodName;

methodName = eeGetMethodFullName(callee->gtCallMethHnd);

printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ",
info.compFullName, methodName);
}
else
{
printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- "
"Decision: ",
info.compFullName);
}

if (thisFailReason == nullptr)
{
printf("Will fast tailcall");
}
else
{
printf("Will not fast tailcall (%s)", thisFailReason);
}

printf(" (CallerArgStackSize: %d, CalleeArgStackSize: %d)\n\n", callerArgStackSize, calleeArgStackSize);
}
else
{
if (thisFailReason == nullptr)
{
JITDUMP("[Fast tailcall decision]: Will fast tailcall\n");
}
else
{
JITDUMP("[Fast tailcall decision]: Will not fast tailcall (%s)\n", thisFailReason);
}
}
#endif // DEBUG
};

if (!opts.compFastTailCalls)
{
reportFastTailCallDecision("Configuration doesn't allow fast tail calls");
return false;
}

// Note on vararg methods:
// If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
// But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
// fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
// out-going area required for callee is bounded by caller's fixed argument space.
//
// Note that callee being a vararg method is not a problem since we can account the params being passed.
//
// We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg
// method. This is due to the ABI differences for native vararg methods for these platforms. There is
// work required to shuffle arguments to the correct locations.
CLANG_FORMAT_COMMENT_ANCHOR;

#if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64))
if (info.compIsVarArgs || callee->IsVarargs())
{
reportFastTailCallDecision("Fast tail calls with varargs not supported on Windows ARM/ARM64");
return false;
}
#endif // (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || defined(TARGET_WINDOWS) && defined(TARGET_ARM64))

if (compLocallocUsed)
{
reportFastTailCallDecision("Localloc used");
return false;
}

#ifdef TARGET_AMD64
// Needed for Jit64 compat.
// In future, enabling fast tail calls from methods that need GS cookie
// check would require codegen side work to emit GS cookie check before a
// tail call.
if (getNeedsGSSecurityCookie())
{
reportFastTailCallDecision("GS Security cookie check required");
return false;
}
#endif

// If the NextCallReturnAddress intrinsic is used we should do normal calls.
if (info.compHasNextCallRetAddr)
{
reportFastTailCallDecision("Uses NextCallReturnAddress intrinsic");
return false;
}

if (callee->HasRetBufArg()) // RetBuf
{
// If callee has RetBuf param, caller too must have it.
// Otherwise go the slow route.
if (info.compRetBuffArg == BAD_VAR_NUM)
{
reportFastTailCallDecision("Callee has RetBuf but caller does not.");
return false;
}
}

// For a fast tail call the caller will use its incoming arg stack space to place
// arguments, so if the callee requires more arg stack space than is available here
// the fast tail call cannot be performed. This is common to all platforms.
// Note that the GC'ness of on stack args need not match since the arg setup area is marked
// as non-interruptible for fast tail calls.
if (calleeArgStackSize > callerArgStackSize)
{
reportFastTailCallDecision("Not enough incoming arg space");
return false;
}

// For Windows some struct parameters are copied on the local frame
// and then passed by reference. We cannot fast tail call in these situation
// as we need to keep our frame around.
if (fgCallHasMustCopyByrefParameter(callee))
{
reportFastTailCallDecision("Callee has a byref parameter");
return false;
}

reportFastTailCallDecision(nullptr);
return true;
#else // FEATURE_FASTTAILCALL
if (failReason)
*failReason = "Fast tailcalls are not supported on this platform";
return false;
#endif
}

//------------------------------------------------------------------------
// fgCallHasMustCopyByrefParameter: Check to see if this call has a byref parameter that
// requires a struct copy in the caller.
//
// Arguments:
// callee - The callee to check
//
// Return Value:
// Returns true or false based on whether this call has a byref parameter that
// requires a struct copy in the caller.

#if FEATURE_FASTTAILCALL
bool Compiler::fgCallHasMustCopyByrefParameter(GenTreeCall* callee)
{
fgArgInfo* argInfo = callee->fgArgInfo;

bool hasMustCopyByrefParameter = false;

for (unsigned index = 0; index < argInfo->ArgCount(); ++index)
{
fgArgTabEntry* arg = argInfo->GetArgEntry(index, false);

if (arg->isStruct)
{
Expand All @@ -6715,7 +6871,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason)
hasMustCopyByrefParameter = true;

// If we're optimizing, we may be able to pass our caller's byref to our callee,
// and so still be able to tail call.
// and so still be able to avoid a struct copy.
if (opts.OptimizationEnabled())
{
// First, see if this arg is an implicit byref param.
Expand Down Expand Up @@ -6921,148 +7077,16 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason)

if (hasMustCopyByrefParameter)
{
// This arg blocks the tail call. No reason to keep scanning the remaining args.
// This arg requires a struct copy. No reason to keep scanning the remaining args.
break;
}
}
}
}

auto reportFastTailCallDecision = [&](const char* thisFailReason) {
if (failReason != nullptr)
{
*failReason = thisFailReason;
}

#ifdef DEBUG
if ((JitConfig.JitReportFastTailCallDecisions()) == 1)
{
if (callee->gtCallType != CT_INDIRECT)
{
const char* methodName;

methodName = eeGetMethodFullName(callee->gtCallMethHnd);

printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ",
info.compFullName, methodName);
}
else
{
printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- "
"Decision: ",
info.compFullName);
}

if (thisFailReason == nullptr)
{
printf("Will fast tailcall");
}
else
{
printf("Will not fast tailcall (%s)", thisFailReason);
}

printf(" (CallerArgStackSize: %d, CalleeArgStackSize: %d)\n\n", callerArgStackSize, calleeArgStackSize);
}
else
{
if (thisFailReason == nullptr)
{
JITDUMP("[Fast tailcall decision]: Will fast tailcall\n");
}
else
{
JITDUMP("[Fast tailcall decision]: Will not fast tailcall (%s)\n", thisFailReason);
}
}
#endif // DEBUG
};

// Note on vararg methods:
// If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
// But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
// fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
// out-going area required for callee is bounded by caller's fixed argument space.
//
// Note that callee being a vararg method is not a problem since we can account the params being passed.
//
// We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg
// method. This is due to the ABI differences for native vararg methods for these platforms. There is
// work required to shuffle arguments to the correct locations.
CLANG_FORMAT_COMMENT_ANCHOR;

#if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64))
if (info.compIsVarArgs || callee->IsVarargs())
{
reportFastTailCallDecision("Fast tail calls with varargs not supported on Windows ARM/ARM64");
return false;
}
#endif // (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || defined(TARGET_WINDOWS) && defined(TARGET_ARM64))

if (compLocallocUsed)
{
reportFastTailCallDecision("Localloc used");
return false;
}

#ifdef TARGET_AMD64
// Needed for Jit64 compat.
// In future, enabling fast tail calls from methods that need GS cookie
// check would require codegen side work to emit GS cookie check before a
// tail call.
if (getNeedsGSSecurityCookie())
{
reportFastTailCallDecision("GS Security cookie check required");
return false;
}
#endif

// If the NextCallReturnAddress intrinsic is used we should do normal calls.
if (info.compHasNextCallRetAddr)
{
reportFastTailCallDecision("Uses NextCallReturnAddress intrinsic");
return false;
}

if (callee->HasRetBufArg()) // RetBuf
{
// If callee has RetBuf param, caller too must have it.
// Otherwise go the slow route.
if (info.compRetBuffArg == BAD_VAR_NUM)
{
reportFastTailCallDecision("Callee has RetBuf but caller does not.");
return false;
}
}

// For Windows some struct parameters are copied on the local frame
// and then passed by reference. We cannot fast tail call in these situation
// as we need to keep our frame around.
if (hasMustCopyByrefParameter)
{
reportFastTailCallDecision("Callee has a byref parameter");
return false;
}

// For a fast tail call the caller will use its incoming arg stack space to place
// arguments, so if the callee requires more arg stack space than is available here
// the fast tail call cannot be performed. This is common to all platforms.
// Note that the GC'ness of on stack args need not match since the arg setup area is marked
// as non-interruptible for fast tail calls.
if (calleeArgStackSize > callerArgStackSize)
{
reportFastTailCallDecision("Not enough incoming arg space");
return false;
}

reportFastTailCallDecision(nullptr);
return true;
#else // FEATURE_FASTTAILCALL
if (failReason)
*failReason = "Fast tailcalls are not supported on this platform";
return false;
#endif
return hasMustCopyByrefParameter;
}
#endif

//------------------------------------------------------------------------
// fgMorphPotentialTailCall: Attempt to morph a call that the importer has
Expand Down Expand Up @@ -7644,8 +7668,9 @@ GenTree* Compiler::fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL
if (call->IsVirtualStub())
{
JITDUMP("This is a VSD\n");
// X86/ARM32 do not include the stub arg in the arg list.
#if !defined(TARGET_X86) && !defined(TARGET_ARM)
#if FEATURE_FASTTAILCALL
// fgInitArgInfo has been called from fgCanFastTailCall and it added the stub address
// to the arg list. Remove it now.
call->gtCallArgs = call->gtCallArgs->GetNext();
// We changed args so recompute info.
call->fgArgInfo = nullptr;
Expand Down

0 comments on commit 9960d15

Please sign in to comment.