-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NativeAOT/ARM64] Generate frames compatible with Apple compact unwinding #107766
Changes from all commits
a39270e
38eea0e
5e3182f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5644,7 +5644,9 @@ void Compiler::lvaFixVirtualFrameOffsets() | |
#endif | ||
|
||
// The delta to be added to virtual offset to adjust it relative to frame pointer or SP | ||
int delta = 0; | ||
int delta = 0; | ||
int frameLocalsDelta = 0; | ||
int frameBoundary = 0; | ||
|
||
#ifdef TARGET_XARCH | ||
delta += REGSIZE_BYTES; // pushed PC (return address) for x86/x64 | ||
|
@@ -5669,7 +5671,25 @@ void Compiler::lvaFixVirtualFrameOffsets() | |
// We set FP to be after LR, FP | ||
delta += 2 * REGSIZE_BYTES; | ||
} | ||
#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) | ||
#elif defined(TARGET_ARM64) | ||
else | ||
{ | ||
// FP is used. | ||
delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta(); | ||
|
||
// If we placed FP/LR at the bottom of the frame we need to shift all the variables | ||
// on the new frame to account for it. See lvaAssignVirtualFrameOffsetsToLocals. | ||
if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters()) | ||
{ | ||
// We set FP to be after LR, FP | ||
frameLocalsDelta = 2 * REGSIZE_BYTES; | ||
frameBoundary = opts.IsOSR() ? -info.compPatchpointInfo->TotalFrameSize() : 0; | ||
if (info.compIsVarArgs) | ||
frameBoundary -= MAX_REG_ARG * REGSIZE_BYTES; | ||
} | ||
JITDUMP("--- delta bump %d for FP frame, %d inside frame for FP/LR relocation\n", delta, frameLocalsDelta); | ||
} | ||
#elif defined(TARGET_AMD64) | ||
else | ||
{ | ||
// FP is used. | ||
|
@@ -5737,7 +5757,7 @@ void Compiler::lvaFixVirtualFrameOffsets() | |
|
||
#if defined(TARGET_X86) | ||
// On x86, we set the stack offset for a promoted field | ||
// to match a struct parameter in lvAssignFrameOffsetsToPromotedStructs. | ||
// to match a struct parameter in lvaAssignFrameOffsetsToPromotedStructs. | ||
if ((!varDsc->lvIsParam || parentvarDsc->lvIsParam) && promotionType == PROMOTION_TYPE_DEPENDENT) | ||
#else | ||
if (!varDsc->lvIsParam && promotionType == PROMOTION_TYPE_DEPENDENT) | ||
|
@@ -5757,15 +5777,23 @@ void Compiler::lvaFixVirtualFrameOffsets() | |
|
||
if (doAssignStkOffs) | ||
{ | ||
JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta); | ||
varDsc->SetStackOffset(varDsc->GetStackOffset() + delta); | ||
int localDelta = delta; | ||
|
||
if (frameLocalsDelta != 0 && varDsc->GetStackOffset() < frameBoundary) | ||
{ | ||
localDelta += frameLocalsDelta; | ||
} | ||
|
||
JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), | ||
varDsc->GetStackOffset() + localDelta); | ||
varDsc->SetStackOffset(varDsc->GetStackOffset() + localDelta); | ||
|
||
#if DOUBLE_ALIGN | ||
if (genDoubleAlign() && !codeGen->isFramePointerUsed()) | ||
{ | ||
if (varDsc->lvFramePointerBased) | ||
{ | ||
varDsc->SetStackOffset(varDsc->GetStackOffset() - delta); | ||
varDsc->SetStackOffset(varDsc->GetStackOffset() - localDelta); | ||
|
||
// We need to re-adjust the offsets of the parameters so they are EBP | ||
// relative rather than stack/frame pointer relative | ||
|
@@ -5787,9 +5815,13 @@ void Compiler::lvaFixVirtualFrameOffsets() | |
assert(codeGen->regSet.tmpAllFree()); | ||
for (TempDsc* temp = codeGen->regSet.tmpListBeg(); temp != nullptr; temp = codeGen->regSet.tmpListNxt(temp)) | ||
{ | ||
temp->tdAdjustTempOffs(delta); | ||
temp->tdAdjustTempOffs(delta + frameLocalsDelta); | ||
} | ||
|
||
if (lvaCachedGenericContextArgOffs < frameBoundary) | ||
{ | ||
lvaCachedGenericContextArgOffs += frameLocalsDelta; | ||
} | ||
lvaCachedGenericContextArgOffs += delta; | ||
|
||
#if FEATURE_FIXED_OUT_ARGS | ||
|
@@ -6045,30 +6077,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() | |
codeGen->setFramePointerUsed(codeGen->isFramePointerRequired()); | ||
} | ||
|
||
#ifdef TARGET_ARM64 | ||
// Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is | ||
// a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we | ||
// need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value, | ||
// and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the | ||
// frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress. | ||
// (It should be legal to use these frame types for every frame). | ||
|
||
if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 0) | ||
{ | ||
// Default configuration | ||
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) || | ||
opts.compDbgEnC || compStressCompile(STRESS_GENERIC_VARN, 20)); | ||
} | ||
else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1) | ||
{ | ||
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames | ||
} | ||
else if ((opts.compJitSaveFpLrWithCalleeSavedRegisters == 2) || (opts.compJitSaveFpLrWithCalleeSavedRegisters == 3)) | ||
{ | ||
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames | ||
} | ||
#endif // TARGET_ARM64 | ||
|
||
#ifdef TARGET_XARCH | ||
// On x86/amd64, the return address has already been pushed by the call instruction in the caller. | ||
stkOffs -= TARGET_POINTER_SIZE; // return address; | ||
|
@@ -6117,9 +6125,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() | |
#endif // !TARGET_ARM | ||
|
||
#ifdef TARGET_ARM64 | ||
// If the frame pointer is used, then we'll save FP/LR at the bottom of the stack. | ||
// Otherwise, we won't store FP, and we'll store LR at the top, with the other callee-save | ||
// registers (if any). | ||
// If the frame pointer is used, then we'll save FP/LR either at the bottom of the stack | ||
// or at the top of the stack depending on frame type. We make the decision after assigning | ||
// the variables on the frame and then fix up the offsets in lvaFixVirtualFrameOffsets. | ||
// For now, we proceed as if FP/LR were saved with the callee registers. If we later | ||
// decide to move the FP/LR to the bottom of the frame it shifts all the assigned | ||
// variables and temporaries by 16 bytes. The largest alignment we currently make is 16 | ||
// bytes for SIMD. | ||
|
||
int initialStkOffs = 0; | ||
if (info.compIsVarArgs) | ||
|
@@ -6130,17 +6142,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() | |
stkOffs -= initialStkOffs; | ||
} | ||
|
||
if (codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() || !isFramePointerUsed()) // Note that currently we always have | ||
// a frame pointer | ||
{ | ||
stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES; | ||
} | ||
else | ||
{ | ||
// Subtract off FP and LR. | ||
assert(compCalleeRegsPushed >= 2); | ||
stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES; | ||
} | ||
stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES; | ||
|
||
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) | ||
|
||
|
@@ -6810,15 +6812,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() | |
} | ||
#endif // TARGET_AMD64 | ||
|
||
#ifdef TARGET_ARM64 | ||
if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() && isFramePointerUsed()) // Note that currently we always have | ||
// a frame pointer | ||
{ | ||
// Create space for saving FP and LR. | ||
stkOffs -= 2 * REGSIZE_BYTES; | ||
} | ||
#endif // TARGET_ARM64 | ||
|
||
#if FEATURE_FIXED_OUT_ARGS | ||
if (lvaOutgoingArgSpaceSize > 0) | ||
{ | ||
|
@@ -6856,6 +6849,44 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() | |
|
||
noway_assert(compLclFrameSize + originalFrameSize == | ||
(unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE))); | ||
|
||
#ifdef TARGET_ARM64 | ||
// Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is | ||
// a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we | ||
// need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value, | ||
// and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the | ||
// frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress. | ||
// (It should be legal to use these frame types for every frame). | ||
// | ||
// For Apple NativeAOT ABI we try to save the FP/LR registers on top to get canonical frame layout that can | ||
// be represented with compact unwinding information. In order to maintain code quality we only do it when | ||
// we can use SP-based addressing (!isFramePointerRequired) through lvaFrameAddress optimization, or if the | ||
// whole frame is small enough that the negative FP-based addressing can address the whole frame. | ||
|
||
if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 0) | ||
{ | ||
if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsApplePlatform && | ||
(!codeGen->isFramePointerRequired() || codeGen->genTotalFrameSize() < 0x100)) | ||
{ | ||
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); | ||
} | ||
else | ||
{ | ||
// Default configuration | ||
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) || | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's just a moved code from here with unchanged condition. I think it was intentional.
|
||
opts.compDbgEnC || | ||
compStressCompile(Compiler::STRESS_GENERIC_VARN, 20)); | ||
} | ||
} | ||
else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1) | ||
{ | ||
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames | ||
} | ||
else if ((opts.compJitSaveFpLrWithCalleeSavedRegisters == 2) || (opts.compJitSaveFpLrWithCalleeSavedRegisters == 3)) | ||
{ | ||
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames | ||
} | ||
#endif // TARGET_ARM64 | ||
} | ||
|
||
//------------------------------------------------------------------------ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you might want to consider to have
#ifdef
for NativeAOT to see if that erases any TP regression that we are seeing now:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure how an
#ifdef
could be used here. There's a single universal build of the ARM64 Unix JIT and it's the same for Crossgen2 and ILC.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I meant
compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI)
checkThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd expect that to make things worse than checking a single [more local] boolean. If this is a concern we can try to manually duplicate the loop and move the check outside of the loop.