Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NativeAOT/ARM64] Generate frames compatible with Apple compact unwinding #107766

Merged
merged 3 commits into from
Jan 12, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
@@ -659,6 +659,7 @@ class CodeGen final : public CodeGenInterface
virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() const;
bool genSaveFpLrWithAllCalleeSavedRegisters;
bool genForceFuncletFrameType5;
bool genReverseAndPairCalleeSavedRegisters;
#endif // TARGET_ARM64

//-------------------------------------------------------------------------
46 changes: 38 additions & 8 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
@@ -845,12 +845,19 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i

for (int i = 0; i < regStack.Height(); ++i)
{
RegPair regPair = regStack.Bottom(i);
RegPair regPair = genReverseAndPairCalleeSavedRegisters ? regStack.Top(i) : regStack.Bottom(i);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you might want to consider to have #ifdef for NativeAOT to see if that erases any TP regression that we are seeing now:

image

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure how an #ifdef could be used here. There's a single universal build of the ARM64 Unix JIT and it's the same for Crossgen2 and ILC.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) check

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd expect that to make things worse than checking a single [more local] boolean. If this is a concern we can try to manually duplicate the loop and move the check outside of the loop.

if (regPair.reg2 != REG_NA)
{
// We can use a STP instruction.
genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_IP0,
nullptr);
if (genReverseAndPairCalleeSavedRegisters)
{
genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr);
}
else
{
genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_IP0,
nullptr);
}

spOffset += 2 * slotSize;
}
@@ -926,8 +933,9 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe

// Save integer registers at higher addresses than floating-point registers.

regMaskTP maskSaveRegsFrame = regsToSaveMask & (RBM_FP | RBM_LR);
regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat & ~maskSaveRegsFrame;

if (maskSaveRegsFloat != RBM_NONE)
{
@@ -939,6 +947,13 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
if (maskSaveRegsInt != RBM_NONE)
{
genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset);
spDelta = 0;
lowestCalleeSavedOffset += genCountBits(maskSaveRegsInt) * FPSAVE_REGSIZE_BYTES;
}

if (maskSaveRegsFrame != RBM_NONE)
{
genPrologSaveRegPair(REG_FP, REG_LR, lowestCalleeSavedOffset, spDelta, false, REG_IP0, nullptr);
// No need to update spDelta, lowestCalleeSavedOffset since they're not used after this.
}
}
@@ -970,13 +985,20 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta
stackDelta = spDelta;
}

RegPair regPair = regStack.Top(i);
RegPair regPair = genReverseAndPairCalleeSavedRegisters ? regStack.Bottom(i) : regStack.Top(i);
if (regPair.reg2 != REG_NA)
{
spOffset -= 2 * slotSize;

genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, REG_IP1,
nullptr);
if (genReverseAndPairCalleeSavedRegisters)
{
genEpilogRestoreRegPair(regPair.reg2, regPair.reg1, spOffset, stackDelta, false, REG_IP1, nullptr);
}
else
{
genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair,
REG_IP1, nullptr);
}
}
else
{
@@ -1043,11 +1065,19 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in

// Save integer registers at higher addresses than floating-point registers.

regMaskTP maskRestoreRegsFrame = regsToRestoreMask & (RBM_FP | RBM_LR);
regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat & ~maskRestoreRegsFrame;

// Restore in the opposite order of saving.

if (maskRestoreRegsFrame != RBM_NONE)
{
int spFrameDelta = (maskRestoreRegsFloat != RBM_NONE || maskRestoreRegsInt != RBM_NONE) ? 0 : spDelta;
spOffset -= 2 * REGSIZE_BYTES;
genEpilogRestoreRegPair(REG_FP, REG_LR, spOffset, spFrameDelta, false, REG_IP1, nullptr);
}

if (maskRestoreRegsInt != RBM_NONE)
{
int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment?
24 changes: 24 additions & 0 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
@@ -255,6 +255,7 @@ CodeGen::CodeGen(Compiler* theCompiler)
#ifdef TARGET_ARM64
genSaveFpLrWithAllCalleeSavedRegisters = false;
genForceFuncletFrameType5 = false;
genReverseAndPairCalleeSavedRegisters = false;
#endif // TARGET_ARM64
}

@@ -4840,6 +4841,29 @@ void CodeGen::genFinalizeFrame()
}
#endif // TARGET_ARM

#ifdef TARGET_ARM64
if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsApplePlatform)
{
JITDUMP("Setting genReverseAndPairCalleeSavedRegisters = true");

genReverseAndPairCalleeSavedRegisters = true;

// Make sure we push the registers in pairs if possible. If we only allocate a contiguous
// block of registers this should add at most one integer and at most one floating point
// register to the list. The stack has to be 16-byte aligned, so in worst case it results
// in allocating 16 bytes more space on stack if odd number of integer and odd number of
// FP registers were occupied. Same number of instructions will be generated, just the
// STR instructions are replaced with STP (store pair).
regMaskTP maskModifiedRegs = regSet.rsGetModifiedRegsMask();
regMaskTP maskPairRegs = ((maskModifiedRegs & (RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14)).getLow() << 1) |
((maskModifiedRegs & (RBM_R19 | RBM_R21 | RBM_R23 | RBM_R25 | RBM_R27)).getLow() << 1);
if (maskPairRegs != RBM_NONE)
{
regSet.rsSetRegsModified(maskPairRegs);
}
}
#endif

#ifdef DEBUG
if (verbose)
{
10 changes: 10 additions & 0 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
@@ -2808,6 +2808,16 @@ inline
{
*pBaseReg = REG_SPBASE;
}
#elif defined(TARGET_ARM64)
if (FPbased && !codeGen->isFramePointerRequired() && varOffset < 0 && !opts.IsOSR() &&
lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT && codeGen->IsSaveFpLrWithAllCalleeSavedRegisters())
{
int spVarOffset = varOffset + codeGen->genSPtoFPdelta();
JITDUMP("lvaFrameAddress optimization for V%02u: [FP-%d] -> [SP+%d]\n", varNum, -varOffset, spVarOffset);
FPbased = false;
varOffset = spVarOffset;
}
*pFPbased = FPbased;
#else
*pFPbased = FPbased;
#endif
139 changes: 85 additions & 54 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
@@ -5644,7 +5644,9 @@ void Compiler::lvaFixVirtualFrameOffsets()
#endif

// The delta to be added to virtual offset to adjust it relative to frame pointer or SP
int delta = 0;
int delta = 0;
int frameLocalsDelta = 0;
int frameBoundary = 0;

#ifdef TARGET_XARCH
delta += REGSIZE_BYTES; // pushed PC (return address) for x86/x64
@@ -5669,7 +5671,25 @@ void Compiler::lvaFixVirtualFrameOffsets()
// We set FP to be after LR, FP
delta += 2 * REGSIZE_BYTES;
}
#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
#elif defined(TARGET_ARM64)
else
{
// FP is used.
delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();

// If we placed FP/LR at the bottom of the frame we need to shift all the variables
// on the new frame to account for it. See lvaAssignVirtualFrameOffsetsToLocals.
if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters())
{
// We set FP to be after LR, FP
frameLocalsDelta = 2 * REGSIZE_BYTES;
frameBoundary = opts.IsOSR() ? -info.compPatchpointInfo->TotalFrameSize() : 0;
if (info.compIsVarArgs)
frameBoundary -= MAX_REG_ARG * REGSIZE_BYTES;
}
JITDUMP("--- delta bump %d for FP frame, %d inside frame for FP/LR relocation\n", delta, frameLocalsDelta);
}
#elif defined(TARGET_AMD64)
else
{
// FP is used.
@@ -5737,7 +5757,7 @@ void Compiler::lvaFixVirtualFrameOffsets()

#if defined(TARGET_X86)
// On x86, we set the stack offset for a promoted field
// to match a struct parameter in lvAssignFrameOffsetsToPromotedStructs.
// to match a struct parameter in lvaAssignFrameOffsetsToPromotedStructs.
if ((!varDsc->lvIsParam || parentvarDsc->lvIsParam) && promotionType == PROMOTION_TYPE_DEPENDENT)
#else
if (!varDsc->lvIsParam && promotionType == PROMOTION_TYPE_DEPENDENT)
@@ -5757,15 +5777,23 @@ void Compiler::lvaFixVirtualFrameOffsets()

if (doAssignStkOffs)
{
JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta);
varDsc->SetStackOffset(varDsc->GetStackOffset() + delta);
int localDelta = delta;

if (frameLocalsDelta != 0 && varDsc->GetStackOffset() < frameBoundary)
{
localDelta += frameLocalsDelta;
}

JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(),
varDsc->GetStackOffset() + localDelta);
varDsc->SetStackOffset(varDsc->GetStackOffset() + localDelta);

#if DOUBLE_ALIGN
if (genDoubleAlign() && !codeGen->isFramePointerUsed())
{
if (varDsc->lvFramePointerBased)
{
varDsc->SetStackOffset(varDsc->GetStackOffset() - delta);
varDsc->SetStackOffset(varDsc->GetStackOffset() - localDelta);

// We need to re-adjust the offsets of the parameters so they are EBP
// relative rather than stack/frame pointer relative
@@ -5787,9 +5815,13 @@ void Compiler::lvaFixVirtualFrameOffsets()
assert(codeGen->regSet.tmpAllFree());
for (TempDsc* temp = codeGen->regSet.tmpListBeg(); temp != nullptr; temp = codeGen->regSet.tmpListNxt(temp))
{
temp->tdAdjustTempOffs(delta);
temp->tdAdjustTempOffs(delta + frameLocalsDelta);
}

if (lvaCachedGenericContextArgOffs < frameBoundary)
{
lvaCachedGenericContextArgOffs += frameLocalsDelta;
}
lvaCachedGenericContextArgOffs += delta;

#if FEATURE_FIXED_OUT_ARGS
@@ -6045,30 +6077,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
codeGen->setFramePointerUsed(codeGen->isFramePointerRequired());
}

#ifdef TARGET_ARM64
// Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is
// a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
// need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
// and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
// frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress.
// (It should be legal to use these frame types for every frame).

if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 0)
{
// Default configuration
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) ||
opts.compDbgEnC || compStressCompile(STRESS_GENERIC_VARN, 20));
}
else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1)
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames
}
else if ((opts.compJitSaveFpLrWithCalleeSavedRegisters == 2) || (opts.compJitSaveFpLrWithCalleeSavedRegisters == 3))
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames
}
#endif // TARGET_ARM64

#ifdef TARGET_XARCH
// On x86/amd64, the return address has already been pushed by the call instruction in the caller.
stkOffs -= TARGET_POINTER_SIZE; // return address;
@@ -6117,9 +6125,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
#endif // !TARGET_ARM

#ifdef TARGET_ARM64
// If the frame pointer is used, then we'll save FP/LR at the bottom of the stack.
// Otherwise, we won't store FP, and we'll store LR at the top, with the other callee-save
// registers (if any).
// If the frame pointer is used, then we'll save FP/LR either at the bottom of the stack
// or at the top of the stack depending on frame type. We make the decision after assigning
// the variables on the frame and then fix up the offsets in lvaFixVirtualFrameOffsets.
// For now, we proceed as if FP/LR were saved with the callee registers. If we later
// decide to move the FP/LR to the bottom of the frame it shifts all the assigned
// variables and temporaries by 16 bytes. The largest alignment we currently make is 16
// bytes for SIMD.

int initialStkOffs = 0;
if (info.compIsVarArgs)
@@ -6130,17 +6142,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
stkOffs -= initialStkOffs;
}

if (codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() || !isFramePointerUsed()) // Note that currently we always have
// a frame pointer
{
stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
}
else
{
// Subtract off FP and LR.
assert(compCalleeRegsPushed >= 2);
stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
}
stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;

#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

@@ -6810,15 +6812,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
}
#endif // TARGET_AMD64

#ifdef TARGET_ARM64
if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() && isFramePointerUsed()) // Note that currently we always have
// a frame pointer
{
// Create space for saving FP and LR.
stkOffs -= 2 * REGSIZE_BYTES;
}
#endif // TARGET_ARM64

#if FEATURE_FIXED_OUT_ARGS
if (lvaOutgoingArgSpaceSize > 0)
{
@@ -6856,6 +6849,44 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()

noway_assert(compLclFrameSize + originalFrameSize ==
(unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE)));

#ifdef TARGET_ARM64
// Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is
// a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
// need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
// and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
// frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress.
// (It should be legal to use these frame types for every frame).
//
// For Apple NativeAOT ABI we try to save the FP/LR registers on top to get canonical frame layout that can
// be represented with compact unwinding information. In order to maintain code quality we only do it when
// we can use SP-based addressing (!isFramePointerRequired) through lvaFrameAddress optimization, or if the
// whole frame is small enough that the negative FP-based addressing can address the whole frame.

if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 0)
{
if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsApplePlatform &&
(!codeGen->isFramePointerRequired() || codeGen->genTotalFrameSize() < 0x100))
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true);
}
else
{
// Default configuration
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be (getNeedsGSSecurityCookie() || compLocallocUsed)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's just a moved code from here with unchanged condition. I think it was intentional.

    // Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is
    // a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
    // need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
    // and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
    // frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress.
    // (It should be legal to use these frame types for every frame).

opts.compDbgEnC ||
compStressCompile(Compiler::STRESS_GENERIC_VARN, 20));
}
}
else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1)
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames
}
else if ((opts.compJitSaveFpLrWithCalleeSavedRegisters == 2) || (opts.compJitSaveFpLrWithCalleeSavedRegisters == 3))
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames
}
#endif // TARGET_ARM64
}

//------------------------------------------------------------------------
Original file line number Diff line number Diff line change
@@ -120,5 +120,18 @@ internal static class MachNative
public const uint PLATFORM_TVOSSIMULATOR = 8;
public const uint PLATFORM_WATCHOSSIMULATOR = 9;
public const uint PLATFORM_DRIVERKIT = 10;

public const uint UNWIND_ARM64_MODE_FRAMELESS = 0x02000000;
public const uint UNWIND_ARM64_MODE_DWARF = 0x03000000;
public const uint UNWIND_ARM64_MODE_FRAME = 0x04000000;
public const uint UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001;
public const uint UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002;
public const uint UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004;
public const uint UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008;
public const uint UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010;
public const uint UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100;
public const uint UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200;
public const uint UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400;
public const uint UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800;
}
}
Original file line number Diff line number Diff line change
@@ -752,38 +752,172 @@ void EmitCompactUnwindSymbol(string symbolName)

private protected override string ExternCName(string name) => "_" + name;

// This represents the following DWARF code:
// DW_CFA_advance_loc: 4
// DW_CFA_def_cfa_offset: +16
// DW_CFA_offset: W29 -16
// DW_CFA_offset: W30 -8
// DW_CFA_advance_loc: 4
// DW_CFA_def_cfa_register: W29
// which is generated for the following frame prolog/epilog:
// stp fp, lr, [sp, #-10]!
// mov fp, sp
// ...
// ldp fp, lr, [sp], #0x10
// ret
private static ReadOnlySpan<byte> DwarfArm64EmptyFrame => new byte[]
private static uint GetArm64CompactUnwindCode(byte[] blobData)
{
0x04, 0x00, 0xFF, 0xFF, 0x10, 0x00, 0x00, 0x00,
0x04, 0x02, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00,
0x04, 0x02, 0x1E, 0x00, 0x08, 0x00, 0x00, 0x00,
0x08, 0x01, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00
};
if (blobData == null || blobData.Length == 0)
{
return UNWIND_ARM64_MODE_FRAMELESS;
}

Debug.Assert(blobData.Length % 8 == 0);

short spReg = -1;

int codeOffset = 0;
short cfaRegister = spReg;
int cfaOffset = 0;
int spOffset = 0;

const int REG_DWARF_X19 = 19;
const int REG_DWARF_X30 = 30;
const int REG_DWARF_FP = 29;
const int REG_DWARF_D8 = 72;
const int REG_DWARF_D15 = 79;
const int REG_IDX_X19 = 0;
const int REG_IDX_X28 = 9;
const int REG_IDX_FP = 10;
const int REG_IDX_LR = 11;
const int REG_IDX_D8 = 12;
const int REG_IDX_D15 = 19;
Span<int> registerOffset = stackalloc int[20];

registerOffset.Fill(int.MinValue);

// First process all the CFI codes to figure out the layout of X19-X28, FP, LR, and
// D8-D15 on the stack.
int offset = 0;
while (offset < blobData.Length)
{
codeOffset = Math.Max(codeOffset, blobData[offset++]);
CFI_OPCODE opcode = (CFI_OPCODE)blobData[offset++];
short dwarfReg = BinaryPrimitives.ReadInt16LittleEndian(blobData.AsSpan(offset));
offset += sizeof(short);
int cfiOffset = BinaryPrimitives.ReadInt32LittleEndian(blobData.AsSpan(offset));
offset += sizeof(int);

switch (opcode)
{
case CFI_OPCODE.CFI_DEF_CFA_REGISTER:
cfaRegister = dwarfReg;

if (spOffset != 0)
{
for (int i = 0; i < registerOffset.Length; i++)
if (registerOffset[i] != int.MinValue)
registerOffset[i] -= spOffset;

cfaOffset += spOffset;
spOffset = 0;
}

break;

case CFI_OPCODE.CFI_REL_OFFSET:
Debug.Assert(cfaRegister == spReg);
if (dwarfReg >= REG_DWARF_X19 && dwarfReg <= REG_DWARF_X30) // X19 - X28, FP, LR
{
registerOffset[dwarfReg - REG_DWARF_X19 + REG_IDX_X19] = cfiOffset;
}
else if (dwarfReg >= REG_DWARF_D8 && dwarfReg <= REG_DWARF_D15) // D8 - D15
{
registerOffset[dwarfReg - REG_DWARF_D8 + REG_IDX_D8] = cfiOffset;
}
else
{
// We cannot represent this register in the compact unwinding format,
// fallback to DWARF immediately.
return UNWIND_ARM64_MODE_DWARF;
}
break;

case CFI_OPCODE.CFI_ADJUST_CFA_OFFSET:
if (cfaRegister != spReg)
{
cfaOffset += cfiOffset;
}
else
{
spOffset += cfiOffset;

for (int i = 0; i < registerOffset.Length; i++)
if (registerOffset[i] != int.MinValue)
registerOffset[i] += cfiOffset;
}
break;
}
}

uint unwindCode;
int nextOffset;

if (cfaRegister == REG_DWARF_FP &&
cfaOffset == 16 &&
registerOffset[REG_IDX_FP] == -16 &&
registerOffset[REG_IDX_LR] == -8)
{
// Frame format - FP/LR are saved on the top. SP is restored to FP+16
unwindCode = UNWIND_ARM64_MODE_FRAME;
nextOffset = -24;
}
else if (cfaRegister == -1 && spOffset <= 65520 &&
registerOffset[REG_IDX_FP] == int.MinValue && registerOffset[REG_IDX_LR] == int.MinValue)
{
// Frameless format - FP/LR are not saved, SP must fit within the representable range
uint encodedSpOffset = (uint)(spOffset / 16) << 12;
unwindCode = UNWIND_ARM64_MODE_FRAMELESS | encodedSpOffset;
nextOffset = spOffset - 8;
}
else
{
return UNWIND_ARM64_MODE_DWARF;
}

// Check that the integer register pairs are in the right order and mark
// a flag for each successive pair that is present.
for (int i = REG_IDX_X19; i < REG_IDX_X28; i += 2)
{
if (registerOffset[i] == int.MinValue)
{
if (registerOffset[i + 1] != int.MinValue)
return UNWIND_ARM64_MODE_DWARF;
}
else if (registerOffset[i] == nextOffset)
{
if (registerOffset[i + 1] != nextOffset - 8)
return UNWIND_ARM64_MODE_DWARF;
nextOffset -= 16;
unwindCode |= UNWIND_ARM64_FRAME_X19_X20_PAIR << (i >> 1);
}
}

// Check that the floating point register pairs are in the right order and mark
// a flag for each successive pair that is present.
for (int i = REG_IDX_D8; i < REG_IDX_D15; i += 2)
{
if (registerOffset[i] == int.MinValue)
{
if (registerOffset[i + 1] != int.MinValue)
return UNWIND_ARM64_MODE_DWARF;
}
else if (registerOffset[i] == nextOffset)
{
if (registerOffset[i + 1] != nextOffset - 8)
return UNWIND_ARM64_MODE_DWARF;
nextOffset -= 16;
unwindCode |= UNWIND_ARM64_FRAME_D8_D9_PAIR << (i >> 1);
}
}

return unwindCode;
}

private protected override bool EmitCompactUnwinding(string startSymbolName, ulong length, string lsdaSymbolName, byte[] blob)
{
uint encoding = _compactUnwindDwarfCode;

if (_cpuType == CPU_TYPE_ARM64)
{
if (blob.AsSpan().SequenceEqual(DwarfArm64EmptyFrame))
{
// Frame-based encoding, no saved registers
encoding = 0x04000000;
}
encoding = GetArm64CompactUnwindCode(blob);
}

_compactUnwindCodes.Add(new CompactUnwindCode(