Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid calling GC write barrier for byrefs #89064

Closed
wants to merge 14 commits into from
7 changes: 2 additions & 5 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
@@ -866,12 +866,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = layout->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC);
@@ -881,11 +879,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
else
{
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}

++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
10 changes: 3 additions & 7 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
@@ -3656,15 +3656,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use ldp/stp
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
@@ -3684,11 +3682,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
10 changes: 3 additions & 7 deletions src/coreclr/jit/codegenloongarch64.cpp
Original file line number Diff line number Diff line change
@@ -2839,15 +2839,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use two load/store
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use two load/store
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
if ((i + 2) == slots)
{
@@ -2883,11 +2881,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
10 changes: 3 additions & 7 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
@@ -2475,15 +2475,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use two ld/sd
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use two ld/sd
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
if ((i + 2) == slots)
{
@@ -2519,11 +2517,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
22 changes: 9 additions & 13 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
@@ -4164,32 +4164,31 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
ClassLayout* layout = cpObjNode->GetLayout();
unsigned gcPtrCount = layout->GetGCPtrCount();
ClassLayout* layout = cpObjNode->GetLayout();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Let's see if we can use rep movsp instead of a sequence of movsp instructions
// to save cycles and code size.
unsigned nonGcSlotCount = 0;
unsigned nonRefSlotCount = 0;

do
{
nonGcSlotCount++;
nonRefSlotCount++;
i++;
} while ((i < slots) && !layout->IsGCPtr(i));
} while ((i < slots) && !layout->IsGCRef(i));

// If we have a very small contiguous non-gc region, it's better just to
// emit a sequence of movsp instructions
if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
{
while (nonGcSlotCount > 0)
while (nonRefSlotCount > 0)
{
instGen(INS_movsp);
nonGcSlotCount--;
nonRefSlotCount--;
}
}
else
@@ -4198,19 +4197,16 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
// rep movsp (alias for movsd/movsq for x86/x64)
assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);

GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonRefSlotCount);
instGen(INS_r_movsp);
}
}
else
{
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
i++;
}
}

assert(gcPtrCount == 0);
}

// Clear the gcInfo for RSI and RDI.
18 changes: 9 additions & 9 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
@@ -422,37 +422,37 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
// we can use REP MOVSD/Q instead of a sequence of MOVSD/Q instructions. According to the
// Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where
// the entire operation takes 20 cycles and encodes in 5 bytes (loading RCX and REP MOVSD/Q).
unsigned nonGCSlots = 0;
unsigned nonRefSlots = 0;

if (dstAddr->OperIs(GT_LCL_ADDR))
{
// If the destination is on the stack then no write barriers are needed.
nonGCSlots = layout->GetSlotCount();
nonRefSlots = layout->GetSlotCount();
}
else
{
// Otherwise a write barrier is needed for every GC pointer in the layout
// so we need to check if there's a long enough sequence of non-GC slots.
// Otherwise a write barrier is needed for every TYP_REF pointer in the layout
// so we need to check if there's a long enough sequence of non-TYP_REF slots.
unsigned slots = layout->GetSlotCount();
for (unsigned i = 0; i < slots; i++)
{
if (layout->IsGCPtr(i))
if (layout->IsGCRef(i))
{
nonGCSlots = 0;
nonRefSlots = 0;
}
else
{
nonGCSlots++;
nonRefSlots++;

if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
break;
}
}
}
}

if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjRepInstr;
}