@@ -3626,7 +3626,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
3626
3626
unsigned slots = layout->GetSlotCount ();
3627
3627
3628
3628
// Temp register(s) used to perform the sequence of loads and stores.
3629
- regNumber tmpReg = cpObjNode->ExtractTempReg (RBM_ALLINT );
3629
+ regNumber tmpReg = cpObjNode->ExtractTempReg ();
3630
3630
regNumber tmpReg2 = REG_NA;
3631
3631
3632
3632
assert (genIsValidIntReg (tmpReg));
@@ -3635,7 +3635,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
3635
3635
3636
3636
if (slots > 1 )
3637
3637
{
3638
- tmpReg2 = cpObjNode->ExtractTempReg (RBM_ALLINT );
3638
+ tmpReg2 = cpObjNode->GetSingleTempReg ( );
3639
3639
assert (tmpReg2 != tmpReg);
3640
3640
assert (genIsValidIntReg (tmpReg2));
3641
3641
assert (tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
@@ -3682,69 +3682,35 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
3682
3682
{
3683
3683
unsigned gcPtrCount = cpObjNode->GetLayout ()->GetGCPtrCount ();
3684
3684
3685
- // We might also need SIMD regs if we have 4 or more continuous non-gc slots
3686
- // On ARM64, SIMD loads/stores provide 8-byte atomicity guarantees when aligned to 8 bytes.
3687
- regNumber tmpSimdReg1 = REG_NA;
3688
- regNumber tmpSimdReg2 = REG_NA;
3689
- if ((slots >= 4 ) && compiler->IsBaselineSimdIsaSupported ())
3690
- {
3691
- tmpSimdReg1 = cpObjNode->ExtractTempReg (RBM_ALLFLOAT);
3692
- tmpSimdReg2 = cpObjNode->ExtractTempReg (RBM_ALLFLOAT);
3693
- }
3694
-
3695
3685
unsigned i = 0 ;
3696
3686
while (i < slots)
3697
3687
{
3698
3688
if (!layout->IsGCPtr (i))
3699
3689
{
3700
- // How many continuous non-gc slots do we have?
3701
- unsigned nonGcSlots = 0 ;
3702
- do
3690
+ // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
3691
+ if ((i + 1 < slots) && !layout->IsGCPtr (i + 1 ))
3703
3692
{
3704
- nonGcSlots++;
3705
- i++ ;
3706
- } while ((i < slots) && !layout-> IsGCPtr (i));
3707
-
3708
- const regNumber srcReg = REG_WRITE_BARRIER_SRC_BYREF;
3709
- const regNumber dstReg = REG_WRITE_BARRIER_DST_BYREF;
3710
- while (nonGcSlots > 0 )
3693
+ emit-> emitIns_R_R_R_I (INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
3694
+ 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX) ;
3695
+ emit-> emitIns_R_R_R_I (INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
3696
+ 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
3697
+ ++i; // extra increment of i, since we are copying two items
3698
+ }
3699
+ else
3711
3700
{
3712
- regNumber tmp1 = tmpReg;
3713
- regNumber tmp2 = tmpReg2;
3714
- emitAttr size = EA_8BYTE;
3715
- insOpts opts = INS_OPTS_POST_INDEX;
3716
-
3717
- // Copy at least two slots at a time
3718
- if (nonGcSlots >= 2 )
3719
- {
3720
- // Do 4 slots at a time if SIMD is supported
3721
- if ((nonGcSlots >= 4 ) && compiler->IsBaselineSimdIsaSupported ())
3722
- {
3723
- // We need SIMD temp regs now
3724
- tmp1 = tmpSimdReg1;
3725
- tmp2 = tmpSimdReg2;
3726
- size = EA_16BYTE;
3727
- nonGcSlots -= 2 ;
3728
- }
3729
- nonGcSlots -= 2 ;
3730
- emit->emitIns_R_R_R_I (INS_ldp, size, tmp1, tmp2, srcReg, EA_SIZE (size) * 2 , opts);
3731
- emit->emitIns_R_R_R_I (INS_stp, size, tmp1, tmp2, dstReg, EA_SIZE (size) * 2 , opts);
3732
- }
3733
- else
3734
- {
3735
- nonGcSlots--;
3736
- emit->emitIns_R_R_I (INS_ldr, EA_8BYTE, tmp1, srcReg, EA_SIZE (size), opts);
3737
- emit->emitIns_R_R_I (INS_str, EA_8BYTE, tmp1, dstReg, EA_SIZE (size), opts);
3738
- }
3701
+ emit->emitIns_R_R_I (INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
3702
+ INS_OPTS_POST_INDEX);
3703
+ emit->emitIns_R_R_I (INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
3704
+ INS_OPTS_POST_INDEX);
3739
3705
}
3740
3706
}
3741
3707
else
3742
3708
{
3743
3709
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
3744
3710
genEmitHelperCall (CORINFO_HELP_ASSIGN_BYREF, 0 , EA_PTRSIZE);
3745
3711
gcPtrCount--;
3746
- i++;
3747
3712
}
3713
+ ++i;
3748
3714
}
3749
3715
assert (gcPtrCount == 0 );
3750
3716
}
0 commit comments