Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unroll byref struct copies #86820

Merged
merged 5 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/coreclr/jit/layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,25 @@ void ClassLayout::InitializeGCPtrs(Compiler* compiler)
INDEBUG(m_gcPtrsInitialized = true;)
}

//------------------------------------------------------------------------
// HasGCByRef: does the layout contain at least one GC ByRef
//
// Return value:
// true if at least one GC ByRef, false otherwise.
bool ClassLayout::HasGCByRef() const
{
unsigned slots = GetSlotCount();
for (unsigned i = 0; i < slots; i++)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an intuition that the GCByRef slots are at the beginning or at the end or scattered? In other words, are we more likely to fast-return if we walk them list backwards?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have any intuition or measurements on this. If necessary, then this could be cached similar to whether the layout has any gc pointers, though if I recall correctly we are out of bits at this size.

{
if (IsGCByRef(i))
{
return true;
}
}

return false;
}

//------------------------------------------------------------------------
// AreCompatible: check if 2 layouts are the same for copying.
//
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/layout.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,23 @@ class ClassLayout
return m_gcPtrCount != 0;
}

bool HasGCByRef() const;

bool IsGCPtr(unsigned slot) const
{
return GetGCPtr(slot) != TYPE_GC_NONE;
}

bool IsGCRef(unsigned slot) const
{
return GetGCPtr(slot) == TYPE_GC_REF;
}

bool IsGCByRef(unsigned slot) const
{
return GetGCPtr(slot) == TYPE_GC_BYREF;
}

var_types GetGCPtrType(unsigned slot) const
{
switch (GetGCPtr(slot))
Expand Down
22 changes: 15 additions & 7 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
GenTree* src = blkNode->Data();
unsigned size = blkNode->Size();

const bool isDstAddrLocal = dstAddr->OperIs(GT_LCL_ADDR);

if (blkNode->OperIsInitBlkOp())
{
if (src->OperIs(GT_INIT_VAL))
Expand Down Expand Up @@ -617,12 +615,22 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}

bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
if (doCpObj && isDstAddrLocal && (size <= copyBlockUnrollLimit))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);

if (doCpObj && (size <= copyBlockUnrollLimit))
{
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}

if (doCpObj)
Expand Down
23 changes: 15 additions & 8 deletions src/coreclr/jit/lowerloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,15 +343,22 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}

bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) && (size <= copyBlockUnrollLimit))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);

if (doCpObj && (size <= copyBlockUnrollLimit))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}

// CopyObj or CopyBlk
Expand Down
21 changes: 14 additions & 7 deletions src/coreclr/jit/lowerriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,14 +297,21 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}

bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) && (size <= CPBLK_UNROLL_LIMIT))
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();

if (doCpObj && (size <= CPBLK_UNROLL_LIMIT))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}

// CopyObj or CopyBlk
Expand Down
25 changes: 15 additions & 10 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,18 +394,23 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::StoreBlkSrc));
}

ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
ClassLayout* layout = blkNode->GetLayout();
bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false);

#ifndef JIT32_GCENCODER
if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) &&
(size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false)))
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying. This is not supported for the JIT32_GCENCODER.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
if (doCpObj && (size <= copyBlockUnrollLimit))
{
// No write barriers are needed on the stack.
// If the layout contains a byref, then we know it must live on the stack.
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the size is small enough to unroll then we need to mark the block as non-interruptible
// to actually allow unrolling. The generated code does not report GC references loaded in the
// temporary register(s) used for copying. This is not supported for the JIT32_GCENCODER.
doCpObj = false;
blkNode->gtBlkOpGcUnsafe = true;
}
}
#endif

Expand Down
70 changes: 70 additions & 0 deletions src/tests/JIT/opt/Misc/Runtime_80086/Runtime_80086.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Xunit;

#pragma warning disable CS8500

namespace Runtime_80086
{
public static unsafe class Test
{
[MethodImpl(MethodImplOptions.NoInlining)]
public static Span<int> Marshal1(Span<int> a)
{
return MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(a), a.Length);
}

[MethodImpl(MethodImplOptions.NoInlining)]
public static Span<int> Marshal2(Span<int>* a)
{
return MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(*a), a->Length);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy1(Span<int> s) => s;

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy2(Span<int> a)
{
ref Span<int> ra = ref a;
return ra;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy3(Span<int>* a)
{
return *a;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy4(scoped ref Span<int> a)
{
return a;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static Span<int> Copy5(ReadOnlySpan<int> a)
{
// Example is used to check code generation but shouldn't be used elsewhere
return *(Span<int>*)&a;
}

[Fact]
public static void TestEntryPoint()
{
Span<int> s = new int[1] { 13 };
s = Marshal1(s);
s = Marshal2(&s);
s = Copy1(s);
s = Copy2(s);
s = Copy3(&s);
s = Copy4(ref s);
s = Copy5(s);
Assert.Equal(13, s[0]);
}
}
}
9 changes: 9 additions & 0 deletions src/tests/JIT/opt/Misc/Runtime_80086/Runtime_80086.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>