diff --git a/docs/design/coreclr/botr/guide-for-porting.md b/docs/design/coreclr/botr/guide-for-porting.md index 5d2c01aa52d066..f7ca105bf165db 100644 --- a/docs/design/coreclr/botr/guide-for-porting.md +++ b/docs/design/coreclr/botr/guide-for-porting.md @@ -413,12 +413,6 @@ Here is an annotated list of the stubs implemented for Unix on Arm64. Today use of this feature on Unix requires hand-written IL. On Windows this feature is commonly used by C++/CLI -3. EH Correctness. Some helpers are written in assembly to provide well known - locations for NullReferenceExceptions to be generated out of a SIGSEGV - signal. - - 1. `JIT_MemSet`, and `JIT_MemCpy` have this requirement - #### cgencpu.h This header is included by various code in the VM directory. It provides a large diff --git a/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs index 16d9067567ee58..de7b3021c458fe 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs @@ -74,7 +74,7 @@ private static unsafe void CopyImpl(Array sourceArray, int sourceIndex, Array de if (pMT->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray return; @@ -184,7 +184,7 @@ private static unsafe void CopyImplUnBoxEachElement(Array sourceArray, int sourc } else { - Buffer.Memmove(ref dest, ref obj.GetRawData(), destSize); + SpanHelpers.Memmove(ref dest, ref obj.GetRawData(), destSize); } } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs index 70cff629fc28e6..88c929dbe74cbe 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs @@ -30,7 +30,7 @@ protected internal unsafe object MemberwiseClone() if (RuntimeHelpers.GetMethodTable(clone)->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); return clone; } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs index c04665aa6c22f4..bbdccc6cd2eed4 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs @@ -266,7 +266,7 @@ public static unsafe void StructureToPtr(object structure, IntPtr ptr, bool fDel } else { - Buffer.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size); + SpanHelpers.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size); } } @@ -291,7 +291,7 @@ private static unsafe void PtrToStructureHelper(IntPtr ptr, object structure, bo } else { - Buffer.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size); + SpanHelpers.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size); } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs index f15ad03d82182b..d2785251613efa 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs @@ -39,7 +39,7 @@ internal static unsafe void InternalCopy(string src, IntPtr dest, int len) { if (len != 0) { - Buffer.Memmove(ref *(byte*)dest, ref Unsafe.As(ref src.GetRawStringData()), (nuint)len); + SpanHelpers.Memmove(ref *(byte*)dest, ref Unsafe.As(ref src.GetRawStringData()), (nuint)len); } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs b/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs index 9874eef6dc2292..81c0dd8e1afecd 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs @@ -103,7 +103,7 @@ internal static unsafe IntPtr ConvertToNative(int flags, string strManaged, IntP // + 1 for the null character from the user. + 1 for the null character we put in. pbNativeBuffer = (byte*)Marshal.AllocCoTaskMem(nb + 2); - Buffer.Memmove(ref *pbNativeBuffer, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nb); + SpanHelpers.Memmove(ref *pbNativeBuffer, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nb); } } @@ -360,7 +360,7 @@ internal static unsafe IntPtr ConvertToNative(string strManaged, bool fBestFit, Debug.Assert(nbytesused >= 0 && nbytesused < nbytes, "Insufficient buffer allocated in VBByValStrMarshaler.ConvertToNative"); - Buffer.Memmove(ref *pNative, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nbytesused); + SpanHelpers.Memmove(ref *pNative, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nbytesused); pNative[nbytesused] = 0; *pLength = nbytesused; @@ -409,7 +409,7 @@ internal static unsafe IntPtr ConvertToNative(int flags, string strManaged) IntPtr bstr = Marshal.AllocBSTRByteLen(length); if (bytes != null) { - Buffer.Memmove(ref *(byte*)bstr, ref MemoryMarshal.GetArrayDataReference(bytes), length); + SpanHelpers.Memmove(ref *(byte*)bstr, ref MemoryMarshal.GetArrayDataReference(bytes), length); } return bstr; @@ -1484,7 +1484,7 @@ internal static unsafe void FmtClassUpdateNativeInternal(object obj, byte* pNati } else { - Buffer.Memmove(ref *pNative, ref obj.GetRawData(), size); + SpanHelpers.Memmove(ref *pNative, ref obj.GetRawData(), size); } } @@ -1503,7 +1503,7 @@ internal static unsafe void FmtClassUpdateCLRInternal(object obj, byte* pNative) } else { - Buffer.Memmove(ref obj.GetRawData(), ref *pNative, size); + SpanHelpers.Memmove(ref obj.GetRawData(), ref *pNative, size); } } diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 111c6488df073f..5fad5e4b2429e4 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -572,7 +572,10 @@ enum CorInfoHelpFunc CORINFO_HELP_INIT_PINVOKE_FRAME, // initialize an inlined PInvoke Frame for the JIT-compiler CORINFO_HELP_MEMSET, // Init block of memory + CORINFO_HELP_MEMZERO, // Init block of memory with zeroes CORINFO_HELP_MEMCPY, // Copy block of memory + CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, + // not safe for unbounded size, does not trigger GC) CORINFO_HELP_RUNTIMEHANDLE_METHOD, // determine a type/field/method handle at run-time CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG, // determine a type/field/method handle at run-time, with IBC logging diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 646f1b169330a2..11675936acfa37 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 1f30d12b-38f1-4f1e-a08a-831def882aa4 */ - 0x1f30d12b, - 0x38f1, - 0x4f1e, - {0xa0, 0x8a, 0x83, 0x1d, 0xef, 0x88, 0x2a, 0xa4} +constexpr GUID JITEEVersionIdentifier = { /* 86eab154-5d93-4fad-bc07-e94fd9268b70 */ + 0x86eab154, + 0x5d93, + 0x4fad, + {0xbc, 0x07, 0xe9, 0x4f, 0xd9, 0x26, 0x8b, 0x70} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 65167abd6a4dd6..a0982f3ac6520f 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -235,13 +235,10 @@ DYNAMICJITHELPER(CORINFO_HELP_INIT_PINVOKE_FRAME, NULL, CORINFO_HELP_SIG_REG_ONLY) #endif -#ifdef TARGET_X86 - JITHELPER(CORINFO_HELP_MEMSET, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB) - JITHELPER(CORINFO_HELP_MEMCPY, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB) -#else - JITHELPER(CORINFO_HELP_MEMSET, JIT_MemSet, CORINFO_HELP_SIG_REG_ONLY) - JITHELPER(CORINFO_HELP_MEMCPY, JIT_MemCpy, CORINFO_HELP_SIG_REG_ONLY) -#endif + DYNAMICJITHELPER(CORINFO_HELP_MEMSET, NULL, CORINFO_HELP_SIG_REG_ONLY) + DYNAMICJITHELPER(CORINFO_HELP_MEMZERO, NULL, CORINFO_HELP_SIG_REG_ONLY) + DYNAMICJITHELPER(CORINFO_HELP_MEMCPY, NULL, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_NATIVE_MEMSET, Jit_NativeMemSet, CORINFO_HELP_SIG_REG_ONLY) // Generics JITHELPER(CORINFO_HELP_RUNTIMEHANDLE_METHOD, JIT_GenericHandleMethod, CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index b3128cb00e4b73..41a4aa251fa742 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -20,7 +20,7 @@ // If you update this, ensure you run `git grep MINIMUM_READYTORUN_MAJOR_VERSION` // and handle pending work. #define READYTORUN_MAJOR_VERSION 0x0009 -#define READYTORUN_MINOR_VERSION 0x0001 +#define READYTORUN_MINOR_VERSION 0x0002 #define MINIMUM_READYTORUN_MAJOR_VERSION 0x009 @@ -33,6 +33,8 @@ // R2R Version 8.0 Changes the alignment of the Int128 type // R2R Version 9.0 adds support for the Vector512 type // R2R Version 9.1 adds new helpers to allocate objects on frozen segments +// R2R Version 9.2 adds MemZero and NativeMemSet helpers + struct READYTORUN_CORE_HEADER { @@ -325,7 +327,9 @@ enum ReadyToRunHelper READYTORUN_HELPER_Stelem_Ref = 0x38, READYTORUN_HELPER_Ldelema_Ref = 0x39, - READYTORUN_HELPER_MemSet = 0x40, + READYTORUN_HELPER_MemZero = 0x3E, + READYTORUN_HELPER_MemSet = 0x3F, + READYTORUN_HELPER_NativeMemSet = 0x40, READYTORUN_HELPER_MemCpy = 0x41, // PInvoke helpers @@ -441,10 +445,6 @@ enum ReadyToRunHelper READYTORUN_HELPER_StackProbe = 0x111, READYTORUN_HELPER_GetCurrentManagedThreadId = 0x112, - - // Array helpers for use with native ints - READYTORUN_HELPER_Stelem_Ref_I = 0x113, - READYTORUN_HELPER_Ldelema_Ref_I = 0x114, }; #include "readytoruninstructionset.h" diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h index 8691f9b9cb8c0c..bbb586e8eb4a30 100644 --- a/src/coreclr/inc/readytorunhelpers.h +++ b/src/coreclr/inc/readytorunhelpers.h @@ -29,6 +29,8 @@ HELPER(READYTORUN_HELPER_Stelem_Ref, CORINFO_HELP_ARRADDR_ST, HELPER(READYTORUN_HELPER_Ldelema_Ref, CORINFO_HELP_LDELEMA_REF, ) HELPER(READYTORUN_HELPER_MemSet, CORINFO_HELP_MEMSET, ) +HELPER(READYTORUN_HELPER_MemZero, CORINFO_HELP_MEMZERO, ) +HELPER(READYTORUN_HELPER_NativeMemSet, CORINFO_HELP_NATIVE_MEMSET, ) HELPER(READYTORUN_HELPER_MemCpy, CORINFO_HELP_MEMCPY, ) HELPER(READYTORUN_HELPER_LogMethodEnter, CORINFO_HELP_BBT_FCN_ENTER, ) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index d965d5c91f42ec..f5a32d241cc341 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8403,7 +8403,9 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn) GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_0, (int)varNum, 0); instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_1, static_cast(poisonVal)); instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_ARG_2, size); - genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); + + // Call non-managed memset + genEmitHelperCall(CORINFO_HELP_NATIVE_MEMSET, 0, EA_UNKNOWN); // May kill REG_SCRATCH, so we need to reload it. hasPoisonImm = false; #endif diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 27aa0966095eac..d1a7bc7fda298a 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1332,7 +1332,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed case NI_System_SpanHelpers_ClearWithoutReferences: case NI_System_SpanHelpers_Fill: case NI_System_SpanHelpers_SequenceEqual: - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: { if (FgStack::IsConstArgument(pushedStack.Top(), impInlineInfo)) { diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index a92e7d260c3043..a1c9da833fd3f4 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -1947,7 +1947,7 @@ class ValueHistogramProbeVisitor final : public GenTreeVisitorIsCall() && node->AsCall()->IsSpecialIntrinsic()) { const NamedIntrinsic ni = m_compiler->lookupNamedIntrinsic(node->AsCall()->gtCallMethHnd); - if ((ni == NI_System_Buffer_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual)) + if ((ni == NI_System_SpanHelpers_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual)) { m_functor(m_compiler, node); } @@ -2274,7 +2274,7 @@ class ValueHistogramProbeInserter return; } - assert(node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_Buffer_Memmove) || + assert(node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_Memmove) || node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_SequenceEqual)); const ICorJitInfo::PgoInstrumentationSchema& countEntry = m_schema[*m_currentSchemaIndex]; @@ -2540,7 +2540,7 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod() // These are marked as [Intrinsic] only to be handled (unrolled) for constant inputs. // In other cases they have large managed implementations we want to profile. case NI_System_String_Equals: - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: case NI_System_MemoryExtensions_Equals: case NI_System_MemoryExtensions_SequenceEqual: case NI_System_MemoryExtensions_StartsWith: diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 5469904c4ce554..e0d472b2538e1c 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10295,9 +10295,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) } #endif - op3 = impPopStack().val; // Size - op2 = impPopStack().val; // Value / Src addr - op1 = impPopStack().val; // Dst addr + op3 = gtFoldExpr(impPopStack().val); // Size + op2 = gtFoldExpr(impPopStack().val); // Value / Src addr + op1 = impPopStack().val; // Dst addr if (op3->IsCnsIntOrI()) { @@ -10343,18 +10343,34 @@ void Compiler::impImportBlockCode(BasicBlock* block) // TODO: enable for X86 as well, it currently doesn't support memset/memcpy helpers // Then, get rid of GT_STORE_DYN_BLK entirely. #ifndef TARGET_X86 - const unsigned helper = opcode == CEE_INITBLK ? CORINFO_HELP_MEMSET : CORINFO_HELP_MEMCPY; + GenTreeCall* call; + if (opcode == CEE_INITBLK) + { + // value is zero -> memzero, otherwise -> memset + if (op2->IsIntegralConst(0)) + { + call = gtNewHelperCallNode(CORINFO_HELP_MEMZERO, TYP_VOID, op1, op3); + } + else + { + call = gtNewHelperCallNode(CORINFO_HELP_MEMSET, TYP_VOID, op1, op2, op3); + } + } + else + { + call = gtNewHelperCallNode(CORINFO_HELP_MEMCPY, TYP_VOID, op1, op2, op3); + } + if (isVolatile) { // Wrap with memory barriers: full-barrier + call + load-barrier impAppendTree(gtNewMemoryBarrier(), CHECK_SPILL_ALL, impCurStmtDI); - impAppendTree(gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3), CHECK_SPILL_ALL, - impCurStmtDI); + impAppendTree(call, CHECK_SPILL_ALL, impCurStmtDI); op1 = gtNewMemoryBarrier(true); } else { - op1 = gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3); + op1 = call; } #else if (opcode == CEE_INITBLK) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b897e783a7268b..a1f5768ee68321 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1290,7 +1290,7 @@ var_types Compiler::impImportCall(OPCODE opcode, impAppendTree(call, verCurrentState.esStackDepth - 1, impCurStmtDI); } else if (JitConfig.JitProfileValues() && call->IsCall() && - call->AsCall()->IsSpecialIntrinsic(this, NI_System_Buffer_Memmove)) + call->AsCall()->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove)) { if (opts.IsOptimizedWithProfile()) { @@ -1555,7 +1555,7 @@ GenTree* Compiler::impDuplicateWithProfiledArg(GenTreeCall* call, IL_OFFSET ilOf unsigned argNum = 0; ssize_t minValue = 0; ssize_t maxValue = 0; - if (call->IsSpecialIntrinsic(this, NI_System_Buffer_Memmove)) + if (call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove)) { // dst(0), src(1), len(2) argNum = 2; @@ -2761,7 +2761,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, betterToExpand = true; break; - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: case NI_System_SpanHelpers_SequenceEqual: // We're going to instrument these betterToExpand = opts.IsInstrumented(); @@ -3983,7 +3983,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8: case NI_System_SpanHelpers_SequenceEqual: case NI_System_SpanHelpers_ClearWithoutReferences: - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: { if (sig->sigInst.methInstCount == 0) { @@ -8874,13 +8874,6 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) result = NI_System_BitConverter_Int64BitsToDouble; } } - else if (strcmp(className, "Buffer") == 0) - { - if (strcmp(methodName, "Memmove") == 0) - { - result = NI_System_Buffer_Memmove; - } - } break; } @@ -9040,6 +9033,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_SpanHelpers_ClearWithoutReferences; } + else if (strcmp(methodName, "Memmove") == 0) + { + result = NI_System_SpanHelpers_Memmove; + } } else if (strcmp(className, "String") == 0) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f9cd17f6510046..79935bffcae23e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2013,7 +2013,7 @@ bool Lowering::LowerCallMemmove(GenTreeCall* call, GenTree** next) { JITDUMP("Considering Memmove [%06d] for unrolling.. ", comp->dspTreeID(call)) assert(call->IsHelperCall(comp, CORINFO_HELP_MEMCPY) || - (comp->lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Buffer_Memmove)); + (comp->lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_SpanHelpers_Memmove)); assert(call->gtArgs.CountUserArgs() == 3); @@ -2374,7 +2374,7 @@ GenTree* Lowering::LowerCall(GenTree* node) { switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd)) { - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: if (LowerCallMemmove(call, &nextNode)) { return nextNode; @@ -8125,7 +8125,18 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode) GenTree* dataPlaceholder = comp->gtNewZeroConNode(genActualType(data)); GenTree* sizePlaceholder = comp->gtNewZeroConNode(genActualType(size)); - GenTreeCall* call = comp->gtNewHelperCallNode(helper, TYP_VOID, destPlaceholder, dataPlaceholder, sizePlaceholder); + const bool isMemzero = helper == CORINFO_HELP_MEMSET ? data->IsIntegralConst(0) : false; + + GenTreeCall* call; + if (isMemzero) + { + BlockRange().Remove(data); + call = comp->gtNewHelperCallNode(CORINFO_HELP_MEMZERO, TYP_VOID, destPlaceholder, sizePlaceholder); + } + else + { + call = comp->gtNewHelperCallNode(helper, TYP_VOID, destPlaceholder, dataPlaceholder, sizePlaceholder); + } comp->fgMorphArgs(call); LIR::Range range = LIR::SeqTree(comp, call); @@ -8136,18 +8147,22 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode) blkNode->gtBashToNOP(); LIR::Use destUse; - LIR::Use dataUse; LIR::Use sizeUse; BlockRange().TryGetUse(destPlaceholder, &destUse); - BlockRange().TryGetUse(dataPlaceholder, &dataUse); BlockRange().TryGetUse(sizePlaceholder, &sizeUse); destUse.ReplaceWith(dest); - dataUse.ReplaceWith(data); sizeUse.ReplaceWith(size); destPlaceholder->SetUnusedValue(); - dataPlaceholder->SetUnusedValue(); sizePlaceholder->SetUnusedValue(); + if (!isMemzero) + { + LIR::Use dataUse; + BlockRange().TryGetUse(dataPlaceholder, &dataUse); + dataUse.ReplaceWith(data); + dataPlaceholder->SetUnusedValue(); + } + LowerRange(rangeStart, rangeEnd); // Finally move all GT_PUTARG_* nodes @@ -8155,8 +8170,11 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode) MoveCFGCallArgs(call); BlockRange().Remove(destPlaceholder); - BlockRange().Remove(dataPlaceholder); BlockRange().Remove(sizePlaceholder); + if (!isMemzero) + { + BlockRange().Remove(dataPlaceholder); + } // Wrap with memory barriers on weak memory models // if the block store was volatile diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3b9ec7f388aec1..df1af6a419a3ec 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2481,7 +2481,7 @@ void LinearScan::buildIntervals() killed = RBM_EDI | RBM_ECX | RBM_EAX; #else // Poisoning uses REG_SCRATCH for small vars and memset helper for big vars. - killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); + killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET); #endif addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true); currentLoc += 2; diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index a68b88f06a4502..9fa128c38f74ee 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -20,7 +20,7 @@ enum NamedIntrinsic : unsigned short NI_System_BitConverter_Int64BitsToDouble, NI_System_BitConverter_SingleToInt32Bits, - NI_System_Buffer_Memmove, + NI_System_SpanHelpers_Memmove, NI_SYSTEM_MATH_START, NI_System_Math_Abs, diff --git a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h index 750faccc828383..6a3b24a3944870 100644 --- a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h +++ b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h @@ -12,7 +12,7 @@ struct ReadyToRunHeaderConstants static const uint32_t Signature = 0x00525452; // 'RTR' static const uint32_t CurrentMajorVersion = 9; - static const uint32_t CurrentMinorVersion = 1; + static const uint32_t CurrentMinorVersion = 2; }; struct ReadyToRunHeader diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs index 64ba6597446a81..61f70e212483c1 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs @@ -494,7 +494,7 @@ private static unsafe void CopyImplValueTypeArrayNoInnerGcRefs(Array sourceArray // Copy scenario: ValueType-array to value-type array with no embedded gc-refs. nuint elementSize = sourceArray.ElementSize; - Buffer.Memmove( + SpanHelpers.Memmove( ref Unsafe.AddByteOffset(ref MemoryMarshal.GetArrayDataReference(destinationArray), (nuint)destinationIndex * elementSize), ref Unsafe.AddByteOffset(ref MemoryMarshal.GetArrayDataReference(sourceArray), (nuint)sourceIndex * elementSize), elementSize * (nuint)length); @@ -534,7 +534,7 @@ private static unsafe void CopyImplPrimitiveTypeWithWidening(Array sourceArray, if (sourceElementType == destElementType) { // Multidim arrays and enum->int copies can still reach this path. - Buffer.Memmove(ref *data, ref *srcData, (nuint)length * srcElementSize); + SpanHelpers.Memmove(ref *data, ref *srcData, (nuint)length * srcElementSize); return; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs index 727fbc9fbfdd2b..9f8dbe11a212eb 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs @@ -41,7 +41,7 @@ protected internal unsafe object MemberwiseClone() if (this.GetMethodTable()->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); return clone; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs index a3ccfc5a8c431f..490997c1da90f5 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs @@ -93,7 +93,7 @@ internal static unsafe void PtrToStructureImpl(IntPtr ptr, object structure) { nuint size = (nuint)RuntimeInteropData.GetStructUnsafeStructSize(structureTypeHandle); - Buffer.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size); + SpanHelpers.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size); } } @@ -180,7 +180,7 @@ public static unsafe void StructureToPtr(object structure, IntPtr ptr, bool fDel { nuint size = (nuint)RuntimeInteropData.GetStructUnsafeStructSize(structureTypeHandle); - Buffer.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size); + SpanHelpers.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size); } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs index 01071442f962b6..6fc5d9542e1609 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs @@ -16,7 +16,7 @@ internal struct ReadyToRunHeaderConstants public const uint Signature = 0x00525452; // 'RTR' public const ushort CurrentMajorVersion = 9; - public const ushort CurrentMinorVersion = 1; + public const ushort CurrentMinorVersion = 2; } #if READYTORUN #pragma warning disable 0169 diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index 63383b7ddfa679..a37945534865bf 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -239,7 +239,9 @@ public enum ReadyToRunHelper Stelem_Ref = 0x38, Ldelema_Ref = 0x39, - MemSet = 0x40, + MemZero = 0x3E, + MemSet = 0x3F, + NativeMemSet = 0x40, MemCpy = 0x41, // P/Invoke support diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 2a9dbe302dac2e..5346806c1aff60 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -214,7 +214,10 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_INIT_PINVOKE_FRAME, // initialize an inlined PInvoke Frame for the JIT-compiler CORINFO_HELP_MEMSET, // Init block of memory + CORINFO_HELP_MEMZERO, // Init block of memory with zeroes CORINFO_HELP_MEMCPY, // Copy block of memory + CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, + // not safe for unbounded size, does not trigger GC) CORINFO_HELP_RUNTIMEHANDLE_METHOD, // determine a type/field/method handle at run-time CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG, // determine a type/field/method handle at run-time, with IBC logging diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 8bcf658230b08a..1845b5ce7848c6 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -139,10 +139,16 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.MemCpy: - mangledName = "memcpy"; // TODO: Null reference handling + mangledName = "RhSpanHelpers_MemCopy"; break; case ReadyToRunHelper.MemSet: - mangledName = "memset"; // TODO: Null reference handling + mangledName = "RhSpanHelpers_MemSet"; + break; + case ReadyToRunHelper.MemZero: + mangledName = "RhSpanHelpers_MemZero"; + break; + case ReadyToRunHelper.NativeMemSet: + mangledName = "memset"; break; case ReadyToRunHelper.GetRuntimeTypeHandle: diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index f7527c96dd9ddb..ad83b1eb42a5d6 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -1028,9 +1028,15 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_MEMSET: id = ReadyToRunHelper.MemSet; break; + case CorInfoHelpFunc.CORINFO_HELP_MEMZERO: + id = ReadyToRunHelper.MemZero; + break; case CorInfoHelpFunc.CORINFO_HELP_MEMCPY: id = ReadyToRunHelper.MemCpy; break; + case CorInfoHelpFunc.CORINFO_HELP_NATIVE_MEMSET: + id = ReadyToRunHelper.NativeMemSet; + break; case CorInfoHelpFunc.CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD: id = ReadyToRunHelper.GetRuntimeMethodHandle; diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs index 8d325f467d600f..0eae2f10cb8f00 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs @@ -1685,10 +1685,18 @@ private void ParseHelper(StringBuilder builder) builder.Append("MEM_SET"); break; + case ReadyToRunHelper.MemZero: + builder.Append("MEM_ZERO"); + break; + case ReadyToRunHelper.MemCpy: builder.Append("MEM_CPY"); break; + case ReadyToRunHelper.NativeMemSet: + builder.Append("NATIVE_MEM_SET"); + break; + // PInvoke helpers case ReadyToRunHelper.PInvokeBegin: builder.Append("PINVOKE_BEGIN"); diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 4495e0322d4a78..8755580e3f2903 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -542,9 +542,15 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_MEMSET: id = ReadyToRunHelper.MemSet; break; + case CorInfoHelpFunc.CORINFO_HELP_MEMZERO: + id = ReadyToRunHelper.MemZero; + break; case CorInfoHelpFunc.CORINFO_HELP_MEMCPY: id = ReadyToRunHelper.MemCpy; break; + case CorInfoHelpFunc.CORINFO_HELP_NATIVE_MEMSET: + id = ReadyToRunHelper.NativeMemSet; + break; case CorInfoHelpFunc.CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE: id = ReadyToRunHelper.GetRuntimeType; diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index eb00b7c6a6757d..ccd8bc35c8bf4e 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -636,7 +636,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/AsmHelpers.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerAMD64.asm ${ARCH_SOURCES_DIR}/ComCallPreStub.asm - ${ARCH_SOURCES_DIR}/CrtHelpers.asm ${ARCH_SOURCES_DIR}/GenericComCallStubs.asm ${ARCH_SOURCES_DIR}/GenericComPlusCallStubs.asm ${ARCH_SOURCES_DIR}/getstate.asm @@ -676,7 +675,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/AsmHelpers.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm - ${ARCH_SOURCES_DIR}/CrtHelpers.asm ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm @@ -693,7 +691,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S ${ARCH_SOURCES_DIR}/getstate.S ${ARCH_SOURCES_DIR}/jithelpers_fast.S @@ -723,7 +720,6 @@ else(CLR_CMAKE_TARGET_WIN32) elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/ehhelpers.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S @@ -733,7 +729,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S @@ -742,7 +737,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerloongarch64.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) @@ -750,7 +744,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerriscv64.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) diff --git a/src/coreclr/vm/amd64/CrtHelpers.asm b/src/coreclr/vm/amd64/CrtHelpers.asm deleted file mode 100644 index 09f48fa5879bd1..00000000000000 --- a/src/coreclr/vm/amd64/CrtHelpers.asm +++ /dev/null @@ -1,79 +0,0 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. - -include AsmMacros.inc - -extern memset:proc -extern memmove:proc - -; JIT_MemSet/JIT_MemCpy -; -; It is IMPORTANT that the exception handling code is able to find these guys -; on the stack, but on windows platforms we can just defer to the platform -; implementation. -; - -; void JIT_MemSet(void* dest, int c, size_t count) -; -; Purpose: -; Sets the first "count" bytes of the block of memory pointed byte -; "dest" to the specified value (interpreted as an unsigned char). -; -; Entry: -; RCX: void* dest - Pointer to the block of memory to fill. -; RDX: int c - Value to be set. -; R8: size_t count - Number of bytes to be set to the value. -; -; Exit: -; -; Uses: -; -; Exceptions: -; -LEAF_ENTRY JIT_MemSet, _TEXT - test r8, r8 ; check if count is zero - jz Exit_MemSet ; if zero, no bytes to set - - cmp byte ptr [rcx], 0 ; check dest for null - - jmp memset ; forward to the CRT implementation - -Exit_MemSet: - ret - -LEAF_END_MARKED JIT_MemSet, _TEXT - -; void JIT_MemCpy(void* dest, const void* src, size_t count) -; -; Purpose: -; Copies the values of "count" bytes from the location pointed to -; by "src" to the memory block pointed by "dest". -; -; Entry: -; RCX: void* dest - Pointer to the destination array where content is to be copied. -; RDX: const void* src - Pointer to the source of the data to be copied. -; R8: size_t count - Number of bytes to copy. -; -; Exit: -; -; Uses: -; -; Exceptions: -; -LEAF_ENTRY JIT_MemCpy, _TEXT - test r8, r8 ; check if count is zero - jz Exit_MemCpy ; if zero, no bytes to copy - - cmp byte ptr [rcx], 0 ; check dest for null - cmp byte ptr [rdx], 0 ; check src for null - - ; Use memmove to handle overlapping buffers for better - ; compatibility with .NET Framework. Needing to handle - ; overlapping buffers in cpblk is undefined by the spec. - jmp memmove ; forward to the CRT implementation - -Exit_MemCpy: - ret - -LEAF_END_MARKED JIT_MemCpy, _TEXT - end diff --git a/src/coreclr/vm/amd64/crthelpers.S b/src/coreclr/vm/amd64/crthelpers.S deleted file mode 100644 index 82219e574092da..00000000000000 --- a/src/coreclr/vm/amd64/crthelpers.S +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.intel_syntax noprefix -#include "unixasmmacros.inc" -#include "asmconstants.h" - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORTANT that the exception handling code is able to find these guys -// on the stack, but on non-windows platforms we can just defer to the platform -// implementation. -// - -// void JIT_MemSet(void* dest, int c, size_t count) -// -// Purpose: -// Sets the first "count" bytes of the block of memory pointed byte -// "dest" to the specified value (interpreted as an unsigned char). -// -// Entry: -// RDI: void* dest - Pointer to the block of memory to fill. -// RSI: int c - Value to be set. -// RDX: size_t count - Number of bytes to be set to the value. -// -// Exit: -// -// Uses: -// -// Exceptions: -// -LEAF_ENTRY JIT_MemSet, _TEXT - test rdx, rdx // check if count is zero - jz Exit_MemSet // if zero, no bytes to set - - cmp byte ptr [rdi], 0 // check dest for null - - jmp C_PLTFUNC(memset) // forward to the CRT implementation - -Exit_MemSet: - ret - -LEAF_END_MARKED JIT_MemSet, _TEXT - -// void JIT_MemCpy(void* dest, const void* src, size_t count) -// -// Purpose: -// Copies the values of "count" bytes from the location pointed to -// by "src" to the memory block pointed by "dest". -// -// Entry: -// RDI: void* dest - Pointer to the destination array where content is to be copied. -// RSI: const void* src - Pointer to the source of the data to be copied. -// RDX: size_t count - Number of bytes to copy. -// -// Exit: -// -// Uses: -// -// Exceptions: -// -LEAF_ENTRY JIT_MemCpy, _TEXT - test rdx, rdx // check if count is zero - jz Exit_MemCpy // if zero, no bytes to set - - cmp byte ptr [rdi], 0 // check dest for null - cmp byte ptr [rsi], 0 // check src for null - - jmp C_PLTFUNC(memcpy) // forward to the CRT implementation - -Exit_MemCpy: - ret - -LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index feafd1f8abad6d..bb5d3d17e00534 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -1352,7 +1352,7 @@ void SystemDomain::LoadBaseSystemClasses() // further loading of nonprimitive types may need casting support. // initialize cast cache here. CastCache::Initialize(); - ECall::PopulateManagedCastHelpers(); + ECall::PopulateManagedHelpers(); // used by IsImplicitInterfaceOfSZArray CoreLibBinder::GetClass(CLASS__IENUMERABLEGENERIC); diff --git a/src/coreclr/vm/arm/crthelpers.S b/src/coreclr/vm/arm/crthelpers.S deleted file mode 100644 index db0ed192c4d60f..00000000000000 --- a/src/coreclr/vm/arm/crthelpers.S +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// *********************************************************************** -// File: crthelpers.S -// -// *********************************************************************** - -#include "unixasmmacros.inc" -#include "asmconstants.h" - -.syntax unified -.thumb - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORANT that the exception handling code is able to find these guys -// on the stack, but to keep them from being tailcalled by VC++ we need to turn -// off optimization and it ends up being a wasteful implementation. -// -// Hence these assembly helpers. -// -//EXTERN_C void __stdcall JIT_MemSet(void* _dest, int c, size_t count) -LEAF_ENTRY JIT_MemSet, _TEXT - - cmp r2, #0 - it eq - bxeq lr - - ldrb r3, [r0] - - b C_PLTFUNC(memset) - -LEAF_END_MARKED JIT_MemSet, _TEXT - - -//EXTERN_C void __stdcall JIT_MemCpy(void* _dest, const void *_src, size_t count) -LEAF_ENTRY JIT_MemCpy, _TEXT -// - - cmp r2, #0 - it eq - bxeq lr - - ldrb r3, [r0] - ldrb r3, [r1] - - b C_PLTFUNC(memcpy) - -LEAF_END_MARKED JIT_MemCpy, _TEXT - diff --git a/src/coreclr/vm/arm64/crthelpers.S b/src/coreclr/vm/arm64/crthelpers.S deleted file mode 100644 index e123fc82808d16..00000000000000 --- a/src/coreclr/vm/arm64/crthelpers.S +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "unixasmmacros.inc" - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORTANT that the exception handling code is able to find these guys -// on the stack, but on non-windows platforms we can just defer to the platform -// implementation. -// -LEAF_ENTRY JIT_MemSet, _TEXT - cbz x2, LOCAL_LABEL(JIT_MemSet_ret) - - ldrb wzr, [x0] - - b C_PLTFUNC(memset) - -LOCAL_LABEL(JIT_MemSet_ret): - ret lr -LEAF_END_MARKED JIT_MemSet, _TEXT - -LEAF_ENTRY JIT_MemCpy, _TEXT - cbz x2, LOCAL_LABEL(JIT_MemCpy_ret) - - ldrb wzr, [x0] - ldrb wzr, [x1] - - b C_PLTFUNC(memcpy) - -LOCAL_LABEL(JIT_MemCpy_ret): - ret lr -LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/coreclr/vm/arm64/crthelpers.asm b/src/coreclr/vm/arm64/crthelpers.asm deleted file mode 100644 index d4d13351365c95..00000000000000 --- a/src/coreclr/vm/arm64/crthelpers.asm +++ /dev/null @@ -1,81 +0,0 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. - -#include "ksarm64.h" -#include "asmconstants.h" -#include "asmmacros.h" - - IMPORT memset - IMPORT memmove - -; JIT_MemSet/JIT_MemCpy -; -; It is IMPORTANT that the exception handling code is able to find these guys -; on the stack, but on windows platforms we can just defer to the platform -; implementation. -; - -; void JIT_MemSet(void* dest, int c, size_t count) -; -; Purpose: -; Sets the first "count" bytes of the block of memory pointed byte -; "dest" to the specified value (interpreted as an unsigned char). -; -; Entry: -; RCX: void* dest - Pointer to the block of memory to fill. -; RDX: int c - Value to be set. -; R8: size_t count - Number of bytes to be set to the value. -; -; Exit: -; -; Uses: -; -; Exceptions: -; - - TEXTAREA - - LEAF_ENTRY JIT_MemSet - cbz x2, JIT_MemSet_ret ; check if count is zero, no bytes to set - - ldrb wzr, [x0] ; check dest for null - - b memset ; forward to the CRT implementation - -JIT_MemSet_ret - ret lr - - LEAF_END_MARKED JIT_MemSet - -; void JIT_MemCpy(void* dest, const void* src, size_t count) -; -; Purpose: -; Copies the values of "count" bytes from the location pointed to -; by "src" to the memory block pointed by "dest". -; -; Entry: -; RCX: void* dest - Pointer to the destination array where content is to be copied. -; RDX: const void* src - Pointer to the source of the data to be copied. -; R8: size_t count - Number of bytes to copy. -; -; Exit: -; -; Uses: -; -; Exceptions: -; - LEAF_ENTRY JIT_MemCpy - cbz x2, JIT_MemCpy_ret ; check if count is zero, no bytes to set - - ldrb wzr, [x0] ; check dest for null - ldrb wzr, [x1] ; check src for null - - b memmove ; forward to the CRT implementation - -JIT_MemCpy_ret - ret lr - - LEAF_END_MARKED JIT_MemCpy - -; Must be at very end of file - END diff --git a/src/coreclr/vm/callcounting.cpp b/src/coreclr/vm/callcounting.cpp index c464949f7aeee0..e7c70dccf40964 100644 --- a/src/coreclr/vm/callcounting.cpp +++ b/src/coreclr/vm/callcounting.cpp @@ -660,7 +660,7 @@ bool CallCountingManager::SetCodeEntryPoint( CallCount callCountThreshold = g_pConfig->TieredCompilation_CallCountThreshold(); _ASSERTE(callCountThreshold != 0); - // Let's tier up all cast helpers faster than other methods. This is because we want to import them as + // Let's tier up all cast and runtime helpers faster than other methods. This is because we want to import them as // direct calls in codegen and they need to be promoted earlier than their callers. if (methodDesc->GetMethodTable() == g_pCastHelpers) { diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index 8e68900686a7e0..c52c58954165a2 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -633,6 +633,11 @@ DEFINE_METHOD(RUNTIME_HELPERS, ALLOC_TAILCALL_ARG_BUFFER, AllocTailCallArgB DEFINE_METHOD(RUNTIME_HELPERS, GET_TAILCALL_INFO, GetTailCallInfo, NoSig) DEFINE_METHOD(RUNTIME_HELPERS, DISPATCH_TAILCALLS, DispatchTailCalls, NoSig) +DEFINE_CLASS(SPAN_HELPERS, System, SpanHelpers) +DEFINE_METHOD(SPAN_HELPERS, MEMSET, Fill, SM_RefByte_Byte_UIntPtr_RetVoid) +DEFINE_METHOD(SPAN_HELPERS, MEMZERO, ClearWithoutReferences, SM_RefByte_UIntPtr_RetVoid) +DEFINE_METHOD(SPAN_HELPERS, MEMCOPY, Memmove, SM_RefByte_RefByte_UIntPtr_RetVoid) + DEFINE_CLASS(UNSAFE, CompilerServices, Unsafe) DEFINE_METHOD(UNSAFE, AS_POINTER, AsPointer, NoSig) DEFINE_METHOD(UNSAFE, BYREF_IS_NULL, IsNullRef, NoSig) diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp index 37ac50d124f6f6..7a9538d8ea7dd9 100644 --- a/src/coreclr/vm/ecall.cpp +++ b/src/coreclr/vm/ecall.cpp @@ -96,7 +96,7 @@ void ECall::PopulateManagedStringConstructors() INDEBUG(fInitialized = true); } -void ECall::PopulateManagedCastHelpers() +void ECall::PopulateManagedHelpers() { STANDARD_VM_CONTRACT; @@ -144,6 +144,18 @@ void ECall::PopulateManagedCastHelpers() pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__CASTHELPERS__LDELEMAREF)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_LDELEMA_REF, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMSET)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_MEMSET, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMZERO)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_MEMZERO, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMCOPY)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_MEMCPY, pDest); } static CrstStatic gFCallLock; diff --git a/src/coreclr/vm/ecall.h b/src/coreclr/vm/ecall.h index bc9d63ae467137..792eea633e8f7a 100644 --- a/src/coreclr/vm/ecall.h +++ b/src/coreclr/vm/ecall.h @@ -94,7 +94,7 @@ class ECall static void PopulateManagedStringConstructors(); - static void PopulateManagedCastHelpers(); + static void PopulateManagedHelpers(); #ifdef DACCESS_COMPILE // Enumerates all gFCallMethods for minidumps. diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index bc09fd73a469d4..190006e07eb175 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6292,9 +6292,6 @@ EXTERN_C void JIT_StackProbe_End(); #ifdef FEATURE_EH_FUNCLETS #ifndef TARGET_X86 -EXTERN_C void JIT_MemSet_End(); -EXTERN_C void JIT_MemCpy_End(); - EXTERN_C void JIT_WriteBarrier_End(); EXTERN_C void JIT_CheckedWriteBarrier_End(); EXTERN_C void JIT_ByRefWriteBarrier_End(); @@ -6345,9 +6342,6 @@ bool IsIPInMarkedJitHelper(UINT_PTR uControlPc) if (GetEEFuncEntryPoint(name) <= uControlPc && uControlPc < GetEEFuncEntryPoint(name##_End)) return true; #ifndef TARGET_X86 - CHECK_RANGE(JIT_MemSet) - CHECK_RANGE(JIT_MemCpy) - CHECK_RANGE(JIT_WriteBarrier) CHECK_RANGE(JIT_CheckedWriteBarrier) CHECK_RANGE(JIT_ByRefWriteBarrier) diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp index 4cf5bb3ad4b018..18dab19a271873 100644 --- a/src/coreclr/vm/exceptionhandling.cpp +++ b/src/coreclr/vm/exceptionhandling.cpp @@ -5451,7 +5451,7 @@ BOOL HandleHardwareException(PAL_SEHException* ex) if (ex->GetExceptionRecord()->ExceptionCode != STATUS_BREAKPOINT && ex->GetExceptionRecord()->ExceptionCode != STATUS_SINGLE_STEP) { // A hardware exception is handled only if it happened in a jitted code or - // in one of the JIT helper functions (JIT_MemSet, ...) + // in one of the JIT helper functions PCODE controlPc = GetIP(ex->GetContextRecord()); if (ExecutionManager::IsManagedCode(controlPc) && IsGcMarker(ex->GetContextRecord(), ex->GetExceptionRecord())) { diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 450752ae367789..1da02114960293 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -3596,6 +3596,14 @@ NOINLINE HCIMPL3(CORINFO_MethodPtr, JIT_VirtualFunctionPointer_Framed, Object * } HCIMPLEND +HCIMPL3(void, Jit_NativeMemSet, void* pDest, int value, size_t length) +{ + _ASSERTE(pDest != nullptr); + FCALL_CONTRACT; + memset(pDest, value, length); +} +HCIMPLEND + HCIMPL1(Object*, JIT_GetRuntimeFieldStub, CORINFO_FIELD_HANDLE field) { FCALL_CONTRACT; diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index aa756361340b6e..5e6b0cbeeafdd1 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -10689,7 +10689,10 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTCLASS_SPECIAL || dynamicFtnNum == DYNAMIC_CORINFO_HELP_UNBOX || dynamicFtnNum == DYNAMIC_CORINFO_HELP_ARRADDR_ST || - dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF) + dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMSET || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMZERO || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMCPY) { Precode* pPrecode = Precode::GetPrecodeFromEntryPoint((PCODE)hlpDynamicFuncTable[dynamicFtnNum].pfnHelper); _ASSERTE(pPrecode->GetType() == PRECODE_FIXUP); diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 63666b46552cf9..bbca5c355fbb97 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -400,9 +400,6 @@ extern "C" #endif // TARGET_AMD64 || TARGET_ARM - void STDCALL JIT_MemSet(void *dest, int c, SIZE_T count); - void STDCALL JIT_MemCpy(void *dest, const void *src, SIZE_T count); - void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle); #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) void STDCALL JIT_StackProbe(); diff --git a/src/coreclr/vm/loongarch64/crthelpers.S b/src/coreclr/vm/loongarch64/crthelpers.S deleted file mode 100644 index 88fd21938fdaa2..00000000000000 --- a/src/coreclr/vm/loongarch64/crthelpers.S +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "unixasmmacros.inc" - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORTANT that the exception handling code is able to find these guys -// on the stack, but on non-windows platforms we can just defer to the platform -// implementation. -// -LEAF_ENTRY JIT_MemSet, _TEXT - beq $a2, $zero, LOCAL_LABEL(JIT_MemSet_ret) - - ld.b $zero, $a0, 0 //Is this really needed ? - - b memset - -LOCAL_LABEL(JIT_MemSet_ret): - jirl $r0, $ra, 0 - -////NOTO: Here must use LEAF_END_MARKED! not LEAF_END !!! -LEAF_END_MARKED JIT_MemSet, _TEXT - -LEAF_ENTRY JIT_MemCpy, _TEXT - beq $a2, $zero, LOCAL_LABEL(JIT_MemCpy_ret) - - ld.b $zero, $a0, 0 - ld.b $zero, $a1, 0 //Is this really needed ? - - b memcpy - -LOCAL_LABEL(JIT_MemCpy_ret): - jirl $r0, $ra, 0 - -////NOTO: Here must use LEAF_END_MARKED! not LEAF_END !!! -LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/coreclr/vm/metasig.h b/src/coreclr/vm/metasig.h index 45cb5700db5293..182acc55e643fe 100644 --- a/src/coreclr/vm/metasig.h +++ b/src/coreclr/vm/metasig.h @@ -237,6 +237,9 @@ DEFINE_METASIG(SM(PtrSByt_RetInt, P(B), i)) DEFINE_METASIG(SM(IntPtr_RetIntPtr, I, I)) DEFINE_METASIG(SM(UIntPtr_RetIntPtr, U, I)) DEFINE_METASIG(SM(PtrByte_PtrByte_Int_RetVoid, P(b) P(b) i, v)) +DEFINE_METASIG(SM(RefByte_RefByte_UIntPtr_RetVoid, r(b) r(b) U, v)) +DEFINE_METASIG(SM(RefByte_Byte_UIntPtr_RetVoid, r(b) b U, v)) +DEFINE_METASIG(SM(RefByte_UIntPtr_RetVoid, r(b) U, v)) DEFINE_METASIG(SM(PtrVoid_Byte_UInt_RetVoid, P(v) b K, v)) DEFINE_METASIG(SM(RefObj_IntPtr_RetVoid, r(j) I, v)) DEFINE_METASIG(SM(RefObj_RefIntPtr_RetVoid, r(j) r(I), v)) diff --git a/src/coreclr/vm/riscv64/crthelpers.S b/src/coreclr/vm/riscv64/crthelpers.S deleted file mode 100644 index 3151387b3cafd3..00000000000000 --- a/src/coreclr/vm/riscv64/crthelpers.S +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "unixasmmacros.inc" - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORTANT that the exception handling code is able to find these guys -// on the stack, but on non-windows platforms we can just defer to the platform -// implementation. -// -LEAF_ENTRY JIT_MemSet, _TEXT - beq a2, zero, LOCAL_LABEL(JIT_MemSet_ret) - - lb zero, 0(a0) // Is this really needed ? - - tail memset - -LOCAL_LABEL(JIT_MemSet_ret): - ret -LEAF_END_MARKED JIT_MemSet, _TEXT - -////NOTE: Here must use LEAF_END_MARKED! not LEAF_END !!! -LEAF_ENTRY JIT_MemCpy, _TEXT - beq a2, zero, LOCAL_LABEL(JIT_MemCpy_ret) - - lb zero, 0(a0) - lb zero, 0(a1) // Is this really needed ? - - tail memcpy - -LOCAL_LABEL(JIT_MemCpy_ret): - ret - -////NOTE: Here must use LEAF_END_MARKED! not LEAF_END !!! -LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index a73e8247a58e72..671d5a3dec585c 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1118,6 +1118,7 @@ + @@ -2140,7 +2141,6 @@ - @@ -2458,7 +2458,6 @@ - diff --git a/src/libraries/System.Private.CoreLib/src/System/Array.cs b/src/libraries/System.Private.CoreLib/src/System/Array.cs index c21caa8cc1d70a..84bd5ed20eed1f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Array.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Array.cs @@ -58,7 +58,7 @@ public static void Resize([NotNull] ref T[]? array, int newSize) // actually of type U[], where U:T; or that an int[] <-> uint[] or // similar cast has occurred. In any case, since it's always legal // to reinterpret U as T in this scenario (but not necessarily the - // other way around), we can use Buffer.Memmove here. + // other way around), we can use SpanHelpers.Memmove here. T[] newArray = new T[newSize]; Buffer.Memmove( @@ -377,7 +377,7 @@ public static unsafe void Copy(Array sourceArray, Array destinationArray, int le if (pMT->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray return; @@ -408,7 +408,7 @@ public static unsafe void Copy(Array sourceArray, int sourceIndex, Array destina if (pMT->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray return; diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs deleted file mode 100644 index 008bc9310a2417..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -namespace System -{ - public static partial class Buffer - { -#if TARGET_ARM64 || TARGET_LOONGARCH64 - // Managed code is currently faster than glibc unoptimized memmove - // TODO-ARM64-UNIX-OPT revisit when glibc optimized memmove is in Linux distros - // https://github.com/dotnet/runtime/issues/8897 - private static nuint MemmoveNativeThreshold => nuint.MaxValue; -#elif TARGET_ARM - private const nuint MemmoveNativeThreshold = 512; -#else - private const nuint MemmoveNativeThreshold = 2048; -#endif - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs deleted file mode 100644 index 4dea08790b91a1..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs +++ /dev/null @@ -1,16 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -namespace System -{ - public static partial class Buffer - { -#if TARGET_ARM64 - // Determine optimal value for Windows. - // https://github.com/dotnet/runtime/issues/8896 - private static nuint MemmoveNativeThreshold => nuint.MaxValue; -#else - private const nuint MemmoveNativeThreshold = 2048; -#endif - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs index 51ec733aaef590..543bf79beba7ef 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs @@ -1,14 +1,9 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64 -#define HAS_CUSTOM_BLOCKS -#endif - using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; namespace System { @@ -128,227 +123,16 @@ public static unsafe void MemoryCopy(void* source, void* destination, ulong dest Memmove(ref *(byte*)destination, ref *(byte*)source, checked((nuint)sourceBytesToCopy)); } - [Intrinsic] // Unrolled for small constant lengths - internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) - { - // P/Invoke into the native version when the buffers are overlapping. - if (((nuint)(nint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)(nint)Unsafe.ByteOffset(ref dest, ref src) < len)) - { - goto BuffersOverlap; - } - - // Use "(IntPtr)(nint)len" to avoid overflow checking on the explicit cast to IntPtr - - ref byte srcEnd = ref Unsafe.Add(ref src, (IntPtr)(nint)len); - ref byte destEnd = ref Unsafe.Add(ref dest, (IntPtr)(nint)len); - - if (len <= 16) - goto MCPY02; - if (len > 64) - goto MCPY05; - - MCPY00: - // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle. - Debug.Assert(len > 16 && len <= 64); -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref dest) = Unsafe.As(ref src); // [0,16] -#elif TARGET_64BIT - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); // [0,16] -#else - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); - Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); - Unsafe.As(ref Unsafe.Add(ref dest, 12)) = Unsafe.As(ref Unsafe.Add(ref src, 12)); // [0,16] -#endif - if (len <= 32) - goto MCPY01; -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); // [0,32] -#elif TARGET_64BIT - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); - Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); // [0,32] -#else - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); - Unsafe.As(ref Unsafe.Add(ref dest, 20)) = Unsafe.As(ref Unsafe.Add(ref src, 20)); - Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); - Unsafe.As(ref Unsafe.Add(ref dest, 28)) = Unsafe.As(ref Unsafe.Add(ref src, 28)); // [0,32] -#endif - if (len <= 48) - goto MCPY01; -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); // [0,48] -#elif TARGET_64BIT - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); - Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); // [0,48] -#else - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); - Unsafe.As(ref Unsafe.Add(ref dest, 36)) = Unsafe.As(ref Unsafe.Add(ref src, 36)); - Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); - Unsafe.As(ref Unsafe.Add(ref dest, 44)) = Unsafe.As(ref Unsafe.Add(ref src, 44)); // [0,48] -#endif - - MCPY01: - // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return. - Debug.Assert(len > 16 && len <= 64); -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); -#elif TARGET_64BIT - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); -#else - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -12)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); -#endif - return; - - MCPY02: - // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return. - if ((len & 24) == 0) - goto MCPY03; - Debug.Assert(len >= 8 && len <= 16); -#if TARGET_64BIT - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); -#else - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); -#endif - return; - - MCPY03: - // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return. - if ((len & 4) == 0) - goto MCPY04; - Debug.Assert(len >= 4 && len < 8); - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); - return; - - MCPY04: - // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return. - Debug.Assert(len < 4); - if (len == 0) - return; - dest = src; - if ((len & 2) == 0) - return; - Unsafe.As(ref Unsafe.Add(ref destEnd, -2)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -2)); - return; - - MCPY05: - // PInvoke to the native version when the copy length exceeds the threshold. - if (len > MemmoveNativeThreshold) - { - goto PInvoke; - } - -#if HAS_CUSTOM_BLOCKS - if (len >= 256) - { - // Try to opportunistically align the destination below. The input isn't pinned, so the GC - // is free to move the references. We're therefore assuming that reads may still be unaligned. - // - // dest is more important to align than src because an unaligned store is more expensive - // than an unaligned load. - nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref dest) & 63; - Unsafe.As(ref dest) = Unsafe.As(ref src); - src = ref Unsafe.Add(ref src, misalignedElements); - dest = ref Unsafe.Add(ref dest, misalignedElements); - len -= misalignedElements; - } -#endif - - // Copy 64-bytes at a time until the remainder is less than 64. - // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return. - Debug.Assert(len > 64 && len <= MemmoveNativeThreshold); - nuint n = len >> 6; - - MCPY06: -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref dest) = Unsafe.As(ref src); -#elif TARGET_64BIT - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); - Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); - Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); - Unsafe.As(ref Unsafe.Add(ref dest, 48)) = Unsafe.As(ref Unsafe.Add(ref src, 48)); - Unsafe.As(ref Unsafe.Add(ref dest, 56)) = Unsafe.As(ref Unsafe.Add(ref src, 56)); -#else - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); - Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); - Unsafe.As(ref Unsafe.Add(ref dest, 12)) = Unsafe.As(ref Unsafe.Add(ref src, 12)); - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); - Unsafe.As(ref Unsafe.Add(ref dest, 20)) = Unsafe.As(ref Unsafe.Add(ref src, 20)); - Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); - Unsafe.As(ref Unsafe.Add(ref dest, 28)) = Unsafe.As(ref Unsafe.Add(ref src, 28)); - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); - Unsafe.As(ref Unsafe.Add(ref dest, 36)) = Unsafe.As(ref Unsafe.Add(ref src, 36)); - Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); - Unsafe.As(ref Unsafe.Add(ref dest, 44)) = Unsafe.As(ref Unsafe.Add(ref src, 44)); - Unsafe.As(ref Unsafe.Add(ref dest, 48)) = Unsafe.As(ref Unsafe.Add(ref src, 48)); - Unsafe.As(ref Unsafe.Add(ref dest, 52)) = Unsafe.As(ref Unsafe.Add(ref src, 52)); - Unsafe.As(ref Unsafe.Add(ref dest, 56)) = Unsafe.As(ref Unsafe.Add(ref src, 56)); - Unsafe.As(ref Unsafe.Add(ref dest, 60)) = Unsafe.As(ref Unsafe.Add(ref src, 60)); -#endif - dest = ref Unsafe.Add(ref dest, 64); - src = ref Unsafe.Add(ref src, 64); - n--; - if (n != 0) - goto MCPY06; - - len %= 64; - if (len > 16) - goto MCPY00; -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); -#elif TARGET_64BIT - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); -#else - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -12)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); -#endif - return; - - BuffersOverlap: - // If the buffers overlap perfectly, there's no point to copying the data. - if (Unsafe.AreSame(ref dest, ref src)) - { - return; - } - - PInvoke: - _Memmove(ref dest, ref src, len); - } - // Non-inlinable wrapper around the QCall that avoids polluting the fast path // with P/Invoke prolog/epilog. [MethodImpl(MethodImplOptions.NoInlining)] - private static unsafe void _Memmove(ref byte dest, ref byte src, nuint len) + internal static unsafe void _Memmove(ref byte dest, ref byte src, nuint len) { fixed (byte* pDest = &dest) fixed (byte* pSrc = &src) __Memmove(pDest, pSrc, len); } -#if HAS_CUSTOM_BLOCKS - [StructLayout(LayoutKind.Sequential, Size = 16)] - private struct Block16 { } - - [StructLayout(LayoutKind.Sequential, Size = 64)] - private struct Block64 { } -#endif // HAS_CUSTOM_BLOCKS - // Non-inlinable wrapper around the QCall that avoids polluting the fast path // with P/Invoke prolog/epilog. [MethodImpl(MethodImplOptions.NoInlining)] @@ -370,7 +154,7 @@ internal static unsafe void Memmove(ref T destination, ref T source, nuint el if (!RuntimeHelpers.IsReferenceOrContainsReferences()) { // Blittable memmove - Memmove( + SpanHelpers.Memmove( ref Unsafe.As(ref destination), ref Unsafe.As(ref source), elementCount * (nuint)sizeof(T)); diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs b/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs index b1b18a2c343135..68adbf72bc6b96 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs @@ -390,7 +390,7 @@ internal int ReadCore(Span buffer) try { _buffer.AcquirePointer(ref pointer); - Buffer.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(pointer + pos + _offset), (nuint)nInt); + SpanHelpers.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(pointer + pos + _offset), (nuint)nInt); } finally { @@ -402,7 +402,7 @@ internal int ReadCore(Span buffer) } else { - Buffer.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(_mem + pos), (nuint)nInt); + SpanHelpers.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(_mem + pos), (nuint)nInt); } } @@ -669,7 +669,7 @@ internal unsafe void WriteCore(ReadOnlySpan buffer) try { _buffer.AcquirePointer(ref pointer); - Buffer.Memmove(ref *(pointer + pos + _offset), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length); + SpanHelpers.Memmove(ref *(pointer + pos + _offset), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length); } finally { @@ -681,7 +681,7 @@ internal unsafe void WriteCore(ReadOnlySpan buffer) } else { - Buffer.Memmove(ref *(_mem + pos), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length); + SpanHelpers.Memmove(ref *(_mem + pos), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length); } _position = n; diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index cf213590851497..38b923764e9764 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -50,7 +50,7 @@ public static T[] GetSubArray(T[] array, Range range) } // In either case, the newly-allocated array is the exact same type as the - // original incoming array. It's safe for us to Buffer.Memmove the contents + // original incoming array. It's safe for us to SpanHelpers.Memmove the contents // from the source array to the destination array, otherwise the contents // wouldn't have been valid for the source array in the first place. @@ -125,9 +125,6 @@ internal static bool IsPrimitiveType(this CorElementType et) [Intrinsic] internal static bool IsKnownConstant(char t) => false; - - [Intrinsic] - internal static bool IsKnownConstant(int t) => false; #pragma warning restore IDE0060 } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs index 069d67e5e4621e..7fb4af35480a9b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs @@ -61,7 +61,7 @@ public static unsafe void Clear(void* ptr, nuint byteCount) [CLSCompliant(false)] public static void Copy(void* source, void* destination, nuint byteCount) { - Buffer.Memmove(ref *(byte*)destination, ref *(byte*)source, byteCount); + SpanHelpers.Memmove(ref *(byte*)destination, ref *(byte*)source, byteCount); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs index d35b5dd174fc73..76858298feb217 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs @@ -194,7 +194,7 @@ public T Read(ulong byteOffset) where T : struct { DangerousAddRef(ref mustCallRelease); - Buffer.Memmove(ref Unsafe.As(ref value), ref *ptr, sizeofT); + SpanHelpers.Memmove(ref Unsafe.As(ref value), ref *ptr, sizeofT); } finally { @@ -281,7 +281,7 @@ public void Write(ulong byteOffset, T value) where T : struct { DangerousAddRef(ref mustCallRelease); - Buffer.Memmove(ref *ptr, ref Unsafe.As(ref value), sizeofT); + SpanHelpers.Memmove(ref *ptr, ref Unsafe.As(ref value), sizeofT); } finally { diff --git a/src/libraries/System.Private.CoreLib/src/System/Span.cs b/src/libraries/System.Private.CoreLib/src/System/Span.cs index aaf3763d81b755..1c66a341b0fde1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Span.cs @@ -300,19 +300,7 @@ public unsafe void Clear() [MethodImpl(MethodImplOptions.AggressiveInlining)] public unsafe void Fill(T value) { - if (sizeof(T) == 1) - { - // Special-case single-byte types like byte / sbyte / bool. - // The runtime eventually calls memset, which can efficiently support large buffers. - // We don't need to check IsReferenceOrContainsReferences because no references - // can ever be stored in types this small. - Unsafe.InitBlockUnaligned(ref Unsafe.As(ref _reference), *(byte*)&value, (uint)_length); - } - else - { - // Call our optimized workhorse method for all other types. - SpanHelpers.Fill(ref _reference, (uint)_length, value); - } + SpanHelpers.Fill(ref _reference, (uint)_length, value); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs new file mode 100644 index 00000000000000..ed54c495d60d4a --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -0,0 +1,538 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64 +// JIT is guaranteed to unroll blocks up to 64 bytes in size +#define HAS_CUSTOM_BLOCKS +#endif + +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace System +{ + internal static partial class SpanHelpers // .ByteMemOps + { +#if TARGET_ARM64 || TARGET_LOONGARCH64 + private const ulong MemmoveNativeThreshold = ulong.MaxValue; +#elif TARGET_ARM + private const nuint MemmoveNativeThreshold = 512; +#else + private const nuint MemmoveNativeThreshold = 2048; +#endif + // TODO: Determine optimal value + private const nuint ZeroMemoryNativeThreshold = 1024; + + +#if HAS_CUSTOM_BLOCKS + [StructLayout(LayoutKind.Sequential, Size = 16)] + private struct Block16 {} + + [StructLayout(LayoutKind.Sequential, Size = 64)] + private struct Block64 {} +#endif // HAS_CUSTOM_BLOCKS + +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhSpanHelpers_MemCopy")] +#endif + [Intrinsic] // Unrolled for small constant lengths + internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) + { + // P/Invoke into the native version when the buffers are overlapping. + if ((nuint)Unsafe.ByteOffset(ref src, ref dest) < len || + (nuint)Unsafe.ByteOffset(ref dest, ref src) < len) + { + goto BuffersOverlap; + } + + ref byte srcEnd = ref Unsafe.Add(ref src, len); + ref byte destEnd = ref Unsafe.Add(ref dest, len); + + if (len <= 16) + goto MCPY02; + if (len > 64) + goto MCPY05; + + MCPY00: + // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle. + Debug.Assert(len > 16 && len <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref dest) = Unsafe.As(ref src); // [0,16] +#elif TARGET_64BIT + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); // [0,16] +#else + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); + Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); + Unsafe.As(ref Unsafe.Add(ref dest, 12)) = Unsafe.As(ref Unsafe.Add(ref src, 12)); // [0,16] +#endif + if (len <= 32) + goto MCPY01; +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); // [0,32] +#elif TARGET_64BIT + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); + Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); // [0,32] +#else + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); + Unsafe.As(ref Unsafe.Add(ref dest, 20)) = Unsafe.As(ref Unsafe.Add(ref src, 20)); + Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); + Unsafe.As(ref Unsafe.Add(ref dest, 28)) = Unsafe.As(ref Unsafe.Add(ref src, 28)); // [0,32] +#endif + if (len <= 48) + goto MCPY01; +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); // [0,48] +#elif TARGET_64BIT + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); + Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); // [0,48] +#else + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); + Unsafe.As(ref Unsafe.Add(ref dest, 36)) = Unsafe.As(ref Unsafe.Add(ref src, 36)); + Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); + Unsafe.As(ref Unsafe.Add(ref dest, 44)) = Unsafe.As(ref Unsafe.Add(ref src, 44)); // [0,48] +#endif + + MCPY01: + // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return. + Debug.Assert(len > 16 && len <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); +#elif TARGET_64BIT + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); +#else + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -12)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); +#endif + return; + + MCPY02: + // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return. + if ((len & 24) == 0) + goto MCPY03; + Debug.Assert(len >= 8 && len <= 16); +#if TARGET_64BIT + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); +#else + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); +#endif + return; + + MCPY03: + // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return. + if ((len & 4) == 0) + goto MCPY04; + Debug.Assert(len >= 4 && len < 8); + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); + return; + + MCPY04: + // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return. + Debug.Assert(len < 4); + if (len == 0) + return; + dest = src; + if ((len & 2) == 0) + return; + Unsafe.As(ref Unsafe.Add(ref destEnd, -2)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -2)); + return; + + MCPY05: + // PInvoke to the native version when the copy length exceeds the threshold. + if (len > MemmoveNativeThreshold) + { + goto PInvoke; + } + +#if HAS_CUSTOM_BLOCKS + if (len >= 256) + { + // Try to opportunistically align the destination below. The input isn't pinned, so the GC + // is free to move the references. We're therefore assuming that reads may still be unaligned. + // + // dest is more important to align than src because an unaligned store is more expensive + // than an unaligned load. + nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref dest) & 63; + Unsafe.As(ref dest) = Unsafe.As(ref src); + src = ref Unsafe.Add(ref src, misalignedElements); + dest = ref Unsafe.Add(ref dest, misalignedElements); + len -= misalignedElements; + } +#endif + + // Copy 64-bytes at a time until the remainder is less than 64. + // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return. + Debug.Assert(len > 64 && len <= MemmoveNativeThreshold); + nuint n = len >> 6; + + MCPY06: +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref dest) = Unsafe.As(ref src); +#elif TARGET_64BIT + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); + Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); + Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); + Unsafe.As(ref Unsafe.Add(ref dest, 48)) = Unsafe.As(ref Unsafe.Add(ref src, 48)); + Unsafe.As(ref Unsafe.Add(ref dest, 56)) = Unsafe.As(ref Unsafe.Add(ref src, 56)); +#else + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); + Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); + Unsafe.As(ref Unsafe.Add(ref dest, 12)) = Unsafe.As(ref Unsafe.Add(ref src, 12)); + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); + Unsafe.As(ref Unsafe.Add(ref dest, 20)) = Unsafe.As(ref Unsafe.Add(ref src, 20)); + Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); + Unsafe.As(ref Unsafe.Add(ref dest, 28)) = Unsafe.As(ref Unsafe.Add(ref src, 28)); + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); + Unsafe.As(ref Unsafe.Add(ref dest, 36)) = Unsafe.As(ref Unsafe.Add(ref src, 36)); + Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); + Unsafe.As(ref Unsafe.Add(ref dest, 44)) = Unsafe.As(ref Unsafe.Add(ref src, 44)); + Unsafe.As(ref Unsafe.Add(ref dest, 48)) = Unsafe.As(ref Unsafe.Add(ref src, 48)); + Unsafe.As(ref Unsafe.Add(ref dest, 52)) = Unsafe.As(ref Unsafe.Add(ref src, 52)); + Unsafe.As(ref Unsafe.Add(ref dest, 56)) = Unsafe.As(ref Unsafe.Add(ref src, 56)); + Unsafe.As(ref Unsafe.Add(ref dest, 60)) = Unsafe.As(ref Unsafe.Add(ref src, 60)); +#endif + dest = ref Unsafe.Add(ref dest, 64); + src = ref Unsafe.Add(ref src, 64); + n--; + if (n != 0) + goto MCPY06; + + len %= 64; + if (len > 16) + goto MCPY00; +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); +#elif TARGET_64BIT + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); +#else + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -12)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); +#endif + return; + + BuffersOverlap: + Debug.Assert(len > 0); + // If the buffers overlap perfectly, there's no point to copying the data. + if (Unsafe.AreSame(ref dest, ref src)) + { + // Both could be null with a non-zero length, perform an implicit null check. + _ = Unsafe.ReadUnaligned(ref dest); + return; + } + + PInvoke: + // Implicit nullchecks + Debug.Assert(len > 0); + _ = Unsafe.ReadUnaligned(ref dest); + _ = Unsafe.ReadUnaligned(ref src); + Buffer._Memmove(ref dest, ref src, len); + } + +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhSpanHelpers_MemZero")] +#endif + [Intrinsic] // Unrolled for small sizes + public static unsafe void ClearWithoutReferences(ref byte dest, nuint len) + { + if (len == 0) + return; + + ref byte destEnd = ref Unsafe.Add(ref dest, len); + + if (len <= 16) + goto MZER02; + if (len > 64) + goto MZER05; + + MZER00: + // Clear bytes which are multiples of 16 and leave the remainder for MZER01 to handle. + Debug.Assert(len > 16 && len <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref dest, default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 8), 0); +#else + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 12), 0); +#endif + if (len <= 32) + goto MZER01; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 24), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 28), 0); +#endif + if (len <= 48) + goto MZER01; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 40), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 44), 0); +#endif + + MZER01: + // Unconditionally clear the last 16 bytes using destEnd and return. + Debug.Assert(len > 16 && len <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -4), 0); +#endif + return; + + MZER02: + // Clear the first 8 bytes and then unconditionally clear the last 8 bytes and return. + if ((len & 24) == 0) + goto MZER03; + Debug.Assert(len >= 8 && len <= 16); +#if TARGET_64BIT + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -4), 0); +#endif + return; + + MZER03: + // Clear the first 4 bytes and then unconditionally clear the last 4 bytes and return. + if ((len & 4) == 0) + goto MZER04; + Debug.Assert(len >= 4 && len < 8); + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -4), 0); + return; + + MZER04: + // Clear the first byte. For pending bytes, do an unconditionally clear of the last 2 bytes and return. + Debug.Assert(len < 4); + if (len == 0) + return; + dest = 0; + if ((len & 2) == 0) + return; + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -2), 0); + return; + + MZER05: + // PInvoke to the native version when the clear length exceeds the threshold. + if (len > ZeroMemoryNativeThreshold) + { + goto PInvoke; + } + +#if HAS_CUSTOM_BLOCKS + if (len >= 256) + { + // Try to opportunistically align the destination below. The input isn't pinned, so the GC + // is free to move the references. We're therefore assuming that reads may still be unaligned. + nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref dest) & 63; + Unsafe.WriteUnaligned(ref dest, default); + dest = ref Unsafe.Add(ref dest, misalignedElements); + len -= misalignedElements; + } +#endif + // Clear 64-bytes at a time until the remainder is less than 64. + // If remainder is greater than 16 bytes, then jump to MZER00. Otherwise, unconditionally clear the last 16 bytes and return. + Debug.Assert(len > 64 && len <= ZeroMemoryNativeThreshold); + nuint n = len >> 6; + + MZER06: +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref dest, default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 56), 0); +#else + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 28), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 44), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 52), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 56), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 60), 0); +#endif + dest = ref Unsafe.Add(ref dest, 64); + n--; + if (n != 0) + goto MZER06; + + len %= 64; + if (len > 16) + goto MZER00; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -4), 0); +#endif + return; + + PInvoke: + // Implicit nullchecks + _ = Unsafe.ReadUnaligned(ref dest); + Buffer._ZeroMemory(ref dest, len); + } + +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhSpanHelpers_MemSet")] +#endif + internal static void Fill(ref byte dest, byte value, nuint len) + { + if (!Vector.IsHardwareAccelerated) + { + goto CannotVectorize; + } + + if (len >= (nuint)Vector.Count) + { + // We have enough data for at least one vectorized write. + Vector vector = new (value); + nuint stopLoopAtOffset = len & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit + nuint offset = 0; + + // Loop, writing 2 vectors at a time. + // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency + // on the very recently calculated 'stopLoopAtOffset' value. + if (len >= (uint)(2 * Vector.Count)) + { + do + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset + (nuint)Vector.Count), vector); + offset += (uint)(2 * Vector.Count); + } while (offset < stopLoopAtOffset); + } + + // At this point, if any data remains to be written, it's strictly less than + // 2 * sizeof(Vector) bytes. The loop above had us write an even number of vectors. + // If the total byte length instead involves us writing an odd number of vectors, write + // one additional vector now. The bit check below tells us if we're in an "odd vector + // count" situation. + if ((len & (nuint)Vector.Count) != 0) + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector); + } + + // It's possible that some small buffer remains to be populated - something that won't + // fit an entire vector's worth of data. Instead of falling back to a loop, we'll write + // a vector at the very end of the buffer. This may involve overwriting previously + // populated data, which is fine since we're splatting the same value for all entries. + // There's no need to perform a length check here because we already performed this + // check before entering the vectorized code path. + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, len - (nuint)Vector.Count), vector); + + // And we're done! + return; + } + + CannotVectorize: + + // If we reached this point, we cannot vectorize this T, or there are too few + // elements for us to vectorize. Fall back to an unrolled loop. + nuint i = 0; + + // Write 8 elements at a time + if (len >= 8) + { + nuint stopLoopAtOffset = len & ~(nuint)7; + do + { + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; + Unsafe.Add(ref dest, (nint)i + 2) = value; + Unsafe.Add(ref dest, (nint)i + 3) = value; + Unsafe.Add(ref dest, (nint)i + 4) = value; + Unsafe.Add(ref dest, (nint)i + 5) = value; + Unsafe.Add(ref dest, (nint)i + 6) = value; + Unsafe.Add(ref dest, (nint)i + 7) = value; + } while ((i += 8) < stopLoopAtOffset); + } + + // Write next 4 elements if needed + if ((len & 4) != 0) + { + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; + Unsafe.Add(ref dest, (nint)i + 2) = value; + Unsafe.Add(ref dest, (nint)i + 3) = value; + i += 4; + } + + // Write next 2 elements if needed + if ((len & 2) != 0) + { + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; + i += 2; + } + + // Write final element if needed + if ((len & 1) != 0) + { + Unsafe.Add(ref dest, (nint)i) = value; + } + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index ecc1a5f3f3718b..aa7ed473d9feff 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -12,328 +12,6 @@ namespace System { internal static partial class SpanHelpers { - [Intrinsic] // Unrolled for small sizes - public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) - { - if (byteLength == 0) - return; - -#if TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 - // The exact matrix on when ZeroMemory is faster than InitBlockUnaligned is very complex. The factors to consider include - // type of hardware and memory alignment. This threshold was chosen as a good balance across different configurations. - if (byteLength > 768) - goto PInvoke; - Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength); - return; -#else - // TODO: Optimize other platforms to be on par with AMD64 CoreCLR - // Note: It's important that this switch handles lengths at least up to 22. - // See notes below near the main loop for why. - - // The switch will be very fast since it can be implemented using a jump - // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info. - - switch (byteLength) - { - case 1: - b = 0; - return; - case 2: - Unsafe.As(ref b) = 0; - return; - case 3: - Unsafe.As(ref b) = 0; - Unsafe.Add(ref b, 2) = 0; - return; - case 4: - Unsafe.As(ref b) = 0; - return; - case 5: - Unsafe.As(ref b) = 0; - Unsafe.Add(ref b, 4) = 0; - return; - case 6: - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - return; - case 7: - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.Add(ref b, 6) = 0; - return; - case 8: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - return; - case 9: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.Add(ref b, 8) = 0; - return; - case 10: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - return; - case 11: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.Add(ref b, 10) = 0; - return; - case 12: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - return; - case 13: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.Add(ref b, 12) = 0; - return; - case 14: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; - return; - case 15: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; - Unsafe.Add(ref b, 14) = 0; - return; - case 16: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - return; - case 17: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.Add(ref b, 16) = 0; - return; - case 18: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - return; - case 19: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.Add(ref b, 18) = 0; - return; - case 20: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - return; - case 21: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.Add(ref b, 20) = 0; - return; - case 22: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 20)) = 0; - return; - } - - // P/Invoke into the native version for large lengths - if (byteLength >= 512) goto PInvoke; - - nuint i = 0; // byte offset at which we're copying - - if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0) - { - if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0) - { - b = 0; - i += 1; - if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0) - goto IntAligned; - } - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 2; - } - - IntAligned: - - // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If - // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1 - // bytes to the next aligned address (respectively), so do nothing. On the other hand, - // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until - // we're aligned. - // The thing 1, 2, 3, and 4 have in common that the others don't is that if you - // subtract one from them, their 3rd lsb will not be set. Hence, the below check. - - if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0) - { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 4; - } - - nuint end = byteLength - 16; - byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop - - // We know due to the above switch-case that this loop will always run 1 iteration; max - // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so - // the switch handles lengths 0-22. - Debug.Assert(end >= 7 && i <= end); - - // This is separated out into a different variable, so the i + 16 addition can be - // performed at the start of the pipeline and the loop condition does not have - // a dependency on the writes. - nuint counter; - - do - { - counter = i + 16; - - // This loop looks very costly since there appear to be a bunch of temporary values - // being created with the adds, but the jit (for x86 anyways) will convert each of - // these to use memory addressing operands. - - // So the only cost is a bit of code size, which is made up for by the fact that - // we save on writes to b. - -#if TARGET_64BIT - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0; -#else - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 12)) = 0; -#endif - - i = counter; - - // See notes above for why this wasn't used instead - // i += 16; - } - while (counter <= end); - - if ((byteLength & 8) != 0) - { -#if TARGET_64BIT - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; -#else - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0; -#endif - i += 8; - } - if ((byteLength & 4) != 0) - { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 4; - } - if ((byteLength & 2) != 0) - { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 2; - } - if ((byteLength & 1) != 0) - { - Unsafe.AddByteOffset(ref b, i) = 0; - // We're not using i after this, so not needed - // i += 1; - } - - return; -#endif - - PInvoke: - Buffer._ZeroMemory(ref b, byteLength); - } - public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength) { Debug.Assert((int)Unsafe.AsPointer(ref ip) % sizeof(IntPtr) == 0, "Should've been aligned on natural word boundary."); diff --git a/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs index 8f45f602e6fb11..bcb9b6b38f2e39 100644 --- a/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs +++ b/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs @@ -23,7 +23,7 @@ internal static unsafe void Memmove(ref T destination, ref T source, nuint el { #pragma warning disable 8500 // sizeof of managed types // Blittable memmove - Memmove( + SpanHelpers.Memmove( ref Unsafe.As(ref destination), ref Unsafe.As(ref source), elementCount * (nuint)sizeof(T)); diff --git a/src/mono/System.Private.CoreLib/src/System/String.Mono.cs b/src/mono/System.Private.CoreLib/src/System/String.Mono.cs index 7314504aff9a22..7dedf5a6e536d0 100644 --- a/src/mono/System.Private.CoreLib/src/System/String.Mono.cs +++ b/src/mono/System.Private.CoreLib/src/System/String.Mono.cs @@ -116,7 +116,7 @@ private static unsafe void memset(byte* dest, int val, int len) private static unsafe void memcpy(byte* dest, byte* src, int size) { - Buffer.Memmove(ref *dest, ref *src, (nuint)size); + SpanHelpers.Memmove(ref *dest, ref *src, (nuint)size); } /* Used by the runtime */ diff --git a/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs new file mode 100644 index 00000000000000..9d7e4816522c73 --- /dev/null +++ b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +public unsafe class MemsetMemcpyNullref +{ + [Fact] + public static void MemsetMemcpyThrowNullRefonNull() + { + Assert.Throws(() => MemoryInit(null)); + Assert.Throws(() => MemoryCopy(null, null)); + Assert.Throws(() => + { + // Check when only src is null + HugeStruct hs = default; + MemoryCopy(&hs, null); + }); + Assert.Throws(() => + { + // Check when only dst is null + HugeStruct hs = default; + MemoryCopy(null, &hs); + }); + + // Check various lengths + uint[] lengths = [1, 10, 100, 1000, 10000, 100000, 1000000]; + foreach (uint length in lengths) + { + Assert.Throws(() => MemoryInitByref(ref Unsafe.NullRef(), length)); + Assert.Throws(() => MemoryCopyByref(ref Unsafe.NullRef(), ref Unsafe.NullRef(), length)); + } + + // These APIs are not expected to fail/throw on zero length, even if pointers are not valid + byte valid = 0; + MemoryInitByref(ref Unsafe.NullRef(), 0); + MemoryCopyByref(ref Unsafe.NullRef(), ref valid, 0); + MemoryCopyByref(ref valid, ref Unsafe.NullRef(), 0); + MemoryCopyByref(ref Unsafe.NullRef(), ref Unsafe.NullRef(), 0); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void MemoryCopy(HugeStruct* dst, HugeStruct* src) => + *dst = *src; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void MemoryCopyByref(ref byte dst, ref byte src, uint len) => + Unsafe.CopyBlockUnaligned(ref dst, ref src, len); + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void MemoryInit(HugeStruct* dst) => + *dst = default; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void MemoryInitByref(ref byte dst, uint len) => + Unsafe.InitBlockUnaligned(ref dst, 42, len); + + private struct HugeStruct + { + public fixed byte Data[20_000]; + } +} diff --git a/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj new file mode 100644 index 00000000000000..23d7b90be5361c --- /dev/null +++ b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj @@ -0,0 +1,10 @@ + + + true + None + True + + + + + diff --git a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il b/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il deleted file mode 100644 index 068f11ad7b6176..00000000000000 --- a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il +++ /dev/null @@ -1,99 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.assembly extern System.Runtime { .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) } -.assembly extern xunit.core {} -.assembly extern System.Runtime.Extensions {} -.assembly BufferMemmoveTailCall { - // Allow access to private members of System.Private.CoreLib - .custom instance void System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::.ctor(string) = ( - 01 00 16 53 79 73 74 65 6d 2e 50 72 69 76 61 74 - 65 2e 43 6f 72 65 4c 69 62 00 00 - ) -} - -.class public abstract auto ansi sealed beforefieldinit TailCallBufferMemmove - extends [System.Runtime]System.Object -{ - .method public hidebysig static int32 Main() cil managed - { - .custom instance void [xunit.core]Xunit.FactAttribute::.ctor() = ( - 01 00 00 00 - ) - .maxstack 8 - .entrypoint - - // C#: - // byte[] src = new byte[32]; - // Test(ref src[0]); - - ldc.i4.s 32 - newarr [System.Runtime]System.Byte - ldc.i4.0 - ldelema [System.Runtime]System.Byte - call void TailCallBufferMemmove::Test(uint8&) - - // return 100; - ldc.i4.s 100 - ret - } - - .method private hidebysig static void Test (uint8& src) cil managed noinlining - { - .maxstack 3 - - // C#: - // byte* data = stackalloc byte[64]; // to trigger slow helper-based tail calls - // Buffer.Memmove(ref Unsafe.AsRef(data), ref src, 64); - - ldc.i4.s 64 - conv.u - localloc - call !!0& [System.Runtime]System.Runtime.CompilerServices.Unsafe::AsRef(void*) - ldarg.0 - ldc.i4.s 64 - conv.i - tail. call void [System.Runtime]System.Buffer::Memmove(uint8&, uint8&, native uint) - ret - } -} - -// C#: -// namespace System.Runtime.CompilerServices -// { -// public class IgnoresAccessChecksToAttribute : Attribute -// { -// public IgnoresAccessChecksToAttribute(string assemblyName) -// { -// AssemblyName = assemblyName; -// } -// public string AssemblyName { get; } -// } -// } -// -.class public auto ansi beforefieldinit System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute - extends [System.Runtime]System.Attribute -{ - .field private initonly string 'k__BackingField' - .method public hidebysig specialname rtspecialname instance void .ctor (string assemblyName) cil managed - { - .maxstack 8 - ldarg.0 - call instance void [System.Runtime]System.Attribute::.ctor() - ldarg.0 - ldarg.1 - stfld string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::'k__BackingField' - ret - } - .method public hidebysig specialname instance string get_AssemblyName () cil managed - { - .maxstack 8 - ldarg.0 - ldfld string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::'k__BackingField' - ret - } - .property instance string AssemblyName() - { - .get instance string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::get_AssemblyName() - } -} diff --git a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj b/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj deleted file mode 100644 index 5fa250452852d2..00000000000000 --- a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj +++ /dev/null @@ -1,8 +0,0 @@ - - - True - - - - - diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 1cdd995ac8fa89..18c6df9e56ab20 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1880,6 +1880,9 @@ https://github.com/dotnet/runtime/issues/90374 + + https://github.com/dotnet/runtime/issues/98628 +