diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index bbb2871fe815c..d06769d180565 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2526,7 +2526,4 @@ - - - \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index 44299c2238ed9..23c3e86ef0ad0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -1750,7 +1750,6 @@ public static unsafe int IndexOfAny(this ReadOnlySpan span, ReadOnlySpan(this ReadOnlySpan span, ReadOnlySpan(this ReadOnlySpan span, ReadOnlySp Unsafe.Add(ref valueRef, 2), span.Length); -#if !MONO // We don't have a mono overload for 4 values case 4: return SpanHelpers.LastIndexOfAnyValueType( ref spanRef, @@ -2034,7 +2032,6 @@ public static unsafe int LastIndexOfAny(this ReadOnlySpan span, ReadOnlySp Unsafe.Add(ref valueRef, 2), Unsafe.Add(ref valueRef, 3), span.Length); -#endif case 5: return SpanHelpers.LastIndexOfAnyValueType( @@ -2075,7 +2072,6 @@ public static unsafe int LastIndexOfAny(this ReadOnlySpan span, ReadOnlySp Unsafe.Add(ref valueRef, 2), span.Length); -#if !MONO // We don't have a mono overload for 4 values case 4: return SpanHelpers.LastIndexOfAnyValueType( ref spanRef, @@ -2084,7 +2080,6 @@ public static unsafe int LastIndexOfAny(this ReadOnlySpan span, ReadOnlySp Unsafe.Add(ref valueRef, 2), Unsafe.Add(ref valueRef, 3), span.Length); -#endif case 5: return SpanHelpers.LastIndexOfAnyValueType( diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 507d27822d539..cc5e7d4d84f0c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -986,13 +986,21 @@ public static nuint CommonPrefixLength(ref byte first, ref byte second, nuint le for (; (nint)i <= (nint)length - 4; i += 4) { - if (Unsafe.Add(ref first, i + 0) != Unsafe.Add(ref second, i + 0)) return i + 0; - if (Unsafe.Add(ref first, i + 1) != Unsafe.Add(ref second, i + 1)) return i + 1; - if (Unsafe.Add(ref first, i + 2) != Unsafe.Add(ref second, i + 2)) return i + 2; - if (Unsafe.Add(ref first, i + 3) != Unsafe.Add(ref second, i + 3)) return i + 3; + if (Unsafe.Add(ref first, i + 0) != Unsafe.Add(ref second, i + 0)) goto Found0; + if (Unsafe.Add(ref first, i + 1) != Unsafe.Add(ref second, i + 1)) goto Found1; + if (Unsafe.Add(ref first, i + 2) != Unsafe.Add(ref second, i + 2)) goto Found2; + if (Unsafe.Add(ref first, i + 3) != Unsafe.Add(ref second, i + 3)) goto Found3; } return length; + Found0: + return i; + Found1: + return i + 1; + Found2: + return i + 2; + Found3: + return i + 3; } Debug.Assert(length >= (uint)Vector128.Count); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs deleted file mode 100644 index d6a7f09e7465b..0000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs +++ /dev/null @@ -1,2697 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.X86; - -namespace System -{ - internal static partial class SpanHelpers // helpers used by Mono - { - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfValueType(ref byte searchSpace, byte value, int length) - { - Debug.Assert(length >= 0); - - uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector128.IsHardwareAccelerated) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - if (length >= Vector128.Count * 2) - { - lengthToExamine = UnalignedCountVector128(ref searchSpace); - } - } - else if (Vector.IsHardwareAccelerated) - { - if (length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVector(ref searchSpace); - } - } - SequentialScan: - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) - goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) - goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) - goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - - offset += 1; - } - - // We get past SequentialScan only if IsHardwareAccelerated is true; and remain length is greater than Vector length. - // However, we still have the redundant check to allow the JIT to see that the code is unreachable and eliminate it when the platform does not - // have hardware accelerated. After processing Vector lengths we return to SequentialScan to finish any remaining. - if (Vector256.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - if ((((nuint)(uint)Unsafe.AsPointer(ref searchSpace) + offset) & (nuint)(Vector256.Count - 1)) != 0) - { - // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches - // with no upper bound e.g. String.strlen. - // Start with a check on Vector128 to align to Vector256, before moving to processing Vector256. - // This ensures we do not fault across memory pages while searching for an end of string. - Vector128 values = Vector128.Create(value); - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as below - uint matches = Vector128.Equals(values, search).ExtractMostSignificantBits(); - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - } - - lengthToExamine = GetByteVector256SpanLength(offset, length); - if (lengthToExamine > offset) - { - Vector256 values = Vector256.Create(value); - do - { - Vector256 search = Vector256.LoadUnsafe(ref searchSpace, offset); - uint matches = Vector256.Equals(values, search).ExtractMostSignificantBits(); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector256.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } while (lengthToExamine > offset); - } - - lengthToExamine = GetByteVector128SpanLength(offset, length); - if (lengthToExamine > offset) - { - Vector128 values = Vector128.Create(value); - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as above - uint matches = Vector128.Equals(values, search).ExtractMostSignificantBits(); - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - else if (Vector128.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - lengthToExamine = GetByteVector128SpanLength(offset, length); - - Vector128 values = Vector128.Create(value); - while (lengthToExamine > offset) - { - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as above - Vector128 compareResult = Vector128.Equals(values, search); - if (compareResult == Vector128.Zero) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - uint matches = compareResult.ExtractMostSignificantBits(); - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - else if (Vector.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - lengthToExamine = GetByteVectorSpanLength(offset, length); - - Vector values = new Vector(value); - - while (lengthToExamine > offset) - { - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); - if (Vector.Zero.Equals(matches)) - { - offset += (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)offset + LocateFirstFoundByte(matches); - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfValueType(ref short searchSpace, short value, int length) - => IndexOfChar(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfChar(ref char searchSpace, char value, int length) - { - Debug.Assert(length >= 0); - - nint offset = 0; - nint lengthToExamine = length; - - if (((int)Unsafe.AsPointer(ref searchSpace) & 1) != 0) - { - // Input isn't char aligned, we won't be able to align it to a Vector - } - else if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - // Needs to be double length to allow us to align the data first. - if (length >= Vector128.Count * 2) - { - lengthToExamine = UnalignedCountVector128(ref searchSpace); - } - } - else if (Vector.IsHardwareAccelerated) - { - // Needs to be double length to allow us to align the data first. - if (length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVector(ref searchSpace); - } - } - - SequentialScan: - // In the non-vector case lengthToExamine is the total length. - // In the vector case lengthToExamine first aligns to Vector, - // then in a second pass after the Vector lengths is the - // remaining data that is shorter than a Vector length. - while (lengthToExamine >= 4) - { - ref char current = ref Unsafe.Add(ref searchSpace, offset); - - if (value == current) - goto Found; - if (value == Unsafe.Add(ref current, 1)) - goto Found1; - if (value == Unsafe.Add(ref current, 2)) - goto Found2; - if (value == Unsafe.Add(ref current, 3)) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - if (value == Unsafe.Add(ref searchSpace, offset)) - goto Found; - - offset++; - lengthToExamine--; - } - - // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow - // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Avx2.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - if (((nint)Unsafe.AsPointer(ref Unsafe.Add(ref searchSpace, (nint)offset)) & (nint)(Vector256.Count - 1)) != 0) - { - // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches - // with no upper bound e.g. String.wcslen. Start with a check on Vector128 to align to Vector256, - // before moving to processing Vector256. - - // If the input searchSpan has been fixed or pinned, this ensures we do not fault across memory pages - // while searching for an end of string. Specifically that this assumes that the length is either correct - // or that the data is pinned otherwise it may cause an AccessViolation from crossing a page boundary into an - // unowned page. If the search is unbounded (e.g. null terminator in wcslen) and the search value is not found, - // again this will likely cause an AccessViolation. However, correctly bounded searches will return -1 rather - // than ever causing an AV. - - // If the searchSpan has not been fixed or pinned the GC can relocate it during the execution of this - // method, so the alignment only acts as best endeavour. The GC cost is likely to dominate over - // the misalignment that may occur after; to we default to giving the GC a free hand to relocate and - // its up to the caller whether they are operating over fixed data. - Vector128 values = Vector128.Create((ushort)value); - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as below - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } - } - - lengthToExamine = GetCharVector256SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector256 values = Vector256.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector256.Count); - - Vector256 search = LoadVector256(ref searchSpace, offset); - int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector256.Count; - lengthToExamine -= Vector256.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } while (lengthToExamine > 0); - } - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 values = Vector128.Create((ushort)value); - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as above - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - // Don't need to change lengthToExamine here as we don't use its current value again. - } - else - { - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (Sse2.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector128 values = Vector128.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as above - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - lengthToExamine -= Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (AdvSimd.Arm64.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector128 values = Vector128.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 search = LoadVector128(ref searchSpace, offset); - Vector128 compareResult = AdvSimd.CompareEqual(values, search); - - if (compareResult == Vector128.Zero) - { - offset += Vector128.Count; - lengthToExamine -= Vector128.Count; - continue; - } - - return (int)(offset + FindFirstMatchedLane(compareResult)); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (Vector.IsHardwareAccelerated) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector.Count); - - lengthToExamine = GetCharVectorSpanLength(offset, length); - - if (lengthToExamine > 0) - { - Vector values = new Vector((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector.Count); - - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); - if (Vector.Zero.Equals(matches)) - { - offset += Vector.Count; - lengthToExamine -= Vector.Count; - continue; - } - - // Find offset of first match - return (int)(offset + LocateFirstFoundChar(matches)); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)(offset); - } - - internal static unsafe int IndexOfValueType(ref T searchSpace, T value, int length) where T : struct, IEquatable - { - Debug.Assert(length >= 0); - - nint index = 0; // Use nint for arithmetic to avoid unnecessary 64->32->64 truncations - if (Vector.IsHardwareAccelerated && Vector.IsSupported && (Vector.Count * 2) <= length) - { - Vector valueVector = new Vector(value); - Vector compareVector; - Vector matchVector; - if ((uint)length % (uint)Vector.Count != 0) - { - // Number of elements is not a multiple of Vector.Count, so do one - // check and shift only enough for the remaining set to be a multiple - // of Vector.Count. - compareVector = Unsafe.As>(ref Unsafe.Add(ref searchSpace, index)); - matchVector = Vector.Equals(valueVector, compareVector); - if (matchVector != Vector.Zero) - { - goto VectorMatch; - } - index += length % Vector.Count; - length -= length % Vector.Count; - } - while (length > 0) - { - compareVector = Unsafe.As>(ref Unsafe.Add(ref searchSpace, index)); - matchVector = Vector.Equals(valueVector, compareVector); - if (matchVector != Vector.Zero) - { - goto VectorMatch; - } - index += Vector.Count; - length -= Vector.Count; - } - goto NotFound; - VectorMatch: - for (int i = 0; i < Vector.Count; i++) - if (compareVector[i].Equals(value)) - return (int)(index + i); - } - - while (length >= 8) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 1))) - goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 2))) - goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 3))) - goto Found3; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 4))) - goto Found4; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 5))) - goto Found5; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 6))) - goto Found6; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 7))) - goto Found7; - - length -= 8; - index += 8; - } - - while (length >= 4) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 1))) - goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 2))) - goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 3))) - goto Found3; - - length -= 4; - index += 4; - } - - while (length > 0) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - - index += 1; - length--; - } - NotFound: - return -1; - - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)index; - Found1: - return (int)(index + 1); - Found2: - return (int)(index + 2); - Found3: - return (int)(index + 3); - Found4: - return (int)(index + 4); - Found5: - return (int)(index + 5); - Found6: - return (int)(index + 6); - Found7: - return (int)(index + 7); - } - - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, int length) where T : struct, IEquatable - { - Debug.Assert(length >= 0, "Expected non-negative length"); - Debug.Assert(value0 is byte or short or int or long, "Expected caller to normalize to one of these types"); - - if (!Vector128.IsHardwareAccelerated || length < Vector128.Count) - { - for (int i = 0; i < length; i++) - { - if (!Unsafe.Add(ref searchSpace, i).Equals(value0)) - { - return i; - } - } - } - else - { - Vector128 notEquals, value0Vector = Vector128.Create(value0); - ref T current = ref searchSpace; - ref T oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector128.Count); - - // Loop until either we've finished all elements or there's less than a vector's-worth remaining. - do - { - notEquals = ~Vector128.Equals(value0Vector, Vector128.LoadUnsafe(ref current)); - if (notEquals != Vector128.Zero) - { - return ComputeIndex(ref searchSpace, ref current, notEquals); - } - - current = ref Unsafe.Add(ref current, Vector128.Count); - } - while (!Unsafe.IsAddressGreaterThan(ref current, ref oneVectorAwayFromEnd)); - - // If any elements remain, process the last vector in the search space. - if ((uint)length % Vector128.Count != 0) - { - notEquals = ~Vector128.Equals(value0Vector, Vector128.LoadUnsafe(ref oneVectorAwayFromEnd)); - if (notEquals != Vector128.Zero) - { - return ComputeIndex(ref searchSpace, ref oneVectorAwayFromEnd, notEquals); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static int ComputeIndex(ref T searchSpace, ref T current, Vector128 notEquals) - { - uint notEqualsElements = notEquals.ExtractMostSignificantBits(); - int index = BitOperations.TrailingZeroCount(notEqualsElements); - return index + (int)(Unsafe.ByteOffset(ref searchSpace, ref current) / Unsafe.SizeOf()); - } - } - - return -1; - } - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int LastIndexOfValueType(ref byte searchSpace, byte value, int length) - { - Debug.Assert(length >= 0); - - uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) - goto Found7; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) - goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) - goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) - goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values = new Vector(value); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset - (nuint)Vector.Count)); - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int LastIndexOfValueType(ref short searchSpace, short value, int length) - => LastIndexOfValueType(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int LastIndexOfValueType(ref char searchSpace, char value, int length) - { - Debug.Assert(length >= 0); - - fixed (char* pChars = &searchSpace) - { - char* pCh = pChars + length; - char* pEndCh = pChars; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - // Figure out how many characters to read sequentially from the end until we are vector aligned - // This is equivalent to: length = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte - const int elementsPerByte = sizeof(ushort) / sizeof(byte); - length = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; - } - - SequentialScan: - while (length >= 4) - { - length -= 4; - pCh -= 4; - - if (*(pCh + 3) == value) - goto Found3; - if (*(pCh + 2) == value) - goto Found2; - if (*(pCh + 1) == value) - goto Found1; - if (*pCh == value) - goto Found; - } - - while (length > 0) - { - length--; - pCh--; - - if (*pCh == value) - goto Found; - } - - // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow - // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Vector.IsHardwareAccelerated && pCh > pEndCh) - { - // Get the highest multiple of Vector.Count that is within the search space. - // That will be how many times we iterate in the loop below. - // This is equivalent to: length = Vector.Count * ((int)(pCh - pEndCh) / Vector.Count) - length = (int)((pCh - pEndCh) & ~(Vector.Count - 1)); - - // Get comparison Vector - Vector vComparison = new Vector(value); - - while (length > 0) - { - char* pStart = pCh - Vector.Count; - // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh (and hence pSart) is always vector aligned - Debug.Assert(((int)pStart & (Unsafe.SizeOf>() - 1)) == 0); - Vector vMatches = Vector.Equals(vComparison, Unsafe.Read>(pStart)); - if (Vector.Zero.Equals(vMatches)) - { - pCh -= Vector.Count; - length -= Vector.Count; - continue; - } - // Find offset of last match - return (int)(pStart - pEndCh) + LocateLastFoundChar(vMatches); - } - - if (pCh > pEndCh) - { - length = (int)(pCh - pEndCh); - goto SequentialScan; - } - } - - return -1; - Found: - return (int)(pCh - pEndCh); - Found1: - return (int)(pCh - pEndCh) + 1; - Found2: - return (int)(pCh - pEndCh) + 2; - Found3: - return (int)(pCh - pEndCh) + 3; - } - } - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int LastIndexOfValueType(ref T searchSpace, T value, int length) where T : IEquatable? - => LastIndexOf(ref searchSpace, value, length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int IndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported, and length is enough to use them so use that path. - // We jump forward to the intrinsics at the end of the method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, as it is used later - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create(value0); - Vector256 values1 = Vector256.Create(value1); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchSpace, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset - offset += (nuint)BitOperations.TrailingZeroCount(matches); - goto Found; - } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 search; - Vector128 matches; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - // None matched - goto NotFound; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - else if (Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchSpace, offset); - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)LocateFirstFoundByte(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, int length) - => IndexOfAnyChar(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value0), Unsafe.As(ref value1), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyChar(ref char searchStart, char value0, char value1, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int IndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, byte value2, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue2 = value2; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported, and length is enough to use them so use that path. - // We jump forward to the intrinsics at the end of the method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, as it is used later - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create(value0); - Vector256 values1 = Vector256.Create(value1); - Vector256 values2 = Vector256.Create(value2); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchSpace, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - Vector128 values2 = Vector128.Create(value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset - offset += (nuint)BitOperations.TrailingZeroCount(matches); - goto Found; - } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 search; - Vector128 matches; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - Vector128 values2 = Vector128.Create(value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = AdvSimd.Or( - AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)), - AdvSimd.CompareEqual(values2, search)); - - if (matches == Vector128.Zero) - { - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = AdvSimd.Or( - AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)), - AdvSimd.CompareEqual(values2, search)); - - if (matches == Vector128.Zero) - { - // None matched - goto NotFound; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - else if (Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchSpace, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)LocateFirstFoundByte(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, int length) - => IndexOfAnyValueType( - ref Unsafe.As(ref searchSpace), - Unsafe.As(ref value0), - Unsafe.As(ref value1), - Unsafe.As(ref value2), - length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyValueType(ref char searchStart, char value0, char value1, char value2, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - Vector256 values2 = Vector256.Create((ushort)value2); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - Vector128 values2 = Vector128.Create((ushort)value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, short value3, int length) - => IndexOfAnyValueType( - ref Unsafe.As(ref searchSpace), - Unsafe.As(ref value0), - Unsafe.As(ref value1), - Unsafe.As(ref value2), - Unsafe.As(ref value3), - length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyValueType(ref char searchStart, char value0, char value1, char value2, char value3, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - Vector256 values2 = Vector256.Create((ushort)value2); - Vector256 values3 = Vector256.Create((ushort)value3); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // We preform the Or at non-Vector level as we are using the maximum number of non-preserved registers, - // and more causes them first to be pushed to stack and then popped on exit to preseve their values. - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - Vector128 values2 = Vector128.Create((ushort)value2); - Vector128 values3 = Vector128.Create((ushort)value3); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - Vector values3 = new Vector(value3); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)), - Vector.Equals(search, values3)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)), - Vector.Equals(search, values3)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, value2, value3, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value, int length) - => LastIndexOfAnyExcept(ref searchSpace, value, length); - - internal static int LastIndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found7; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - Vector search = LoadVector(ref searchSpace, offset - (nuint)Vector.Count); - var matches = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyValueType(ref short searchSpace, short value0, short value1, int length) - => LastIndexOfAny(ref searchSpace, value0, value1, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, length); - - internal static int LastIndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, byte value2, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; - uint uValue2 = value2; - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found7; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - Vector search = LoadVector(ref searchSpace, offset - (nuint)Vector.Count); - - var matches = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, int length) - => LastIndexOfAny(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, value2, value3, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 LoadVector128(ref char start, nint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 LoadVector128(ref char start, nuint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (nint)offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 LoadVector256(ref char start, nint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 LoadVector256(ref char start, nuint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (nint)offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ref char Add(ref char start, nuint offset) => ref Unsafe.Add(ref start, (nint)offset); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint FindFirstMatchedLane(Vector128 compareResult) - { - Debug.Assert(AdvSimd.Arm64.IsSupported); - - // Mask to help find the first lane in compareResult that is set. - // MSB 0x10 corresponds to 1st lane, 0x01 corresponds to 0th lane and so forth. - Vector128 mask = Vector128.Create((ushort)0x1001).AsByte(); - - // Find the first lane that is set inside compareResult. - Vector128 maskedSelectedLanes = AdvSimd.And(compareResult, mask); - Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(maskedSelectedLanes, maskedSelectedLanes); - ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar(); - - // It should be handled by compareResult != Vector.Zero - Debug.Assert(selectedLanes != 0); - - // Find the first lane that is set inside compareResult. - return (uint)BitOperations.TrailingZeroCount(selectedLanes) >> 2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int FindFirstMatchedLane(Vector128 compareResult) - { - Debug.Assert(AdvSimd.Arm64.IsSupported); - - Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(compareResult.AsByte(), compareResult.AsByte()); - ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar(); - - // It should be handled by compareResult != Vector.Zero - Debug.Assert(selectedLanes != 0); - - return BitOperations.TrailingZeroCount(selectedLanes) >> 3; - } - - // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138 - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int LocateLastFoundChar(Vector match) - { - var vector64 = Vector.AsVectorUInt64(match); - ulong candidate = 0; - int i = Vector.Count - 1; - - // This pattern is only unrolled by the Jit if the limit is Vector.Count - // As such, we need a dummy iteration variable for that condition to be satisfied - for (int j = 0; j < Vector.Count; j++) - { - candidate = vector64[i]; - if (candidate != 0) - { - break; - } - - i--; - } - - // Single LEA instruction with jitted const (using function result) - return i * 4 + LocateLastFoundChar(candidate); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int LocateLastFoundChar(ulong match) - => BitOperations.Log2(match) >> 4; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe nuint UnalignedCountVectorFromEnd(ref byte searchSpace, int length) - { - nint unaligned = (nint)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - return (nuint)(uint)(((length & (Vector.Count - 1)) + unaligned) & (Vector.Count - 1)); - } - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 83a72eef3fbe7..66bf0618cab3a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -1446,7 +1446,6 @@ internal static bool ContainsValueType(ref T searchSpace, T value, int length return false; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfChar(ref char searchSpace, char value, int length) => IndexOfValueType(ref Unsafe.As(ref searchSpace), (short)value, length); @@ -1458,7 +1457,6 @@ internal static int IndexOfValueType(ref T searchSpace, T value, int length) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value, int length) where T : struct, INumber => IndexOfValueType>(ref searchSpace, value, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfValueType(ref TValue searchSpace, TValue value, int length) @@ -1476,14 +1474,14 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 8; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) return (int)offset + 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) return (int)offset + 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) return (int)offset + 3; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 4) == value)) return (int)offset + 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 5) == value)) return (int)offset + 5; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 6) == value)) return (int)offset + 6; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 7) == value)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) goto Found1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) goto Found2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) goto Found3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 4) == value)) goto Found4; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 5) == value)) goto Found5; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 6) == value)) goto Found6; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 7) == value)) goto Found7; offset += 8; } @@ -1492,10 +1490,10 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) return (int)offset + 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) return (int)offset + 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) goto Found1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) goto Found2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) goto Found3; offset += 4; } @@ -1504,10 +1502,27 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1573,7 +1588,6 @@ private static int IndexOfValueType(ref TValue searchSpace, TV return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyChar(ref char searchSpace, char value0, char value1, int length) => IndexOfAnyValueType(ref Unsafe.As(ref searchSpace), (short)value0, (short)value1, length); @@ -1585,7 +1599,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, length); -#endif // having INumber constraint here allows to use == operator and get better perf compared to .Equals [MethodImpl(MethodImplOptions.AggressiveOptimization)] @@ -1609,21 +1622,21 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found3; lookUp = Unsafe.Add(ref current, 4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found4; lookUp = Unsafe.Add(ref current, 5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found5; lookUp = Unsafe.Add(ref current, 6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found6; lookUp = Unsafe.Add(ref current, 7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found7; offset += 8; } @@ -1635,13 +1648,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found3; offset += 4; } @@ -1651,10 +1664,27 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1724,7 +1754,6 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); @@ -1732,7 +1761,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, int length) @@ -1755,21 +1783,21 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found3; lookUp = Unsafe.Add(ref current, 4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found4; lookUp = Unsafe.Add(ref current, 5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found5; lookUp = Unsafe.Add(ref current, 6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found6; lookUp = Unsafe.Add(ref current, 7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found7; offset += 8; } @@ -1781,13 +1809,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found3; offset += 4; } @@ -1797,10 +1825,27 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1870,7 +1915,6 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); @@ -1878,7 +1922,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, TValue value3, int length) @@ -1899,13 +1942,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found3; offset += 4; } @@ -1915,10 +1958,19 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; offset += 1; } + return -1; + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2019,13 +2071,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found3; offset += 4; } @@ -2035,10 +2087,20 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4)) goto Found; offset += 1; } + + return -1; + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2114,7 +2176,6 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfValueType(ref T searchSpace, T value, int length) where T : struct, INumber => LastIndexOfValueType>(ref searchSpace, value, length); @@ -2122,7 +2183,6 @@ internal static int LastIndexOfValueType(ref T searchSpace, T value, int leng [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value, int length) where T : struct, INumber => LastIndexOfValueType>(ref searchSpace, value, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfValueType(ref TValue searchSpace, TValue value, int length) @@ -2140,14 +2200,14 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 8; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) return (int)offset - 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) return (int)offset - 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) return (int)offset - 3; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 4) == value)) return (int)offset - 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 5) == value)) return (int)offset - 5; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 6) == value)) return (int)offset - 6; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 7) == value)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) goto FoundM1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) goto FoundM2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) goto FoundM3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 4) == value)) goto FoundM4; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 5) == value)) goto FoundM5; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 6) == value)) goto FoundM6; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 7) == value)) goto FoundM7; offset -= 8; } @@ -2156,10 +2216,10 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) return (int)offset - 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) return (int)offset - 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) goto FoundM1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) goto FoundM2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) goto FoundM3; offset -= 4; } @@ -2168,10 +2228,27 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2234,7 +2311,6 @@ private static int LastIndexOfValueType(ref TValue searchSpace return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, length); @@ -2242,7 +2318,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, int length) @@ -2265,21 +2340,21 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM3; lookUp = Unsafe.Add(ref current, -4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM4; lookUp = Unsafe.Add(ref current, -5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM5; lookUp = Unsafe.Add(ref current, -6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM6; lookUp = Unsafe.Add(ref current, -7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM7; offset -= 8; } @@ -2291,13 +2366,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM3; offset -= 4; } @@ -2307,10 +2382,27 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2375,7 +2467,6 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); @@ -2383,7 +2474,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, int length) @@ -2406,21 +2496,21 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM3; lookUp = Unsafe.Add(ref current, -4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM4; lookUp = Unsafe.Add(ref current, -5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM5; lookUp = Unsafe.Add(ref current, -6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM6; lookUp = Unsafe.Add(ref current, -7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM7; offset -= 8; } @@ -2432,13 +2522,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM3; offset -= 4; } @@ -2448,10 +2538,27 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2517,7 +2624,6 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); @@ -2525,7 +2631,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, TValue value3, int length) @@ -2546,13 +2651,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM3; offset -= 4; } @@ -2562,10 +2667,19 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; offset -= 1; } + return -1; + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 0976c9bf5e466..939799f3dc84b 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -4910,6 +4910,41 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; #endif MINT_IN_BREAK; +#define LDIND_OFFSET_ADD_MUL(datatype,casttype,unaligned) do { \ + MONO_DISABLE_WARNING(4127) \ + gpointer ptr = LOCAL_VAR (ip [2], gpointer); \ + NULL_CHECK (ptr); \ + ptr = (char*)ptr + (LOCAL_VAR (ip [3], mono_i) + (gint16)ip [4]) * (gint16)ip [5]; \ + if (unaligned && ((gsize)ptr % SIZEOF_VOID_P)) \ + memcpy (locals + ip [1], ptr, sizeof (datatype)); \ + else \ + LOCAL_VAR (ip [1], datatype) = *(casttype*)ptr; \ + ip += 6; \ + MONO_RESTORE_WARNING \ +} while (0) + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_I1) + LDIND_OFFSET_ADD_MUL(gint32, gint8, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_U1) + LDIND_OFFSET_ADD_MUL(gint32, guint8, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_I2) + LDIND_OFFSET_ADD_MUL(gint32, gint16, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_U2) + LDIND_OFFSET_ADD_MUL(gint32, guint16, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_I4) + LDIND_OFFSET_ADD_MUL(gint32, gint32, FALSE); + MINT_IN_BREAK; + MINT_IN_CASE(MINT_LDIND_OFFSET_ADD_MUL_IMM_I8) +#ifdef NO_UNALIGNED_ACCESS + LDIND_OFFSET_ADD_MUL(gint64, gint64, TRUE); +#else + LDIND_OFFSET_ADD_MUL(gint64, gint64, FALSE); +#endif + MINT_IN_BREAK; + #define LDIND_OFFSET_IMM(datatype,casttype,unaligned) do { \ MONO_DISABLE_WARNING(4127) \ gpointer ptr = LOCAL_VAR (ip [2], gpointer); \ @@ -5111,6 +5146,14 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; LOCAL_VAR (ip [1], gint64) = LOCAL_VAR (ip [2], gint64) * (gint16)ip [3]; ip += 4; MINT_IN_BREAK; + MINT_IN_CASE(MINT_ADD_MUL_I4_IMM) + LOCAL_VAR (ip [1], gint32) = (LOCAL_VAR (ip [2], gint32) + (gint16)ip [3]) * (gint16)ip [4]; + ip += 5; + MINT_IN_BREAK; + MINT_IN_CASE(MINT_ADD_MUL_I8_IMM) + LOCAL_VAR (ip [1], gint64) = (LOCAL_VAR (ip [2], gint64) + (gint16)ip [3]) * (gint16)ip [4]; + ip += 5; + MINT_IN_BREAK; MINT_IN_CASE(MINT_MUL_R4) BINOP(float, *); MINT_IN_BREAK; diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 56080ef6ca50d..edeac370a6172 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -156,6 +156,13 @@ OPDEF(MINT_LDIND_OFFSET_IMM_U2, "ldind_off_imm.u2", 4, 1, 1, MintOpShortInt) OPDEF(MINT_LDIND_OFFSET_IMM_I4, "ldind_off_imm.i4", 4, 1, 1, MintOpShortInt) OPDEF(MINT_LDIND_OFFSET_IMM_I8, "ldind_off_imm.i8", 4, 1, 1, MintOpShortInt) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_I1, "ldind_off_add_mul_imm.i1", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_U1, "ldind_off_add_mul_imm.u1", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_I2, "ldind_off_add_mul_imm.i2", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_U2, "ldind_off_add_mul_imm.u2", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_I4, "ldind_off_add_mul_imm.i4", 6, 1, 2, MintOpTwoShorts) +OPDEF(MINT_LDIND_OFFSET_ADD_MUL_IMM_I8, "ldind_off_add_mul_imm.i8", 6, 1, 2, MintOpTwoShorts) + OPDEF(MINT_STIND_I1, "stind.i1", 3, 0, 2, MintOpNoArgs) OPDEF(MINT_STIND_I2, "stind.i2", 3, 0, 2, MintOpNoArgs) OPDEF(MINT_STIND_I4, "stind.i4", 3, 0, 2, MintOpNoArgs) @@ -644,6 +651,9 @@ OPDEF(MINT_RET_I8_IMM, "ret.i8.imm", 2, 0, 0, MintOpShortInt) OPDEF(MINT_ADD_I4_IMM, "add.i4.imm", 4, 1, 1, MintOpShortInt) OPDEF(MINT_ADD_I8_IMM, "add.i8.imm", 4, 1, 1, MintOpShortInt) +OPDEF(MINT_ADD_MUL_I4_IMM, "add.mul.i4.imm", 5, 1, 1, MintOpTwoShorts) +OPDEF(MINT_ADD_MUL_I8_IMM, "add.mul.i8.imm", 5, 1, 1, MintOpTwoShorts) + OPDEF(MINT_MUL_I4_IMM, "mul.i4.imm", 4, 1, 1, MintOpShortInt) OPDEF(MINT_MUL_I8_IMM, "mul.i8.imm", 4, 1, 1, MintOpShortInt) diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index 974cd3c7f457f..caea1370af8a1 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -79,6 +79,10 @@ typedef enum { #define MINT_IS_LDIND(op) ((op) >= MINT_LDIND_I1 && (op) <= MINT_LDIND_R8) #define MINT_IS_STIND_INT(op) ((op) >= MINT_STIND_I1 && (op) <= MINT_STIND_I8) #define MINT_IS_STIND(op) ((op) >= MINT_STIND_I1 && (op) <= MINT_STIND_REF) +#define MINT_IS_LDIND_OFFSET(op) ((op) >= MINT_LDIND_OFFSET_I1 && (op) <= MINT_LDIND_OFFSET_I8) + +// TODO Add more +#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_MONO_LDPTR) #define MINT_CALL_ARGS 2 #define MINT_CALL_ARGS_SREG -2 diff --git a/src/mono/mono/mini/interp/tiering.c b/src/mono/mono/mini/interp/tiering.c index 7830c66c4c111..1aa02dd341efe 100644 --- a/src/mono/mono/mini/interp/tiering.c +++ b/src/mono/mono/mini/interp/tiering.c @@ -209,5 +209,7 @@ mono_interp_tier_up_frame_patchpoint (InterpFrame *frame, ThreadContext *context } context->stack_pointer = (guchar*)frame->stack + optimized_method->alloca_size; frame->imethod = optimized_method; - return optimized_method->code + lookup_patchpoint_data (optimized_method, bb_index); + int offset = lookup_patchpoint_data (optimized_method, bb_index); + g_assert (offset != G_MAXINT32); + return optimized_method->code + offset; } diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 831268c14b3d2..0716e6c240180 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -145,6 +145,8 @@ MonoInterpStats mono_interp_stats; #define MINT_STIND_I MINT_STIND_I8 #define MINT_LDELEM_I MINT_LDELEM_I8 #define MINT_STELEM_I MINT_STELEM_I8 +#define MINT_MUL_P_IMM MINT_MUL_I8_IMM +#define MINT_ADD_MUL_P_IMM MINT_ADD_MUL_I8_IMM #else #define MINT_MOV_P MINT_MOV_4 #define MINT_LDNULL MINT_LDC_I4_0 @@ -152,6 +154,8 @@ MonoInterpStats mono_interp_stats; #define MINT_STIND_I MINT_STIND_I4 #define MINT_LDELEM_I MINT_LDELEM_I4 #define MINT_STELEM_I MINT_STELEM_I4 +#define MINT_MUL_P_IMM MINT_MUL_I4_IMM +#define MINT_ADD_MUL_P_IMM MINT_ADD_MUL_I4_IMM #endif static const char *stack_type_string [] = { "I4", "I8", "R4", "R8", "O ", "VT", "MP", "F " }; @@ -280,11 +284,17 @@ interp_clear_ins (InterpInst *ins) ins->opcode = MINT_NOP; } +static gboolean +interp_ins_is_nop (InterpInst *ins) +{ + return ins->opcode == MINT_NOP || ins->opcode == MINT_IL_SEQ_POINT; +} + static InterpInst* interp_prev_ins (InterpInst *ins) { ins = ins->prev; - while (ins && (ins->opcode == MINT_NOP || ins->opcode == MINT_IL_SEQ_POINT)) + while (ins && interp_ins_is_nop (ins)) ins = ins->prev; return ins; } @@ -300,6 +310,26 @@ check_stack_helper (TransformData *td, int n) return TRUE; } +static InterpInst* +interp_first_ins (InterpBasicBlock *bb) +{ + InterpInst *ins = bb->first_ins; + if (!ins || !interp_ins_is_nop (ins)) + return ins; + while (ins && interp_ins_is_nop (ins)) + ins = ins->next; + return ins; +} + +static InterpInst* +interp_last_ins (InterpBasicBlock *bb) +{ + InterpInst *ins = bb->last_ins; + if (!ins || !interp_ins_is_nop (ins)) + return ins; + return interp_prev_ins (ins); +} + #define CHECK_STACK(td, n) \ do { \ if (!check_stack_helper (td, n)) \ @@ -686,7 +716,6 @@ interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock { gboolean needs_cprop = FALSE; - g_assert (!bb->in_count); for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { if (ins->opcode == MINT_LDLOCA_S) { td->locals [ins->sregs [0]].indirects--; @@ -696,6 +725,8 @@ interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock } } } + while (bb->in_count) + interp_unlink_bblocks (bb->in_bb [0], bb); while (bb->out_count) interp_unlink_bblocks (bb, bb->out_bb [0]); prev_bb->next_bb = bb->next_bb; @@ -1452,7 +1483,7 @@ dump_interp_ins_data (InterpInst *ins, gint32 ins_offset, const guint16 *data, i g_string_append_printf (str, " %u", *(guint16*)data); break; case MintOpTwoShorts: - g_string_append_printf (str, " %u,%u", *(guint16*)data, *(guint16 *)(data + 1)); + g_string_append_printf (str, " %d,%d", *(gint16*)data, *(gint16 *)(data + 1)); break; case MintOpTwoInts: g_string_append_printf (str, " %u,%u", (guint32)READ32(data), (guint32)READ32(data + 2)); @@ -1627,6 +1658,32 @@ dump_interp_inst (InterpInst *ins) g_string_free (str, TRUE); } +static GString* +get_interp_bb_links (InterpBasicBlock *bb) +{ + GString *str = g_string_new (""); + + if (bb->in_count) { + g_string_append_printf (str, "IN (%d", bb->in_bb [0]->index); + for (int i = 1; i < bb->in_count; i++) + g_string_append_printf (str, " %d", bb->in_bb [i]->index); + g_string_append_printf (str, "), "); + } else { + g_string_append_printf (str, "IN (nil), "); + } + + if (bb->out_count) { + g_string_append_printf (str, "OUT (%d", bb->out_bb [0]->index); + for (int i = 1; i < bb->out_count; i++) + g_string_append_printf (str, " %d", bb->out_bb [i]->index); + g_string_append_printf (str, ")"); + } else { + g_string_append_printf (str, "OUT (nil)"); + } + + return str; +} + static void dump_interp_bb (InterpBasicBlock *bb) { @@ -3590,6 +3647,18 @@ interp_field_from_token (MonoMethod *method, guint32 token, MonoClass **klass, M return field; } +static InterpBasicBlock* +alloc_bb (TransformData *td) +{ + InterpBasicBlock *bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock)); + bb->il_offset = -1; + bb->native_offset = -1; + bb->stack_height = -1; + bb->index = td->bb_count++; + + return bb; +} + static InterpBasicBlock* get_bb (TransformData *td, unsigned char *ip, gboolean make_list) { @@ -3597,13 +3666,10 @@ get_bb (TransformData *td, unsigned char *ip, gboolean make_list) InterpBasicBlock *bb = td->offset_to_bb [offset]; if (!bb) { - bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock)); + bb = alloc_bb (td); + bb->il_offset = offset; - bb->native_offset = -1; - bb->stack_height = -1; - bb->index = td->bb_count++; td->offset_to_bb [offset] = bb; - /* Add the blocks in reverse order */ if (make_list) td->basic_blocks = g_list_prepend_mempool (td->mempool, td->basic_blocks, bb); @@ -8222,6 +8288,150 @@ generate_compacted_code (TransformData *td) g_ptr_array_free (td->relocs, TRUE); } +static void +interp_mark_reachable_bblocks (TransformData *td) +{ + InterpBasicBlock **queue = mono_mem_manager_alloc0 (td->mem_manager, td->bb_count * sizeof (InterpBasicBlock*)); + InterpBasicBlock *current; + int cur_index = 0; + int next_position = 0; + + // FIXME There is no need to force eh bblocks to remain alive + current = td->entry_bb; + while (current != NULL) { + if (current->eh_block || current->patchpoint_data) { + queue [next_position++] = current; + current->reachable = TRUE; + } else { + current->reachable = FALSE; + } + current = current->next_bb; + } + + queue [next_position++] = td->entry_bb; + td->entry_bb->reachable = TRUE; + + // We have the roots, traverse everything else + while (cur_index < next_position) { + current = queue [cur_index++]; + for (int i = 0; i < current->out_count; i++) { + InterpBasicBlock *child = current->out_bb [i]; + if (!child->reachable) { + queue [next_position++] = child; + child->reachable = TRUE; + } + } + } +} + +static gboolean +interp_prev_ins_defines_var (InterpInst *ins, int var1, int var2) +{ + // Check max of 5 instructions + for (int i = 0; i < 5; i++) { + ins = interp_prev_ins (ins); + if (!ins) + return FALSE; + if (mono_interp_op_dregs [ins->opcode] && (ins->dreg == var1 || ins->dreg == var2)) + return TRUE; + } + return FALSE; +} + +static void +interp_reorder_bblocks (TransformData *td) +{ + InterpBasicBlock *bb; + + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + InterpInst *first = interp_first_ins (bb); + if (!first) + continue; + if (MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { + // This means this bblock has a single instruction, the conditional branch + int i = 0; + int lookup_var2 = (mono_interp_op_dregs [first->opcode] > 1) ? first->sregs [1] : -1; + while (i < bb->in_count) { + InterpBasicBlock *in_bb = bb->in_bb [i]; + InterpInst *last_ins = interp_last_ins (in_bb); + if (last_ins && last_ins->opcode == MINT_BR && interp_prev_ins_defines_var (last_ins, first->sregs [0], lookup_var2)) { + // This bblock is reached unconditionally from one of its parents + // Move the conditional branch inside the parent to facilitate propagation + // of condition value. + InterpBasicBlock *cond_true_bb = first->info.target_bb; + InterpBasicBlock *next_bb = bb->next_bb; + + // parent bb will do the conditional branch + interp_unlink_bblocks (in_bb, bb); + last_ins->opcode = first->opcode; + last_ins->sregs [0] = first->sregs [0]; + last_ins->sregs [1] = first->sregs [1]; + last_ins->info.target_bb = cond_true_bb; + interp_link_bblocks (td, in_bb, cond_true_bb); + + // Create new fallthrough bb between in_bb and in_bb->next_bb + InterpBasicBlock *new_bb = alloc_bb (td); + new_bb->next_bb = in_bb->next_bb; + in_bb->next_bb = new_bb; + interp_link_bblocks (td, in_bb, new_bb); + + + InterpInst *new_inst = interp_insert_ins_bb (td, new_bb, NULL, MINT_BR); + new_inst->info.target_bb = next_bb; + + interp_link_bblocks (td, new_bb, next_bb); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + GString* in_bb_info = get_interp_bb_links (in_bb); + GString* new_bb_info = get_interp_bb_links (new_bb); + g_print ("Moved cond branch BB%d into BB%d, new BB%d\n", bb->index, in_bb->index, new_bb->index); + g_print ("\tBB%d: %s\n", bb->index, bb_info->str); + g_print ("\tBB%d: %s\n", in_bb->index, in_bb_info->str); + g_print ("\tBB%d: %s\n", new_bb->index, new_bb_info->str); + g_string_free (bb_info, TRUE); + g_string_free (in_bb_info, TRUE); + g_string_free (new_bb_info, TRUE); + } + // Since we changed links, in_bb might have changed, loop again from the start + i = 0; + } else { + i++; + } + } + } else if (first->opcode == MINT_BR) { + // All bblocks jumping into this bblock can jump directly into the br target + int i = 0; + while (i < bb->in_count) { + InterpBasicBlock *in_bb = bb->in_bb [i]; + InterpInst *last_ins = interp_last_ins (in_bb); + if (last_ins && (MINT_IS_CONDITIONAL_BRANCH (last_ins->opcode) || + MINT_IS_UNCONDITIONAL_BRANCH (last_ins->opcode)) && + last_ins->info.target_bb == bb) { + InterpBasicBlock *target_bb = first->info.target_bb; + last_ins->info.target_bb = target_bb; + interp_unlink_bblocks (in_bb, bb); + interp_link_bblocks (td, in_bb, target_bb); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + GString* in_bb_info = get_interp_bb_links (in_bb); + GString* target_bb_info = get_interp_bb_links (target_bb); + g_print ("Propagated target bb BB%d into BB%d\n", target_bb->index, in_bb->index); + g_print ("\tBB%d: %s\n", bb->index, bb_info->str); + g_print ("\tBB%d: %s\n", in_bb->index, in_bb_info->str); + g_print ("\tBB%d: %s\n", target_bb->index, target_bb_info->str); + g_string_free (bb_info, TRUE); + g_string_free (in_bb_info, TRUE); + g_string_free (target_bb_info, TRUE); + } + i = 0; + } else { + i++; + } + } + } + } +} + // Traverse the list of basic blocks and merge adjacent blocks static gboolean interp_optimize_bblocks (TransformData *td) @@ -8229,16 +8439,20 @@ interp_optimize_bblocks (TransformData *td) InterpBasicBlock *bb = td->entry_bb; gboolean needs_cprop = FALSE; + interp_reorder_bblocks (td); + + interp_mark_reachable_bblocks (td); + while (TRUE) { InterpBasicBlock *next_bb = bb->next_bb; if (!next_bb) break; - if (next_bb->in_count == 0 && !next_bb->eh_block) { + if (!next_bb->reachable) { if (td->verbose_level) g_print ("Removed BB%d\n", next_bb->index); needs_cprop |= interp_remove_bblock (td, next_bb, bb); continue; - } else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->eh_block) { + } else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->eh_block && !next_bb->patchpoint_data) { g_assert (next_bb->in_bb [0] == bb); interp_merge_bblocks (td, bb, next_bb); if (td->verbose_level) @@ -8262,25 +8476,31 @@ interp_local_deadce (TransformData *td) for (unsigned int i = 0; i < td->locals_size; i++) { g_assert (local_ref_count [i] >= 0); g_assert (td->locals [i].indirects >= 0); - if (!local_ref_count [i] && - !td->locals [i].indirects && - (td->locals [i].flags & INTERP_LOCAL_FLAG_DEAD) == 0) { + if (td->locals [i].indirects || (td->locals [i].flags & INTERP_LOCAL_FLAG_DEAD)) + continue; + if (!local_ref_count [i]) { needs_dce = TRUE; td->locals [i].flags |= INTERP_LOCAL_FLAG_DEAD; + } else if (!(td->locals [i].flags & INTERP_LOCAL_FLAG_UNKNOWN_USE)) { + if (!(td->locals [i].flags & INTERP_LOCAL_FLAG_LOCAL_ONLY)) { + // The value of this var is not passed between multiple basic blocks + td->locals [i].flags |= INTERP_LOCAL_FLAG_LOCAL_ONLY; + if (td->verbose_level) + g_print ("Var %d is local only\n", i); + needs_cprop = TRUE; + } } + td->locals [i].flags &= ~INTERP_LOCAL_FLAG_UNKNOWN_USE; } // Return early if all locals are alive if (!needs_dce) - return FALSE; + return needs_cprop; // Kill instructions that don't use stack and are storing into dead locals for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { - if (MINT_IS_MOV (ins->opcode) || - MINT_IS_LDC_I4 (ins->opcode) || - MINT_IS_LDC_I8 (ins->opcode) || - ins->opcode == MINT_MONO_LDPTR || + if (MINT_NO_SIDE_EFFECTS (ins->opcode) || ins->opcode == MINT_LDLOCA_S) { int dreg = ins->dreg; if (td->locals [dreg].flags & INTERP_LOCAL_FLAG_DEAD) { @@ -8669,6 +8889,7 @@ cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_de int sreg = *psreg; local_ref_count [sreg]++; + local_defs [sreg].ref_count++; if (local_defs [sreg].type == LOCAL_VALUE_LOCAL) { int cprop_local = local_defs [sreg].local; @@ -8684,6 +8905,8 @@ cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_de local_ref_count [cprop_local]++; if (td->verbose_level) dump_interp_inst (ins); + } else if (!local_defs [sreg].ins) { + td->locals [sreg].flags |= INTERP_LOCAL_FLAG_UNKNOWN_USE; } } @@ -8693,6 +8916,35 @@ clear_local_defs (TransformData *td, int var, void *data) LocalValue *local_defs = (LocalValue*) data; local_defs [var].type = LOCAL_VALUE_NONE; local_defs [var].ins = NULL; + local_defs [var].ref_count = 0; +} + +static void +clear_unused_defs (TransformData *td, int var, void *data) +{ + if (!(td->locals [var].flags & INTERP_LOCAL_FLAG_LOCAL_ONLY)) + return; + if (td->locals [var].indirects) + return; + + LocalValue *local_def = &((LocalValue*) data) [var]; + InterpInst *def_ins = local_def->ins; + if (!def_ins) + return; + if (local_def->ref_count) + return; + + // This is a local only var that is defined in this bblock and its value is not used + // at all in this bblock. Clear the definition + if (MINT_NO_SIDE_EFFECTS (def_ins->opcode)) { + for (int i = 0; i < mono_interp_op_sregs [def_ins->opcode]; i++) + td->local_ref_count [def_ins->sregs [i]]--; + if (td->verbose_level) { + g_print ("kill unused local def:\n\t"); + dump_interp_inst (def_ins); + } + interp_clear_ins (def_ins); + } } static void @@ -8703,6 +8955,7 @@ interp_cprop (TransformData *td) InterpBasicBlock *bb; gboolean needs_retry; int ins_index; + int iteration_count = 0; td->local_ref_count = local_ref_count; retry: @@ -8710,7 +8963,7 @@ interp_cprop (TransformData *td) memset (local_ref_count, 0, td->locals_size * sizeof (int)); if (td->verbose_level) - g_print ("\ncprop iteration\n"); + g_print ("\ncprop iteration %d\n", iteration_count++); for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { InterpInst *ins; @@ -8722,8 +8975,11 @@ interp_cprop (TransformData *td) for (ins = bb->first_ins; ins != NULL; ins = ins->next) foreach_local_var (td, ins, local_defs, clear_local_defs); - if (td->verbose_level) - g_print ("BB%d\n", bb->index); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + g_print ("\nBB%d: %s\n", bb->index, bb_info->str); + g_string_free (bb_info, TRUE); + } for (ins = bb->first_ins; ins != NULL; ins = ins->next) { int opcode = ins->opcode; @@ -8759,6 +9015,18 @@ interp_cprop (TransformData *td) } if (num_dregs) { + // Check if the previous definition of this var was used at all. + // If it wasn't we can just clear the instruction + if (local_defs [dreg].ins != NULL && + local_defs [dreg].ref_count == 0 && + !td->locals [dreg].indirects) { + InterpInst *prev_def = local_defs [dreg].ins; + if (MINT_NO_SIDE_EFFECTS (prev_def->opcode)) { + for (int i = 0; i < mono_interp_op_sregs [prev_def->opcode]; i++) + local_ref_count [prev_def->sregs [i]]--; + interp_clear_ins (prev_def); + } + } local_defs [dreg].type = LOCAL_VALUE_NONE; local_defs [dreg].ins = ins; local_defs [dreg].def_index = ins_index; @@ -9093,8 +9361,12 @@ interp_cprop (TransformData *td) needs_retry = TRUE; } } + ins_index++; } + + for (ins = bb->first_ins; ins != NULL; ins = ins->next) + foreach_local_var (td, ins, local_defs, clear_unused_defs); } needs_retry |= interp_local_deadce (td); @@ -9318,6 +9590,27 @@ interp_super_instructions (TransformData *td) dump_interp_inst (new_inst); } } + } else if (opcode == MINT_MUL_I4_IMM || opcode == MINT_MUL_I8_IMM) { + int sreg = ins->sregs [0]; + InterpInst *def = td->locals [sreg].def; + if (def != NULL && td->local_ref_count [sreg] == 1) { + gboolean is_i4 = opcode == MINT_MUL_I4_IMM; + if ((is_i4 && def->opcode == MINT_ADD_I4_IMM) || + (!is_i4 && def->opcode == MINT_ADD_I8_IMM)) { + InterpInst *new_inst = interp_insert_ins (td, ins, is_i4 ? MINT_ADD_MUL_I4_IMM : MINT_ADD_MUL_I8_IMM); + new_inst->dreg = ins->dreg; + new_inst->sregs [0] = def->sregs [0]; + new_inst->data [0] = def->data [0]; + new_inst->data [1] = ins->data [0]; + interp_clear_ins (def); + interp_clear_ins (ins); + local_ref_count [sreg]--; + if (td->verbose_level) { + g_print ("superins: "); + dump_interp_inst (new_inst); + } + } + } } else if (MINT_IS_BINOP_SHIFT (opcode)) { // ldc + sh -> sh.imm gint16 imm; @@ -9365,6 +9658,43 @@ interp_super_instructions (TransformData *td) } } } + } else if (MINT_IS_LDIND_OFFSET (opcode)) { + int sreg_off = ins->sregs [1]; + InterpInst *def = td->locals [sreg_off].def; + if (def != NULL && td->local_ref_count [sreg_off] == 1) { + if (def->opcode == MINT_MUL_P_IMM || def->opcode == MINT_ADD_P_IMM || def->opcode == MINT_ADD_MUL_P_IMM) { + int ldind_offset_op = MINT_LDIND_OFFSET_ADD_MUL_IMM_I1 + (opcode - MINT_LDIND_OFFSET_I1); + InterpInst *new_inst = interp_insert_ins (td, ins, ldind_offset_op); + new_inst->dreg = ins->dreg; + new_inst->sregs [0] = ins->sregs [0]; // base + new_inst->sregs [1] = def->sregs [0]; // off + + // set the add and mul immediates + switch (def->opcode) { + case MINT_ADD_P_IMM: + new_inst->data [0] = def->data [0]; + new_inst->data [1] = 1; + break; + case MINT_MUL_P_IMM: + new_inst->data [0] = 0; + new_inst->data [1] = def->data [0]; + break; + case MINT_ADD_MUL_P_IMM: + new_inst->data [0] = def->data [0]; + new_inst->data [1] = def->data [1]; + break; + } + + interp_clear_ins (def); + interp_clear_ins (ins); + local_ref_count [sreg_off]--; + mono_interp_stats.super_instructions++; + if (td->verbose_level) { + g_print ("method %s:%s, superins: ", m_class_get_name (td->method->klass), td->method->name); + dump_interp_inst (new_inst); + } + } + } } else if (MINT_IS_STIND_INT (opcode)) { int sreg_base = ins->sregs [0]; InterpInst *def = td->locals [sreg_base].def; @@ -9449,6 +9779,34 @@ interp_super_instructions (TransformData *td) } } } else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode) && is_short_offset (noe, ins->info.target_bb->native_offset_estimate)) { + if (opcode == MINT_BRFALSE_I4 || opcode == MINT_BRTRUE_I4) { + gboolean negate = opcode == MINT_BRFALSE_I4; + int cond_sreg = ins->sregs [0]; + InterpInst *def = td->locals [cond_sreg].def; + if (def != NULL) { + int replace_opcode = -1; + switch (def->opcode) { + case MINT_CEQ_I4: replace_opcode = negate ? MINT_BNE_UN_I4 : MINT_BEQ_I4; break; + case MINT_CEQ_I8: replace_opcode = negate ? MINT_BNE_UN_I8 : MINT_BEQ_I8; break; + // Add more opcodes + default: + break; + } + if (replace_opcode != -1) { + ins->opcode = replace_opcode; + ins->sregs [0] = def->sregs [0]; + ins->sregs [1] = def->sregs [1]; + interp_clear_ins (def); + if (td->verbose_level) { + g_print ("superins: "); + dump_interp_inst (ins); + } + // The newly added opcode could be part of further superinstructions. Retry + ins = ins->prev; + continue; + } + } + } InterpInst *prev_ins = interp_prev_ins (ins); if (prev_ins && prev_ins->opcode == MINT_SAFEPOINT) { int condbr_op = get_unop_condbr_sp (opcode); @@ -9460,7 +9818,6 @@ interp_super_instructions (TransformData *td) dump_interp_inst (ins); } } - } } else if (opcode == MINT_STOBJ_VT_NOREF) { int sreg_src = ins->sregs [1]; diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 9c78df4fc5dac..7c080406ab5c6 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -21,6 +21,9 @@ #define INTERP_LOCAL_FLAG_GLOBAL 8 #define INTERP_LOCAL_FLAG_NO_CALL_ARGS 16 +#define INTERP_LOCAL_FLAG_UNKNOWN_USE 32 +#define INTERP_LOCAL_FLAG_LOCAL_ONLY 64 + typedef struct _InterpInst InterpInst; typedef struct _InterpBasicBlock InterpBasicBlock; @@ -59,6 +62,8 @@ typedef struct { // The instruction that writes this local. InterpInst *ins; int def_index; + // ref count for ins->dreg + int ref_count; } LocalValue; struct _InterpInst { @@ -125,6 +130,7 @@ struct _InterpBasicBlock { SeqPoint **pred_seq_points; guint num_pred_seq_points; + int reachable : 1; // This block has special semantics and it shouldn't be optimized away int eh_block : 1; int dead: 1;