Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize IndexOf for OrdinalIgnoreCase #67758

Merged
merged 12 commits into from
Apr 9, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
using System.Diagnostics;
using System.Text.Unicode;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace System.Globalization
{
Expand Down Expand Up @@ -223,7 +225,7 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
// target strings can never be found inside small search spaces. This check also
// handles empty 'source' spans.

return -1;
goto NOT_FOUND;
}

if (GlobalizationMode.Invariant)
Expand All @@ -236,7 +238,46 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
return CompareInfo.NlsIndexOfOrdinalCore(source, value, ignoreCase: true, fromBeginning: true);
}

return OrdinalCasing.IndexOf(source, value);
// if value starts with an ASCII char we can use a vectorized path
ref char valueRef = ref MemoryMarshal.GetReference(value);
char valueChar = valueRef;

if (!char.IsAscii(valueChar))
{
// Fallback to a more non-ASCII friendly version
return OrdinalCasing.IndexOf(source, value);
}

ref char searchSpace = ref MemoryMarshal.GetReference(source);
int valueLength = value.Length;
int searchSpaceLength = source.Length;

do
{
// if val is either [a..z] or [A..Z] - search for its lower and upper counter parts using IndexOfAny
// otherwise use just plain IndexOf
int candidatePos = (uint)((valueChar | 0x20) - 'a') <= 'z' - 'a' ?
SpanHelpers.IndexOfAny(ref searchSpace, (char)(valueChar & ~0x20), (char)(valueChar | 0x20), searchSpaceLength) :
SpanHelpers.IndexOf(ref searchSpace, valueChar, searchSpaceLength);

if (candidatePos == -1)
{
// the whole input doesn't contain the first char
goto NOT_FOUND;
}

// Do ASCII and non-ASCII friendly compare for the current candidate
if (EqualsIgnoreCase(ref searchSpace, ref valueRef, valueLength))
{
return source.Length - searchSpaceLength;
}

searchSpace = Unsafe.Add(ref searchSpace, (nuint)(candidatePos + valueLength));
searchSpaceLength -= candidatePos + valueLength;
} while (searchSpaceLength >= valueLength);

NOT_FOUND:
return -1;
}

internal static int LastIndexOf(string source, string value, int startIndex, int count)
Expand Down