diff --git a/src/Common/src/CoreLib/System/Globalization/CompareInfo.Unix.cs b/src/Common/src/CoreLib/System/Globalization/CompareInfo.Unix.cs index f6ca669d6186..3388c09e1c23 100644 --- a/src/Common/src/CoreLib/System/Globalization/CompareInfo.Unix.cs +++ b/src/Common/src/CoreLib/System/Globalization/CompareInfo.Unix.cs @@ -238,26 +238,25 @@ internal unsafe int IndexOfCore(string source, string target, int startIndex, in Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); Debug.Assert((options & CompareOptions.Ordinal) == 0); -#if CORECLR - if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort()) + int index; + + if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options)) { - int index = IndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options)); - if (index != -1) + if ((options & CompareOptions.IgnoreCase) != 0) + index = IndexOfOrdinalIgnoreCaseHelper(source.AsSpan(startIndex, count), target.AsSpan(), options, matchLengthPtr, fromBeginning: true); + else + index = IndexOfOrdinalHelper(source.AsSpan(startIndex, count), target.AsSpan(), options, matchLengthPtr, fromBeginning: true); + } + else + { + fixed (char* pSource = source) + fixed (char* pTarget = target) { - if (matchLengthPtr != null) - *matchLengthPtr = target.Length; + index = Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource + startIndex, count, options, matchLengthPtr); } - return index; } -#endif - - fixed (char* pSource = source) - fixed (char* pTarget = target) - { - int index = Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource + startIndex, count, options, matchLengthPtr); - return index != -1 ? index + startIndex : -1; - } + return index != -1 ? index + startIndex : -1; } // For now, this method is only called from Span APIs with either options == CompareOptions.None or CompareOptions.IgnoreCase @@ -269,7 +268,7 @@ internal unsafe int IndexOfCore(ReadOnlySpan source, ReadOnlySpan ta if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options)) { - if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase) + if ((options & CompareOptions.IgnoreCase) != 0) return IndexOfOrdinalIgnoreCaseHelper(source, target, options, matchLengthPtr, fromBeginning); else return IndexOfOrdinalHelper(source, target, options, matchLengthPtr, fromBeginning); @@ -296,7 +295,6 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan source, Rea { Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!source.IsEmpty); Debug.Assert(!target.IsEmpty); Debug.Assert(_isAsciiEqualityOrdinal); @@ -312,7 +310,7 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan source, Rea for (int j = 0; j < target.Length; j++) { char targetChar = *(b + j); - if (targetChar >= 0x80 || s_highCharTable[targetChar]) + if (targetChar >= 0x80 || HighCharTable[targetChar]) goto InteropCall; } @@ -344,7 +342,7 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan source, Rea char valueChar = *(a + sourceIndex); char targetChar = *(b + targetIndex); - if (valueChar == targetChar && valueChar < 0x80 && !s_highCharTable[valueChar]) + if (valueChar == targetChar && valueChar < 0x80 && !HighCharTable[valueChar]) { continue; } @@ -355,7 +353,7 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan source, Rea if ((uint)(targetChar - 'a') <= ('z' - 'a')) targetChar = (char)(targetChar - 0x20); - if (valueChar >= 0x80 || s_highCharTable[valueChar]) + if (valueChar >= 0x80 || HighCharTable[valueChar]) goto InteropCall; else if (valueChar != targetChar) break; @@ -370,7 +368,8 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan source, Rea } return -1; - InteropCall: + + InteropCall: if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); else @@ -382,7 +381,6 @@ private unsafe int IndexOfOrdinalHelper(ReadOnlySpan source, ReadOnlySpan< { Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(!source.IsEmpty); Debug.Assert(!target.IsEmpty); Debug.Assert(_isAsciiEqualityOrdinal); @@ -392,16 +390,24 @@ private unsafe int IndexOfOrdinalHelper(ReadOnlySpan source, ReadOnlySpan< char* a = ap; char* b = bp; - if (target.Length > source.Length) - goto InteropCall; - for (int j = 0; j < target.Length; j++) { char targetChar = *(b + j); - if (targetChar >= 0x80 || s_highCharTable[targetChar]) + if (targetChar >= 0x80 || HighCharTable[targetChar]) goto InteropCall; } + if (target.Length > source.Length) + { + for (int k = 0; k < source.Length; k++) + { + char targetChar = *(a + k); + if (targetChar >= 0x80 || HighCharTable[targetChar]) + goto InteropCall; + } + return -1; + } + int startIndex, endIndex, jump; if (fromBeginning) { @@ -430,7 +436,7 @@ private unsafe int IndexOfOrdinalHelper(ReadOnlySpan source, ReadOnlySpan< char valueChar = *(a + sourceIndex); char targetChar = *(b + targetIndex); - if (valueChar >= 0x80 || s_highCharTable[valueChar]) + if (valueChar >= 0x80 || HighCharTable[valueChar]) goto InteropCall; else if (valueChar != targetChar) break; @@ -445,6 +451,7 @@ private unsafe int IndexOfOrdinalHelper(ReadOnlySpan source, ReadOnlySpan< } return -1; + InteropCall: if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); @@ -471,42 +478,29 @@ private unsafe int LastIndexOfCore(string source, string target, int startIndex, return LastIndexOfOrdinalCore(source, target, startIndex, count, ignoreCase: false); } -#if CORECLR - if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort()) - { - return LastIndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options)); - } -#endif - // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source // of the start of the string that is count characters away from startIndex. int leftStartIndex = (startIndex - count + 1); - fixed (char* pSource = source) - fixed (char* pTarget = target) - { - int lastIndex = Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource + (startIndex - count + 1), count, options); + int lastIndex; - return lastIndex != -1 ? lastIndex + leftStartIndex : -1; + if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options)) + { + if ((options & CompareOptions.IgnoreCase) != 0) + lastIndex = IndexOfOrdinalIgnoreCaseHelper(source.AsSpan(leftStartIndex, count), target.AsSpan(), options, matchLengthPtr: null, fromBeginning: false); + else + lastIndex = IndexOfOrdinalHelper(source.AsSpan(leftStartIndex, count), target.AsSpan(), options, matchLengthPtr: null, fromBeginning: false); } - } - - private bool StartsWith(string source, string prefix, CompareOptions options) - { - Debug.Assert(!GlobalizationMode.Invariant); - - Debug.Assert(!string.IsNullOrEmpty(source)); - Debug.Assert(!string.IsNullOrEmpty(prefix)); - Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); - -#if CORECLR - if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && prefix.IsFastSort()) + else { - return IsPrefix(source, prefix, GetOrdinalCompareOptions(options)); + fixed (char* pSource = source) + fixed (char* pTarget = target) + { + lastIndex = Interop.Globalization.LastIndexOf(_sortHandle, pTarget, target.Length, pSource + (startIndex - count + 1), count, options); + } } -#endif - return Interop.Globalization.StartsWith(_sortHandle, prefix, prefix.Length, source, source.Length, options); + return lastIndex != -1 ? lastIndex + leftStartIndex : -1; } private unsafe bool StartsWith(ReadOnlySpan source, ReadOnlySpan prefix, CompareOptions options) @@ -519,19 +513,10 @@ private unsafe bool StartsWith(ReadOnlySpan source, ReadOnlySpan pre if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options)) { - if (source.Length < prefix.Length) - { - return false; - } - - if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase) - { + if ((options & CompareOptions.IgnoreCase) != 0) return StartsWithOrdinalIgnoreCaseHelper(source, prefix, options); - } else - { return StartsWithOrdinalHelper(source, prefix, options); - } } else { @@ -550,9 +535,8 @@ private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, Debug.Assert(!source.IsEmpty); Debug.Assert(!prefix.IsEmpty); Debug.Assert(_isAsciiEqualityOrdinal); - Debug.Assert(source.Length >= prefix.Length); - int length = prefix.Length; + int length = Math.Min(source.Length, prefix.Length); fixed (char* ap = &MemoryMarshal.GetReference(source)) fixed (char* bp = &MemoryMarshal.GetReference(prefix)) @@ -560,11 +544,14 @@ private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, char* a = ap; char* b = bp; - while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b])) + while (length != 0) { int charA = *a; int charB = *b; + if (charA >= 0x80 || charB >= 0x80 || HighCharTable[charA] || HighCharTable[charB]) + goto InteropCall; + if (charA == charB) { a++; b++; @@ -576,16 +563,39 @@ private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20; if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20; - if (charA != charB) - return false; + if (charA == charB) + { + a++; b++; + length--; + continue; + } + + // The match may be affected by special character. Verify that the following character is regular ASCII. + if (a < ap + source.Length - 1 && *(a + 1) >= 0x80) + goto InteropCall; + if (b < bp + prefix.Length - 1 && *(b + 1) >= 0x80) + goto InteropCall; + return false; + } + + // The match may be affected by special character. Verify that the following character is regular ASCII. + + if (source.Length < prefix.Length) + { + if (*b >= 0x80) + goto InteropCall; + return false; + } - // Next char - a++; b++; - length--; + if (source.Length > prefix.Length) + { + if (*a >= 0x80) + goto InteropCall; } + return true; - if (length == 0) return true; - return Interop.Globalization.StartsWith(_sortHandle, b, length, a, length, options); + InteropCall: + return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options); } } @@ -596,9 +606,8 @@ private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan source, ReadOnlyS Debug.Assert(!source.IsEmpty); Debug.Assert(!prefix.IsEmpty); Debug.Assert(_isAsciiEqualityOrdinal); - Debug.Assert(source.Length >= prefix.Length); - int length = prefix.Length; + int length = Math.Min(source.Length, prefix.Length); fixed (char* ap = &MemoryMarshal.GetReference(source)) fixed (char* bp = &MemoryMarshal.GetReference(prefix)) @@ -606,40 +615,48 @@ private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan source, ReadOnlyS char* a = ap; char* b = bp; - while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b])) + while (length != 0) { int charA = *a; int charB = *b; - if (charA != charB) - return false; + if (charA >= 0x80 || charB >= 0x80 || HighCharTable[charA] || HighCharTable[charB]) + goto InteropCall; - // Next char - a++; b++; - length--; + if (charA == charB) + { + a++; b++; + length--; + continue; + } + + // The match may be affected by special character. Verify that the following character is regular ASCII. + if (a < ap + source.Length - 1 && *(a + 1) >= 0x80) + goto InteropCall; + if (b < bp + prefix.Length - 1 && *(b + 1) >= 0x80) + goto InteropCall; + return false; } - if (length == 0) return true; - return Interop.Globalization.StartsWith(_sortHandle, b, length, a, length, options); - } - } + // The match may be affected by special character. Verify that the following character is regular ASCII. - private bool EndsWith(string source, string suffix, CompareOptions options) - { - Debug.Assert(!GlobalizationMode.Invariant); + if (source.Length < prefix.Length) + { + if (*b >= 0x80) + goto InteropCall; + return false; + } - Debug.Assert(!string.IsNullOrEmpty(source)); - Debug.Assert(!string.IsNullOrEmpty(suffix)); - Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); + if (source.Length > prefix.Length) + { + if (*a >= 0x80) + goto InteropCall; + } + return true; -#if CORECLR - if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && suffix.IsFastSort()) - { - return IsSuffix(source, suffix, GetOrdinalCompareOptions(options)); + InteropCall: + return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options); } -#endif - - return Interop.Globalization.EndsWith(_sortHandle, suffix, suffix.Length, source, source.Length, options); } private unsafe bool EndsWith(ReadOnlySpan source, ReadOnlySpan suffix, CompareOptions options) @@ -652,19 +669,10 @@ private unsafe bool EndsWith(ReadOnlySpan source, ReadOnlySpan suffi if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options)) { - if (source.Length < suffix.Length) - { - return false; - } - - if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase) - { + if ((options & CompareOptions.IgnoreCase) != 0) return EndsWithOrdinalIgnoreCaseHelper(source, suffix, options); - } else - { return EndsWithOrdinalHelper(source, suffix, options); - } } else { @@ -683,9 +691,8 @@ private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, R Debug.Assert(!source.IsEmpty); Debug.Assert(!suffix.IsEmpty); Debug.Assert(_isAsciiEqualityOrdinal); - Debug.Assert(source.Length >= suffix.Length); - int length = suffix.Length; + int length = Math.Min(source.Length, suffix.Length); fixed (char* ap = &MemoryMarshal.GetReference(source)) fixed (char* bp = &MemoryMarshal.GetReference(suffix)) @@ -693,11 +700,14 @@ private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, R char* a = ap + source.Length - 1; char* b = bp + suffix.Length - 1; - while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b])) + while (length != 0) { int charA = *a; int charB = *b; + if (charA >= 0x80 || charB >= 0x80 || HighCharTable[charA] || HighCharTable[charB]) + goto InteropCall; + if (charA == charB) { a--; b--; @@ -709,16 +719,20 @@ private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, R if ((uint)(charA - 'a') <= (uint)('z' - 'a')) charA -= 0x20; if ((uint)(charB - 'a') <= (uint)('z' - 'a')) charB -= 0x20; - if (charA != charB) - return false; + if (charA == charB) + { + a--; b--; + length--; + continue; + } - // Next char - a--; b--; - length--; + return false; } - if (length == 0) return true; - return Interop.Globalization.EndsWith(_sortHandle, b - length + 1, length, a - length + 1, length, options); + return (source.Length >= suffix.Length); + + InteropCall: + return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options); } } @@ -729,9 +743,8 @@ private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan source, ReadOnlySpa Debug.Assert(!source.IsEmpty); Debug.Assert(!suffix.IsEmpty); Debug.Assert(_isAsciiEqualityOrdinal); - Debug.Assert(source.Length >= suffix.Length); - int length = suffix.Length; + int length = Math.Min(source.Length, suffix.Length); fixed (char* ap = &MemoryMarshal.GetReference(source)) fixed (char* bp = &MemoryMarshal.GetReference(suffix)) @@ -739,21 +752,28 @@ private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan source, ReadOnlySpa char* a = ap + source.Length - 1; char* b = bp + suffix.Length - 1; - while (length != 0 && (*a < 0x80) && (*b < 0x80) && (!s_highCharTable[*a]) && (!s_highCharTable[*b])) + while (length != 0) { int charA = *a; int charB = *b; - if (charA != charB) - return false; + if (charA >= 0x80 || charB >= 0x80 || HighCharTable[charA] || HighCharTable[charB]) + goto InteropCall; - // Next char - a--; b--; - length--; + if (charA == charB) + { + a--; b--; + length--; + continue; + } + + return false; } - if (length == 0) return true; - return Interop.Globalization.EndsWith(_sortHandle, b - length + 1, length, a - length + 1, length, options); + return (source.Length >= suffix.Length); + + InteropCall: + return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options); } } @@ -896,7 +916,7 @@ internal unsafe int GetHashCodeOfStringCore(ReadOnlySpan source, CompareOp private static CompareOptions GetOrdinalCompareOptions(CompareOptions options) { - if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase) + if ((options & CompareOptions.IgnoreCase) != 0) { return CompareOptions.OrdinalIgnoreCase; } @@ -960,8 +980,7 @@ internal static IntPtr GetCachedSortHandle(string sortName) } } - // See https://github.com/dotnet/coreclr/blob/master/src/utilcode/util_nodependencies.cpp#L970 - private static readonly bool[] s_highCharTable = new bool[0x80] + private static ReadOnlySpan HighCharTable => new bool[0x80] { true, /* 0x0, 0x0 */ true, /* 0x1, .*/