From 7959c94a372128907ef249d47a3a9862730b5c31 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Tue, 7 Oct 2025 10:56:01 +0200 Subject: [PATCH 01/15] implement IgnoreSymbols in managed code for iOS --- .../System/Globalization/CompareInfo.Icu.cs | 37 ++- .../System/Globalization/CompareInfo.iOS.cs | 224 ++++++++++++++++-- .../CompareInfo/CompareInfoTests.Compare.cs | 27 +-- .../CompareInfo/CompareInfoTests.IndexOf.cs | 39 ++- .../CompareInfo/CompareInfoTests.IsPrefix.cs | 20 +- .../CompareInfo/CompareInfoTests.IsSuffix.cs | 7 +- .../CompareInfoTests.LastIndexOf.cs | 3 +- .../pal_collation.m | 18 +- 8 files changed, 286 insertions(+), 89 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs index aa7b91b4fdebf0..b1442d87b6a3a2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs @@ -78,16 +78,15 @@ private unsafe int IcuIndexOfCore(ReadOnlySpan source, ReadOnlySpan } else { +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return IndexOfCoreNative(target, source, options, fromBeginning, matchLengthPtr); +#endif // GetReference may return nullptr if the input span is defaulted. The native layer handles // this appropriately; no workaround is needed on the managed side. - fixed (char* pSource = &MemoryMarshal.GetReference(source)) fixed (char* pTarget = &MemoryMarshal.GetReference(target)) { -#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS - if (GlobalizationMode.Hybrid) - return IndexOfCoreNative(pTarget, target.Length, pSource, source.Length, options, fromBeginning, matchLengthPtr); -#endif if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr); else @@ -207,7 +206,7 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan source, Rea InteropCall: #if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) - return IndexOfCoreNative(b, target.Length, a, source.Length, options, fromBeginning, matchLengthPtr); + return IndexOfCoreNative(target, source, options, fromBeginning, matchLengthPtr); #endif if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); @@ -301,7 +300,7 @@ private unsafe int IndexOfOrdinalHelper(ReadOnlySpan source, ReadOnlySpan< InteropCall: #if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) - return IndexOfCoreNative(b, target.Length, a, source.Length, options, fromBeginning, matchLengthPtr); + return IndexOfCoreNative(target, source, options, fromBeginning, matchLengthPtr); #endif if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); @@ -328,13 +327,13 @@ private unsafe bool IcuStartsWith(ReadOnlySpan source, ReadOnlySpan } else { +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return NativeStartsWith(prefix, source, options); +#endif fixed (char* pSource = &MemoryMarshal.GetReference(source)) // could be null (or otherwise unable to be dereferenced) fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix)) { -#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS - if (GlobalizationMode.Hybrid) - return NativeStartsWith(pPrefix, prefix.Length, pSource, source.Length, options); -#endif return Interop.Globalization.StartsWith(_sortHandle, pPrefix, prefix.Length, pSource, source.Length, options, matchLengthPtr); } } @@ -416,7 +415,7 @@ private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, InteropCall: #if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) - return NativeStartsWith(bp, prefix.Length, ap, source.Length, options); + return NativeStartsWith(prefix, source, options); #endif return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options, matchLengthPtr); } @@ -488,7 +487,7 @@ private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan source, ReadOnlyS InteropCall: #if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) - return NativeStartsWith(bp, prefix.Length, ap, source.Length, options); + return NativeStartsWith(prefix, source, options); #endif return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options, matchLengthPtr); } @@ -512,13 +511,13 @@ private unsafe bool IcuEndsWith(ReadOnlySpan source, ReadOnlySpan su } else { +#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return NativeEndsWith(suffix, source, options); +#endif fixed (char* pSource = &MemoryMarshal.GetReference(source)) // could be null (or otherwise unable to be dereferenced) fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) { -#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS - if (GlobalizationMode.Hybrid) - return NativeEndsWith(pSuffix, suffix.Length, pSource, source.Length, options); -#endif return Interop.Globalization.EndsWith(_sortHandle, pSuffix, suffix.Length, pSource, source.Length, options, matchLengthPtr); } } @@ -601,7 +600,7 @@ private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, R InteropCall: #if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) - return NativeEndsWith(bp, suffix.Length, ap, source.Length, options); + return NativeEndsWith(suffix, source, options); #endif return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options, matchLengthPtr); } @@ -673,7 +672,7 @@ private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan source, ReadOnlySpa InteropCall: #if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) - return NativeEndsWith(bp, suffix.Length, ap, source.Length, options); + return NativeEndsWith(suffix, source, options); #endif return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options, matchLengthPtr); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 3ad9df0fcc25eb..336a7e1b59ae6d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -25,13 +25,27 @@ private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan< AssertComparisonSupported(options); + // Handle IgnoreSymbols preprocessing + bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; + ReadOnlySpan filteredString1 = string1; + ReadOnlySpan filteredString2 = string2; + + if (ignoreSymbols) + { + filteredString1 = FilterSymbolsFromSpan(string1, out _); + filteredString2 = FilterSymbolsFromSpan(string2, out _); + + // Remove the flag before passing to native since we handled it here + options &= ~CompareOptions.IgnoreSymbols; + } + // GetReference may return nullptr if the input span is defaulted. The native layer handles // this appropriately; no workaround is needed on the managed side. int result; - fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) - fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) + fixed (char* pString1 = &MemoryMarshal.GetReference(filteredString1)) + fixed (char* pString2 = &MemoryMarshal.GetReference(filteredString2)) { - result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); + result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, filteredString1.Length, pString2, filteredString2.Length, options); } Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); @@ -39,38 +53,214 @@ private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan< return result; } - private unsafe int IndexOfCoreNative(char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, bool fromBeginning, int* matchLengthPtr) + private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan source, CompareOptions options, bool fromBeginning, int* matchLengthPtr) { AssertComparisonSupported(options); + // We only implement managed preprocessing for IgnoreSymbols. + bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; + ReadOnlySpan filteredTarget = target; + ReadOnlySpan filteredSource = source; + int[]? sourceIndexMap = null; // maps each char index in filteredSource to original source char index + + // If we are ignoring symbols, preprocess the strings by removing specified Unicode categories. + if (ignoreSymbols) + { + filteredTarget = FilterSymbolsFromSpan(target, out _); + filteredSource = FilterSymbolsFromSpan(source, out sourceIndexMap); + + // Remove the flag before passing to native since we handled it here. + options &= ~CompareOptions.IgnoreSymbols; + } + + int nativeLocation; + int nativeLength; + fixed (char* pTarget = &MemoryMarshal.GetReference(filteredTarget)) + fixed (char* pSource = &MemoryMarshal.GetReference(filteredSource)) + { + Interop.Range result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, filteredTarget.Length, pSource, filteredSource.Length, options, fromBeginning); + Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND) + throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); + nativeLocation = result.Location; + nativeLength = result.Length; + } + + if (!ignoreSymbols) + { + if (matchLengthPtr != null) + *matchLengthPtr = nativeLength; + return nativeLocation; + } + + // If ignoring symbols and nothing found, or an error code, or no source index map, just propagate. + if (nativeLocation < 0 || sourceIndexMap == null) + { + if (matchLengthPtr != null) + *matchLengthPtr = nativeLength; + return nativeLocation; + } + + // Map filtered indices back to original indices, expanding match length to include removed symbol chars inside the span. + // nativeLocation is index into filteredSource; nativeLength is length in filteredSource UTF-16 code units. + int originalStart = sourceIndexMap[nativeLocation]; + int filteredEnd = nativeLocation + nativeLength - 1; + + Debug.Assert(filteredEnd < sourceIndexMap.Length, + $"Filtered end index {filteredEnd} should not exceed the length of the filtered string {sourceIndexMap.Length}. nativeLocation={nativeLocation}, nativeLength={nativeLength}"); + + int originalEnd = sourceIndexMap[filteredEnd]; + int originalLength = originalEnd - originalStart + 1; - Interop.Range result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, target, cwTargetLength, pSource, cwSourceLength, options, fromBeginning); - Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND) - throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); if (matchLengthPtr != null) - *matchLengthPtr = result.Length; + *matchLengthPtr = originalLength; + return originalStart; + } + + /// + /// Determines whether the specified rune should be ignored when using CompareOptions.IgnoreSymbols. + /// + /// The rune to check. + /// + /// true if the rune should be ignored; otherwise, false. + /// + /// + /// This method returns true for: + /// - All separator categories (SpaceSeparator, LineSeparator, ParagraphSeparator) + /// - All punctuation categories (ConnectorPunctuation through OtherPunctuation) + /// - All symbol categories (MathSymbol through ModifierSymbol) + /// - Whitespace control characters (tab, line feed, vertical tab, form feed, carriage return, etc.) + /// + private static bool IsIgnorableSymbol(Rune rune) + { + UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(rune.Value); + + // Check for separator categories (11-13) + if (category >= UnicodeCategory.SpaceSeparator && category <= UnicodeCategory.ParagraphSeparator) + return true; - return result.Location; + // Check for punctuation/symbol categories (18-27) + if (category >= UnicodeCategory.ConnectorPunctuation && category <= UnicodeCategory.ModifierSymbol) + return true; + + // For Control (14) and Format (15) categories, only include whitespace characters + // This includes: tab (U+0009), LF (U+000A), VT (U+000B), FF (U+000C), CR (U+000D), NEL (U+0085) + if (category == UnicodeCategory.Control || category == UnicodeCategory.Format) + return Rune.IsWhiteSpace(rune); + + return false; } - private unsafe bool NativeStartsWith(char* pPrefix, int cwPrefixLength, char* pSource, int cwSourceLength, CompareOptions options) + private unsafe bool NativeStartsWith(ReadOnlySpan prefix, ReadOnlySpan source, CompareOptions options) { AssertComparisonSupported(options); - int result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, cwPrefixLength, pSource, cwSourceLength, options); + // Handle IgnoreSymbols preprocessing + bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; + ReadOnlySpan filteredPrefix = prefix; + ReadOnlySpan filteredSource = source; + + if (ignoreSymbols) + { + filteredPrefix = FilterSymbolsFromSpan(prefix, out _); + filteredSource = FilterSymbolsFromSpan(source, out _); + + // Remove the flag before passing to native since we handled it here + options &= ~CompareOptions.IgnoreSymbols; + } + + int result; + fixed (char* pPrefix = &MemoryMarshal.GetReference(filteredPrefix)) + fixed (char* pSource = &MemoryMarshal.GetReference(filteredSource)) + { + result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, filteredPrefix.Length, pSource, filteredSource.Length, options); + } Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - return result > 0 ? true : false; + return result > 0; } - private unsafe bool NativeEndsWith(char* pSuffix, int cwSuffixLength, char* pSource, int cwSourceLength, CompareOptions options) + private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan source, CompareOptions options) { AssertComparisonSupported(options); - int result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, cwSuffixLength, pSource, cwSourceLength, options); + // Handle IgnoreSymbols preprocessing + bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; + ReadOnlySpan filteredSuffix = suffix; + ReadOnlySpan filteredSource = source; + + if (ignoreSymbols) + { + filteredSuffix = FilterSymbolsFromSpan(suffix, out _); + filteredSource = FilterSymbolsFromSpan(source, out _); + + // Remove the flag before passing to native since we handled it here + options &= ~CompareOptions.IgnoreSymbols; + } + + int result; + fixed (char* pSuffix = &MemoryMarshal.GetReference(filteredSuffix)) + fixed (char* pSource = &MemoryMarshal.GetReference(filteredSource)) + { + result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, filteredSuffix.Length, pSource, filteredSource.Length, options); + } Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - return result > 0 ? true : false; + return result > 0; + } + + /// + /// Filters out ignorable symbol characters from the input span. + /// + /// The input span to filter. + /// + /// When this method returns, contains a mapping array where each index in the filtered output + /// maps to the corresponding character index in the original input span. This parameter is + /// passed uninitialized and will be null if no symbols were removed. + /// + /// + /// A read-only span with ignorable symbols removed. If no symbols were found, returns the + /// original input span unchanged. + /// + private static ReadOnlySpan FilterSymbolsFromSpan(ReadOnlySpan input, out int[]? indexMap) + { + int length = input.Length; + bool hasSymbols = false; + List keptChars = new List(length); + List mapping = new List(length); + // TODO: Use ArrayPool for keptChars and mapping to avoid allocations. + for (int i = 0; i < length;) + { + Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed); + bool remove = IsIgnorableSymbol(rune); + + if (!remove) + { + // Copy the UTF-16 units and map each filtered position to the start of the original character + for (int j = 0; j < consumed; j++) + { + keptChars.Add(input[i + j]); + mapping.Add(i); + } + } + else + { + hasSymbols = true; + } + + i += consumed; + } + + if (!hasSymbols) + { + // No symbols removed; return original span and no mapping. + indexMap = null; + return input; + } + else + { + indexMap = mapping.ToArray(); + return keptChars.ToArray(); + } } private static void AssertComparisonSupported(CompareOptions options) @@ -80,7 +270,7 @@ private static void AssertComparisonSupported(CompareOptions options) } private const CompareOptions SupportedCompareOptions = CompareOptions.None | CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace | - CompareOptions.IgnoreWidth | CompareOptions.StringSort | CompareOptions.IgnoreKanaType; + CompareOptions.IgnoreWidth | CompareOptions.StringSort | CompareOptions.IgnoreKanaType | CompareOptions.IgnoreSymbols; private static string GetPNSE(CompareOptions options) => SR.Format(SR.PlatformNotSupported_HybridGlobalizationWithCompareOptions, options); diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.Compare.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.Compare.cs index c3e5fd0e586a81..a8e955e9a6eaa3 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.Compare.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.Compare.cs @@ -225,11 +225,7 @@ public static IEnumerable Compare_TestData() yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, -1 }; yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, 0 }; - // In HybridGlobalization on Apple platforms IgnoreSymbols is not supported - if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) - { - yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.IgnoreSymbols, 0 }; - } + yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.IgnoreSymbols, 0 }; yield return new object[] { s_invariantCompare, "Test's", "Tests", CompareOptions.StringSort, -1 }; yield return new object[] { s_invariantCompare, null, "Tests", CompareOptions.None, -1 }; @@ -251,26 +247,15 @@ public static IEnumerable Compare_TestData() yield return new object[] { s_invariantCompare, "\uFF9E", "\u3099", CompareOptions.IgnoreCase, 0 }; yield return new object[] { s_invariantCompare, "\u20A9", "\uFFE6", CompareOptions.IgnoreCase, -1 }; yield return new object[] { s_invariantCompare, "\u20A9", "\uFFE6", CompareOptions.None, -1 }; - - // In HybridGlobalization mode on Apple platforms IgnoreSymbols is not supported - if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) - { - yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.IgnoreSymbols, 0 }; - yield return new object[] { s_invariantCompare, "\uFF65", "\u30FB", CompareOptions.IgnoreSymbols, 0 }; - yield return new object[] { s_invariantCompare, "\u00A2", "\uFFE0", CompareOptions.IgnoreSymbols, 0 }; - yield return new object[] { s_invariantCompare, "$", "&", CompareOptions.IgnoreSymbols, 0 }; - } + yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.IgnoreSymbols, 0 }; + yield return new object[] { s_invariantCompare, "\uFF65", "\u30FB", CompareOptions.IgnoreSymbols, 0 }; + yield return new object[] { s_invariantCompare, "\u00A2", "\uFFE0", CompareOptions.IgnoreSymbols, 0 }; + yield return new object[] { s_invariantCompare, "$", "&", CompareOptions.IgnoreSymbols, 0 }; yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "\u20A9", "\uFFE6", CompareOptions.IgnoreWidth, 0 }; yield return new object[] { s_invariantCompare, "\u0021", "\uFF01", CompareOptions.IgnoreWidth, 0 }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreWidth, 0 }; - - // In HybridGlobalization mode on Apple platforms IgnoreSymbols is not supported - if(PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) - { - yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreSymbols, s_expectedHalfToFullFormsComparison }; - } - + yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreSymbols, s_expectedHalfToFullFormsComparison }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreCase, s_expectedHalfToFullFormsComparison }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.IgnoreNonSpace, s_expectedHalfToFullFormsComparison }; yield return new object[] { s_invariantCompare, "\uFF66", "\u30F2", CompareOptions.None, s_expectedHalfToFullFormsComparison }; diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs index 9e6e46db401f4e..f1d9b522d2007e 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -82,10 +82,43 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_invariantCompare, "", "\u200d", 0, 0, CompareOptions.None, 0, 0 }; yield return new object[] { s_invariantCompare, "hello", "\u200d", 1, 3, CompareOptions.IgnoreCase, 1, 0 }; - // Ignore symbols - if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) // IgnoreSymbols are not supported - yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, 5, 6 }; + // Ignore symbols - punctuation characters + yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, 5, 6 }; + yield return new object[] { s_invariantCompare, "-Testing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + yield return new object[] { s_invariantCompare, "Test(ing)", "Testing", 0, 9, CompareOptions.IgnoreSymbols, 0, 8 }; + yield return new object[] { s_invariantCompare, "Testing]", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 7 }; + yield return new object[] { s_invariantCompare, "{Test}ing", "Testing", 0, 9, CompareOptions.IgnoreSymbols, 1, 8 }; + yield return new object[] { s_invariantCompare, "\"Testing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + + // Ignore symbols - currency and math symbols + yield return new object[] { s_invariantCompare, "$Testing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + yield return new object[] { s_invariantCompare, "Test€ing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 8 }; + yield return new object[] { s_invariantCompare, "Testing¢", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 7 }; + yield return new object[] { s_invariantCompare, "+Testing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + yield return new object[] { s_invariantCompare, "Test=ing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 8 }; + yield return new object[] { s_invariantCompare, "Testing%", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 7 }; + + // Ignore symbols - whitespace characters + yield return new object[] { s_invariantCompare, " Testing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + yield return new object[] { s_invariantCompare, "Testing ", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 7 }; + yield return new object[] { s_invariantCompare, "Test\u00A0ing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 8 }; // Non-breaking space + yield return new object[] { s_invariantCompare, "Testing\u2028", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 7 }; // Line separator + yield return new object[] { s_invariantCompare, "\u2029Testing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 1, 7 }; // Paragraph separator + yield return new object[] { s_invariantCompare, "Test\ting\n", "Testing", 0, 9, CompareOptions.IgnoreSymbols, 0, 8 }; // Tab and newline + + // Ignore symbols - multiple whitespace and punctuation + yield return new object[] { s_invariantCompare, " Testing, ", "Testing", 0, 11, CompareOptions.IgnoreSymbols, 2, 7 }; + yield return new object[] { s_invariantCompare, "'Te st i ng!", "Testing", 0, 13, CompareOptions.IgnoreSymbols, 1, 11 }; + + // Ignore symbols - mixed categories + yield return new object[] { s_invariantCompare, "$Te%s t&ing+", "Testing", 0, 12, CompareOptions.IgnoreSymbols, 1, 10 }; + yield return new object[] { s_invariantCompare, ", Hello World!", "HelloWorld", 0, 13, CompareOptions.IgnoreSymbols, 2, 11 }; + + // With symbols - should not match yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.None, -1, 0 }; + yield return new object[] { s_invariantCompare, "Tes ting", "Testing", 0, 8, CompareOptions.None, -1, 0 }; + yield return new object[] { s_invariantCompare, "'Te st i ng!", "Testing", 0, 11, CompareOptions.IgnoreSymbols, -1, 0 }; + yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 0, 13, CompareOptions.None, 2, 2 }; // Ordinal should be case-sensitive diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IsPrefix.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IsPrefix.cs index 3fba71b155c171..d645cf4a70913c 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IsPrefix.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IsPrefix.cs @@ -72,29 +72,23 @@ public static IEnumerable IsPrefix_TestData() yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 }; // Ignore symbols - if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) - { - yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 }; - yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 }; - } + yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 }; + yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 }; // Platform differences if (PlatformDetection.IsNlsGlobalization) { - if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) - { - yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 }; - yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 }; - yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 }; - } + yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 }; + yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 }; + yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 }; yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, true, 1 }; yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, true, 1 }; } else { yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, false, 0 }; - if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) - yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, false, 0 }; + // Bug in ICU (non-Apple) implementation, correct result should be true (https://github.com/dotnet/runtime/issues/118521) + yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, PlatformDetection.IsHybridGlobalizationOnApplePlatform ? true : false, 0 }; yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, false, 0 }; if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) { diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IsSuffix.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IsSuffix.cs index d545be88e67476..80c911a5a4a69b 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IsSuffix.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IsSuffix.cs @@ -80,11 +80,8 @@ public static IEnumerable IsSuffix_TestData() yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 }; // Ignore symbols - if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) - { - yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.IgnoreSymbols, true, 6 }; - yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.None, false, 0 }; - } + yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.IgnoreSymbols, true, 6 }; + yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.None, false, 0 }; // NULL character yield return new object[] { s_invariantCompare, "a\u0000b", "a\u0000b", CompareOptions.None, true, 3 }; diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.LastIndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.LastIndexOf.cs index 83556e601faec7..b7bc089581a9e3 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.LastIndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.LastIndexOf.cs @@ -101,8 +101,7 @@ public static IEnumerable LastIndexOf_TestData() yield return new object[] { s_invariantCompare, "AA\u200DA", "\u200d", 3, 4, CompareOptions.None, 4, 0}; // Ignore symbols - if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform) // IgnoreSymbols are not supported - yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, 5, 6 }; + yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, 5, 6 }; yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.None, -1, 0 }; yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 12, 13, CompareOptions.None, 10, 2 }; diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index 56af941fb40033..4c6f54f8a27726 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -13,14 +13,14 @@ #if defined(APPLE_HYBRID_GLOBALIZATION) // Enum that corresponds to C# CompareOptions -typedef enum +typedef enum : int32_t { - None = 0, - IgnoreCase = 1, - IgnoreNonSpace = 2, - IgnoreKanaType = 8, - IgnoreWidth = 16, - StringSort = 536870912, + None = 0x00000000, + IgnoreCase = 0x00000001, + IgnoreNonSpace = 0x00000002, + IgnoreKanaType = 0x00000008, + IgnoreWidth = 0x00000010, + StringSort = 0x20000000, } CompareOptions; typedef enum @@ -45,7 +45,7 @@ return currentLocale; } -static bool IsComparisonOptionSupported(int32_t comparisonOptions) +static bool IsComparisonOptionSupported(CompareOptions comparisonOptions) { int32_t supportedOptions = None | IgnoreCase | IgnoreNonSpace | IgnoreWidth | StringSort | IgnoreKanaType; if ((comparisonOptions | supportedOptions) != supportedOptions) @@ -53,7 +53,7 @@ static bool IsComparisonOptionSupported(int32_t comparisonOptions) return true; } -static NSStringCompareOptions ConvertFromCompareOptionsToNSStringCompareOptions(int32_t comparisonOptions, bool isLiteralSearchSupported) +static NSStringCompareOptions ConvertFromCompareOptionsToNSStringCompareOptions(CompareOptions comparisonOptions, bool isLiteralSearchSupported) { // To achieve an equivalent search behavior to the default in ICU, // NSLiteralSearch is employed as the default search option. From 282fef6adaaab7aca3405d1e39839f534f69ff43 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Tue, 7 Oct 2025 11:19:20 +0200 Subject: [PATCH 02/15] simplify ignore symbols logic --- .../System/Globalization/CompareInfo.iOS.cs | 77 +++++++------------ 1 file changed, 28 insertions(+), 49 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 336a7e1b59ae6d..3ec23d66e792a9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -26,14 +26,10 @@ private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan< AssertComparisonSupported(options); // Handle IgnoreSymbols preprocessing - bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; - ReadOnlySpan filteredString1 = string1; - ReadOnlySpan filteredString2 = string2; - - if (ignoreSymbols) + if ((options & CompareOptions.IgnoreSymbols) != 0) { - filteredString1 = FilterSymbolsFromSpan(string1, out _); - filteredString2 = FilterSymbolsFromSpan(string2, out _); + string1 = FilterSymbolsFromSpan(string1, out _); + string2 = FilterSymbolsFromSpan(string2, out _); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; @@ -42,10 +38,10 @@ private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan< // GetReference may return nullptr if the input span is defaulted. The native layer handles // this appropriately; no workaround is needed on the managed side. int result; - fixed (char* pString1 = &MemoryMarshal.GetReference(filteredString1)) - fixed (char* pString2 = &MemoryMarshal.GetReference(filteredString2)) + fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) + fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) { - result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, filteredString1.Length, pString2, filteredString2.Length, options); + result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); } Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); @@ -58,26 +54,24 @@ private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan filteredTarget = target; - ReadOnlySpan filteredSource = source; int[]? sourceIndexMap = null; // maps each char index in filteredSource to original source char index // If we are ignoring symbols, preprocess the strings by removing specified Unicode categories. if (ignoreSymbols) { - filteredTarget = FilterSymbolsFromSpan(target, out _); - filteredSource = FilterSymbolsFromSpan(source, out sourceIndexMap); + target = FilterSymbolsFromSpan(target, out _); + source = FilterSymbolsFromSpan(source, out sourceIndexMap); - // Remove the flag before passing to native since we handled it here. + // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } int nativeLocation; int nativeLength; - fixed (char* pTarget = &MemoryMarshal.GetReference(filteredTarget)) - fixed (char* pSource = &MemoryMarshal.GetReference(filteredSource)) + fixed (char* pTarget = &MemoryMarshal.GetReference(target)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) { - Interop.Range result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, filteredTarget.Length, pSource, filteredSource.Length, options, fromBeginning); + Interop.Range result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, target.Length, pSource, source.Length, options, fromBeginning); Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND) throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); @@ -85,23 +79,16 @@ private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan prefix, ReadOnlySpan filteredPrefix = prefix; - ReadOnlySpan filteredSource = source; - - if (ignoreSymbols) + if ((options & CompareOptions.IgnoreSymbols) != 0) { - filteredPrefix = FilterSymbolsFromSpan(prefix, out _); - filteredSource = FilterSymbolsFromSpan(source, out _); + prefix = FilterSymbolsFromSpan(prefix, out _); + source = FilterSymbolsFromSpan(source, out _); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } int result; - fixed (char* pPrefix = &MemoryMarshal.GetReference(filteredPrefix)) - fixed (char* pSource = &MemoryMarshal.GetReference(filteredSource)) + fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) { - result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, filteredPrefix.Length, pSource, filteredSource.Length, options); + result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, prefix.Length, pSource, source.Length, options); } Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); @@ -184,24 +167,20 @@ private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan AssertComparisonSupported(options); // Handle IgnoreSymbols preprocessing - bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; - ReadOnlySpan filteredSuffix = suffix; - ReadOnlySpan filteredSource = source; - - if (ignoreSymbols) + if ((options & CompareOptions.IgnoreSymbols) != 0) { - filteredSuffix = FilterSymbolsFromSpan(suffix, out _); - filteredSource = FilterSymbolsFromSpan(source, out _); + suffix = FilterSymbolsFromSpan(suffix, out _); + source = FilterSymbolsFromSpan(source, out _); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } int result; - fixed (char* pSuffix = &MemoryMarshal.GetReference(filteredSuffix)) - fixed (char* pSource = &MemoryMarshal.GetReference(filteredSource)) + fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) { - result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, filteredSuffix.Length, pSource, filteredSource.Length, options); + result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, suffix.Length, pSource, source.Length, options); } Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); From bc4a72f0ed0d99ec0866b47f855fd9b1576a80e5 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Tue, 7 Oct 2025 18:53:46 +0200 Subject: [PATCH 03/15] surrogate tests and update mappings accordingly. --- .../System/Globalization/CompareInfo.iOS.cs | 22 +++++++++-- .../CompareInfo/CompareInfoTests.IndexOf.cs | 9 +++++ .../CompareInfoTests.LastIndexOf.cs | 39 ++++++++++++++++++- 3 files changed, 65 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 3ec23d66e792a9..5d496a9046cee1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -95,8 +95,22 @@ private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan 0 && sourceIndexMap[filteredEnd - 1] == endCharStartPos) + ? filteredEnd - 1 + : filteredEnd; + + // Check if the next position belongs to the same character (second unit of a surrogate pair) + int lastUnit = (filteredEnd + 1 < sourceIndexMap.Length && sourceIndexMap[filteredEnd + 1] == endCharStartPos) + ? filteredEnd + 1 + : filteredEnd; + + int endCharWidth = lastUnit - firstUnit + 1; + int originalEnd = endCharStartPos + endCharWidth; + int originalLength = originalEnd - originalStart; if (matchLengthPtr != null) *matchLengthPtr = originalLength; @@ -193,8 +207,8 @@ private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan /// The input span to filter. /// /// When this method returns, contains a mapping array where each index in the filtered output - /// maps to the corresponding character index in the original input span. This parameter is - /// passed uninitialized and will be null if no symbols were removed. + /// maps to the corresponding character start position in the original input span. + /// This parameter is passed uninitialized and will be null if no symbols were removed. /// /// /// A read-only span with ignorable symbols removed. If no symbols were found, returns the diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs index f1d9b522d2007e..d8100474403e1e 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -97,6 +97,7 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_invariantCompare, "+Testing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 1, 7 }; yield return new object[] { s_invariantCompare, "Test=ing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 8 }; yield return new object[] { s_invariantCompare, "Testing%", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 0, 7 }; + yield return new object[] { s_invariantCompare, "x$test$xtest$x", "test", 0, 14, CompareOptions.IgnoreSymbols, 2, 4 }; // Ignore symbols - whitespace characters yield return new object[] { s_invariantCompare, " Testing", "Testing", 0, 8, CompareOptions.IgnoreSymbols, 1, 7 }; @@ -114,6 +115,14 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_invariantCompare, "$Te%s t&ing+", "Testing", 0, 12, CompareOptions.IgnoreSymbols, 1, 10 }; yield return new object[] { s_invariantCompare, ", Hello World!", "HelloWorld", 0, 13, CompareOptions.IgnoreSymbols, 2, 11 }; + // Ignore symbols - source contains surrogates (to match) + symbols (to ignore) + yield return new object[] { s_invariantCompare, "$\U0001D7D8Testing!", "\U0001D7D8Testing", 0, 11, CompareOptions.IgnoreSymbols, 1, 9 }; + yield return new object[] { s_invariantCompare, "Test$\U0001D7D8ing", "Test\U0001D7D8ing", 0, 10, CompareOptions.IgnoreSymbols, 0, 10 }; + yield return new object[] { s_invariantCompare, "$Testing\U0001D7DA", "Testing\U0001D7DA", 0, 10, CompareOptions.IgnoreSymbols, 1, 9 }; + yield return new object[] { s_invariantCompare, "$\U0001D7D8Test!\U0001D7D9ing", "\U0001D7D8Test\U0001D7D9ing", 0, 13, CompareOptions.IgnoreSymbols, 1, 12 }; + yield return new object[] { s_invariantCompare, "\U0001D7D8 Test$ \U0001D7D9 ing!", "\U0001D7D8 Test \U0001D7D9 ing", 0, 15, CompareOptions.IgnoreSymbols, 0, 15 }; + yield return new object[] { s_invariantCompare, "!$\U0001D7D8Test\U0001D7D9ing\U0001D7DA!", "\U0001D7D8Test\U0001D7D9ing\U0001D7DA", 0, 15, CompareOptions.IgnoreSymbols, 2, 13 }; + // With symbols - should not match yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.None, -1, 0 }; yield return new object[] { s_invariantCompare, "Tes ting", "Testing", 0, 8, CompareOptions.None, -1, 0 }; diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.LastIndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.LastIndexOf.cs index b7bc089581a9e3..b1e0e4c10b9bd2 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.LastIndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.LastIndexOf.cs @@ -100,9 +100,46 @@ public static IEnumerable LastIndexOf_TestData() yield return new object[] { s_invariantCompare, "\u0001F601", "\u200d", 1, 2, CompareOptions.None, 2, 0}; // \u0001F601 is GRINNING FACE WITH SMILING EYES surrogate character yield return new object[] { s_invariantCompare, "AA\u200DA", "\u200d", 3, 4, CompareOptions.None, 4, 0}; - // Ignore symbols + // Ignore symbols - punctuation characters yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, 5, 6 }; + yield return new object[] { s_invariantCompare, "-Testing", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + yield return new object[] { s_invariantCompare, "Testing]", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 0, 7 }; + yield return new object[] { s_invariantCompare, "\"Testing", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + + // Ignore symbols - currency and math symbols + yield return new object[] { s_invariantCompare, "$Testing", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + yield return new object[] { s_invariantCompare, "Test€ing", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 0, 8 }; + yield return new object[] { s_invariantCompare, "Testing¢", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 0, 7 }; + yield return new object[] { s_invariantCompare, "Test=ing", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 0, 8 }; + + // Ignore symbols - whitespace characters + yield return new object[] { s_invariantCompare, " Testing", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 1, 7 }; + yield return new object[] { s_invariantCompare, "Testing ", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 0, 7 }; + yield return new object[] { s_invariantCompare, "Test\u00A0ing", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 0, 8 }; // Non-breaking space + yield return new object[] { s_invariantCompare, "Testing\u2028", "Testing", 7, 8, CompareOptions.IgnoreSymbols, 0, 7 }; // Line separator + yield return new object[] { s_invariantCompare, "Test\ting\n", "Testing", 8, 9, CompareOptions.IgnoreSymbols, 0, 8 }; // Tab and newline + + // Ignore symbols - multiple whitespace and punctuation + yield return new object[] { s_invariantCompare, " Testing, ", "Testing", 10, 11, CompareOptions.IgnoreSymbols, 2, 7 }; + yield return new object[] { s_invariantCompare, "'Te st i ng!", "Testing", 12, 13, CompareOptions.IgnoreSymbols, 1, 11 }; + + // Ignore symbols - mixed categories + yield return new object[] { s_invariantCompare, "$Te%s t&ing+", "Testing", 11, 12, CompareOptions.IgnoreSymbols, 1, 10 }; + yield return new object[] { s_invariantCompare, ", Hello World!", "HelloWorld", 13, 14, CompareOptions.IgnoreSymbols, 2, 11 }; + yield return new object[] { s_invariantCompare, "x$test$xtest$x", "test", 13, 14, CompareOptions.IgnoreSymbols, 8, 4 }; + + // Ignore symbols - source contains surrogates (to match) + symbols (to ignore) + yield return new object[] { s_invariantCompare, "$\U0001D7D8Testing!", "\U0001D7D8Testing", 10, 11, CompareOptions.IgnoreSymbols, 1, 9 }; + yield return new object[] { s_invariantCompare, "Test$\U0001D7D8ing", "Test\U0001D7D8ing", 9, 10, CompareOptions.IgnoreSymbols, 0, 10 }; + yield return new object[] { s_invariantCompare, "$Testing\U0001D7DA", "Testing\U0001D7DA", 9, 10, CompareOptions.IgnoreSymbols, 1, 9 }; + yield return new object[] { s_invariantCompare, "$\U0001D7D8Test!\U0001D7D9ing", "\U0001D7D8Test\U0001D7D9ing", 12, 13, CompareOptions.IgnoreSymbols, 1, 12 }; + yield return new object[] { s_invariantCompare, "\U0001D7D8 Test$ \U0001D7D9 ing!", "\U0001D7D8 Test \U0001D7D9 ing", 14, 15, CompareOptions.IgnoreSymbols, 0, 15 }; + yield return new object[] { s_invariantCompare, "!$\U0001D7D8Test\U0001D7D9ing\U0001D7DA!", "\U0001D7D8Test\U0001D7D9ing\U0001D7DA", 14, 15, CompareOptions.IgnoreSymbols, 2, 13 }; + + // With symbols - should not match yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.None, -1, 0 }; + yield return new object[] { s_invariantCompare, "Tes ting", "Testing", 7, 8, CompareOptions.None, -1, 0 }; + yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 12, 13, CompareOptions.None, 10, 2 }; // Platform differences From d9e3c9a525199b66d83dcf22dc4e4f77e313559f Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Wed, 8 Oct 2025 11:49:46 +0200 Subject: [PATCH 04/15] switch to ArrayPool for bigger buffers --- .../System/Globalization/CompareInfo.iOS.cs | 262 +++++++++++------- 1 file changed, 166 insertions(+), 96 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 5d496a9046cee1..6403ab9c8fd7bd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -28,93 +28,108 @@ private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan< // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - string1 = FilterSymbolsFromSpan(string1, out _); - string2 = FilterSymbolsFromSpan(string2, out _); + using SymbolFilterHelper filter1 = new SymbolFilterHelper(string1, needsIndexMap: false); + using SymbolFilterHelper filter2 = new SymbolFilterHelper(string2, needsIndexMap: false); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; + + string1 = filter1.FilteredSpan; + string2 = filter2.FilteredSpan; } // GetReference may return nullptr if the input span is defaulted. The native layer handles // this appropriately; no workaround is needed on the managed side. - int result; fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) { - result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); + int result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); + Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + return result; } - - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - - return result; } private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan source, CompareOptions options, bool fromBeginning, int* matchLengthPtr) { AssertComparisonSupported(options); - // We only implement managed preprocessing for IgnoreSymbols. + + SymbolFilterHelper targetFilter = default; + SymbolFilterHelper sourceFilter = default; bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; - int[]? sourceIndexMap = null; // maps each char index in filteredSource to original source char index // If we are ignoring symbols, preprocess the strings by removing specified Unicode categories. if (ignoreSymbols) { - target = FilterSymbolsFromSpan(target, out _); - source = FilterSymbolsFromSpan(source, out sourceIndexMap); + targetFilter = new SymbolFilterHelper(target, needsIndexMap: false); + sourceFilter = new SymbolFilterHelper(source, needsIndexMap: true); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; - } - int nativeLocation; - int nativeLength; - fixed (char* pTarget = &MemoryMarshal.GetReference(target)) - fixed (char* pSource = &MemoryMarshal.GetReference(source)) - { - Interop.Range result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, target.Length, pSource, source.Length, options, fromBeginning); - Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND) - throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); - nativeLocation = result.Location; - nativeLength = result.Length; + target = targetFilter.FilteredSpan; + source = sourceFilter.FilteredSpan; } - // If not ignoring symbols / nothing found / an error code / no source index map (no ignorable symbols in source string), just propagate. - if (!ignoreSymbols || nativeLocation < 0 || sourceIndexMap == null) + try { - if (matchLengthPtr != null) - *matchLengthPtr = nativeLength; - return nativeLocation; - } + Interop.Range result; + fixed (char* pTarget = &MemoryMarshal.GetReference(target)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + { + result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, target.Length, pSource, source.Length, options, fromBeginning); + Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND) + throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); + } - // If ignoring symbols, map filtered indices back to original indices, expanding match length to include removed symbol chars inside the span. - // nativeLocation is index into filtered source; nativeLength is length in filtered source UTF-16 code units. - int originalStart = sourceIndexMap[nativeLocation]; - int filteredEnd = nativeLocation + nativeLength - 1; + int nativeLocation = result.Location; + int nativeLength = result.Length; + + // If not ignoring symbols / nothing found / an error code / filtered length is same as original (no ignorable symbols in source string), just propagate. + if (!ignoreSymbols || nativeLocation < 0 || sourceFilter.FilteredLength == sourceFilter.OriginalLength) + { + if (matchLengthPtr != null) + *matchLengthPtr = nativeLength; + return nativeLocation; + } - Debug.Assert(filteredEnd < sourceIndexMap.Length, - $"Filtered end index {filteredEnd} should not exceed the length of the filtered string {sourceIndexMap.Length}. nativeLocation={nativeLocation}, nativeLength={nativeLength}"); + // If ignoring symbols, map filtered indices back to original indices, expanding match length to include removed symbol chars inside the span. + ReadOnlySpan sourceIndexMap = sourceFilter.IndexMap; + int originalStart = sourceIndexMap[nativeLocation]; + int filteredEnd = nativeLocation + nativeLength - 1; - // Find the end position of the character at filteredEnd in the original string. - int endCharStartPos = sourceIndexMap[filteredEnd]; + Debug.Assert(filteredEnd < sourceFilter.FilteredLength, + $"Filtered end index {filteredEnd} should not exceed the length of the filtered string {sourceFilter.FilteredLength}. nativeLocation={nativeLocation}, nativeLength={nativeLength}"); - // Check if the previous position belongs to the same character (first unit of a surrogate pair) - int firstUnit = (filteredEnd > 0 && sourceIndexMap[filteredEnd - 1] == endCharStartPos) - ? filteredEnd - 1 - : filteredEnd; + // Find the end position of the character at filteredEnd in the original string. + int endCharStartPos = sourceIndexMap[filteredEnd]; - // Check if the next position belongs to the same character (second unit of a surrogate pair) - int lastUnit = (filteredEnd + 1 < sourceIndexMap.Length && sourceIndexMap[filteredEnd + 1] == endCharStartPos) - ? filteredEnd + 1 - : filteredEnd; + // Check if the previous position belongs to the same character (first unit of a surrogate pair) + int firstUnit = (filteredEnd > 0 && sourceIndexMap[filteredEnd - 1] == endCharStartPos) + ? filteredEnd - 1 + : filteredEnd; - int endCharWidth = lastUnit - firstUnit + 1; - int originalEnd = endCharStartPos + endCharWidth; - int originalLength = originalEnd - originalStart; + // Check if the next position belongs to the same character (second unit of a surrogate pair) + int lastUnit = (filteredEnd + 1 < sourceFilter.FilteredLength && sourceIndexMap[filteredEnd + 1] == endCharStartPos) + ? filteredEnd + 1 + : filteredEnd; - if (matchLengthPtr != null) - *matchLengthPtr = originalLength; - return originalStart; + int endCharWidth = lastUnit - firstUnit + 1; + int originalEnd = endCharStartPos + endCharWidth; + int originalLength = originalEnd - originalStart; + + if (matchLengthPtr != null) + *matchLengthPtr = originalLength; + return originalStart; + } + finally + { + if (ignoreSymbols) + { + targetFilter.Dispose(); + sourceFilter.Dispose(); + } + } } /// @@ -158,22 +173,23 @@ private unsafe bool NativeStartsWith(ReadOnlySpan prefix, ReadOnlySpan 0; } - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - - return result > 0; } private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan source, CompareOptions options) @@ -183,76 +199,130 @@ private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - suffix = FilterSymbolsFromSpan(suffix, out _); - source = FilterSymbolsFromSpan(source, out _); + using SymbolFilterHelper suffixFilter = new SymbolFilterHelper(suffix, needsIndexMap: false); + using SymbolFilterHelper sourceFilter = new SymbolFilterHelper(source, needsIndexMap: false); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; + + suffix = suffixFilter.FilteredSpan; + source = sourceFilter.FilteredSpan; } - int result; fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) fixed (char* pSource = &MemoryMarshal.GetReference(source)) { - result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, suffix.Length, pSource, source.Length, options); + int result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, suffix.Length, pSource, source.Length, options); + Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + return result > 0; } - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - - return result > 0; } /// /// Filters out ignorable symbol characters from the input span. /// /// The input span to filter. + /// The destination span to write filtered characters to. Must be at least as large as input. /// - /// When this method returns, contains a mapping array where each index in the filtered output - /// maps to the corresponding character start position in the original input span. - /// This parameter is passed uninitialized and will be null if no symbols were removed. + /// Optional array to store the mapping where each index in the filtered output maps to the corresponding + /// character start position in the original input span. Pass null if mapping is not needed. + /// Must be at least as large as input if provided. /// /// - /// A read-only span with ignorable symbols removed. If no symbols were found, returns the - /// original input span unchanged. + /// The number of characters written to the destination span after filtering. /// - private static ReadOnlySpan FilterSymbolsFromSpan(ReadOnlySpan input, out int[]? indexMap) + private static int FilterSymbolsFromSpan(ReadOnlySpan input, Span destination, int[]? indexMap) { + Debug.Assert(destination.Length >= input.Length, "Destination buffer must be at least as large as input"); + Debug.Assert(indexMap is null || indexMap.Length >= input.Length, "Index map buffer must be at least as large as input if provided"); + int length = input.Length; - bool hasSymbols = false; - List keptChars = new List(length); - List mapping = new List(length); - // TODO: Use ArrayPool for keptChars and mapping to avoid allocations. - for (int i = 0; i < length;) - { - Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed); - bool remove = IsIgnorableSymbol(rune); + int writeIndex = 0; - if (!remove) + if (indexMap is null) + { + // Fast path when no index mapping is needed + for (int i = 0; i < length;) { - // Copy the UTF-16 units and map each filtered position to the start of the original character - for (int j = 0; j < consumed; j++) + Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed); + + if (!IsIgnorableSymbol(rune)) { - keptChars.Add(input[i + j]); - mapping.Add(i); + // Copy the UTF-16 units + for (int j = 0; j < consumed; j++) + { + destination[writeIndex++] = input[i + j]; + } } + + i += consumed; } - else + } + else + { + // Path with index mapping + for (int i = 0; i < length;) { - hasSymbols = true; - } + Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed); - i += consumed; + if (!IsIgnorableSymbol(rune)) + { + // Copy the UTF-16 units and map each filtered position to the start of the original character + for (int j = 0; j < consumed; j++) + { + destination[writeIndex] = input[i + j]; + indexMap[writeIndex] = i; + writeIndex++; + } + } + + i += consumed; + } } - if (!hasSymbols) + return writeIndex; + } + + /// + /// Helper struct that manages buffers for filtering symbols from a span. + /// Uses ArrayPool to rent temporary buffers. + /// Implements IDisposable to return rented arrays. + /// + private ref struct SymbolFilterHelper + { + private char[]? _rentedCharArray; + private int[]? _rentedIntArray; + + public ReadOnlySpan FilteredSpan { get; } + public ReadOnlySpan IndexMap { get; } + public int FilteredLength { get; } + public int OriginalLength { get; } + + public SymbolFilterHelper(ReadOnlySpan input, bool needsIndexMap) { - // No symbols removed; return original span and no mapping. - indexMap = null; - return input; + OriginalLength = input.Length; + _rentedCharArray = ArrayPool.Shared.Rent(input.Length); + _rentedIntArray = needsIndexMap ? ArrayPool.Shared.Rent(input.Length) : null; + + Span charBuffer = _rentedCharArray.AsSpan(0, input.Length); + + FilteredLength = FilterSymbolsFromSpan(input, charBuffer, _rentedIntArray); + FilteredSpan = charBuffer.Slice(0, FilteredLength); + IndexMap = _rentedIntArray is not null ? _rentedIntArray.AsSpan(0, FilteredLength) : default; } - else + + public void Dispose() { - indexMap = mapping.ToArray(); - return keptChars.ToArray(); + if (_rentedCharArray is not null) + { + ArrayPool.Shared.Return(_rentedCharArray); + _rentedCharArray = null; + } + if (_rentedIntArray is not null) + { + ArrayPool.Shared.Return(_rentedIntArray); + _rentedIntArray = null; + } } } From 407a1c44556d857f2073adb26f2fcbd474c61b59 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Wed, 8 Oct 2025 12:01:43 +0200 Subject: [PATCH 05/15] simplify FilterSymbolsFromSpan --- .../System/Globalization/CompareInfo.iOS.cs | 40 +++++-------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 6403ab9c8fd7bd..fcb5796c33905c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -239,45 +239,25 @@ private static int FilterSymbolsFromSpan(ReadOnlySpan input, Span de int length = input.Length; int writeIndex = 0; - if (indexMap is null) + for (int i = 0; i < length;) { - // Fast path when no index mapping is needed - for (int i = 0; i < length;) - { - Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed); - - if (!IsIgnorableSymbol(rune)) - { - // Copy the UTF-16 units - for (int j = 0; j < consumed; j++) - { - destination[writeIndex++] = input[i + j]; - } - } + Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed); - i += consumed; - } - } - else - { - // Path with index mapping - for (int i = 0; i < length;) + if (!IsIgnorableSymbol(rune)) { - Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed); - - if (!IsIgnorableSymbol(rune)) + // Copy the UTF-16 units and map each filtered position to the start of the original character + for (int j = 0; j < consumed; j++) { - // Copy the UTF-16 units and map each filtered position to the start of the original character - for (int j = 0; j < consumed; j++) + destination[writeIndex] = input[i + j]; + if (indexMap is not null) { - destination[writeIndex] = input[i + j]; indexMap[writeIndex] = i; - writeIndex++; } + writeIndex++; } - - i += consumed; } + + i += consumed; } return writeIndex; From 46590ff14fd9f25949d2302a37311a6d968e462c Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Wed, 8 Oct 2025 12:23:52 +0200 Subject: [PATCH 06/15] simplify null check --- .../src/System/Globalization/CompareInfo.iOS.cs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index fcb5796c33905c..bdb55e832ce8d0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -249,10 +249,7 @@ private static int FilterSymbolsFromSpan(ReadOnlySpan input, Span de for (int j = 0; j < consumed; j++) { destination[writeIndex] = input[i + j]; - if (indexMap is not null) - { - indexMap[writeIndex] = i; - } + indexMap?[writeIndex] = i; writeIndex++; } } From ea7a8dadf8d83f2121e65a23f048470f71521097 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Thu, 9 Oct 2025 09:29:28 +0200 Subject: [PATCH 07/15] fix bug with early dispose --- .../System/Globalization/CompareInfo.iOS.cs | 373 ++++++++++-------- 1 file changed, 210 insertions(+), 163 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index bdb55e832ce8d0..553dc2cef1b4fa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -25,17 +25,17 @@ private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan< AssertComparisonSupported(options); + using SymbolFilterHelper helper1 = new SymbolFilterHelper(); + using SymbolFilterHelper helper2 = new SymbolFilterHelper(); + // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - using SymbolFilterHelper filter1 = new SymbolFilterHelper(string1, needsIndexMap: false); - using SymbolFilterHelper filter2 = new SymbolFilterHelper(string2, needsIndexMap: false); + string1 = helper1.GetFilteredString(string1, generateIndexMap: false); + string2 = helper2.GetFilteredString(string2, generateIndexMap: false); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; - - string1 = filter1.FilteredSpan; - string2 = filter2.FilteredSpan; } // GetReference may return nullptr if the input span is defaulted. The native layer handles @@ -53,134 +53,87 @@ private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan sourceIndexMap = sourceFilter.IndexMap; - int originalStart = sourceIndexMap[nativeLocation]; - int filteredEnd = nativeLocation + nativeLength - 1; - - Debug.Assert(filteredEnd < sourceFilter.FilteredLength, - $"Filtered end index {filteredEnd} should not exceed the length of the filtered string {sourceFilter.FilteredLength}. nativeLocation={nativeLocation}, nativeLength={nativeLength}"); - - // Find the end position of the character at filteredEnd in the original string. - int endCharStartPos = sourceIndexMap[filteredEnd]; - - // Check if the previous position belongs to the same character (first unit of a surrogate pair) - int firstUnit = (filteredEnd > 0 && sourceIndexMap[filteredEnd - 1] == endCharStartPos) - ? filteredEnd - 1 - : filteredEnd; + result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, target.Length, pSource, source.Length, options, fromBeginning); + Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND) + throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); + } - // Check if the next position belongs to the same character (second unit of a surrogate pair) - int lastUnit = (filteredEnd + 1 < sourceFilter.FilteredLength && sourceIndexMap[filteredEnd + 1] == endCharStartPos) - ? filteredEnd + 1 - : filteredEnd; + int nativeLocation = result.Location; + int nativeLength = result.Length; - int endCharWidth = lastUnit - firstUnit + 1; - int originalEnd = endCharStartPos + endCharWidth; - int originalLength = originalEnd - originalStart; + ReadOnlySpan sourceIndexMap = sourceHelper.GetIndexMap(); - if (matchLengthPtr != null) - *matchLengthPtr = originalLength; - return originalStart; - } - finally + // If not ignoring symbols / nothing found / an error code / no index map (no symbols found in source), just propagate. + if (!ignoreSymbols || nativeLocation < 0 || sourceIndexMap.IsEmpty) { - if (ignoreSymbols) - { - targetFilter.Dispose(); - sourceFilter.Dispose(); - } + if (matchLengthPtr != null) + *matchLengthPtr = nativeLength; + return nativeLocation; } - } - /// - /// Determines whether the specified rune should be ignored when using CompareOptions.IgnoreSymbols. - /// - /// The rune to check. - /// - /// true if the rune should be ignored; otherwise, false. - /// - /// - /// This method returns true for: - /// - All separator categories (SpaceSeparator, LineSeparator, ParagraphSeparator) - /// - All punctuation categories (ConnectorPunctuation through OtherPunctuation) - /// - All symbol categories (MathSymbol through ModifierSymbol) - /// - Whitespace control characters (tab, line feed, vertical tab, form feed, carriage return, etc.) - /// - private static bool IsIgnorableSymbol(Rune rune) - { - UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(rune.Value); + // If ignoring symbols, map filtered indices back to original indices, expanding match length to include removed symbol chars inside the span. + int originalStart = sourceIndexMap[nativeLocation]; + int filteredEnd = nativeLocation + nativeLength - 1; + + Debug.Assert(filteredEnd < source.Length, + $"Filtered end index {filteredEnd} should not exceed the length of the filtered string {source.Length}. nativeLocation={nativeLocation}, nativeLength={nativeLength}"); - // Check for separator categories (11-13) - if (category >= UnicodeCategory.SpaceSeparator && category <= UnicodeCategory.ParagraphSeparator) - return true; + // Find the end position of the character at filteredEnd in the original string. + int endCharStartPos = sourceIndexMap[filteredEnd]; - // Check for punctuation/symbol categories (18-27) - if (category >= UnicodeCategory.ConnectorPunctuation && category <= UnicodeCategory.ModifierSymbol) - return true; + // Check if the previous position belongs to the same character (first unit of a surrogate pair) + int firstUnit = (filteredEnd > 0 && sourceIndexMap[filteredEnd - 1] == endCharStartPos) + ? filteredEnd - 1 + : filteredEnd; - // For Control (14) and Format (15) categories, only include whitespace characters - // This includes: tab (U+0009), LF (U+000A), VT (U+000B), FF (U+000C), CR (U+000D), NEL (U+0085) - if (category == UnicodeCategory.Control || category == UnicodeCategory.Format) - return Rune.IsWhiteSpace(rune); + // Check if the next position belongs to the same character (second unit of a surrogate pair) + int lastUnit = (filteredEnd + 1 < source.Length && sourceIndexMap[filteredEnd + 1] == endCharStartPos) + ? filteredEnd + 1 + : filteredEnd; - return false; + int endCharWidth = lastUnit - firstUnit + 1; + int originalEnd = endCharStartPos + endCharWidth; + int originalLength = originalEnd - originalStart; + + if (matchLengthPtr != null) + *matchLengthPtr = originalLength; + return originalStart; } private unsafe bool NativeStartsWith(ReadOnlySpan prefix, ReadOnlySpan source, CompareOptions options) { AssertComparisonSupported(options); + using SymbolFilterHelper prefixHelper = new SymbolFilterHelper(); + using SymbolFilterHelper sourceHelper = new SymbolFilterHelper(); + // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - using SymbolFilterHelper prefixFilter = new SymbolFilterHelper(prefix, needsIndexMap: false); - using SymbolFilterHelper sourceFilter = new SymbolFilterHelper(source, needsIndexMap: false); + prefix = prefixHelper.GetFilteredString(prefix, generateIndexMap: false); + source = sourceHelper.GetFilteredString(source, generateIndexMap: false); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; - - prefix = prefixFilter.FilteredSpan; - source = sourceFilter.FilteredSpan; } fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix)) @@ -196,17 +149,17 @@ private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan { AssertComparisonSupported(options); + using SymbolFilterHelper suffixHelper = new SymbolFilterHelper(); + using SymbolFilterHelper sourceHelper = new SymbolFilterHelper(); + // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - using SymbolFilterHelper suffixFilter = new SymbolFilterHelper(suffix, needsIndexMap: false); - using SymbolFilterHelper sourceFilter = new SymbolFilterHelper(source, needsIndexMap: false); + suffix = suffixHelper.GetFilteredString(suffix, generateIndexMap: false); + source = sourceHelper.GetFilteredString(source, generateIndexMap: false); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; - - suffix = suffixFilter.FilteredSpan; - source = sourceFilter.FilteredSpan; } fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) @@ -219,88 +172,182 @@ private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan } /// - /// Filters out ignorable symbol characters from the input span. + /// Helper struct that manages buffers for filtering symbols from a span. + /// Uses stack allocation for short strings (up to 256 characters) and ArrayPool for longer strings. + /// Implements IDisposable to return rented arrays when heap allocation was used. /// - /// The input span to filter. - /// The destination span to write filtered characters to. Must be at least as large as input. - /// - /// Optional array to store the mapping where each index in the filtered output maps to the corresponding - /// character start position in the original input span. Pass null if mapping is not needed. - /// Must be at least as large as input if provided. - /// - /// - /// The number of characters written to the destination span after filtering. - /// - private static int FilterSymbolsFromSpan(ReadOnlySpan input, Span destination, int[]? indexMap) + private ref struct SymbolFilterHelper { - Debug.Assert(destination.Length >= input.Length, "Destination buffer must be at least as large as input"); - Debug.Assert(indexMap is null || indexMap.Length >= input.Length, "Index map buffer must be at least as large as input if provided"); - - int length = input.Length; - int writeIndex = 0; - - for (int i = 0; i < length;) + private const int StackAllocThreshold = 256; + private char[]? _arrayToReturnToPool; + private int[]? _indexMapArrayToReturnToPool; + private bool _usedStackAllocation; + private Span _indexMapSpan; + private bool _hasIndexMap; + + public SymbolFilterHelper() { } + + /// + /// Gets the index map as a ReadOnlySpan, or an empty span if no index map was generated. + /// + public ReadOnlySpan GetIndexMap() => _hasIndexMap ? _indexMapSpan : ReadOnlySpan.Empty; + + /// + /// Filters ignorable symbols from the input span and optionally generates an index map. + /// + /// The input span to filter. + /// + /// If true, generates an index map that tracks the position of each character in the filtered + /// output back to its original position in the input. The map is accessible via . + /// + /// + /// A span containing the filtered string with ignorable symbols removed. If no symbols are found, + /// returns the original input span without allocating. For short strings (up to 256 characters), + /// uses stack allocation. For longer strings, uses a rented array from + /// that will be returned when is called. + /// + public ReadOnlySpan GetFilteredString(ReadOnlySpan input, bool generateIndexMap = false) { - Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed); + if (_arrayToReturnToPool is not null) + { + Dispose(); + } - if (!IsIgnorableSymbol(rune)) + int i = 0; + int consumed = 0; + // Fast path: scan through the input until we find the first ignorable symbol + for (; i < input.Length;) { - // Copy the UTF-16 units and map each filtered position to the start of the original character - for (int j = 0; j < consumed; j++) + Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out consumed); + if (IsIgnorableSymbol(rune)) { - destination[writeIndex] = input[i + j]; - indexMap?[writeIndex] = i; - writeIndex++; + break; } + i += consumed; } - i += consumed; - } + // If we scanned the entire input without finding any ignorable symbols, return original input + if (i >= input.Length) + { + return input; + } - return writeIndex; - } + // Decide whether to use stack or heap allocation based on input length + Span outSpan; + if (input.Length <= StackAllocThreshold) + { + outSpan = stackalloc char[input.Length]; + if (generateIndexMap) + { + _indexMapSpan = stackalloc int[input.Length]; + _hasIndexMap = true; + } + _usedStackAllocation = true; + } + else + { + _arrayToReturnToPool = ArrayPool.Shared.Rent(input.Length); + outSpan = _arrayToReturnToPool; + if (generateIndexMap) + { + _indexMapArrayToReturnToPool = ArrayPool.Shared.Rent(input.Length); + _indexMapSpan = _indexMapArrayToReturnToPool; + _hasIndexMap = true; + } + _usedStackAllocation = false; + } - /// - /// Helper struct that manages buffers for filtering symbols from a span. - /// Uses ArrayPool to rent temporary buffers. - /// Implements IDisposable to return rented arrays. - /// - private ref struct SymbolFilterHelper - { - private char[]? _rentedCharArray; - private int[]? _rentedIntArray; + // Copy the initial segment that contains no ignorable symbols (positions 0 to i-1) + input.Slice(0, i).CopyTo(outSpan); + if (generateIndexMap) + { + // Initialize the index map for the initial segment with identity mapping + for (int j = 0; j < i; j++) + { + _indexMapSpan[j] = j; + } + } - public ReadOnlySpan FilteredSpan { get; } - public ReadOnlySpan IndexMap { get; } - public int FilteredLength { get; } - public int OriginalLength { get; } + int writeIndex = i; + i += consumed; // skip the ignorable symbol we just found - public SymbolFilterHelper(ReadOnlySpan input, bool needsIndexMap) - { - OriginalLength = input.Length; - _rentedCharArray = ArrayPool.Shared.Rent(input.Length); - _rentedIntArray = needsIndexMap ? ArrayPool.Shared.Rent(input.Length) : null; + for (; i < input.Length;) + { + Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out consumed); + if (!IsIgnorableSymbol(rune)) + { + // Copy the UTF-16 units and map each filtered position to the start of the original character + outSpan[writeIndex] = input[i]; + if (generateIndexMap) + { + _indexMapSpan[writeIndex] = i; + } + writeIndex++; - Span charBuffer = _rentedCharArray.AsSpan(0, input.Length); + if (consumed > 1) + { + outSpan[writeIndex] = input[i + 1]; + if (generateIndexMap) + { + _indexMapSpan[writeIndex] = i + 1; + } + writeIndex++; + } + } + i += consumed; + } - FilteredLength = FilterSymbolsFromSpan(input, charBuffer, _rentedIntArray); - FilteredSpan = charBuffer.Slice(0, FilteredLength); - IndexMap = _rentedIntArray is not null ? _rentedIntArray.AsSpan(0, FilteredLength) : default; + return outSpan.Slice(0, writeIndex); } public void Dispose() { - if (_rentedCharArray is not null) + // Only return the arrays if we used heap allocation (not stack allocation) + if (!_usedStackAllocation && _arrayToReturnToPool is not null) { - ArrayPool.Shared.Return(_rentedCharArray); - _rentedCharArray = null; + ArrayPool.Shared.Return(_arrayToReturnToPool); + _arrayToReturnToPool = null; } - if (_rentedIntArray is not null) + if (!_usedStackAllocation && _indexMapArrayToReturnToPool is not null) { - ArrayPool.Shared.Return(_rentedIntArray); - _rentedIntArray = null; + ArrayPool.Shared.Return(_indexMapArrayToReturnToPool); + _indexMapArrayToReturnToPool = null; } } + + /// + /// Determines whether the specified rune should be ignored when using CompareOptions.IgnoreSymbols. + /// + /// The rune to check. + /// + /// true if the rune should be ignored; otherwise, false. + /// + /// + /// This method returns true for: + /// - All separator categories (SpaceSeparator, LineSeparator, ParagraphSeparator) + /// - All punctuation categories (ConnectorPunctuation through OtherPunctuation) + /// - All symbol categories (MathSymbol through ModifierSymbol) + /// - Whitespace control characters (tab, line feed, vertical tab, form feed, carriage return, etc.) + /// + private static bool IsIgnorableSymbol(Rune rune) + { + UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(rune.Value); + + // Check for separator categories (11-13) + if (category >= UnicodeCategory.SpaceSeparator && category <= UnicodeCategory.ParagraphSeparator) + return true; + + // Check for punctuation/symbol categories (18-27) + if (category >= UnicodeCategory.ConnectorPunctuation && category <= UnicodeCategory.ModifierSymbol) + return true; + + // For Control (14) and Format (15) categories, only include whitespace characters + // This includes: tab (U+0009), LF (U+000A), VT (U+000B), FF (U+000C), CR (U+000D), NEL (U+0085) + if (category == UnicodeCategory.Control || category == UnicodeCategory.Format) + return Rune.IsWhiteSpace(rune); + + return false; + } } private static void AssertComparisonSupported(CompareOptions options) From 55c38eeb5903a29b584ccd71f291e02aac9905f3 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Thu, 9 Oct 2025 09:35:13 +0200 Subject: [PATCH 08/15] update comment and null check --- .../src/System/Globalization/CompareInfo.iOS.cs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 553dc2cef1b4fa..cb16b8af1a4d79 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -198,17 +198,16 @@ public SymbolFilterHelper() { } /// The input span to filter. /// /// If true, generates an index map that tracks the position of each character in the filtered - /// output back to its original position in the input. The map is accessible via . + /// output back to its original position in the input. The map is accessible via . /// /// /// A span containing the filtered string with ignorable symbols removed. If no symbols are found, - /// returns the original input span without allocating. For short strings (up to 256 characters), - /// uses stack allocation. For longer strings, uses a rented array from - /// that will be returned when is called. + /// returns the original input span without allocating. Otherwise, returns a span backed by either + /// stack-allocated or pooled memory that will be returned when is called. /// public ReadOnlySpan GetFilteredString(ReadOnlySpan input, bool generateIndexMap = false) { - if (_arrayToReturnToPool is not null) + if (_arrayToReturnToPool is not null || _indexMapArrayToReturnToPool is not null) { Dispose(); } From 5361926b61d559d95535508eadb17f113e03e6d0 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Thu, 9 Oct 2025 10:18:57 +0200 Subject: [PATCH 09/15] go back to always array pool for symbols filters --- .../System/Globalization/CompareInfo.iOS.cs | 81 ++++++------------- 1 file changed, 24 insertions(+), 57 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index cb16b8af1a4d79..ab308c7f47292a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -80,10 +80,9 @@ private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan sourceIndexMap = sourceHelper.GetIndexMap(); // If not ignoring symbols / nothing found / an error code / no index map (no symbols found in source), just propagate. - if (!ignoreSymbols || nativeLocation < 0 || sourceIndexMap.IsEmpty) + if (!ignoreSymbols || nativeLocation < 0 || sourceHelper.IndexMap is null) { if (matchLengthPtr != null) *matchLengthPtr = nativeLength; @@ -91,22 +90,22 @@ private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan 0 && sourceIndexMap[filteredEnd - 1] == endCharStartPos) + int firstUnit = (filteredEnd > 0 && sourceHelper.IndexMap[filteredEnd - 1] == endCharStartPos) ? filteredEnd - 1 : filteredEnd; // Check if the next position belongs to the same character (second unit of a surrogate pair) - int lastUnit = (filteredEnd + 1 < source.Length && sourceIndexMap[filteredEnd + 1] == endCharStartPos) + int lastUnit = (filteredEnd + 1 < source.Length && sourceHelper.IndexMap[filteredEnd + 1] == endCharStartPos) ? filteredEnd + 1 : filteredEnd; @@ -173,24 +172,19 @@ private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan /// /// Helper struct that manages buffers for filtering symbols from a span. - /// Uses stack allocation for short strings (up to 256 characters) and ArrayPool for longer strings. - /// Implements IDisposable to return rented arrays when heap allocation was used. + /// Uses ArrayPool for memory allocation. + /// Implements IDisposable to return rented arrays. /// private ref struct SymbolFilterHelper { - private const int StackAllocThreshold = 256; private char[]? _arrayToReturnToPool; - private int[]? _indexMapArrayToReturnToPool; - private bool _usedStackAllocation; - private Span _indexMapSpan; - private bool _hasIndexMap; public SymbolFilterHelper() { } /// - /// Gets the index map as a ReadOnlySpan, or an empty span if no index map was generated. + /// Gets the index map array, or null if no index map was generated. /// - public ReadOnlySpan GetIndexMap() => _hasIndexMap ? _indexMapSpan : ReadOnlySpan.Empty; + public int[]? IndexMap { get; private set; } /// /// Filters ignorable symbols from the input span and optionally generates an index map. @@ -198,16 +192,16 @@ public SymbolFilterHelper() { } /// The input span to filter. /// /// If true, generates an index map that tracks the position of each character in the filtered - /// output back to its original position in the input. The map is accessible via . + /// output back to its original position in the input. The map is accessible via . /// /// /// A span containing the filtered string with ignorable symbols removed. If no symbols are found, - /// returns the original input span without allocating. Otherwise, returns a span backed by either - /// stack-allocated or pooled memory that will be returned when is called. + /// returns the original input span without allocating. Otherwise, returns a span backed by + /// pooled memory that will be returned when is called. /// public ReadOnlySpan GetFilteredString(ReadOnlySpan input, bool generateIndexMap = false) { - if (_arrayToReturnToPool is not null || _indexMapArrayToReturnToPool is not null) + if (_arrayToReturnToPool is not null || IndexMap is not null) { Dispose(); } @@ -231,39 +225,19 @@ public ReadOnlySpan GetFilteredString(ReadOnlySpan input, bool gener return input; } - // Decide whether to use stack or heap allocation based on input length - Span outSpan; - if (input.Length <= StackAllocThreshold) - { - outSpan = stackalloc char[input.Length]; - if (generateIndexMap) - { - _indexMapSpan = stackalloc int[input.Length]; - _hasIndexMap = true; - } - _usedStackAllocation = true; - } - else - { - _arrayToReturnToPool = ArrayPool.Shared.Rent(input.Length); - outSpan = _arrayToReturnToPool; - if (generateIndexMap) - { - _indexMapArrayToReturnToPool = ArrayPool.Shared.Rent(input.Length); - _indexMapSpan = _indexMapArrayToReturnToPool; - _hasIndexMap = true; - } - _usedStackAllocation = false; - } + // Rent buffers from ArrayPool + _arrayToReturnToPool = ArrayPool.Shared.Rent(input.Length); + Span outSpan = _arrayToReturnToPool; // Copy the initial segment that contains no ignorable symbols (positions 0 to i-1) input.Slice(0, i).CopyTo(outSpan); if (generateIndexMap) { // Initialize the index map for the initial segment with identity mapping + IndexMap = ArrayPool.Shared.Rent(input.Length); for (int j = 0; j < i; j++) { - _indexMapSpan[j] = j; + IndexMap![j] = j; } } @@ -277,19 +251,13 @@ public ReadOnlySpan GetFilteredString(ReadOnlySpan input, bool gener { // Copy the UTF-16 units and map each filtered position to the start of the original character outSpan[writeIndex] = input[i]; - if (generateIndexMap) - { - _indexMapSpan[writeIndex] = i; - } + IndexMap?[writeIndex] = i; writeIndex++; if (consumed > 1) { outSpan[writeIndex] = input[i + 1]; - if (generateIndexMap) - { - _indexMapSpan[writeIndex] = i + 1; - } + IndexMap?[writeIndex] = i + 1; writeIndex++; } } @@ -301,16 +269,15 @@ public ReadOnlySpan GetFilteredString(ReadOnlySpan input, bool gener public void Dispose() { - // Only return the arrays if we used heap allocation (not stack allocation) - if (!_usedStackAllocation && _arrayToReturnToPool is not null) + if (_arrayToReturnToPool is not null) { ArrayPool.Shared.Return(_arrayToReturnToPool); _arrayToReturnToPool = null; } - if (!_usedStackAllocation && _indexMapArrayToReturnToPool is not null) + if (IndexMap is not null) { - ArrayPool.Shared.Return(_indexMapArrayToReturnToPool); - _indexMapArrayToReturnToPool = null; + ArrayPool.Shared.Return(IndexMap); + IndexMap = null; } } From d72dba2406cbf9bd48816b270268b9f7c42b0063 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 10 Oct 2025 13:15:26 +0200 Subject: [PATCH 10/15] [WIP] IgnoreSymbols with ArrayPools and stackalloc --- .../System/Globalization/CompareInfo.iOS.cs | 399 +++++++++--------- 1 file changed, 208 insertions(+), 191 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index ab308c7f47292a..045a6621a1042b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -4,6 +4,8 @@ using System.Buffers; using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; @@ -18,6 +20,8 @@ private enum ErrorCodes ERROR_MIXED_COMPOSITION_NOT_FOUND = -3, } + private const int StackAllocThreshold = 256; + private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan string2, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); @@ -25,27 +29,34 @@ private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan< AssertComparisonSupported(options); - using SymbolFilterHelper helper1 = new SymbolFilterHelper(); - using SymbolFilterHelper helper2 = new SymbolFilterHelper(); + char[]? rentedArray1 = null; + char[]? rentedArray2 = null; // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - string1 = helper1.GetFilteredString(string1, generateIndexMap: false); - string2 = helper2.GetFilteredString(string2, generateIndexMap: false); + string1 = GetFilteredString(string1, stackalloc char[StackAllocThreshold], out rentedArray1, out _, generateIndexMap: false); + string2 = GetFilteredString(string2, stackalloc char[StackAllocThreshold], out rentedArray2, out _, generateIndexMap: false); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } - // GetReference may return nullptr if the input span is defaulted. The native layer handles - // this appropriately; no workaround is needed on the managed side. - fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) - fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) + try + { + // GetReference may return nullptr if the input span is defaulted. The native layer handles + // this appropriately; no workaround is needed on the managed side. + fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) + fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) + { + int result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); + Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + return result; + } + } + finally { - int result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - return result; + ReturnBuffers(rentedArray1, rentedArray2); } } @@ -53,94 +64,108 @@ private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan 0 && sourceHelper.IndexMap[filteredEnd - 1] == endCharStartPos) - ? filteredEnd - 1 - : filteredEnd; + // Check if the previous position belongs to the same character (first unit of a surrogate pair) + int firstUnit = (filteredEnd > 0 && rentedIndexMap[filteredEnd - 1] == endCharStartPos) + ? filteredEnd - 1 + : filteredEnd; - // Check if the next position belongs to the same character (second unit of a surrogate pair) - int lastUnit = (filteredEnd + 1 < source.Length && sourceHelper.IndexMap[filteredEnd + 1] == endCharStartPos) - ? filteredEnd + 1 - : filteredEnd; + // Check if the next position belongs to the same character (second unit of a surrogate pair) + int lastUnit = (filteredEnd + 1 < source.Length && rentedIndexMap[filteredEnd + 1] == endCharStartPos) + ? filteredEnd + 1 + : filteredEnd; - int endCharWidth = lastUnit - firstUnit + 1; - int originalEnd = endCharStartPos + endCharWidth; - int originalLength = originalEnd - originalStart; + int endCharWidth = lastUnit - firstUnit + 1; + int originalEnd = endCharStartPos + endCharWidth; + int originalLength = originalEnd - originalStart; - if (matchLengthPtr != null) - *matchLengthPtr = originalLength; - return originalStart; + if (matchLengthPtr != null) + *matchLengthPtr = originalLength; + return originalStart; + } + finally + { + ReturnBuffers(rentedTargetArray, rentedSourceArray, rentedIndexMap); + } } private unsafe bool NativeStartsWith(ReadOnlySpan prefix, ReadOnlySpan source, CompareOptions options) { AssertComparisonSupported(options); - using SymbolFilterHelper prefixHelper = new SymbolFilterHelper(); - using SymbolFilterHelper sourceHelper = new SymbolFilterHelper(); + char[]? rentedPrefixArray = null; + char[]? rentedSourceArray = null; // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - prefix = prefixHelper.GetFilteredString(prefix, generateIndexMap: false); - source = sourceHelper.GetFilteredString(source, generateIndexMap: false); + prefix = GetFilteredString(prefix, stackalloc char[StackAllocThreshold], out rentedPrefixArray, out _, generateIndexMap: false); + source = GetFilteredString(source, stackalloc char[StackAllocThreshold], out rentedSourceArray, out _, generateIndexMap: false); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } - fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix)) - fixed (char* pSource = &MemoryMarshal.GetReference(source)) + try + { + fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + { + int result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, prefix.Length, pSource, source.Length, options); + Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + return result > 0; + } + } + finally { - int result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, prefix.Length, pSource, source.Length, options); - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - return result > 0; + ReturnBuffers(rentedPrefixArray, rentedSourceArray); } } @@ -148,172 +173,164 @@ private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan { AssertComparisonSupported(options); - using SymbolFilterHelper suffixHelper = new SymbolFilterHelper(); - using SymbolFilterHelper sourceHelper = new SymbolFilterHelper(); + char[]? rentedSuffixArray = null; + char[]? rentedSourceArray = null; // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - suffix = suffixHelper.GetFilteredString(suffix, generateIndexMap: false); - source = sourceHelper.GetFilteredString(source, generateIndexMap: false); + suffix = GetFilteredString(suffix, stackalloc char[StackAllocThreshold], out rentedSuffixArray, out _, generateIndexMap: false); + source = GetFilteredString(source, stackalloc char[StackAllocThreshold], out rentedSourceArray, out _, generateIndexMap: false); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } - fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) - fixed (char* pSource = &MemoryMarshal.GetReference(source)) + try + { + fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + { + int result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, suffix.Length, pSource, source.Length, options); + Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + return result > 0; + } + } + finally { - int result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, suffix.Length, pSource, source.Length, options); - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - return result > 0; + ReturnBuffers(rentedSuffixArray, rentedSourceArray); } } /// - /// Helper struct that manages buffers for filtering symbols from a span. - /// Uses ArrayPool for memory allocation. - /// Implements IDisposable to return rented arrays. + /// Returns multiple rented buffers to their respective pools if they were allocated. /// - private ref struct SymbolFilterHelper + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void ReturnBuffers(char[]? charBuffer1 = null, char[]? charBuffer2 = null, int[]? indexMapBuffer = null) { - private char[]? _arrayToReturnToPool; - - public SymbolFilterHelper() { } - - /// - /// Gets the index map array, or null if no index map was generated. - /// - public int[]? IndexMap { get; private set; } - - /// - /// Filters ignorable symbols from the input span and optionally generates an index map. - /// - /// The input span to filter. - /// - /// If true, generates an index map that tracks the position of each character in the filtered - /// output back to its original position in the input. The map is accessible via . - /// - /// - /// A span containing the filtered string with ignorable symbols removed. If no symbols are found, - /// returns the original input span without allocating. Otherwise, returns a span backed by - /// pooled memory that will be returned when is called. - /// - public ReadOnlySpan GetFilteredString(ReadOnlySpan input, bool generateIndexMap = false) - { - if (_arrayToReturnToPool is not null || IndexMap is not null) - { - Dispose(); - } + if (charBuffer1 is not null) + ArrayPool.Shared.Return(charBuffer1); + if (charBuffer2 is not null) + ArrayPool.Shared.Return(charBuffer2); + if (indexMapBuffer is not null) + ArrayPool.Shared.Return(indexMapBuffer); + } - int i = 0; - int consumed = 0; - // Fast path: scan through the input until we find the first ignorable symbol - for (; i < input.Length;) - { - Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out consumed); - if (IsIgnorableSymbol(rune)) - { - break; - } - i += consumed; - } + /// + /// Filters ignorable symbols from the input span. + /// + /// The input span to filter. + /// A stack-allocated buffer to use for small inputs or if no symbols are found. + /// Outputs a rented char array if input is too large for stackBuffer and contains symbols. Null if stackBuffer was sufficient. + /// Outputs a rented int array for the index map if generateIndexMap is true and symbols were found. Null otherwise. + /// If true, generates an index map tracking each character's original position. + /// + /// A span containing the filtered string with ignorable symbols removed. If no symbols are found, + /// returns the original input span without allocating. + /// + private static ReadOnlySpan GetFilteredString( + ReadOnlySpan input, + scoped Span stackBuffer, + out char[]? rentedCharArray, + out int[]? rentedIndexMap, + bool generateIndexMap) + { + rentedCharArray = null; + rentedIndexMap = null; + + int i = 0; + int consumed = 0; - // If we scanned the entire input without finding any ignorable symbols, return original input - if (i >= input.Length) + // Fast path: scan through the input until we find the first ignorable symbol + for (; i < input.Length;) + { + Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out consumed); + if (IsIgnorableSymbol(rune)) { - return input; + break; } + i += consumed; + } - // Rent buffers from ArrayPool - _arrayToReturnToPool = ArrayPool.Shared.Rent(input.Length); - Span outSpan = _arrayToReturnToPool; + // If we scanned the entire input without finding any ignorable symbols, return original input + if (i >= input.Length) + { + return input; + } - // Copy the initial segment that contains no ignorable symbols (positions 0 to i-1) - input.Slice(0, i).CopyTo(outSpan); - if (generateIndexMap) + // Found symbols - decide whether to use stack or heap allocation + Span outSpan = input.Length <= stackBuffer.Length ? stackBuffer : (rentedCharArray = ArrayPool.Shared.Rent(input.Length)); + // Copy the initial segment that contains no ignorable symbols (positions 0 to i-1) + input.Slice(0, i).CopyTo(outSpan); + + // Initialize the index map for the initial segment with identity mapping + if (generateIndexMap) + { + rentedIndexMap = ArrayPool.Shared.Rent(input.Length); + for (int j = 0; j < i; j++) { - // Initialize the index map for the initial segment with identity mapping - IndexMap = ArrayPool.Shared.Rent(input.Length); - for (int j = 0; j < i; j++) - { - IndexMap![j] = j; - } + rentedIndexMap[j] = j; } + } - int writeIndex = i; - i += consumed; // skip the ignorable symbol we just found + int writeIndex = i; + i += consumed; // skip the ignorable symbol we just found - for (; i < input.Length;) + for (; i < input.Length;) + { + Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out consumed); + if (!IsIgnorableSymbol(rune)) { - Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out consumed); - if (!IsIgnorableSymbol(rune)) + // Copy the UTF-16 units and map each filtered position to the start of the original character + outSpan[writeIndex] = input[i]; + rentedIndexMap?[writeIndex] = i; + writeIndex++; + + if (consumed > 1) { - // Copy the UTF-16 units and map each filtered position to the start of the original character - outSpan[writeIndex] = input[i]; - IndexMap?[writeIndex] = i; + outSpan[writeIndex] = input[i + 1]; + rentedIndexMap?[writeIndex] = i; writeIndex++; - - if (consumed > 1) - { - outSpan[writeIndex] = input[i + 1]; - IndexMap?[writeIndex] = i + 1; - writeIndex++; - } } - i += consumed; } - - return outSpan.Slice(0, writeIndex); + i += consumed; } - public void Dispose() - { - if (_arrayToReturnToPool is not null) - { - ArrayPool.Shared.Return(_arrayToReturnToPool); - _arrayToReturnToPool = null; - } - if (IndexMap is not null) - { - ArrayPool.Shared.Return(IndexMap); - IndexMap = null; - } - } + return outSpan.Slice(0, writeIndex); + } - /// - /// Determines whether the specified rune should be ignored when using CompareOptions.IgnoreSymbols. - /// - /// The rune to check. - /// - /// true if the rune should be ignored; otherwise, false. - /// - /// - /// This method returns true for: - /// - All separator categories (SpaceSeparator, LineSeparator, ParagraphSeparator) - /// - All punctuation categories (ConnectorPunctuation through OtherPunctuation) - /// - All symbol categories (MathSymbol through ModifierSymbol) - /// - Whitespace control characters (tab, line feed, vertical tab, form feed, carriage return, etc.) - /// - private static bool IsIgnorableSymbol(Rune rune) - { - UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(rune.Value); + /// + /// Determines whether the specified rune should be ignored when using CompareOptions.IgnoreSymbols. + /// + /// The rune to check. + /// + /// true if the rune should be ignored; otherwise, false. + /// + /// + /// This method returns true for: + /// - All separator categories (SpaceSeparator, LineSeparator, ParagraphSeparator) + /// - All punctuation categories (ConnectorPunctuation through OtherPunctuation) + /// - All symbol categories (MathSymbol through ModifierSymbol) + /// - Whitespace control characters (tab, line feed, vertical tab, form feed, carriage return, etc.) + /// + private static bool IsIgnorableSymbol(Rune rune) + { + UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(rune.Value); - // Check for separator categories (11-13) - if (category >= UnicodeCategory.SpaceSeparator && category <= UnicodeCategory.ParagraphSeparator) - return true; + // Check for separator categories (11-13) + if (category >= UnicodeCategory.SpaceSeparator && category <= UnicodeCategory.ParagraphSeparator) + return true; - // Check for punctuation/symbol categories (18-27) - if (category >= UnicodeCategory.ConnectorPunctuation && category <= UnicodeCategory.ModifierSymbol) - return true; + // Check for punctuation/symbol categories (18-27) + if (category >= UnicodeCategory.ConnectorPunctuation && category <= UnicodeCategory.ModifierSymbol) + return true; - // For Control (14) and Format (15) categories, only include whitespace characters - // This includes: tab (U+0009), LF (U+000A), VT (U+000B), FF (U+000C), CR (U+000D), NEL (U+0085) - if (category == UnicodeCategory.Control || category == UnicodeCategory.Format) - return Rune.IsWhiteSpace(rune); + // For Control (14) and Format (15) categories, only include whitespace characters + // This includes: tab (U+0009), LF (U+000A), VT (U+000B), FF (U+000C), CR (U+000D), NEL (U+0085) + if (category == UnicodeCategory.Control || category == UnicodeCategory.Format) + return Rune.IsWhiteSpace(rune); - return false; - } + return false; } private static void AssertComparisonSupported(CompareOptions options) From d86d2789c74dce6c7bd49407f641fd4982184d81 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Fri, 10 Oct 2025 13:17:30 +0200 Subject: [PATCH 11/15] add temp tests for ArrayPool allocation on iOS --- .../CompareInfo/CompareInfoTests.IndexOf.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs index d8100474403e1e..44d8fb00369d4f 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -128,6 +128,13 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_invariantCompare, "Tes ting", "Testing", 0, 8, CompareOptions.None, -1, 0 }; yield return new object[] { s_invariantCompare, "'Te st i ng!", "Testing", 0, 11, CompareOptions.IgnoreSymbols, -1, 0 }; + // Ignore symbols - long strings (over 256 chars) to test ArrayPool buffer allocation on iOS + if (PlatformDetection.IsHybridGlobalizationOnApplePlatform) + { + yield return new object[] { s_invariantCompare, new string('a', 100) + new string('b', 50) + "$" + new string('b', 50) + "!" + new string('c', 100), new string('b', 100), 0, 303, CompareOptions.IgnoreSymbols, 100, 102 }; + yield return new object[] { s_invariantCompare, new string('a', 100) + new string('b', 100) + new string('c', 100), new string('b', 100), 0, 300, CompareOptions.IgnoreSymbols, 100, 100 }; + } + yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 0, 13, CompareOptions.None, 2, 2 }; // Ordinal should be case-sensitive From bcb340ea76f43021395eb7d94229b22bd3e8c42b Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Sat, 11 Oct 2025 17:09:53 -0700 Subject: [PATCH 12/15] Fix build errors --- .../System/Globalization/CompareInfo.iOS.cs | 357 ++++++++++-------- 1 file changed, 201 insertions(+), 156 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 045a6621a1042b..80f101fd1e64f8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -20,223 +20,255 @@ private enum ErrorCodes ERROR_MIXED_COMPOSITION_NOT_FOUND = -3, } - private const int StackAllocThreshold = 256; + private const int StackAllocThreshold = 150; - private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan string2, CompareOptions options) + private unsafe int CompareStringNative(scoped ReadOnlySpan string1, scoped ReadOnlySpan string2, CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); AssertComparisonSupported(options); - char[]? rentedArray1 = null; - char[]? rentedArray2 = null; + using SymbolFilteringBuffer buffer1 = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer buffer2 = new SymbolFilteringBuffer(); + + Span stackBuffer1 = stackalloc char[StackAllocThreshold]; + Span stackBuffer2 = stackalloc char[StackAllocThreshold]; // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - string1 = GetFilteredString(string1, stackalloc char[StackAllocThreshold], out rentedArray1, out _, generateIndexMap: false); - string2 = GetFilteredString(string2, stackalloc char[StackAllocThreshold], out rentedArray2, out _, generateIndexMap: false); + string1 = !buffer1.TryFilterString(string1, stackBuffer1, Span.Empty) ? string1 : + buffer1.RentedFilteredBuffer is not null ? + buffer1.RentedFilteredBuffer.AsSpan(0, buffer1.FilteredLength) : + stackBuffer1.Slice(0, buffer1.FilteredLength); + + string2 = !buffer2.TryFilterString(string2, stackBuffer2, Span.Empty) ? string2 : + buffer2.RentedFilteredBuffer is not null ? + buffer2.RentedFilteredBuffer.AsSpan(0, buffer2.FilteredLength) : + stackBuffer2.Slice(0, buffer2.FilteredLength); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } - try - { - // GetReference may return nullptr if the input span is defaulted. The native layer handles - // this appropriately; no workaround is needed on the managed side. - fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) - fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) - { - int result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - return result; - } - } - finally + // GetReference may return nullptr if the input span is defaulted. The native layer handles + // this appropriately; no workaround is needed on the managed side. + fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) + fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) { - ReturnBuffers(rentedArray1, rentedArray2); + int result = Interop.Globalization.CompareStringNative(m_name, m_name.Length, pString1, string1.Length, pString2, string2.Length, options); + Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + return result; } } - private unsafe int IndexOfCoreNative(ReadOnlySpan target, ReadOnlySpan source, CompareOptions options, bool fromBeginning, int* matchLengthPtr) + private unsafe int IndexOfCoreNative(scoped ReadOnlySpan target, scoped ReadOnlySpan source, CompareOptions options, bool fromBeginning, int* matchLengthPtr) { AssertComparisonSupported(options); - char[]? rentedTargetArray = null; - char[]? rentedSourceArray = null; - int[]? rentedIndexMap = null; bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; + using SymbolFilteringBuffer buffer1 = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer buffer2 = new SymbolFilteringBuffer(); + + Span stackBuffer1 = stackalloc char[StackAllocThreshold]; + Span stackBuffer2 = stackalloc char[StackAllocThreshold]; + Span stackIndexMap = stackalloc int[StackAllocThreshold]; + // If we are ignoring symbols, preprocess the strings by removing specified Unicode categories. if (ignoreSymbols) { - target = GetFilteredString(target, stackalloc char[StackAllocThreshold], out rentedTargetArray, out _, generateIndexMap: false); - source = GetFilteredString(source, stackalloc char[StackAllocThreshold], out rentedSourceArray, out rentedIndexMap, generateIndexMap: true); + target = !buffer1.TryFilterString(target, stackBuffer1, Span.Empty) ? target : + buffer1.RentedFilteredBuffer is not null ? + buffer1.RentedFilteredBuffer.AsSpan(0, buffer1.FilteredLength) : + stackBuffer1.Slice(0, buffer1.FilteredLength); + + source = !buffer2.TryFilterString(source, stackBuffer2, stackIndexMap) ? source : + buffer2.RentedFilteredBuffer is not null ? + buffer2.RentedFilteredBuffer.AsSpan(0, buffer2.FilteredLength) : + stackBuffer2.Slice(0, buffer2.FilteredLength); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } - try + Interop.Range result; + fixed (char* pTarget = &MemoryMarshal.GetReference(target)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) { - Interop.Range result; - fixed (char* pTarget = &MemoryMarshal.GetReference(target)) - fixed (char* pSource = &MemoryMarshal.GetReference(source)) - { - result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, target.Length, pSource, source.Length, options, fromBeginning); - Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND) - throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); - } + result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, target.Length, pSource, source.Length, options, fromBeginning); + Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND) + throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); + } - int nativeLocation = result.Location; - int nativeLength = result.Length; + int nativeLocation = result.Location; + int nativeLength = result.Length; - // If not ignoring symbols / nothing found / an error code / no index map (no symbols found in source), just propagate. - if (!ignoreSymbols || nativeLocation < 0 || rentedIndexMap is null) - { - if (matchLengthPtr != null) - *matchLengthPtr = nativeLength; - return nativeLocation; - } + // If not ignoring symbols / nothing found / an error code / no index map (no symbols found in source), just propagate. + // buffer2.FilteredLength == 0 means no symbols were found in source, so no index map was created. + if (!ignoreSymbols || buffer2.FilteredLength == 0 || nativeLocation < 0) + { + if (matchLengthPtr != null) + *matchLengthPtr = nativeLength; + return nativeLocation; + } - // If ignoring symbols, map filtered indices back to original indices, expanding match length to include removed symbol chars inside the span. - int originalStart = rentedIndexMap[nativeLocation]; - int filteredEnd = nativeLocation + nativeLength - 1; + Span rentedIndexMap = buffer2.RentedIndexMapBuffer is not null ? + buffer2.RentedIndexMapBuffer.AsSpan(0, buffer2.FilteredLength) : + stackIndexMap.Slice(0, buffer2.FilteredLength); - Debug.Assert(filteredEnd < source.Length, - $"Filtered end index {filteredEnd} should not exceed the length of the filtered string {source.Length}. nativeLocation={nativeLocation}, nativeLength={nativeLength}"); + // If ignoring symbols, map filtered indices back to original indices, expanding match length to include removed symbol chars inside the span. + int originalStart = rentedIndexMap[nativeLocation]; + int filteredEnd = nativeLocation + nativeLength - 1; - // Find the end position of the character at filteredEnd in the original string. - int endCharStartPos = rentedIndexMap[filteredEnd]; + Debug.Assert(filteredEnd < source.Length, + $"Filtered end index {filteredEnd} should not exceed the length of the filtered string {source.Length}. nativeLocation={nativeLocation}, nativeLength={nativeLength}"); - // Check if the previous position belongs to the same character (first unit of a surrogate pair) - int firstUnit = (filteredEnd > 0 && rentedIndexMap[filteredEnd - 1] == endCharStartPos) - ? filteredEnd - 1 - : filteredEnd; + // Find the end position of the character at filteredEnd in the original string. + int endCharStartPos = rentedIndexMap[filteredEnd]; - // Check if the next position belongs to the same character (second unit of a surrogate pair) - int lastUnit = (filteredEnd + 1 < source.Length && rentedIndexMap[filteredEnd + 1] == endCharStartPos) - ? filteredEnd + 1 - : filteredEnd; + // Check if the previous position belongs to the same character (first unit of a surrogate pair) + int firstUnit = (filteredEnd > 0 && rentedIndexMap[filteredEnd - 1] == endCharStartPos) + ? filteredEnd - 1 + : filteredEnd; - int endCharWidth = lastUnit - firstUnit + 1; - int originalEnd = endCharStartPos + endCharWidth; - int originalLength = originalEnd - originalStart; + // Check if the next position belongs to the same character (second unit of a surrogate pair) + int lastUnit = (filteredEnd + 1 < source.Length && rentedIndexMap[filteredEnd + 1] == endCharStartPos) + ? filteredEnd + 1 + : filteredEnd; - if (matchLengthPtr != null) - *matchLengthPtr = originalLength; - return originalStart; - } - finally - { - ReturnBuffers(rentedTargetArray, rentedSourceArray, rentedIndexMap); - } + int endCharWidth = lastUnit - firstUnit + 1; + int originalEnd = endCharStartPos + endCharWidth; + int originalLength = originalEnd - originalStart; + + if (matchLengthPtr != null) + *matchLengthPtr = originalLength; + + return originalStart; } - private unsafe bool NativeStartsWith(ReadOnlySpan prefix, ReadOnlySpan source, CompareOptions options) + private unsafe bool NativeStartsWith(scoped ReadOnlySpan prefix, scoped ReadOnlySpan source, CompareOptions options) { AssertComparisonSupported(options); - char[]? rentedPrefixArray = null; - char[]? rentedSourceArray = null; + using SymbolFilteringBuffer buffer1 = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer buffer2 = new SymbolFilteringBuffer(); + + Span stackBuffer1 = stackalloc char[StackAllocThreshold]; + Span stackBuffer2 = stackalloc char[StackAllocThreshold]; // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - prefix = GetFilteredString(prefix, stackalloc char[StackAllocThreshold], out rentedPrefixArray, out _, generateIndexMap: false); - source = GetFilteredString(source, stackalloc char[StackAllocThreshold], out rentedSourceArray, out _, generateIndexMap: false); + prefix = !buffer1.TryFilterString(prefix, stackBuffer1, Span.Empty) ? prefix : + buffer1.RentedFilteredBuffer is not null ? + buffer1.RentedFilteredBuffer.AsSpan(0, buffer1.FilteredLength) : + stackBuffer1.Slice(0, buffer1.FilteredLength); + + source = !buffer2.TryFilterString(source, stackBuffer2, Span.Empty) ? source : + buffer2.RentedFilteredBuffer is not null ? + buffer2.RentedFilteredBuffer.AsSpan(0, buffer2.FilteredLength) : + stackBuffer2.Slice(0, buffer2.FilteredLength); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } - try - { - fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix)) - fixed (char* pSource = &MemoryMarshal.GetReference(source)) - { - int result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, prefix.Length, pSource, source.Length, options); - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - return result > 0; - } - } - finally + fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) { - ReturnBuffers(rentedPrefixArray, rentedSourceArray); + int result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, prefix.Length, pSource, source.Length, options); + Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + return result > 0; } } - private unsafe bool NativeEndsWith(ReadOnlySpan suffix, ReadOnlySpan source, CompareOptions options) + private unsafe bool NativeEndsWith(scoped ReadOnlySpan suffix, scoped ReadOnlySpan source, CompareOptions options) { AssertComparisonSupported(options); - char[]? rentedSuffixArray = null; - char[]? rentedSourceArray = null; + using SymbolFilteringBuffer buffer1 = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer buffer2 = new SymbolFilteringBuffer(); + + Span stackBuffer1 = stackalloc char[StackAllocThreshold]; + Span stackBuffer2 = stackalloc char[StackAllocThreshold]; // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - suffix = GetFilteredString(suffix, stackalloc char[StackAllocThreshold], out rentedSuffixArray, out _, generateIndexMap: false); - source = GetFilteredString(source, stackalloc char[StackAllocThreshold], out rentedSourceArray, out _, generateIndexMap: false); + suffix = !buffer1.TryFilterString(suffix, stackBuffer1, Span.Empty) ? suffix : + buffer1.RentedFilteredBuffer is not null ? + buffer1.RentedFilteredBuffer.AsSpan(0, buffer1.FilteredLength) : + stackBuffer1.Slice(0, buffer1.FilteredLength); + + source = !buffer2.TryFilterString(source, stackBuffer2, Span.Empty) ? source : + buffer2.RentedFilteredBuffer is not null ? + buffer2.RentedFilteredBuffer.AsSpan(0, buffer2.FilteredLength) : + stackBuffer2.Slice(0, buffer2.FilteredLength); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; } - try + fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) + fixed (char* pSource = &MemoryMarshal.GetReference(source)) { - fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) - fixed (char* pSource = &MemoryMarshal.GetReference(source)) - { - int result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, suffix.Length, pSource, source.Length, options); - Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); - return result > 0; - } - } - finally - { - ReturnBuffers(rentedSuffixArray, rentedSourceArray); + int result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, suffix.Length, pSource, source.Length, options); + Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND); + return result > 0; } } - /// - /// Returns multiple rented buffers to their respective pools if they were allocated. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void ReturnBuffers(char[]? charBuffer1 = null, char[]? charBuffer2 = null, int[]? indexMapBuffer = null) + private static void AssertComparisonSupported(CompareOptions options) + { + if ((options | SupportedCompareOptions) != SupportedCompareOptions) + throw new PlatformNotSupportedException(GetPNSE(options)); + } + + private const CompareOptions SupportedCompareOptions = CompareOptions.None | CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace | + CompareOptions.IgnoreWidth | CompareOptions.StringSort | CompareOptions.IgnoreKanaType | CompareOptions.IgnoreSymbols; + + private static string GetPNSE(CompareOptions options) => + SR.Format(SR.PlatformNotSupported_HybridGlobalizationWithCompareOptions, options); + } + + internal struct SymbolFilteringBuffer : IDisposable + { + public SymbolFilteringBuffer() { - if (charBuffer1 is not null) - ArrayPool.Shared.Return(charBuffer1); - if (charBuffer2 is not null) - ArrayPool.Shared.Return(charBuffer2); - if (indexMapBuffer is not null) - ArrayPool.Shared.Return(indexMapBuffer); + RentedFilteredBuffer = null; + RentedIndexMapBuffer = null; + FilteredLength = 0; } + public char[]? RentedFilteredBuffer { get; set; } + public int[]? RentedIndexMapBuffer { get; set; } + public int FilteredLength { get; set; } + /// - /// Filters ignorable symbols from the input span. + /// Attempt to filter ignorable symbols from the input string. /// - /// The input span to filter. - /// A stack-allocated buffer to use for small inputs or if no symbols are found. - /// Outputs a rented char array if input is too large for stackBuffer and contains symbols. Null if stackBuffer was sufficient. - /// Outputs a rented int array for the index map if generateIndexMap is true and symbols were found. Null otherwise. - /// If true, generates an index map tracking each character's original position. - /// - /// A span containing the filtered string with ignorable symbols removed. If no symbols are found, - /// returns the original input span without allocating. - /// - private static ReadOnlySpan GetFilteredString( + /// If the input contains ignorable symbols and the stack buffers are not large enough to + /// hold the filtered result, the method may fall back to heap allocation. The index mapping buffer is only + /// populated if index mapping is needed. This method does not modify the original input. + /// The input string to be filtered, represented as a read-only span of UTF-16 characters. + /// A stack-allocated buffer to receive the filtered characters. If this buffer is not long enough to hold the filtered result, + /// the method may fall to use the ArrayPool. + /// + /// A stack-allocated buffer to receive the mapping from filtered character positions to their original indices + /// in the input. if this span is empty, means ni need to create the index mapping. + /// true if the string is filtered removing symbols from there. false if there is no symbols found in the input. + internal bool TryFilterString( ReadOnlySpan input, - scoped Span stackBuffer, - out char[]? rentedCharArray, - out int[]? rentedIndexMap, - bool generateIndexMap) + Span filteredStackBuffer, + Span indexMapStackBuffer) { - rentedCharArray = null; - rentedIndexMap = null; + if (RentedFilteredBuffer is not null || RentedIndexMapBuffer is not null) + { + Dispose(); + } int i = 0; int consumed = 0; @@ -252,28 +284,31 @@ private static ReadOnlySpan GetFilteredString( i += consumed; } - // If we scanned the entire input without finding any ignorable symbols, return original input + // If we scanned the entire input without finding any ignorable symbols, return false if (i >= input.Length) { - return input; + return false; } // Found symbols - decide whether to use stack or heap allocation - Span outSpan = input.Length <= stackBuffer.Length ? stackBuffer : (rentedCharArray = ArrayPool.Shared.Rent(input.Length)); + Span outSpan = input.Length <= filteredStackBuffer.Length ? filteredStackBuffer : (RentedFilteredBuffer = ArrayPool.Shared.Rent(input.Length)); // Copy the initial segment that contains no ignorable symbols (positions 0 to i-1) input.Slice(0, i).CopyTo(outSpan); + Span indexMap = indexMapStackBuffer.IsEmpty ? + Span.Empty : + (indexMapStackBuffer.Length >= input.Length ? indexMapStackBuffer : (RentedIndexMapBuffer = ArrayPool.Shared.Rent(input.Length))); + // Initialize the index map for the initial segment with identity mapping - if (generateIndexMap) + if (!indexMap.IsEmpty) { - rentedIndexMap = ArrayPool.Shared.Rent(input.Length); for (int j = 0; j < i; j++) { - rentedIndexMap[j] = j; + indexMap[j] = j; } } - int writeIndex = i; + FilteredLength = i; i += consumed; // skip the ignorable symbol we just found for (; i < input.Length;) @@ -282,21 +317,27 @@ private static ReadOnlySpan GetFilteredString( if (!IsIgnorableSymbol(rune)) { // Copy the UTF-16 units and map each filtered position to the start of the original character - outSpan[writeIndex] = input[i]; - rentedIndexMap?[writeIndex] = i; - writeIndex++; + outSpan[FilteredLength] = input[i]; + if (!indexMap.IsEmpty) + { + indexMap[FilteredLength] = i; + } + FilteredLength++; if (consumed > 1) { - outSpan[writeIndex] = input[i + 1]; - rentedIndexMap?[writeIndex] = i; - writeIndex++; + outSpan[FilteredLength] = input[i + 1]; + if (!indexMap.IsEmpty) + { + indexMap[FilteredLength] = i + 1; + } + FilteredLength++; } } i += consumed; } - return outSpan.Slice(0, writeIndex); + return true; } /// @@ -333,16 +374,20 @@ private static bool IsIgnorableSymbol(Rune rune) return false; } - private static void AssertComparisonSupported(CompareOptions options) + public void Dispose() { - if ((options | SupportedCompareOptions) != SupportedCompareOptions) - throw new PlatformNotSupportedException(GetPNSE(options)); - } - - private const CompareOptions SupportedCompareOptions = CompareOptions.None | CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace | - CompareOptions.IgnoreWidth | CompareOptions.StringSort | CompareOptions.IgnoreKanaType | CompareOptions.IgnoreSymbols; + if (RentedFilteredBuffer is not null) + { + ArrayPool.Shared.Return(RentedFilteredBuffer); + RentedFilteredBuffer = null; + } + if (RentedIndexMapBuffer is not null) + { + ArrayPool.Shared.Return(RentedIndexMapBuffer); + RentedIndexMapBuffer = null; + } - private static string GetPNSE(CompareOptions options) => - SR.Format(SR.PlatformNotSupported_HybridGlobalizationWithCompareOptions, options); + FilteredLength = 0; + } } } From 24a5570a82eec3ba1e59e49652d8674e0125d065 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed <10833894+tarekgh@users.noreply.github.com> Date: Sun, 12 Oct 2025 17:32:50 -0700 Subject: [PATCH 13/15] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../src/System/Globalization/CompareInfo.iOS.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 80f101fd1e64f8..860ff0e2a08e20 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -255,10 +255,10 @@ public SymbolFilteringBuffer() /// populated if index mapping is needed. This method does not modify the original input. /// The input string to be filtered, represented as a read-only span of UTF-16 characters. /// A stack-allocated buffer to receive the filtered characters. If this buffer is not long enough to hold the filtered result, - /// the method may fall to use the ArrayPool. + /// the method may fall back to using the ArrayPool. /// /// A stack-allocated buffer to receive the mapping from filtered character positions to their original indices - /// in the input. if this span is empty, means ni need to create the index mapping. + /// in the input. if this span is empty, means no need to create the index mapping. /// true if the string is filtered removing symbols from there. false if there is no symbols found in the input. internal bool TryFilterString( ReadOnlySpan input, From 3f257a7a0e9abb5eb4e2cec3c6ed7292eefc0c90 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 13 Oct 2025 09:57:55 +0200 Subject: [PATCH 14/15] refactor naming to be more descriptive --- .../System/Globalization/CompareInfo.iOS.cs | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs index 860ff0e2a08e20..f2976bc073c731 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.iOS.cs @@ -69,25 +69,25 @@ private unsafe int IndexOfCoreNative(scoped ReadOnlySpan target, scoped Re bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0; - using SymbolFilteringBuffer buffer1 = new SymbolFilteringBuffer(); - using SymbolFilteringBuffer buffer2 = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer targetBuffer = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer sourceBuffer = new SymbolFilteringBuffer(); - Span stackBuffer1 = stackalloc char[StackAllocThreshold]; - Span stackBuffer2 = stackalloc char[StackAllocThreshold]; - Span stackIndexMap = stackalloc int[StackAllocThreshold]; + Span stackTargetBuffer = stackalloc char[StackAllocThreshold]; + Span stackSourceBuffer = stackalloc char[StackAllocThreshold]; + Span stackSourceIndexMap = stackalloc int[StackAllocThreshold]; // If we are ignoring symbols, preprocess the strings by removing specified Unicode categories. if (ignoreSymbols) { - target = !buffer1.TryFilterString(target, stackBuffer1, Span.Empty) ? target : - buffer1.RentedFilteredBuffer is not null ? - buffer1.RentedFilteredBuffer.AsSpan(0, buffer1.FilteredLength) : - stackBuffer1.Slice(0, buffer1.FilteredLength); + target = !targetBuffer.TryFilterString(target, stackTargetBuffer, Span.Empty) ? target : + targetBuffer.RentedFilteredBuffer is not null ? + targetBuffer.RentedFilteredBuffer.AsSpan(0, targetBuffer.FilteredLength) : + stackTargetBuffer.Slice(0, targetBuffer.FilteredLength); - source = !buffer2.TryFilterString(source, stackBuffer2, stackIndexMap) ? source : - buffer2.RentedFilteredBuffer is not null ? - buffer2.RentedFilteredBuffer.AsSpan(0, buffer2.FilteredLength) : - stackBuffer2.Slice(0, buffer2.FilteredLength); + source = !sourceBuffer.TryFilterString(source, stackSourceBuffer, stackSourceIndexMap) ? source : + sourceBuffer.RentedFilteredBuffer is not null ? + sourceBuffer.RentedFilteredBuffer.AsSpan(0, sourceBuffer.FilteredLength) : + stackSourceBuffer.Slice(0, sourceBuffer.FilteredLength); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; @@ -107,17 +107,17 @@ buffer2.RentedFilteredBuffer is not null ? int nativeLength = result.Length; // If not ignoring symbols / nothing found / an error code / no index map (no symbols found in source), just propagate. - // buffer2.FilteredLength == 0 means no symbols were found in source, so no index map was created. - if (!ignoreSymbols || buffer2.FilteredLength == 0 || nativeLocation < 0) + // sourceBuffer.FilteredLength == 0 means no symbols were found in source, so no index map was created. + if (!ignoreSymbols || sourceBuffer.FilteredLength == 0 || nativeLocation < 0) { if (matchLengthPtr != null) *matchLengthPtr = nativeLength; return nativeLocation; } - Span rentedIndexMap = buffer2.RentedIndexMapBuffer is not null ? - buffer2.RentedIndexMapBuffer.AsSpan(0, buffer2.FilteredLength) : - stackIndexMap.Slice(0, buffer2.FilteredLength); + Span rentedIndexMap = sourceBuffer.RentedIndexMapBuffer is not null ? + sourceBuffer.RentedIndexMapBuffer.AsSpan(0, sourceBuffer.FilteredLength) : + stackSourceIndexMap.Slice(0, sourceBuffer.FilteredLength); // If ignoring symbols, map filtered indices back to original indices, expanding match length to include removed symbol chars inside the span. int originalStart = rentedIndexMap[nativeLocation]; @@ -153,24 +153,24 @@ private unsafe bool NativeStartsWith(scoped ReadOnlySpan prefix, scoped Re { AssertComparisonSupported(options); - using SymbolFilteringBuffer buffer1 = new SymbolFilteringBuffer(); - using SymbolFilteringBuffer buffer2 = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer prefixBuffer = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer sourceBuffer = new SymbolFilteringBuffer(); - Span stackBuffer1 = stackalloc char[StackAllocThreshold]; - Span stackBuffer2 = stackalloc char[StackAllocThreshold]; + Span stackPrefixBuffer = stackalloc char[StackAllocThreshold]; + Span stackSourceBuffer = stackalloc char[StackAllocThreshold]; // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - prefix = !buffer1.TryFilterString(prefix, stackBuffer1, Span.Empty) ? prefix : - buffer1.RentedFilteredBuffer is not null ? - buffer1.RentedFilteredBuffer.AsSpan(0, buffer1.FilteredLength) : - stackBuffer1.Slice(0, buffer1.FilteredLength); + prefix = !prefixBuffer.TryFilterString(prefix, stackPrefixBuffer, Span.Empty) ? prefix : + prefixBuffer.RentedFilteredBuffer is not null ? + prefixBuffer.RentedFilteredBuffer.AsSpan(0, prefixBuffer.FilteredLength) : + stackPrefixBuffer.Slice(0, prefixBuffer.FilteredLength); - source = !buffer2.TryFilterString(source, stackBuffer2, Span.Empty) ? source : - buffer2.RentedFilteredBuffer is not null ? - buffer2.RentedFilteredBuffer.AsSpan(0, buffer2.FilteredLength) : - stackBuffer2.Slice(0, buffer2.FilteredLength); + source = !sourceBuffer.TryFilterString(source, stackSourceBuffer, Span.Empty) ? source : + sourceBuffer.RentedFilteredBuffer is not null ? + sourceBuffer.RentedFilteredBuffer.AsSpan(0, sourceBuffer.FilteredLength) : + stackSourceBuffer.Slice(0, sourceBuffer.FilteredLength); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; @@ -189,24 +189,24 @@ private unsafe bool NativeEndsWith(scoped ReadOnlySpan suffix, scoped Read { AssertComparisonSupported(options); - using SymbolFilteringBuffer buffer1 = new SymbolFilteringBuffer(); - using SymbolFilteringBuffer buffer2 = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer suffixBuffer = new SymbolFilteringBuffer(); + using SymbolFilteringBuffer sourceBuffer = new SymbolFilteringBuffer(); - Span stackBuffer1 = stackalloc char[StackAllocThreshold]; - Span stackBuffer2 = stackalloc char[StackAllocThreshold]; + Span stackSuffixBuffer = stackalloc char[StackAllocThreshold]; + Span stackSourceBuffer = stackalloc char[StackAllocThreshold]; // Handle IgnoreSymbols preprocessing if ((options & CompareOptions.IgnoreSymbols) != 0) { - suffix = !buffer1.TryFilterString(suffix, stackBuffer1, Span.Empty) ? suffix : - buffer1.RentedFilteredBuffer is not null ? - buffer1.RentedFilteredBuffer.AsSpan(0, buffer1.FilteredLength) : - stackBuffer1.Slice(0, buffer1.FilteredLength); - - source = !buffer2.TryFilterString(source, stackBuffer2, Span.Empty) ? source : - buffer2.RentedFilteredBuffer is not null ? - buffer2.RentedFilteredBuffer.AsSpan(0, buffer2.FilteredLength) : - stackBuffer2.Slice(0, buffer2.FilteredLength); + suffix = !suffixBuffer.TryFilterString(suffix, stackSuffixBuffer, Span.Empty) ? suffix : + suffixBuffer.RentedFilteredBuffer is not null ? + suffixBuffer.RentedFilteredBuffer.AsSpan(0, suffixBuffer.FilteredLength) : + stackSuffixBuffer.Slice(0, suffixBuffer.FilteredLength); + + source = !sourceBuffer.TryFilterString(source, stackSourceBuffer, Span.Empty) ? source : + sourceBuffer.RentedFilteredBuffer is not null ? + sourceBuffer.RentedFilteredBuffer.AsSpan(0, sourceBuffer.FilteredLength) : + stackSourceBuffer.Slice(0, sourceBuffer.FilteredLength); // Remove the flag before passing to native since we handled it here options &= ~CompareOptions.IgnoreSymbols; From 75e79a5f1e8806a582e703a5a8a2f63f7709a634 Mon Sep 17 00:00:00 2001 From: Matous Kozak Date: Mon, 13 Oct 2025 10:58:16 +0200 Subject: [PATCH 15/15] fix test cases and update documentation --- docs/design/features/globalization-hybrid-mode.md | 10 +++------- .../CompareInfo/CompareInfoTests.IndexOf.cs | 12 ++++++------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/docs/design/features/globalization-hybrid-mode.md b/docs/design/features/globalization-hybrid-mode.md index 9840c30fed0bba..7ac8a86411a1dd 100644 --- a/docs/design/features/globalization-hybrid-mode.md +++ b/docs/design/features/globalization-hybrid-mode.md @@ -30,7 +30,7 @@ Due to these differences, the exact result of string compariso on Apple platform The number of `CompareOptions` and `NSStringCompareOptions` combinations are limited. Originally supported combinations can be found [here for CompareOptions](https://learn.microsoft.com/dotnet/api/system.globalization.compareoptions) and [here for NSStringCompareOptions](https://developer.apple.com/documentation/foundation/nsstringcompareoptions). -- `IgnoreSymbols` is not supported because there is no equivalent in native api. Throws `PlatformNotSupportedException`. +- `IgnoreSymbols` is supported by filtering ignorable symbols on the managed side before invoking the native API. - `IgnoreKanaType` is implemented using [`kCFStringTransformHiraganaKatakana`](https://developer.apple.com/documentation/corefoundation/kcfstringtransformhiraganakatakana?language=objc) then comparing strings. @@ -71,10 +71,6 @@ The number of `CompareOptions` and `NSStringCompareOptions` combinations are lim `CompareOptions.IgnoreWidth` is mapped to `NSStringCompareOptions.NSWidthInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch` -- All combinations that contain below `CompareOptions` always throw `PlatformNotSupportedException`: - - `IgnoreSymbols` - ## String starts with / ends with Affected public APIs: @@ -91,7 +87,7 @@ Apple Native API does not expose locale-sensitive endsWith/startsWith function. - `IgnoreSymbols` - As there is no IgnoreSymbols equivalent in NSStringCompareOptions all `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException` + Supported by filtering ignorable symbols on the managed side prior to comparison using native API. ## String indexing @@ -129,7 +125,7 @@ Not covered case: - `IgnoreSymbols` - As there is no IgnoreSymbols equivalent in NSStringCompareOptions all `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException` + Supported by filtering ignorable symbols on the managed side prior to comparison using native API. - Some letters consist of more than one grapheme. diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs index 44d8fb00369d4f..25c44d4f0921e8 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -108,30 +108,30 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_invariantCompare, "Test\ting\n", "Testing", 0, 9, CompareOptions.IgnoreSymbols, 0, 8 }; // Tab and newline // Ignore symbols - multiple whitespace and punctuation - yield return new object[] { s_invariantCompare, " Testing, ", "Testing", 0, 11, CompareOptions.IgnoreSymbols, 2, 7 }; + yield return new object[] { s_invariantCompare, " Testing, ", "Testing", 0, 12, CompareOptions.IgnoreSymbols, 2, 7 }; yield return new object[] { s_invariantCompare, "'Te st i ng!", "Testing", 0, 13, CompareOptions.IgnoreSymbols, 1, 11 }; // Ignore symbols - mixed categories yield return new object[] { s_invariantCompare, "$Te%s t&ing+", "Testing", 0, 12, CompareOptions.IgnoreSymbols, 1, 10 }; - yield return new object[] { s_invariantCompare, ", Hello World!", "HelloWorld", 0, 13, CompareOptions.IgnoreSymbols, 2, 11 }; + yield return new object[] { s_invariantCompare, ", Hello World!", "HelloWorld", 0, 14, CompareOptions.IgnoreSymbols, 2, 11 }; // Ignore symbols - source contains surrogates (to match) + symbols (to ignore) yield return new object[] { s_invariantCompare, "$\U0001D7D8Testing!", "\U0001D7D8Testing", 0, 11, CompareOptions.IgnoreSymbols, 1, 9 }; yield return new object[] { s_invariantCompare, "Test$\U0001D7D8ing", "Test\U0001D7D8ing", 0, 10, CompareOptions.IgnoreSymbols, 0, 10 }; yield return new object[] { s_invariantCompare, "$Testing\U0001D7DA", "Testing\U0001D7DA", 0, 10, CompareOptions.IgnoreSymbols, 1, 9 }; yield return new object[] { s_invariantCompare, "$\U0001D7D8Test!\U0001D7D9ing", "\U0001D7D8Test\U0001D7D9ing", 0, 13, CompareOptions.IgnoreSymbols, 1, 12 }; - yield return new object[] { s_invariantCompare, "\U0001D7D8 Test$ \U0001D7D9 ing!", "\U0001D7D8 Test \U0001D7D9 ing", 0, 15, CompareOptions.IgnoreSymbols, 0, 15 }; - yield return new object[] { s_invariantCompare, "!$\U0001D7D8Test\U0001D7D9ing\U0001D7DA!", "\U0001D7D8Test\U0001D7D9ing\U0001D7DA", 0, 15, CompareOptions.IgnoreSymbols, 2, 13 }; + yield return new object[] { s_invariantCompare, "\U0001D7D8 Test$ \U0001D7D9 ing!", "\U0001D7D8 Test \U0001D7D9 ing", 0, 16, CompareOptions.IgnoreSymbols, 0, 15 }; + yield return new object[] { s_invariantCompare, "!$\U0001D7D8Test\U0001D7D9ing\U0001D7DA!", "\U0001D7D8Test\U0001D7D9ing\U0001D7DA", 0, 16, CompareOptions.IgnoreSymbols, 2, 13 }; // With symbols - should not match yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.None, -1, 0 }; yield return new object[] { s_invariantCompare, "Tes ting", "Testing", 0, 8, CompareOptions.None, -1, 0 }; - yield return new object[] { s_invariantCompare, "'Te st i ng!", "Testing", 0, 11, CompareOptions.IgnoreSymbols, -1, 0 }; + yield return new object[] { s_invariantCompare, "'Te st i ng!", "Testing", 0, 11, CompareOptions.IgnoreSymbols, -1, 0 }; // Not enough characters to match // Ignore symbols - long strings (over 256 chars) to test ArrayPool buffer allocation on iOS if (PlatformDetection.IsHybridGlobalizationOnApplePlatform) { - yield return new object[] { s_invariantCompare, new string('a', 100) + new string('b', 50) + "$" + new string('b', 50) + "!" + new string('c', 100), new string('b', 100), 0, 303, CompareOptions.IgnoreSymbols, 100, 102 }; + yield return new object[] { s_invariantCompare, new string('a', 100) + new string('b', 50) + "$" + new string('b', 50) + "!" + new string('c', 100), new string('b', 100), 0, 302, CompareOptions.IgnoreSymbols, 100, 101 }; yield return new object[] { s_invariantCompare, new string('a', 100) + new string('b', 100) + new string('c', 100), new string('b', 100), 0, 300, CompareOptions.IgnoreSymbols, 100, 100 }; }