From f5e0842280b09d2f6ec4ceeeac144dbdc031ce57 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 13:44:22 +0000 Subject: [PATCH 01/21] Initial plan From 81ddd696bc93cd90e44ed1e7bd70c96b00f1fff1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 14:18:41 +0000 Subject: [PATCH 02/21] Add TryGetAscii and TryGetUnicode span-based APIs to IdnMapping Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../System/Globalization/CharUnicodeInfo.cs | 27 +++ .../System/Globalization/IdnMapping.Icu.cs | 55 ++++- .../System/Globalization/IdnMapping.Nls.cs | 74 ++++++- .../src/System/Globalization/IdnMapping.cs | 204 ++++++++++++++---- .../System.Runtime/ref/System.Runtime.cs | 2 + 5 files changed, 309 insertions(+), 53 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs index 1b13711ff1e59c..b93949ed5b0fed 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs @@ -91,6 +91,33 @@ private static StrongBidiCategory GetBidiCategoryNoBoundsChecks(uint codePoint) return bidiCategory; } + internal static StrongBidiCategory GetBidiCategory(ReadOnlySpan s, int index) + { + Debug.Assert(index >= 0 && index < s.Length, "index < s.Length"); + + // The logic below follows Table 3-5 in the Unicode Standard, Sec. 3.9. + // First char (high surrogate) = 110110wwwwxxxxxx + // Second char (low surrogate) = 110111xxxxxxxxxx + + int c = (int)s[index]; + if (index < s.Length - 1) + { + int temp1 = c - HIGH_SURROGATE_START; // temp1 = 000000wwwwxxxxxx + if ((uint)temp1 <= HIGH_SURROGATE_RANGE) + { + int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; // temp2 = 000000xxxxxxxxxx + if ((uint)temp2 <= HIGH_SURROGATE_RANGE) + { + // |--------temp1--||-temp2--| + // 00000uuuuuuxxxxxxxxxxxxxxxx (where uuuuu = wwww + 1) + c = (temp1 << 10) + temp2 + UNICODE_PLANE01_START; + } + } + } + + return GetBidiCategoryNoBoundsChecks((uint)c); + } + /* * GetDecimalDigitValue * ==================== diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs index 142c2b01be8032..745d57a967af86 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs @@ -7,11 +7,10 @@ namespace System.Globalization { public sealed partial class IdnMapping { - private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int count) + private unsafe string IcuGetAsciiCore(string? unicodeString, char* unicode, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(unicodeString != null && unicodeString.Length >= count); uint flags = IcuFlags; CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode)); @@ -51,11 +50,33 @@ private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int c } } - private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int count) + private unsafe bool IcuTryGetAsciiCore(char* unicode, int count, char* destination, int destinationLength, out int charsWritten) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + + uint flags = IcuFlags; + CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode)); + + int actualLength = Interop.Globalization.ToAscii(flags, unicode, count, destination, destinationLength); + if (actualLength == 0) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); + } + if (actualLength <= destinationLength) + { + charsWritten = actualLength; + return true; + } + + charsWritten = 0; + return false; + } + + private unsafe string IcuGetUnicodeCore(string? asciiString, char* ascii, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(asciiString != null && asciiString.Length >= count); uint flags = IcuFlags; CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii)); @@ -76,11 +97,10 @@ private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int cou } } - private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int count, uint flags, char* output, int outputLength, bool reattempt) + private unsafe string IcuGetUnicodeCore(string? asciiString, char* ascii, int count, uint flags, char* output, int outputLength, bool reattempt) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(asciiString != null && asciiString.Length >= count); int realLen = Interop.Globalization.ToUnicode(flags, ascii, count, output, outputLength); @@ -104,6 +124,29 @@ private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int cou throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); } + private unsafe bool IcuTryGetUnicodeCore(char* ascii, int count, char* destination, int destinationLength, out int charsWritten) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + + uint flags = IcuFlags; + CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii)); + + int actualLength = Interop.Globalization.ToUnicode(flags, ascii, count, destination, destinationLength); + if (actualLength == 0) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } + if (actualLength <= destinationLength) + { + charsWritten = actualLength; + return true; + } + + charsWritten = 0; + return false; + } + private uint IcuFlags { get diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs index f4569191426fa2..697b642bcfde05 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs @@ -9,11 +9,10 @@ namespace System.Globalization { public sealed partial class IdnMapping { - private unsafe string NlsGetAsciiCore(string unicodeString, char* unicode, int count) + private unsafe string NlsGetAsciiCore(string? unicodeString, char* unicode, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(unicodeString != null && unicodeString.Length >= count); uint flags = NlsFlags; @@ -41,11 +40,10 @@ private unsafe string NlsGetAsciiCore(string unicodeString, char* unicode, int c } } - private static unsafe string NlsGetAsciiCore(string unicodeString, char* unicode, int count, uint flags, char* output, int outputLength) + private static unsafe string NlsGetAsciiCore(string? unicodeString, char* unicode, int count, uint flags, char* output, int outputLength) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(unicodeString != null && unicodeString.Length >= count); int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, output, outputLength); if (length == 0) @@ -56,11 +54,41 @@ private static unsafe string NlsGetAsciiCore(string unicodeString, char* unicode return GetStringForOutput(unicodeString, unicode, count, output, length); } - private unsafe string NlsGetUnicodeCore(string asciiString, char* ascii, int count) + private unsafe bool NlsTryGetAsciiCore(char* unicode, int count, char* destination, int destinationLength, out int charsWritten) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(GlobalizationMode.UseNls); + + uint flags = NlsFlags; + + // Determine the required length + int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, null, 0); + if (length == 0) + { + ThrowForZeroLength(unicode: true); + } + + if (length > destinationLength) + { + charsWritten = 0; + return false; + } + + // Do the conversion + int actualLength = Interop.Normaliz.IdnToAscii(flags, unicode, count, destination, destinationLength); + if (actualLength == 0) + { + ThrowForZeroLength(unicode: true); + } + + charsWritten = actualLength; + return true; + } + + private unsafe string NlsGetUnicodeCore(string? asciiString, char* ascii, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(asciiString != null && asciiString.Length >= count); uint flags = NlsFlags; @@ -88,11 +116,10 @@ private unsafe string NlsGetUnicodeCore(string asciiString, char* ascii, int cou } } - private static unsafe string NlsGetUnicodeCore(string asciiString, char* ascii, int count, uint flags, char* output, int outputLength) + private static unsafe string NlsGetUnicodeCore(string? asciiString, char* ascii, int count, uint flags, char* output, int outputLength) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(asciiString != null && asciiString.Length >= count); int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, output, outputLength); if (length == 0) @@ -103,6 +130,37 @@ private static unsafe string NlsGetUnicodeCore(string asciiString, char* ascii, return GetStringForOutput(asciiString, ascii, count, output, length); } + private unsafe bool NlsTryGetUnicodeCore(char* ascii, int count, char* destination, int destinationLength, out int charsWritten) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(GlobalizationMode.UseNls); + + uint flags = NlsFlags; + + // Determine the required length + int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, null, 0); + if (length == 0) + { + ThrowForZeroLength(unicode: false); + } + + if (length > destinationLength) + { + charsWritten = 0; + return false; + } + + // Do the conversion + int actualLength = Interop.Normaliz.IdnToUnicode(flags, ascii, count, destination, destinationLength); + if (actualLength == 0) + { + ThrowForZeroLength(unicode: false); + } + + charsWritten = actualLength; + return true; + } + private uint NlsFlags { get diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 9d664c583648fa..473336d365572d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -26,6 +26,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Text; namespace System.Globalization @@ -74,27 +75,73 @@ public string GetAscii(string unicode, int index, int count) if (index > unicode.Length - count) throw new ArgumentOutOfRangeException(nameof(unicode), SR.ArgumentOutOfRange_IndexCountBuffer); - if (count == 0) + return GetAsciiCore(unicode, unicode.AsSpan(index, count)); + } + + /// + /// Encodes a substring of domain name labels that include Unicode characters outside the ASCII character range (U+0000 to U+007F) to a displayable Unicode string. + /// + /// The Unicode domain name to convert. + /// The buffer to write the ASCII result to. + /// When this method returns, contains the number of characters that were written to . + /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. + /// is invalid based on the and properties, and the IDNA standard. + public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out int charsWritten) + { + return TryGetAsciiCore(unicode, destination, out charsWritten); + } + + private string GetAsciiCore(string? originalUnicode, ReadOnlySpan unicode) + { + if (unicode.Length == 0) + { + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + } + if (unicode[^1] == 0) + { + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); + } + + if (GlobalizationMode.Invariant) + { + return GetAsciiInvariant(unicode); + } + + unsafe + { + fixed (char* pUnicode = &MemoryMarshal.GetReference(unicode)) + { + return GlobalizationMode.UseNls ? + NlsGetAsciiCore(originalUnicode, pUnicode, unicode.Length) : + IcuGetAsciiCore(originalUnicode, pUnicode, unicode.Length); + } + } + } + + private bool TryGetAsciiCore(ReadOnlySpan unicode, Span destination, out int charsWritten) + { + if (unicode.Length == 0) { throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); } - if (unicode[index + count - 1] == 0) + if (unicode[^1] == 0) { - throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, index + count - 1), nameof(unicode)); + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); } if (GlobalizationMode.Invariant) { - return GetAsciiInvariant(unicode, index, count); + return TryGetAsciiInvariant(unicode, destination, out charsWritten); } unsafe { - fixed (char* pUnicode = unicode) + fixed (char* pUnicode = &MemoryMarshal.GetReference(unicode)) + fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) { return GlobalizationMode.UseNls ? - NlsGetAsciiCore(unicode, pUnicode + index, count) : - IcuGetAsciiCore(unicode, pUnicode + index, count); + NlsTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten) : + IcuTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten); } } } @@ -124,21 +171,67 @@ public string GetUnicode(string ascii, int index, int count) // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. // The Win32 APIs fail on an embedded null, but not on a terminating null. - if (count > 0 && ascii[index + count - 1] == (char)0) + return GetUnicodeCore(ascii, ascii.AsSpan(index, count)); + } + + /// + /// Decodes one or more encoded domain name labels to a string of Unicode characters. + /// + /// The ASCII domain name to convert. The string may contain one or more labels, where each label is prefixed by "xn--". + /// The buffer to write the Unicode result to. + /// When this method returns, contains the number of characters that were written to . + /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. + /// is invalid based on the and properties, and the IDNA standard. + public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out int charsWritten) + { + return TryGetUnicodeCore(ascii, destination, out charsWritten); + } + + private string GetUnicodeCore(string? originalAscii, ReadOnlySpan ascii) + { + // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. + // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. + // The Win32 APIs fail on an embedded null, but not on a terminating null. + if (ascii.Length > 0 && ascii[^1] == (char)0) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + + if (GlobalizationMode.Invariant) + { + return GetUnicodeInvariant(ascii); + } + + unsafe + { + fixed (char* pAscii = &MemoryMarshal.GetReference(ascii)) + { + return GlobalizationMode.UseNls ? + NlsGetUnicodeCore(originalAscii, pAscii, ascii.Length) : + IcuGetUnicodeCore(originalAscii, pAscii, ascii.Length); + } + } + } + + private bool TryGetUnicodeCore(ReadOnlySpan ascii, Span destination, out int charsWritten) + { + // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. + // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. + // The Win32 APIs fail on an embedded null, but not on a terminating null. + if (ascii.Length > 0 && ascii[^1] == (char)0) throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); if (GlobalizationMode.Invariant) { - return GetUnicodeInvariant(ascii, index, count); + return TryGetUnicodeInvariant(ascii, destination, out charsWritten); } unsafe { - fixed (char* pAscii = ascii) + fixed (char* pAscii = &MemoryMarshal.GetReference(ascii)) + fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) { return GlobalizationMode.UseNls ? - NlsGetUnicodeCore(ascii, pAscii + index, count) : - IcuGetUnicodeCore(ascii, pAscii + index, count); + NlsTryGetUnicodeCore(pAscii, ascii.Length, pDestination, destination.Length, out charsWritten) : + IcuTryGetUnicodeCore(pAscii, ascii.Length, pDestination, destination.Length, out charsWritten); } } } @@ -152,11 +245,12 @@ public override int GetHashCode() => (_allowUnassigned ? 100 : 200) + (_useStd3AsciiRules ? 1000 : 2000); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe string GetStringForOutput(string originalString, char* input, int inputLength, char* output, int outputLength) + private static unsafe string GetStringForOutput(string? originalString, char* input, int inputLength, char* output, int outputLength) { Debug.Assert(inputLength > 0); - if (originalString.Length == inputLength && + if (originalString is not null && + originalString.Length == inputLength && inputLength == outputLength && Ordinal.EqualsIgnoreCase(ref *input, ref *output, inputLength)) { @@ -183,22 +277,17 @@ private static unsafe string GetStringForOutput(string originalString, char* inp private const int c_skew = 38; private const int c_damp = 700; - private string GetAsciiInvariant(string unicode, int index, int count) + private string GetAsciiInvariant(ReadOnlySpan unicode) { - if (index > 0 || count < unicode.Length) - { - unicode = unicode.Substring(index, count); - } - // Check for ASCII only string, which will be unchanged if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true)) { - return unicode; + return unicode.ToString(); } // Cannot be null terminated (normalization won't help us with this one, and // may have returned false before checking the whole string above) - Debug.Assert(count >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now."); + Debug.Assert(unicode.Length >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now."); if (unicode[^1] <= 0x1f) { throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); @@ -214,8 +303,22 @@ private string GetAsciiInvariant(string unicode, int index, int count) return PunycodeEncode(unicode); } + private bool TryGetAsciiInvariant(ReadOnlySpan unicode, Span destination, out int charsWritten) + { + string result = GetAsciiInvariant(unicode); + if (result.Length <= destination.Length) + { + result.CopyTo(destination); + charsWritten = result.Length; + return true; + } + + charsWritten = 0; + return false; + } + // See if we're only ASCII - private static bool ValidateStd3AndAscii(string unicode, bool bUseStd3, bool bCheckAscii) + private static bool ValidateStd3AndAscii(ReadOnlySpan unicode, bool bUseStd3, bool bCheckAscii) { // If its empty, then its too small if (unicode.Length == 0) @@ -304,7 +407,7 @@ private static bool ValidateStd3AndAscii(string unicode, bool bUseStd3, bool bCh /* value can be any of the punycode_status values defined above */ /* except punycode_bad_input; if not punycode_success, then */ /* output_size and output might contain garbage. */ - private static string PunycodeEncode(string unicode) + private static string PunycodeEncode(ReadOnlySpan unicode) { // 0 length strings aren't allowed if (unicode.Length == 0) @@ -322,7 +425,7 @@ private static string PunycodeEncode(string unicode) const string DotSeparators = ".\u3002\uFF0E\uFF61"; // Find end of this segment - iNextDot = unicode.AsSpan(iAfterLastDot).IndexOfAny(DotSeparators); + iNextDot = unicode.Slice(iAfterLastDot).IndexOfAny(DotSeparators); iNextDot = iNextDot < 0 ? unicode.Length : iNextDot + iAfterLastDot; // Only allowed to have empty . section at end (www.microsoft.com.) @@ -350,7 +453,7 @@ private static string PunycodeEncode(string unicode) // Check last char int iTest = iNextDot - 1; - if (char.IsLowSurrogate(unicode, iTest)) + if (char.IsLowSurrogate(unicode[iTest])) { iTest--; } @@ -369,7 +472,7 @@ private static string PunycodeEncode(string unicode) for (basicCount = iAfterLastDot; basicCount < iNextDot; basicCount++) { // Can't be lonely surrogate because it would've thrown in normalization - Debug.Assert(!char.IsLowSurrogate(unicode, basicCount), "[IdnMapping.punycode_encode]Unexpected low surrogate"); + Debug.Assert(!char.IsLowSurrogate(unicode[basicCount]), "[IdnMapping.punycode_encode]Unexpected low surrogate"); // Double check our bidi rules StrongBidiCategory testBidi = CharUnicodeInfo.GetBidiCategory(unicode, basicCount); @@ -395,7 +498,7 @@ private static string PunycodeEncode(string unicode) numProcessed++; } // If its a surrogate, skip the next since our bidi category tester doesn't handle it. - else if (char.IsSurrogatePair(unicode, basicCount)) + else if (char.IsSurrogatePair(unicode[basicCount], unicode[basicCount + 1])) basicCount++; } @@ -410,7 +513,7 @@ private static string PunycodeEncode(string unicode) else { // If it has some non-basic code points the input cannot start with xn-- - if (unicode.AsSpan(iAfterLastDot).StartsWith(c_strAcePrefix, StringComparison.OrdinalIgnoreCase)) + if (unicode.Slice(iAfterLastDot).StartsWith(c_strAcePrefix, StringComparison.OrdinalIgnoreCase)) throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(unicode)); // Need to do ACE encoding @@ -439,7 +542,7 @@ private static string PunycodeEncode(string unicode) j < iNextDot; j += IsSupplementary(test) ? 2 : 1) { - test = char.ConvertToUtf32(unicode, j); + test = GetCodePoint(unicode, j); if (test >= n && test < m) m = test; } @@ -452,7 +555,7 @@ private static string PunycodeEncode(string unicode) for (j = iAfterLastDot; j < iNextDot; j += IsSupplementary(test) ? 2 : 1) { // Make sure we're aware of surrogates - test = char.ConvertToUtf32(unicode, j); + test = GetCodePoint(unicode, j); // Adjust for character position (only the chars in our string already, some // haven't been processed. @@ -528,6 +631,17 @@ private static bool Basic(uint cp) => // Is it in ASCII range? cp < 0x80; + private static int GetCodePoint(ReadOnlySpan s, int index) + { + // Check if the character at index is a high surrogate. + if (char.IsHighSurrogate(s[index]) && index + 1 < s.Length && char.IsLowSurrogate(s[index + 1])) + { + return char.ConvertToUtf32(s[index], s[index + 1]); + } + + return s[index]; + } + // Validate Std3 rules for a character private static void ValidateStd3(char c, bool bNextToDot) { @@ -538,23 +652,35 @@ private static void ValidateStd3(char c, bool bNextToDot) throw new ArgumentException(SR.Format(SR.Argument_IdnBadStd3, c), nameof(c)); } - private string GetUnicodeInvariant(string ascii, int index, int count) + private string GetUnicodeInvariant(ReadOnlySpan ascii) { - if (index > 0 || count < ascii.Length) - { - // We're only using part of the string - ascii = ascii.Substring(index, count); - } + // Convert the span to a string for PunycodeDecode since it uses string operations extensively + string asciiString = ascii.ToString(); + // Convert Punycode to Unicode - string strUnicode = PunycodeDecode(ascii); + string strUnicode = PunycodeDecode(asciiString); // Output name MUST obey IDNA rules & round trip (casing differences are allowed) - if (!ascii.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase)) + if (!asciiString.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase)) throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); return strUnicode; } + private bool TryGetUnicodeInvariant(ReadOnlySpan ascii, Span destination, out int charsWritten) + { + string result = GetUnicodeInvariant(ascii); + if (result.Length <= destination.Length) + { + result.CopyTo(destination); + charsWritten = result.Length; + return true; + } + + charsWritten = 0; + return false; + } + /* PunycodeDecode() converts Punycode to Unicode. The input is */ /* represented as an array of ASCII code points, and the output */ /* will be represented as an array of Unicode code points. The */ diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 841e4e04c88fc0..5634545a383125 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -9626,6 +9626,8 @@ public IdnMapping() { } public string GetUnicode(string ascii) { throw null; } public string GetUnicode(string ascii, int index) { throw null; } public string GetUnicode(string ascii, int index, int count) { throw null; } + public bool TryGetAscii(System.ReadOnlySpan unicode, System.Span destination, out int charsWritten) { throw null; } + public bool TryGetUnicode(System.ReadOnlySpan ascii, System.Span destination, out int charsWritten) { throw null; } } public static partial class ISOWeek { From a4f93e4cc91bd48fd2b7078e3528dbe11bf29a28 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 14:21:01 +0000 Subject: [PATCH 03/21] Add tests for TryGetAscii and TryGetUnicode methods Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../IdnMapping/IdnMappingGetAsciiTests.cs | 68 +++++++++++++++++++ .../IdnMapping/IdnMappingGetUnicodeTests.cs | 59 ++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs index 595b005c96d918..1bc69c9bec7b84 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs @@ -176,5 +176,73 @@ public void TestStringWithHyphenIn3rdAnd4thPlace() // Ensure we are not throwing on Linux because of the 3rd and 4th hyphens in the string. Assert.Equal(unicode, new IdnMapping().GetAscii(unicode)); } + + [Theory] + [MemberData(nameof(GetAscii_TestData))] + public void TryGetAscii(string unicode, int index, int count, string expected) + { + var idn = new IdnMapping(); + ReadOnlySpan unicodeSpan = unicode.AsSpan(index, count); + + // Test with exact size buffer + char[] destination = new char[expected.Length]; + Assert.True(idn.TryGetAscii(unicodeSpan, destination, out int charsWritten)); + Assert.Equal(expected.Length, charsWritten); + // IDN names are case-insensitive; the underlying API may lowercase the output + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + + // Test with larger buffer + destination = new char[expected.Length + 10]; + Assert.True(idn.TryGetAscii(unicodeSpan, destination, out charsWritten)); + Assert.Equal(expected.Length, charsWritten); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + } + + [Theory] + [MemberData(nameof(GetAscii_TestData))] + public void TryGetAscii_BufferTooSmall(string unicode, int index, int count, string expected) + { + if (expected.Length == 0) + { + return; + } + + var idn = new IdnMapping(); + ReadOnlySpan unicodeSpan = unicode.AsSpan(index, count); + + // Test with buffer that is too small + char[] destination = new char[expected.Length - 1]; + Assert.False(idn.TryGetAscii(unicodeSpan, destination, out int charsWritten)); + Assert.Equal(0, charsWritten); + } + + [Fact] + public void TryGetAscii_EmptyBuffer() + { + var idn = new IdnMapping(); + + // Test with empty destination when result would be non-empty + Assert.False(idn.TryGetAscii("abc", Span.Empty, out int charsWritten)); + Assert.Equal(0, charsWritten); + } + + [Theory] + [InlineData("")] + public void TryGetAscii_Empty_ThrowsArgumentException(string unicode) + { + var idn = new IdnMapping(); + char[] destination = new char[100]; + Assert.Throws(() => idn.TryGetAscii(unicode, destination, out _)); + } + + [Theory] + [InlineData("\u0101\u0000")] + [InlineData("\u0101\u0000\u0101")] + public void TryGetAscii_NullContaining_ThrowsArgumentException(string unicode) + { + var idn = new IdnMapping(); + char[] destination = new char[100]; + Assert.Throws(() => idn.TryGetAscii(unicode, destination, out _)); + } } } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs index 35a04512c1595b..16c984ef206cd7 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs @@ -121,5 +121,64 @@ static void getUnicode_Invalid(IdnMapping idnMapping, string ascii, int index, i getUnicode_Invalid(new IdnMapping() { UseStd3AsciiRules = false }, ascii, index, count, exceptionType); getUnicode_Invalid(new IdnMapping() { UseStd3AsciiRules = true }, ascii, index, count, exceptionType); } + + [Theory] + [MemberData(nameof(GetUnicode_TestData))] + public void TryGetUnicode(string ascii, int index, int count, string expected) + { + var idn = new IdnMapping(); + ReadOnlySpan asciiSpan = ascii.AsSpan(index, count); + + // Test with exact size buffer + char[] destination = new char[expected.Length]; + Assert.True(idn.TryGetUnicode(asciiSpan, destination, out int charsWritten)); + Assert.Equal(expected.Length, charsWritten); + // IDN names are case-insensitive; the underlying API may lowercase the output + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + + // Test with larger buffer + destination = new char[expected.Length + 10]; + Assert.True(idn.TryGetUnicode(asciiSpan, destination, out charsWritten)); + Assert.Equal(expected.Length, charsWritten); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + } + + [Theory] + [MemberData(nameof(GetUnicode_TestData))] + public void TryGetUnicode_BufferTooSmall(string ascii, int index, int count, string expected) + { + if (expected.Length == 0) + { + return; + } + + var idn = new IdnMapping(); + ReadOnlySpan asciiSpan = ascii.AsSpan(index, count); + + // Test with buffer that is too small + char[] destination = new char[expected.Length - 1]; + Assert.False(idn.TryGetUnicode(asciiSpan, destination, out int charsWritten)); + Assert.Equal(0, charsWritten); + } + + [Fact] + public void TryGetUnicode_EmptyBuffer() + { + var idn = new IdnMapping(); + + // Test with empty destination when result would be non-empty + Assert.False(idn.TryGetUnicode("abc", Span.Empty, out int charsWritten)); + Assert.Equal(0, charsWritten); + } + + [Theory] + [InlineData("abc\u0000")] + [InlineData("ab\u0000c")] + public void TryGetUnicode_NullContaining_ThrowsArgumentException(string ascii) + { + var idn = new IdnMapping(); + char[] destination = new char[100]; + Assert.Throws(() => idn.TryGetUnicode(ascii, destination, out _)); + } } } From 511b2c5318a73be45527b48bbc26f46e869e769b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 19:44:11 +0000 Subject: [PATCH 04/21] Remove unnecessary TryGetAsciiCore/TryGetUnicodeCore indirection Co-authored-by: jkotas <6668460+jkotas@users.noreply.github.com> --- .../src/System/Globalization/IdnMapping.cs | 42 +++++++------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 473336d365572d..e093c4889ac5e8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -87,11 +87,6 @@ public string GetAscii(string unicode, int index, int count) /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. /// is invalid based on the and properties, and the IDNA standard. public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out int charsWritten) - { - return TryGetAsciiCore(unicode, destination, out charsWritten); - } - - private string GetAsciiCore(string? originalUnicode, ReadOnlySpan unicode) { if (unicode.Length == 0) { @@ -104,21 +99,22 @@ private string GetAsciiCore(string? originalUnicode, ReadOnlySpan unicode) if (GlobalizationMode.Invariant) { - return GetAsciiInvariant(unicode); + return TryGetAsciiInvariant(unicode, destination, out charsWritten); } unsafe { fixed (char* pUnicode = &MemoryMarshal.GetReference(unicode)) + fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) { return GlobalizationMode.UseNls ? - NlsGetAsciiCore(originalUnicode, pUnicode, unicode.Length) : - IcuGetAsciiCore(originalUnicode, pUnicode, unicode.Length); + NlsTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten) : + IcuTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten); } } } - private bool TryGetAsciiCore(ReadOnlySpan unicode, Span destination, out int charsWritten) + private string GetAsciiCore(string? originalUnicode, ReadOnlySpan unicode) { if (unicode.Length == 0) { @@ -131,17 +127,16 @@ private bool TryGetAsciiCore(ReadOnlySpan unicode, Span destination, if (GlobalizationMode.Invariant) { - return TryGetAsciiInvariant(unicode, destination, out charsWritten); + return GetAsciiInvariant(unicode); } unsafe { fixed (char* pUnicode = &MemoryMarshal.GetReference(unicode)) - fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) { return GlobalizationMode.UseNls ? - NlsTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten) : - IcuTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten); + NlsGetAsciiCore(originalUnicode, pUnicode, unicode.Length) : + IcuGetAsciiCore(originalUnicode, pUnicode, unicode.Length); } } } @@ -183,11 +178,6 @@ public string GetUnicode(string ascii, int index, int count) /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. /// is invalid based on the and properties, and the IDNA standard. public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out int charsWritten) - { - return TryGetUnicodeCore(ascii, destination, out charsWritten); - } - - private string GetUnicodeCore(string? originalAscii, ReadOnlySpan ascii) { // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. @@ -197,21 +187,22 @@ private string GetUnicodeCore(string? originalAscii, ReadOnlySpan ascii) if (GlobalizationMode.Invariant) { - return GetUnicodeInvariant(ascii); + return TryGetUnicodeInvariant(ascii, destination, out charsWritten); } unsafe { fixed (char* pAscii = &MemoryMarshal.GetReference(ascii)) + fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) { return GlobalizationMode.UseNls ? - NlsGetUnicodeCore(originalAscii, pAscii, ascii.Length) : - IcuGetUnicodeCore(originalAscii, pAscii, ascii.Length); + NlsTryGetUnicodeCore(pAscii, ascii.Length, pDestination, destination.Length, out charsWritten) : + IcuTryGetUnicodeCore(pAscii, ascii.Length, pDestination, destination.Length, out charsWritten); } } } - private bool TryGetUnicodeCore(ReadOnlySpan ascii, Span destination, out int charsWritten) + private string GetUnicodeCore(string? originalAscii, ReadOnlySpan ascii) { // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. @@ -221,17 +212,16 @@ private bool TryGetUnicodeCore(ReadOnlySpan ascii, Span destination, if (GlobalizationMode.Invariant) { - return TryGetUnicodeInvariant(ascii, destination, out charsWritten); + return GetUnicodeInvariant(ascii); } unsafe { fixed (char* pAscii = &MemoryMarshal.GetReference(ascii)) - fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) { return GlobalizationMode.UseNls ? - NlsTryGetUnicodeCore(pAscii, ascii.Length, pDestination, destination.Length, out charsWritten) : - IcuTryGetUnicodeCore(pAscii, ascii.Length, pDestination, destination.Length, out charsWritten); + NlsGetUnicodeCore(originalAscii, pAscii, ascii.Length) : + IcuGetUnicodeCore(originalAscii, pAscii, ascii.Length); } } } From 5e3facf400431b6c5cab7c6d0c68cef2022ca6b7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 21:19:20 +0000 Subject: [PATCH 05/21] Remove GetAsciiCore/GetUnicodeCore indirection, inline directly into callers Co-authored-by: jkotas <6668460+jkotas@users.noreply.github.com> --- .../src/System/Globalization/IdnMapping.cs | 87 ++++++++----------- 1 file changed, 37 insertions(+), 50 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index e093c4889ac5e8..6117ce9fb408e6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -75,46 +75,40 @@ public string GetAscii(string unicode, int index, int count) if (index > unicode.Length - count) throw new ArgumentOutOfRangeException(nameof(unicode), SR.ArgumentOutOfRange_IndexCountBuffer); - return GetAsciiCore(unicode, unicode.AsSpan(index, count)); - } - - /// - /// Encodes a substring of domain name labels that include Unicode characters outside the ASCII character range (U+0000 to U+007F) to a displayable Unicode string. - /// - /// The Unicode domain name to convert. - /// The buffer to write the ASCII result to. - /// When this method returns, contains the number of characters that were written to . - /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. - /// is invalid based on the and properties, and the IDNA standard. - public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out int charsWritten) - { - if (unicode.Length == 0) + if (count == 0) { throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); } - if (unicode[^1] == 0) + if (unicode[index + count - 1] == 0) { - throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, index + count - 1), nameof(unicode)); } if (GlobalizationMode.Invariant) { - return TryGetAsciiInvariant(unicode, destination, out charsWritten); + return GetAsciiInvariant(unicode.AsSpan(index, count)); } unsafe { - fixed (char* pUnicode = &MemoryMarshal.GetReference(unicode)) - fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) + fixed (char* pUnicode = unicode) { return GlobalizationMode.UseNls ? - NlsTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten) : - IcuTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten); + NlsGetAsciiCore(unicode, pUnicode + index, count) : + IcuGetAsciiCore(unicode, pUnicode + index, count); } } } - private string GetAsciiCore(string? originalUnicode, ReadOnlySpan unicode) + /// + /// Encodes a substring of domain name labels that include Unicode characters outside the ASCII character range (U+0000 to U+007F) to a displayable Unicode string. + /// + /// The Unicode domain name to convert. + /// The buffer to write the ASCII result to. + /// When this method returns, contains the number of characters that were written to . + /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. + /// is invalid based on the and properties, and the IDNA standard. + public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out int charsWritten) { if (unicode.Length == 0) { @@ -127,16 +121,17 @@ private string GetAsciiCore(string? originalUnicode, ReadOnlySpan unicode) if (GlobalizationMode.Invariant) { - return GetAsciiInvariant(unicode); + return TryGetAsciiInvariant(unicode, destination, out charsWritten); } unsafe { fixed (char* pUnicode = &MemoryMarshal.GetReference(unicode)) + fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) { return GlobalizationMode.UseNls ? - NlsGetAsciiCore(originalUnicode, pUnicode, unicode.Length) : - IcuGetAsciiCore(originalUnicode, pUnicode, unicode.Length); + NlsTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten) : + IcuTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten); } } } @@ -166,7 +161,23 @@ public string GetUnicode(string ascii, int index, int count) // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. // The Win32 APIs fail on an embedded null, but not on a terminating null. - return GetUnicodeCore(ascii, ascii.AsSpan(index, count)); + if (count > 0 && ascii[index + count - 1] == (char)0) + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + + if (GlobalizationMode.Invariant) + { + return GetUnicodeInvariant(ascii.AsSpan(index, count)); + } + + unsafe + { + fixed (char* pAscii = ascii) + { + return GlobalizationMode.UseNls ? + NlsGetUnicodeCore(ascii, pAscii + index, count) : + IcuGetUnicodeCore(ascii, pAscii + index, count); + } + } } /// @@ -202,30 +213,6 @@ public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out } } - private string GetUnicodeCore(string? originalAscii, ReadOnlySpan ascii) - { - // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. - // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. - // The Win32 APIs fail on an embedded null, but not on a terminating null. - if (ascii.Length > 0 && ascii[^1] == (char)0) - throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); - - if (GlobalizationMode.Invariant) - { - return GetUnicodeInvariant(ascii); - } - - unsafe - { - fixed (char* pAscii = &MemoryMarshal.GetReference(ascii)) - { - return GlobalizationMode.UseNls ? - NlsGetUnicodeCore(originalAscii, pAscii, ascii.Length) : - IcuGetUnicodeCore(originalAscii, pAscii, ascii.Length); - } - } - } - public override bool Equals([NotNullWhen(true)] object? obj) => obj is IdnMapping that && _allowUnassigned == that._allowUnassigned && From d626cb28078e27ec833135f70a0f7990dc607a9d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 22:30:17 +0000 Subject: [PATCH 06/21] Remove unsafe code from TryGetAscii/TryGetUnicode, use span-based interop Co-authored-by: tarekgh <10833894+tarekgh@users.noreply.github.com> --- .../Common/src/Interop/Interop.Idna.cs | 7 ++++ .../Interop/Windows/Normaliz/Interop.Idna.cs | 17 ++++++++ .../System/Globalization/IdnMapping.Icu.cs | 41 +++++++++++++++---- .../System/Globalization/IdnMapping.Nls.cs | 16 ++++---- .../src/System/Globalization/IdnMapping.cs | 26 +++--------- 5 files changed, 71 insertions(+), 36 deletions(-) diff --git a/src/libraries/Common/src/Interop/Interop.Idna.cs b/src/libraries/Common/src/Interop/Interop.Idna.cs index f5a8e90adcbeb0..49e89afd8333fd 100644 --- a/src/libraries/Common/src/Interop/Interop.Idna.cs +++ b/src/libraries/Common/src/Interop/Interop.Idna.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System; using System.Runtime.InteropServices; internal static partial class Interop @@ -13,7 +14,13 @@ internal static partial class Globalization [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToAscii", StringMarshalling = StringMarshalling.Utf16)] internal static unsafe partial int ToAscii(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToAscii", StringMarshalling = StringMarshalling.Utf16)] + internal static partial int ToAscii(uint flags, ReadOnlySpan src, int srcLen, Span dstBuffer, int dstBufferCapacity); + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToUnicode", StringMarshalling = StringMarshalling.Utf16)] internal static unsafe partial int ToUnicode(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); + + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToUnicode", StringMarshalling = StringMarshalling.Utf16)] + internal static partial int ToUnicode(uint flags, ReadOnlySpan src, int srcLen, Span dstBuffer, int dstBufferCapacity); } } diff --git a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs index c5f56fbfb44fe7..e6a955df5d1cdd 100644 --- a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs +++ b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System; using System.Runtime.InteropServices; internal static partial class Interop @@ -19,6 +20,14 @@ internal static unsafe partial int IdnToAscii( char* lpASCIICharStr, int cchASCIIChar); + [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] + internal static partial int IdnToAscii( + uint dwFlags, + ReadOnlySpan lpUnicodeCharStr, + int cchUnicodeChar, + Span lpASCIICharStr, + int cchASCIIChar); + [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] internal static unsafe partial int IdnToUnicode( uint dwFlags, @@ -27,6 +36,14 @@ internal static unsafe partial int IdnToUnicode( char* lpUnicodeCharStr, int cchUnicodeChar); + [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] + internal static partial int IdnToUnicode( + uint dwFlags, + ReadOnlySpan lpASCIICharStr, + int cchASCIIChar, + Span lpUnicodeCharStr, + int cchUnicodeChar); + internal const int IDN_ALLOW_UNASSIGNED = 0x1; internal const int IDN_USE_STD3_ASCII_RULES = 0x2; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs index 745d57a967af86..e0a7612f7977ff 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs @@ -50,20 +50,20 @@ private unsafe string IcuGetAsciiCore(string? unicodeString, char* unicode, int } } - private unsafe bool IcuTryGetAsciiCore(char* unicode, int count, char* destination, int destinationLength, out int charsWritten) + private bool IcuTryGetAsciiCore(ReadOnlySpan unicode, Span destination, out int charsWritten) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); uint flags = IcuFlags; - CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode)); + CheckInvalidIdnCharacters(unicode, flags, nameof(unicode)); - int actualLength = Interop.Globalization.ToAscii(flags, unicode, count, destination, destinationLength); + int actualLength = Interop.Globalization.ToAscii(flags, unicode, unicode.Length, destination, destination.Length); if (actualLength == 0) { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); } - if (actualLength <= destinationLength) + if (actualLength <= destination.Length) { charsWritten = actualLength; return true; @@ -124,20 +124,20 @@ private unsafe string IcuGetUnicodeCore(string? asciiString, char* ascii, int co throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); } - private unsafe bool IcuTryGetUnicodeCore(char* ascii, int count, char* destination, int destinationLength, out int charsWritten) + private bool IcuTryGetUnicodeCore(ReadOnlySpan ascii, Span destination, out int charsWritten) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); uint flags = IcuFlags; - CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii)); + CheckInvalidIdnCharacters(ascii, flags, nameof(ascii)); - int actualLength = Interop.Globalization.ToUnicode(flags, ascii, count, destination, destinationLength); + int actualLength = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, destination, destination.Length); if (actualLength == 0) { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); } - if (actualLength <= destinationLength) + if (actualLength <= destination.Length) { charsWritten = actualLength; return true; @@ -182,5 +182,30 @@ private static unsafe void CheckInvalidIdnCharacters(char* s, int count, uint fl } } } + + /// + /// ICU doesn't check for invalid characters unless the STD3 rules option + /// is enabled. + /// + /// To match Windows behavior, we walk the string ourselves looking for these + /// bad characters so we can continue to throw ArgumentException in these cases. + /// + private static void CheckInvalidIdnCharacters(ReadOnlySpan s, uint flags, string paramName) + { + if ((flags & Interop.Globalization.UseStd3AsciiRules) == 0) + { + for (int i = 0; i < s.Length; i++) + { + char c = s[i]; + + // These characters are prohibited regardless of the UseStd3AsciiRules property. + // See https://msdn.microsoft.com/en-us/library/system.globalization.idnmapping.usestd3asciirules(v=vs.110).aspx + if (c <= 0x1F || c == 0x7F) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, paramName); + } + } + } + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs index 697b642bcfde05..64b120415c3404 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs @@ -54,7 +54,7 @@ private static unsafe string NlsGetAsciiCore(string? unicodeString, char* unicod return GetStringForOutput(unicodeString, unicode, count, output, length); } - private unsafe bool NlsTryGetAsciiCore(char* unicode, int count, char* destination, int destinationLength, out int charsWritten) + private bool NlsTryGetAsciiCore(ReadOnlySpan unicode, Span destination, out int charsWritten) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); @@ -62,20 +62,20 @@ private unsafe bool NlsTryGetAsciiCore(char* unicode, int count, char* destinati uint flags = NlsFlags; // Determine the required length - int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, null, 0); + int length = Interop.Normaliz.IdnToAscii(flags, unicode, unicode.Length, Span.Empty, 0); if (length == 0) { ThrowForZeroLength(unicode: true); } - if (length > destinationLength) + if (length > destination.Length) { charsWritten = 0; return false; } // Do the conversion - int actualLength = Interop.Normaliz.IdnToAscii(flags, unicode, count, destination, destinationLength); + int actualLength = Interop.Normaliz.IdnToAscii(flags, unicode, unicode.Length, destination, destination.Length); if (actualLength == 0) { ThrowForZeroLength(unicode: true); @@ -130,7 +130,7 @@ private static unsafe string NlsGetUnicodeCore(string? asciiString, char* ascii, return GetStringForOutput(asciiString, ascii, count, output, length); } - private unsafe bool NlsTryGetUnicodeCore(char* ascii, int count, char* destination, int destinationLength, out int charsWritten) + private bool NlsTryGetUnicodeCore(ReadOnlySpan ascii, Span destination, out int charsWritten) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); @@ -138,20 +138,20 @@ private unsafe bool NlsTryGetUnicodeCore(char* ascii, int count, char* destinati uint flags = NlsFlags; // Determine the required length - int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, null, 0); + int length = Interop.Normaliz.IdnToUnicode(flags, ascii, ascii.Length, Span.Empty, 0); if (length == 0) { ThrowForZeroLength(unicode: false); } - if (length > destinationLength) + if (length > destination.Length) { charsWritten = 0; return false; } // Do the conversion - int actualLength = Interop.Normaliz.IdnToUnicode(flags, ascii, count, destination, destinationLength); + int actualLength = Interop.Normaliz.IdnToUnicode(flags, ascii, ascii.Length, destination, destination.Length); if (actualLength == 0) { ThrowForZeroLength(unicode: false); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 6117ce9fb408e6..ec5eeda93eab4c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -124,16 +124,9 @@ public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out return TryGetAsciiInvariant(unicode, destination, out charsWritten); } - unsafe - { - fixed (char* pUnicode = &MemoryMarshal.GetReference(unicode)) - fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) - { - return GlobalizationMode.UseNls ? - NlsTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten) : - IcuTryGetAsciiCore(pUnicode, unicode.Length, pDestination, destination.Length, out charsWritten); - } - } + return GlobalizationMode.UseNls ? + NlsTryGetAsciiCore(unicode, destination, out charsWritten) : + IcuTryGetAsciiCore(unicode, destination, out charsWritten); } // Gets Unicode version of the string. Normalized and limited to IDNA characters. @@ -201,16 +194,9 @@ public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out return TryGetUnicodeInvariant(ascii, destination, out charsWritten); } - unsafe - { - fixed (char* pAscii = &MemoryMarshal.GetReference(ascii)) - fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) - { - return GlobalizationMode.UseNls ? - NlsTryGetUnicodeCore(pAscii, ascii.Length, pDestination, destination.Length, out charsWritten) : - IcuTryGetUnicodeCore(pAscii, ascii.Length, pDestination, destination.Length, out charsWritten); - } - } + return GlobalizationMode.UseNls ? + NlsTryGetUnicodeCore(ascii, destination, out charsWritten) : + IcuTryGetUnicodeCore(ascii, destination, out charsWritten); } public override bool Equals([NotNullWhen(true)] object? obj) => From a9095290c1eb9ec15d4775c7da89bddc889b44de Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 22:48:16 +0000 Subject: [PATCH 07/21] Remove unsafe code, use span-based APIs throughout, fix allocation issue Co-authored-by: tarekgh <10833894+tarekgh@users.noreply.github.com> --- .../System/Globalization/CharUnicodeInfo.cs | 7 ++ .../System/Globalization/IdnMapping.Icu.cs | 74 ++++-------- .../System/Globalization/IdnMapping.Nls.cs | 44 ++++--- .../src/System/Globalization/IdnMapping.cs | 112 ++++++++++++------ 4 files changed, 122 insertions(+), 115 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs index b93949ed5b0fed..c24d90100877d9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs @@ -438,6 +438,13 @@ private static int GetCodePointFromString(string s, int index) Debug.Assert(s != null); Debug.Assert((uint)index < (uint)s.Length, "index < s.Length"); + return GetCodePoint(s.AsSpan(), index); + } + + private static int GetCodePoint(ReadOnlySpan s, int index) + { + Debug.Assert((uint)index < (uint)s.Length, "index < s.Length"); + int codePoint = 0; // We know the 'if' block below will always succeed, but it allows the diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs index e0a7612f7977ff..d040c410578019 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs @@ -7,13 +7,14 @@ namespace System.Globalization { public sealed partial class IdnMapping { - private unsafe string IcuGetAsciiCore(string? unicodeString, char* unicode, int count) + private string IcuGetAsciiCore(string unicodeString, int index, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); + ReadOnlySpan unicode = unicodeString.AsSpan(index, count); uint flags = IcuFlags; - CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode)); + CheckInvalidIdnCharacters(unicode, flags, nameof(unicode)); const int StackallocThreshold = 512; // Each unicode character is represented by up to 3 ASCII chars @@ -22,16 +23,16 @@ private unsafe string IcuGetAsciiCore(string? unicodeString, char* unicode, int int actualLength; if ((uint)estimatedLength < StackallocThreshold) { - char* outputStack = stackalloc char[estimatedLength]; + Span outputStack = stackalloc char[estimatedLength]; actualLength = Interop.Globalization.ToAscii(flags, unicode, count, outputStack, estimatedLength); if (actualLength > 0 && actualLength <= estimatedLength) { - return GetStringForOutput(unicodeString, unicode, count, outputStack, actualLength); + return GetStringForOutput(unicodeString, unicode, outputStack.Slice(0, actualLength)); } } else { - actualLength = Interop.Globalization.ToAscii(flags, unicode, count, null, 0); + actualLength = Interop.Globalization.ToAscii(flags, unicode, count, Span.Empty, 0); } if (actualLength == 0) { @@ -39,15 +40,12 @@ private unsafe string IcuGetAsciiCore(string? unicodeString, char* unicode, int } char[] outputHeap = new char[actualLength]; - fixed (char* pOutputHeap = &outputHeap[0]) + actualLength = Interop.Globalization.ToAscii(flags, unicode, count, outputHeap, actualLength); + if (actualLength == 0 || actualLength > outputHeap.Length) { - actualLength = Interop.Globalization.ToAscii(flags, unicode, count, pOutputHeap, actualLength); - if (actualLength == 0 || actualLength > outputHeap.Length) - { - throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); - } - return GetStringForOutput(unicodeString, unicode, count, pOutputHeap, actualLength); + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); } + return GetStringForOutput(unicodeString, unicode, outputHeap.AsSpan(0, actualLength)); } private bool IcuTryGetAsciiCore(ReadOnlySpan unicode, Span destination, out int charsWritten) @@ -73,52 +71,47 @@ private bool IcuTryGetAsciiCore(ReadOnlySpan unicode, Span destinati return false; } - private unsafe string IcuGetUnicodeCore(string? asciiString, char* ascii, int count) + private string IcuGetUnicodeCore(string asciiString, int index, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); + ReadOnlySpan ascii = asciiString.AsSpan(index, count); uint flags = IcuFlags; - CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii)); + CheckInvalidIdnCharacters(ascii, flags, nameof(ascii)); const int StackAllocThreshold = 512; if ((uint)count < StackAllocThreshold) { - char* output = stackalloc char[count]; - return IcuGetUnicodeCore(asciiString, ascii, count, flags, output, count, reattempt: true); + Span output = stackalloc char[count]; + return IcuGetUnicodeCore(asciiString, ascii, flags, output, reattempt: true); } else { char[] output = new char[count]; - fixed (char* pOutput = &output[0]) - { - return IcuGetUnicodeCore(asciiString, ascii, count, flags, pOutput, count, reattempt: true); - } + return IcuGetUnicodeCore(asciiString, ascii, flags, output, reattempt: true); } } - private unsafe string IcuGetUnicodeCore(string? asciiString, char* ascii, int count, uint flags, char* output, int outputLength, bool reattempt) + private static string IcuGetUnicodeCore(string asciiString, ReadOnlySpan ascii, uint flags, Span output, bool reattempt) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); - int realLen = Interop.Globalization.ToUnicode(flags, ascii, count, output, outputLength); + int realLen = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, output, output.Length); if (realLen == 0) { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); } - else if (realLen <= outputLength) + else if (realLen <= output.Length) { - return GetStringForOutput(asciiString, ascii, count, output, realLen); + return GetStringForOutput(asciiString, ascii, output.Slice(0, realLen)); } else if (reattempt) { char[] newOutput = new char[realLen]; - fixed (char* pNewOutput = newOutput) - { - return IcuGetUnicodeCore(asciiString, ascii, count, flags, pNewOutput, realLen, reattempt: false); - } + return IcuGetUnicodeCore(asciiString, ascii, flags, newOutput, reattempt: false); } throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); @@ -158,31 +151,6 @@ private uint IcuFlags } } - /// - /// ICU doesn't check for invalid characters unless the STD3 rules option - /// is enabled. - /// - /// To match Windows behavior, we walk the string ourselves looking for these - /// bad characters so we can continue to throw ArgumentException in these cases. - /// - private static unsafe void CheckInvalidIdnCharacters(char* s, int count, uint flags, string paramName) - { - if ((flags & Interop.Globalization.UseStd3AsciiRules) == 0) - { - for (int i = 0; i < count; i++) - { - char c = s[i]; - - // These characters are prohibited regardless of the UseStd3AsciiRules property. - // See https://msdn.microsoft.com/en-us/library/system.globalization.idnmapping.usestd3asciirules(v=vs.110).aspx - if (c <= 0x1F || c == 0x7F) - { - throw new ArgumentException(SR.Argument_IdnIllegalName, paramName); - } - } - } - } - /// /// ICU doesn't check for invalid characters unless the STD3 rules option /// is enabled. diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs index 64b120415c3404..6117295a8abc2d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs @@ -9,15 +9,16 @@ namespace System.Globalization { public sealed partial class IdnMapping { - private unsafe string NlsGetAsciiCore(string? unicodeString, char* unicode, int count) + private string NlsGetAsciiCore(string unicodeString, int index, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); + ReadOnlySpan unicode = unicodeString.AsSpan(index, count); uint flags = NlsFlags; // Determine the required length - int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, null, 0); + int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, Span.Empty, 0); if (length == 0) { ThrowForZeroLength(unicode: true); @@ -27,31 +28,28 @@ private unsafe string NlsGetAsciiCore(string? unicodeString, char* unicode, int const int StackAllocThreshold = 512; // arbitrary limit to switch from stack to heap allocation if ((uint)length < StackAllocThreshold) { - char* output = stackalloc char[length]; - return NlsGetAsciiCore(unicodeString, unicode, count, flags, output, length); + Span output = stackalloc char[length]; + return NlsGetAsciiCore(unicodeString, unicode, flags, output); } else { char[] output = new char[length]; - fixed (char* pOutput = &output[0]) - { - return NlsGetAsciiCore(unicodeString, unicode, count, flags, pOutput, length); - } + return NlsGetAsciiCore(unicodeString, unicode, flags, output); } } - private static unsafe string NlsGetAsciiCore(string? unicodeString, char* unicode, int count, uint flags, char* output, int outputLength) + private static string NlsGetAsciiCore(string unicodeString, ReadOnlySpan unicode, uint flags, Span output) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, output, outputLength); + int length = Interop.Normaliz.IdnToAscii(flags, unicode, unicode.Length, output, output.Length); if (length == 0) { ThrowForZeroLength(unicode: true); } - Debug.Assert(length == outputLength); - return GetStringForOutput(unicodeString, unicode, count, output, length); + Debug.Assert(length == output.Length); + return GetStringForOutput(unicodeString, unicode, output.Slice(0, length)); } private bool NlsTryGetAsciiCore(ReadOnlySpan unicode, Span destination, out int charsWritten) @@ -85,15 +83,16 @@ private bool NlsTryGetAsciiCore(ReadOnlySpan unicode, Span destinati return true; } - private unsafe string NlsGetUnicodeCore(string? asciiString, char* ascii, int count) + private string NlsGetUnicodeCore(string asciiString, int index, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); + ReadOnlySpan ascii = asciiString.AsSpan(index, count); uint flags = NlsFlags; // Determine the required length - int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, null, 0); + int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, Span.Empty, 0); if (length == 0) { ThrowForZeroLength(unicode: false); @@ -103,31 +102,28 @@ private unsafe string NlsGetUnicodeCore(string? asciiString, char* ascii, int co const int StackAllocThreshold = 512; // arbitrary limit to switch from stack to heap allocation if ((uint)length < StackAllocThreshold) { - char* output = stackalloc char[length]; - return NlsGetUnicodeCore(asciiString, ascii, count, flags, output, length); + Span output = stackalloc char[length]; + return NlsGetUnicodeCore(asciiString, ascii, flags, output); } else { char[] output = new char[length]; - fixed (char* pOutput = &output[0]) - { - return NlsGetUnicodeCore(asciiString, ascii, count, flags, pOutput, length); - } + return NlsGetUnicodeCore(asciiString, ascii, flags, output); } } - private static unsafe string NlsGetUnicodeCore(string? asciiString, char* ascii, int count, uint flags, char* output, int outputLength) + private static string NlsGetUnicodeCore(string asciiString, ReadOnlySpan ascii, uint flags, Span output) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, output, outputLength); + int length = Interop.Normaliz.IdnToUnicode(flags, ascii, ascii.Length, output, output.Length); if (length == 0) { ThrowForZeroLength(unicode: false); } - Debug.Assert(length == outputLength); - return GetStringForOutput(asciiString, ascii, count, output, length); + Debug.Assert(length == output.Length); + return GetStringForOutput(asciiString, ascii, output.Slice(0, length)); } private bool NlsTryGetUnicodeCore(ReadOnlySpan ascii, Span destination, out int charsWritten) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index ec5eeda93eab4c..3cef9f3200cd21 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -86,18 +86,12 @@ public string GetAscii(string unicode, int index, int count) if (GlobalizationMode.Invariant) { - return GetAsciiInvariant(unicode.AsSpan(index, count)); + return GetAsciiInvariant(unicode, index, count); } - unsafe - { - fixed (char* pUnicode = unicode) - { - return GlobalizationMode.UseNls ? - NlsGetAsciiCore(unicode, pUnicode + index, count) : - IcuGetAsciiCore(unicode, pUnicode + index, count); - } - } + return GlobalizationMode.UseNls ? + NlsGetAsciiCore(unicode, index, count) : + IcuGetAsciiCore(unicode, index, count); } /// @@ -159,18 +153,12 @@ public string GetUnicode(string ascii, int index, int count) if (GlobalizationMode.Invariant) { - return GetUnicodeInvariant(ascii.AsSpan(index, count)); + return GetUnicodeInvariant(ascii, index, count); } - unsafe - { - fixed (char* pAscii = ascii) - { - return GlobalizationMode.UseNls ? - NlsGetUnicodeCore(ascii, pAscii + index, count) : - IcuGetUnicodeCore(ascii, pAscii + index, count); - } - } + return GlobalizationMode.UseNls ? + NlsGetUnicodeCore(ascii, index, count) : + IcuGetUnicodeCore(ascii, index, count); } /// @@ -208,19 +196,19 @@ public override int GetHashCode() => (_allowUnassigned ? 100 : 200) + (_useStd3AsciiRules ? 1000 : 2000); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe string GetStringForOutput(string? originalString, char* input, int inputLength, char* output, int outputLength) + private static string GetStringForOutput(string? originalString, ReadOnlySpan input, ReadOnlySpan output) { - Debug.Assert(inputLength > 0); + Debug.Assert(input.Length > 0); if (originalString is not null && - originalString.Length == inputLength && - inputLength == outputLength && - Ordinal.EqualsIgnoreCase(ref *input, ref *output, inputLength)) + originalString.Length == input.Length && + input.Length == output.Length && + Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(input), ref MemoryMarshal.GetReference(output), input.Length)) { return originalString; } - return new string(output, 0, outputLength); + return output.ToString(); } // @@ -240,11 +228,18 @@ private static unsafe string GetStringForOutput(string? originalString, char* in private const int c_skew = 38; private const int c_damp = 700; - private string GetAsciiInvariant(ReadOnlySpan unicode) + private string GetAsciiInvariant(string unicodeString, int index, int count) { + ReadOnlySpan unicode = unicodeString.AsSpan(index, count); + // Check for ASCII only string, which will be unchanged if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true)) { + // Return original string if the entire string was requested and it doesn't need modification + if (index == 0 && count == unicodeString.Length) + { + return unicodeString; + } return unicode.ToString(); } @@ -268,7 +263,35 @@ private string GetAsciiInvariant(ReadOnlySpan unicode) private bool TryGetAsciiInvariant(ReadOnlySpan unicode, Span destination, out int charsWritten) { - string result = GetAsciiInvariant(unicode); + // Check for ASCII only string, which will be unchanged + if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true)) + { + if (unicode.Length <= destination.Length) + { + unicode.CopyTo(destination); + charsWritten = unicode.Length; + return true; + } + charsWritten = 0; + return false; + } + + // Cannot be null terminated (normalization won't help us with this one, and + // may have returned false before checking the whole string above) + Debug.Assert(unicode.Length >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now."); + if (unicode[^1] <= 0x1f) + { + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); + } + + // May need to check Std3 rules again for non-ascii + if (UseStd3AsciiRules) + { + ValidateStd3AndAscii(unicode, true, false); + } + + // Go ahead and encode it + string result = PunycodeEncode(unicode); if (result.Length <= destination.Length) { result.CopyTo(destination); @@ -615,7 +638,26 @@ private static void ValidateStd3(char c, bool bNextToDot) throw new ArgumentException(SR.Format(SR.Argument_IdnBadStd3, c), nameof(c)); } - private string GetUnicodeInvariant(ReadOnlySpan ascii) + private string GetUnicodeInvariant(string asciiString, int index, int count) + { + // Convert Punycode to Unicode + string strUnicode = PunycodeDecode(asciiString.Substring(index, count)); + + // Output name MUST obey IDNA rules & round trip (casing differences are allowed) + string ascii = GetAscii(strUnicode); + if (!ascii.Equals(asciiString.Substring(index, count), StringComparison.OrdinalIgnoreCase)) + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(asciiString)); + + // If the ASCII round-trip equals the original string, return it as-is (no allocation) + if (index == 0 && count == asciiString.Length && strUnicode.Equals(asciiString, StringComparison.OrdinalIgnoreCase)) + { + return asciiString; + } + + return strUnicode; + } + + private bool TryGetUnicodeInvariant(ReadOnlySpan ascii, Span destination, out int charsWritten) { // Convert the span to a string for PunycodeDecode since it uses string operations extensively string asciiString = ascii.ToString(); @@ -627,16 +669,10 @@ private string GetUnicodeInvariant(ReadOnlySpan ascii) if (!asciiString.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase)) throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); - return strUnicode; - } - - private bool TryGetUnicodeInvariant(ReadOnlySpan ascii, Span destination, out int charsWritten) - { - string result = GetUnicodeInvariant(ascii); - if (result.Length <= destination.Length) + if (strUnicode.Length <= destination.Length) { - result.CopyTo(destination); - charsWritten = result.Length; + strUnicode.CopyTo(destination); + charsWritten = strUnicode.Length; return true; } From ad001628ecd22a19e254f0f616f8ab5eb05ae76b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 23:01:41 +0000 Subject: [PATCH 08/21] Fix potential out-of-bounds access in IsSurrogatePair check Co-authored-by: tarekgh <10833894+tarekgh@users.noreply.github.com> --- .../src/System/Globalization/IdnMapping.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 3cef9f3200cd21..0e1ea9da14f7e8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -484,7 +484,7 @@ private static string PunycodeEncode(ReadOnlySpan unicode) numProcessed++; } // If its a surrogate, skip the next since our bidi category tester doesn't handle it. - else if (char.IsSurrogatePair(unicode[basicCount], unicode[basicCount + 1])) + else if (basicCount + 1 < iNextDot && char.IsSurrogatePair(unicode[basicCount], unicode[basicCount + 1])) basicCount++; } From 1b4eaec278c0dc55ed79f40e58f23b9fea9c7af3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 25 Jan 2026 23:14:22 +0000 Subject: [PATCH 09/21] Simplify GetBidiCategory(string) to call span-based overload Co-authored-by: tarekgh <10833894+tarekgh@users.noreply.github.com> --- .../src/System/Globalization/CharUnicodeInfo.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs index c24d90100877d9..53d1a413f11fda 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs @@ -48,7 +48,7 @@ internal static StrongBidiCategory GetBidiCategory(string s, int index) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); } - return GetBidiCategoryNoBoundsChecks((uint)GetCodePointFromString(s, index)); + return GetBidiCategory(s.AsSpan(), index); } internal static StrongBidiCategory GetBidiCategory(StringBuilder s, int index) From 30f67ebee6b3b73d3e4c6e1c840ca626a403fa2f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 00:21:46 +0000 Subject: [PATCH 10/21] Remove unused pointer-based interop method definitions Co-authored-by: jkotas <6668460+jkotas@users.noreply.github.com> --- src/libraries/Common/src/Interop/Interop.Idna.cs | 6 ------ .../src/Interop/Windows/Normaliz/Interop.Idna.cs | 16 ---------------- 2 files changed, 22 deletions(-) diff --git a/src/libraries/Common/src/Interop/Interop.Idna.cs b/src/libraries/Common/src/Interop/Interop.Idna.cs index 49e89afd8333fd..e62e82005389f7 100644 --- a/src/libraries/Common/src/Interop/Interop.Idna.cs +++ b/src/libraries/Common/src/Interop/Interop.Idna.cs @@ -11,15 +11,9 @@ internal static partial class Globalization internal const int AllowUnassigned = 0x1; internal const int UseStd3AsciiRules = 0x2; - [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToAscii", StringMarshalling = StringMarshalling.Utf16)] - internal static unsafe partial int ToAscii(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); - [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToAscii", StringMarshalling = StringMarshalling.Utf16)] internal static partial int ToAscii(uint flags, ReadOnlySpan src, int srcLen, Span dstBuffer, int dstBufferCapacity); - [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToUnicode", StringMarshalling = StringMarshalling.Utf16)] - internal static unsafe partial int ToUnicode(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); - [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToUnicode", StringMarshalling = StringMarshalling.Utf16)] internal static partial int ToUnicode(uint flags, ReadOnlySpan src, int srcLen, Span dstBuffer, int dstBufferCapacity); } diff --git a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs index e6a955df5d1cdd..ea4187ad357766 100644 --- a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs +++ b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs @@ -12,14 +12,6 @@ internal static partial class Normaliz // Idn APIs // - [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] - internal static unsafe partial int IdnToAscii( - uint dwFlags, - char* lpUnicodeCharStr, - int cchUnicodeChar, - char* lpASCIICharStr, - int cchASCIIChar); - [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] internal static partial int IdnToAscii( uint dwFlags, @@ -28,14 +20,6 @@ internal static partial int IdnToAscii( Span lpASCIICharStr, int cchASCIIChar); - [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] - internal static unsafe partial int IdnToUnicode( - uint dwFlags, - char* lpASCIICharStr, - int cchASCIIChar, - char* lpUnicodeCharStr, - int cchUnicodeChar); - [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] internal static partial int IdnToUnicode( uint dwFlags, From 1488db9763656d09fde22bca198acca1af3a6503 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 00:45:37 +0000 Subject: [PATCH 11/21] Address review feedback: simplify code and fix docs Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../System/Globalization/CharUnicodeInfo.cs | 20 ++++++------------- .../System/Globalization/IdnMapping.Icu.cs | 4 ++++ .../src/System/Globalization/IdnMapping.cs | 17 ++++++++-------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs index 53d1a413f11fda..fced8e03130913 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs @@ -48,7 +48,7 @@ internal static StrongBidiCategory GetBidiCategory(string s, int index) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); } - return GetBidiCategory(s.AsSpan(), index); + return GetBidiCategory((ReadOnlySpan)s, index); } internal static StrongBidiCategory GetBidiCategory(StringBuilder s, int index) @@ -142,7 +142,7 @@ public static int GetDecimalDigitValue(string s, int index) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); } - return GetDecimalDigitValueInternalNoBoundsCheck((uint)GetCodePointFromString(s, index)); + return GetDecimalDigitValueInternalNoBoundsCheck((uint)GetCodePoint(s, index)); } private static int GetDecimalDigitValueInternalNoBoundsCheck(uint codePoint) @@ -176,7 +176,7 @@ public static int GetDigitValue(string s, int index) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); } - return GetDigitValueInternalNoBoundsCheck((uint)GetCodePointFromString(s, index)); + return GetDigitValueInternalNoBoundsCheck((uint)GetCodePoint(s, index)); } private static int GetDigitValueInternalNoBoundsCheck(uint codePoint) @@ -255,7 +255,7 @@ public static double GetNumericValue(string s, int index) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static double GetNumericValueInternal(string s, int index) => GetNumericValueNoBoundsCheck((uint)GetCodePointFromString(s, index)); + internal static double GetNumericValueInternal(string s, int index) => GetNumericValueNoBoundsCheck((uint)GetCodePoint(s, index)); private static double GetNumericValueNoBoundsCheck(uint codePoint) { @@ -392,7 +392,7 @@ internal static UnicodeCategory GetUnicodeCategoryInternal(string value, int ind Debug.Assert(value != null, "value can not be null"); Debug.Assert(index < value.Length); - return GetUnicodeCategoryNoBoundsChecks((uint)GetCodePointFromString(value, index)); + return GetUnicodeCategoryNoBoundsChecks((uint)GetCodePoint(value, index)); } /// @@ -405,7 +405,7 @@ internal static UnicodeCategory GetUnicodeCategoryInternal(string str, int index Debug.Assert(str.Length > 0); Debug.Assert(index >= 0 && index < str.Length); - uint codePoint = (uint)GetCodePointFromString(str, index); + uint codePoint = (uint)GetCodePoint(str, index); UnicodeDebug.AssertIsValidCodePoint(codePoint); charLength = (codePoint >= UNICODE_PLANE01_START) ? 2 /* surrogate pair */ : 1 /* BMP char */; @@ -433,14 +433,6 @@ private static UnicodeCategory GetUnicodeCategoryNoBoundsChecks(uint codePoint) /// WARNING: since it doesn't throw an exception it CAN return a value /// in the surrogate range D800-DFFF, which is not a legal scalar value. /// - private static int GetCodePointFromString(string s, int index) - { - Debug.Assert(s != null); - Debug.Assert((uint)index < (uint)s.Length, "index < s.Length"); - - return GetCodePoint(s.AsSpan(), index); - } - private static int GetCodePoint(ReadOnlySpan s, int index) { Debug.Assert((uint)index < (uint)s.Length, "index < s.Length"); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs index d040c410578019..2a260dcad3e0b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs @@ -57,10 +57,12 @@ private bool IcuTryGetAsciiCore(ReadOnlySpan unicode, Span destinati CheckInvalidIdnCharacters(unicode, flags, nameof(unicode)); int actualLength = Interop.Globalization.ToAscii(flags, unicode, unicode.Length, destination, destination.Length); + if (actualLength == 0) { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); } + if (actualLength <= destination.Length) { charsWritten = actualLength; @@ -126,10 +128,12 @@ private bool IcuTryGetUnicodeCore(ReadOnlySpan ascii, Span destinati CheckInvalidIdnCharacters(ascii, flags, nameof(ascii)); int actualLength = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, destination, destination.Length); + if (actualLength == 0) { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); } + if (actualLength <= destination.Length) { charsWritten = actualLength; diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 0e1ea9da14f7e8..ed5aedc5a72e01 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -95,7 +95,7 @@ public string GetAscii(string unicode, int index, int count) } /// - /// Encodes a substring of domain name labels that include Unicode characters outside the ASCII character range (U+0000 to U+007F) to a displayable Unicode string. + /// Encodes a Unicode domain name to its ASCII (Punycode) equivalent. /// /// The Unicode domain name to convert. /// The buffer to write the ASCII result to. @@ -638,20 +638,21 @@ private static void ValidateStd3(char c, bool bNextToDot) throw new ArgumentException(SR.Format(SR.Argument_IdnBadStd3, c), nameof(c)); } - private string GetUnicodeInvariant(string asciiString, int index, int count) + private string GetUnicodeInvariant(string ascii, int index, int count) { // Convert Punycode to Unicode - string strUnicode = PunycodeDecode(asciiString.Substring(index, count)); + string asciiSlice = ascii.Substring(index, count); + string strUnicode = PunycodeDecode(asciiSlice); // Output name MUST obey IDNA rules & round trip (casing differences are allowed) - string ascii = GetAscii(strUnicode); - if (!ascii.Equals(asciiString.Substring(index, count), StringComparison.OrdinalIgnoreCase)) - throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(asciiString)); + string asciiRoundtrip = GetAscii(strUnicode); + if (!asciiRoundtrip.Equals(asciiSlice, StringComparison.OrdinalIgnoreCase)) + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); // If the ASCII round-trip equals the original string, return it as-is (no allocation) - if (index == 0 && count == asciiString.Length && strUnicode.Equals(asciiString, StringComparison.OrdinalIgnoreCase)) + if (index == 0 && count == ascii.Length && strUnicode.Equals(ascii, StringComparison.OrdinalIgnoreCase)) { - return asciiString; + return ascii; } return strUnicode; From 5d344969b5852d1c03e7620908d75a8bfdecaf46 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 02:08:20 +0000 Subject: [PATCH 12/21] Use span-based TryGetAscii/TryGetUnicode in DomainNameHelper to avoid allocations Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../src/System/DomainNameHelper.cs | 54 +++++++++++++++++-- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index 8ab493e0e83543..a8e268a233e547 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -224,6 +224,11 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild // b) if label is ascii and ace and not valid idn then just lowercase it // c) if label is ascii and ace and is valid idn then get its unicode eqvl // d) if label is unicode then clean it by running it through idnmapping + + // Buffer for intermediate ASCII form when processing non-ASCII labels + // Max label length is 63 chars, but punycode can expand - 256 is a safe upper bound + Span asciiBuffer = stackalloc char[256]; + for (int i = 0; i < hostname.Length; i++) { if (i != 0) @@ -241,11 +246,20 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild if (!Ascii.IsValid(label)) { + // For non-ASCII labels, first convert to ASCII (punycode), then to normalized Unicode + // Use span-based APIs to avoid intermediate string allocations try { - string asciiForm = s_idnMapping.GetAscii(hostname, i, label.Length); + if (!s_idnMapping.TryGetAscii(label, asciiBuffer, out int asciiWritten)) + { + return false; + } - dest.Append(s_idnMapping.GetUnicode(asciiForm)); + // Now convert the ASCII form to Unicode and append directly to dest + if (!TryAppendIdnUnicode(asciiBuffer.Slice(0, asciiWritten), ref dest)) + { + return false; + } } catch (ArgumentException) { @@ -258,11 +272,13 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild if (label.StartsWith("xn--", StringComparison.Ordinal)) { - // check ace validity + // check ace validity - use span-based API to avoid string allocation try { - dest.Append(s_idnMapping.GetUnicode(hostname, i, label.Length)); - aceValid = true; + if (TryAppendIdnUnicode(label, ref dest)) + { + aceValid = true; + } } catch (ArgumentException) { @@ -283,5 +299,33 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild return true; } + + /// + /// Attempts to convert ASCII (punycode) to Unicode and append directly to the ValueStringBuilder. + /// + private static bool TryAppendIdnUnicode(scoped ReadOnlySpan ascii, ref ValueStringBuilder dest) + { + // Unicode output is typically similar length to ASCII input for IDN labels + // Start with a reasonable estimate and grow if needed + const int InitialBufferSize = 64; + + // First try with the initial buffer size + Span buffer = dest.AppendSpan(InitialBufferSize); + + if (s_idnMapping.TryGetUnicode(ascii, buffer, out int charsWritten)) + { + // Shrink the buffer to actual size + dest.Length -= InitialBufferSize - charsWritten; + return true; + } + + // Buffer was too small - undo the append and try with a larger buffer + dest.Length -= InitialBufferSize; + + // For longer results, fall back to string-based API + // This is rare since domain labels are limited to 63 chars + dest.Append(s_idnMapping.GetUnicode(ascii.ToString())); + return true; + } } } From b9f72ff88099cfb5a31af6eaa08eeea15d187bfc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 04:01:17 +0000 Subject: [PATCH 13/21] Address copilot-pull-request-reviewer feedback: add tests with UseStd3AsciiRules/AllowUnassigned flags Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../IdnMapping/IdnMappingGetAsciiTests.cs | 38 +++++++++++++++++++ .../IdnMapping/IdnMappingGetUnicodeTests.cs | 38 +++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs index 1bc69c9bec7b84..ad565880506cf1 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs @@ -244,5 +244,43 @@ public void TryGetAscii_NullContaining_ThrowsArgumentException(string unicode) char[] destination = new char[100]; Assert.Throws(() => idn.TryGetAscii(unicode, destination, out _)); } + + [Theory] + [MemberData(nameof(GetAscii_TestData))] + public void TryGetAscii_WithFlags(string unicode, int index, int count, string expected) + { + // Test with UseStd3AsciiRules = true and AllowUnassigned = true + var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true, AllowUnassigned = true }; + ReadOnlySpan unicodeSpan = unicode.AsSpan(index, count); + char[] destination = new char[expected.Length + 10]; + + Assert.True(idnStd3.TryGetAscii(unicodeSpan, destination, out int charsWritten)); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + + // Test with AllowUnassigned = false (default) + var idnNoUnassigned = new IdnMapping() { AllowUnassigned = false }; + Assert.True(idnNoUnassigned.TryGetAscii(unicodeSpan, destination, out charsWritten)); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + } + + [Theory] + [MemberData(nameof(GetAscii_Invalid_TestData))] + public void TryGetAscii_Invalid(string unicode, int index, int count, Type exceptionType) + { + if (unicode is null) + { + return; // TryGetAscii takes ReadOnlySpan, which can't be null + } + + static void tryGetAscii_Invalid(IdnMapping idnMapping, string unicode, int index, int count, Type exceptionType) + { + ReadOnlySpan unicodeSpan = unicode.AsSpan(index, count); + char[] destination = new char[100]; + Assert.Throws(exceptionType, () => idnMapping.TryGetAscii(unicodeSpan, destination, out _)); + } + + tryGetAscii_Invalid(new IdnMapping() { UseStd3AsciiRules = false }, unicode, index, count, exceptionType); + tryGetAscii_Invalid(new IdnMapping() { UseStd3AsciiRules = true }, unicode, index, count, exceptionType); + } } } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs index 16c984ef206cd7..86b8c1e2e38f98 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs @@ -180,5 +180,43 @@ public void TryGetUnicode_NullContaining_ThrowsArgumentException(string ascii) char[] destination = new char[100]; Assert.Throws(() => idn.TryGetUnicode(ascii, destination, out _)); } + + [Theory] + [MemberData(nameof(GetUnicode_TestData))] + public void TryGetUnicode_WithFlags(string ascii, int index, int count, string expected) + { + // Test with UseStd3AsciiRules = true and AllowUnassigned = true + var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true, AllowUnassigned = true }; + ReadOnlySpan asciiSpan = ascii.AsSpan(index, count); + char[] destination = new char[expected.Length + 10]; + + Assert.True(idnStd3.TryGetUnicode(asciiSpan, destination, out int charsWritten)); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + + // Test with AllowUnassigned = false (default) + var idnNoUnassigned = new IdnMapping() { AllowUnassigned = false }; + Assert.True(idnNoUnassigned.TryGetUnicode(asciiSpan, destination, out charsWritten)); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + } + + [Theory] + [MemberData(nameof(GetUnicode_Invalid_TestData))] + public void TryGetUnicode_Invalid(string ascii, int index, int count, Type exceptionType) + { + if (ascii is null) + { + return; // TryGetUnicode takes ReadOnlySpan, which can't be null + } + + static void tryGetUnicode_Invalid(IdnMapping idnMapping, string ascii, int index, int count, Type exceptionType) + { + ReadOnlySpan asciiSpan = ascii.AsSpan(index, count); + char[] destination = new char[100]; + Assert.Throws(exceptionType, () => idnMapping.TryGetUnicode(asciiSpan, destination, out _)); + } + + tryGetUnicode_Invalid(new IdnMapping() { UseStd3AsciiRules = false }, ascii, index, count, exceptionType); + tryGetUnicode_Invalid(new IdnMapping() { UseStd3AsciiRules = true }, ascii, index, count, exceptionType); + } } } From bbcb6491910233eb7561e2401a4733d3d61b1c64 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 13:51:05 +0000 Subject: [PATCH 14/21] Address MihaZupan review feedback: fix test build errors, simplify AppendIdnUnicode, add overlapping buffer tests Co-authored-by: MihaZupan <25307628+MihaZupan@users.noreply.github.com> --- .../src/System/DomainNameHelper.cs | 36 ++++------------ .../IdnMapping/IdnMappingGetAsciiTests.cs | 41 +++++++++++++++---- .../IdnMapping/IdnMappingGetUnicodeTests.cs | 38 +++++++++++++---- 3 files changed, 72 insertions(+), 43 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index a8e268a233e547..7896e935582461 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -256,10 +256,7 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild } // Now convert the ASCII form to Unicode and append directly to dest - if (!TryAppendIdnUnicode(asciiBuffer.Slice(0, asciiWritten), ref dest)) - { - return false; - } + AppendIdnUnicode(asciiBuffer.Slice(0, asciiWritten), ref dest); } catch (ArgumentException) { @@ -275,10 +272,8 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild // check ace validity - use span-based API to avoid string allocation try { - if (TryAppendIdnUnicode(label, ref dest)) - { - aceValid = true; - } + AppendIdnUnicode(label, ref dest); + aceValid = true; } catch (ArgumentException) { @@ -301,31 +296,18 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild } /// - /// Attempts to convert ASCII (punycode) to Unicode and append directly to the ValueStringBuilder. + /// Converts ASCII (punycode) to Unicode and appends directly to the ValueStringBuilder. /// - private static bool TryAppendIdnUnicode(scoped ReadOnlySpan ascii, ref ValueStringBuilder dest) + private static void AppendIdnUnicode(scoped ReadOnlySpan ascii, ref ValueStringBuilder dest) { - // Unicode output is typically similar length to ASCII input for IDN labels - // Start with a reasonable estimate and grow if needed - const int InitialBufferSize = 64; - - // First try with the initial buffer size - Span buffer = dest.AppendSpan(InitialBufferSize); + int charsWritten; - if (s_idnMapping.TryGetUnicode(ascii, buffer, out int charsWritten)) + while (!s_idnMapping.TryGetUnicode(ascii, dest.RawChars.Slice(dest.Length), out charsWritten)) { - // Shrink the buffer to actual size - dest.Length -= InitialBufferSize - charsWritten; - return true; + dest.EnsureCapacity(dest.Capacity + 1); } - // Buffer was too small - undo the append and try with a larger buffer - dest.Length -= InitialBufferSize; - - // For longer results, fall back to string-based API - // This is rare since domain labels are limited to 63 chars - dest.Append(s_idnMapping.GetUnicode(ascii.ToString())); - return true; + dest.Length += charsWritten; } } } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs index ad565880506cf1..ab63ace432c7bf 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs @@ -272,15 +272,40 @@ public void TryGetAscii_Invalid(string unicode, int index, int count, Type excep return; // TryGetAscii takes ReadOnlySpan, which can't be null } - static void tryGetAscii_Invalid(IdnMapping idnMapping, string unicode, int index, int count, Type exceptionType) - { - ReadOnlySpan unicodeSpan = unicode.AsSpan(index, count); - char[] destination = new char[100]; - Assert.Throws(exceptionType, () => idnMapping.TryGetAscii(unicodeSpan, destination, out _)); - } + string slice = unicode.Substring(index, count); + char[] destination = new char[100]; + + var idnNoStd3 = new IdnMapping() { UseStd3AsciiRules = false }; + Assert.Throws(exceptionType, () => idnNoStd3.TryGetAscii(slice, destination, out _)); + + var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true }; + Assert.Throws(exceptionType, () => idnStd3.TryGetAscii(slice, destination, out _)); + } + + [Fact] + public void TryGetAscii_OverlappingBuffers() + { + // Test with overlapping input and destination buffers + // The native functions should handle this correctly by copying to destination + var idn = new IdnMapping(); + + // Test case: input and destination start at same location + char[] buffer = "example.com\0\0\0\0\0\0\0\0\0\0".ToCharArray(); + ReadOnlySpan input = buffer.AsSpan(0, 11); // "example.com" + Span destination = buffer.AsSpan(0, buffer.Length); + + Assert.True(idn.TryGetAscii(input, destination, out int charsWritten)); + Assert.Equal(11, charsWritten); + Assert.Equal("example.com", new string(buffer, 0, charsWritten)); + + // Test case: destination offset but overlapping + buffer = "example.com\0\0\0\0\0\0\0\0\0\0".ToCharArray(); + input = buffer.AsSpan(0, 11); + destination = buffer.AsSpan(5, buffer.Length - 5); - tryGetAscii_Invalid(new IdnMapping() { UseStd3AsciiRules = false }, unicode, index, count, exceptionType); - tryGetAscii_Invalid(new IdnMapping() { UseStd3AsciiRules = true }, unicode, index, count, exceptionType); + Assert.True(idn.TryGetAscii(input, destination, out charsWritten)); + Assert.Equal(11, charsWritten); + Assert.Equal("example.com", new string(buffer, 5, charsWritten)); } } } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs index 86b8c1e2e38f98..88e815914ca3a6 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs @@ -208,15 +208,37 @@ public void TryGetUnicode_Invalid(string ascii, int index, int count, Type excep return; // TryGetUnicode takes ReadOnlySpan, which can't be null } - static void tryGetUnicode_Invalid(IdnMapping idnMapping, string ascii, int index, int count, Type exceptionType) - { - ReadOnlySpan asciiSpan = ascii.AsSpan(index, count); - char[] destination = new char[100]; - Assert.Throws(exceptionType, () => idnMapping.TryGetUnicode(asciiSpan, destination, out _)); - } + string slice = ascii.Substring(index, count); + char[] destination = new char[100]; + + var idnNoStd3 = new IdnMapping() { UseStd3AsciiRules = false }; + Assert.Throws(exceptionType, () => idnNoStd3.TryGetUnicode(slice, destination, out _)); + + var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true }; + Assert.Throws(exceptionType, () => idnStd3.TryGetUnicode(slice, destination, out _)); + } + + [Fact] + public void TryGetUnicode_OverlappingBuffers() + { + // Test with overlapping input and destination buffers + // The native functions should handle this correctly by copying to destination + var idn = new IdnMapping(); + + // Test case: input and destination start at same location + char[] buffer = "xn--nxasmq5b.com\0\0\0\0\0\0\0\0\0\0".ToCharArray(); + ReadOnlySpan input = buffer.AsSpan(0, 16); // "xn--nxasmq5b.com" + Span destination = buffer.AsSpan(0, buffer.Length); + + Assert.True(idn.TryGetUnicode(input, destination, out int charsWritten)); + // The expected output is the Unicode equivalent + + // Test case: destination offset but overlapping + buffer = "xn--nxasmq5b.com\0\0\0\0\0\0\0\0\0\0".ToCharArray(); + input = buffer.AsSpan(0, 16); + destination = buffer.AsSpan(5, buffer.Length - 5); - tryGetUnicode_Invalid(new IdnMapping() { UseStd3AsciiRules = false }, ascii, index, count, exceptionType); - tryGetUnicode_Invalid(new IdnMapping() { UseStd3AsciiRules = true }, ascii, index, count, exceptionType); + Assert.True(idn.TryGetUnicode(input, destination, out charsWritten)); } } } From 165ef820020ba5d24648ec34e3bee0bf745b649d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:20:30 +0000 Subject: [PATCH 15/21] Address MihaZupan feedback: fix comment casing, add debug assert, use non-ASCII data in overlapping buffer tests Co-authored-by: MihaZupan <25307628+MihaZupan@users.noreply.github.com> --- .../src/System/DomainNameHelper.cs | 8 +++----- .../IdnMapping/IdnMappingGetAsciiTests.cs | 19 ++++++++++--------- .../IdnMapping/IdnMappingGetUnicodeTests.cs | 18 +++++++++++------- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index 7896e935582461..3df336ffbf6f0a 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -250,10 +250,8 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild // Use span-based APIs to avoid intermediate string allocations try { - if (!s_idnMapping.TryGetAscii(label, asciiBuffer, out int asciiWritten)) - { - return false; - } + bool asciiSuccess = s_idnMapping.TryGetAscii(label, asciiBuffer, out int asciiWritten); + Debug.Assert(asciiSuccess, "TryGetAscii should always succeed with a 255-char buffer for valid IDN labels"); // Now convert the ASCII form to Unicode and append directly to dest AppendIdnUnicode(asciiBuffer.Slice(0, asciiWritten), ref dest); @@ -269,7 +267,7 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild if (label.StartsWith("xn--", StringComparison.Ordinal)) { - // check ace validity - use span-based API to avoid string allocation + // Check ace validity - use span-based API to avoid string allocation try { AppendIdnUnicode(label, ref dest); diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs index ab63ace432c7bf..9b317cb296de59 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs @@ -285,27 +285,28 @@ public void TryGetAscii_Invalid(string unicode, int index, int count, Type excep [Fact] public void TryGetAscii_OverlappingBuffers() { - // Test with overlapping input and destination buffers - // The native functions should handle this correctly by copying to destination + // Test with overlapping input and destination buffers using non-ASCII inputs + // that require actual native API calls for conversion var idn = new IdnMapping(); // Test case: input and destination start at same location - char[] buffer = "example.com\0\0\0\0\0\0\0\0\0\0".ToCharArray(); - ReadOnlySpan input = buffer.AsSpan(0, 11); // "example.com" + // Using Japanese characters that convert to "xn--r8jz45g" (11 chars) + char[] buffer = "\u4F8B\u3048\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0".ToCharArray(); + ReadOnlySpan input = buffer.AsSpan(0, 2); // "例え" (2 Japanese chars) Span destination = buffer.AsSpan(0, buffer.Length); Assert.True(idn.TryGetAscii(input, destination, out int charsWritten)); Assert.Equal(11, charsWritten); - Assert.Equal("example.com", new string(buffer, 0, charsWritten)); + Assert.Equal("xn--r8jz45g", new string(buffer, 0, charsWritten)); // Test case: destination offset but overlapping - buffer = "example.com\0\0\0\0\0\0\0\0\0\0".ToCharArray(); - input = buffer.AsSpan(0, 11); - destination = buffer.AsSpan(5, buffer.Length - 5); + buffer = "\u4F8B\u3048\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0".ToCharArray(); + input = buffer.AsSpan(0, 2); + destination = buffer.AsSpan(1, buffer.Length - 1); Assert.True(idn.TryGetAscii(input, destination, out charsWritten)); Assert.Equal(11, charsWritten); - Assert.Equal("example.com", new string(buffer, 5, charsWritten)); + Assert.Equal("xn--r8jz45g", new string(buffer, 1, charsWritten)); } } } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs index 88e815914ca3a6..5be8ad1976cf62 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs @@ -221,24 +221,28 @@ public void TryGetUnicode_Invalid(string ascii, int index, int count, Type excep [Fact] public void TryGetUnicode_OverlappingBuffers() { - // Test with overlapping input and destination buffers - // The native functions should handle this correctly by copying to destination + // Test with overlapping input and destination buffers using ACE-encoded inputs + // that require actual native API calls for conversion var idn = new IdnMapping(); // Test case: input and destination start at same location - char[] buffer = "xn--nxasmq5b.com\0\0\0\0\0\0\0\0\0\0".ToCharArray(); - ReadOnlySpan input = buffer.AsSpan(0, 16); // "xn--nxasmq5b.com" + // "xn--r8jz45g" converts to "例え" (2 Japanese chars) + char[] buffer = "xn--r8jz45g\0\0\0\0\0\0\0\0\0\0".ToCharArray(); + ReadOnlySpan input = buffer.AsSpan(0, 11); // "xn--r8jz45g" Span destination = buffer.AsSpan(0, buffer.Length); Assert.True(idn.TryGetUnicode(input, destination, out int charsWritten)); - // The expected output is the Unicode equivalent + Assert.Equal(2, charsWritten); + Assert.Equal("\u4F8B\u3048", new string(buffer, 0, charsWritten)); // "例え" // Test case: destination offset but overlapping - buffer = "xn--nxasmq5b.com\0\0\0\0\0\0\0\0\0\0".ToCharArray(); - input = buffer.AsSpan(0, 16); + buffer = "xn--r8jz45g\0\0\0\0\0\0\0\0\0\0".ToCharArray(); + input = buffer.AsSpan(0, 11); destination = buffer.AsSpan(5, buffer.Length - 5); Assert.True(idn.TryGetUnicode(input, destination, out charsWritten)); + Assert.Equal(2, charsWritten); + Assert.Equal("\u4F8B\u3048", new string(buffer, 5, charsWritten)); // "例え" } } } From c753f6b0a8f79055d9e0d1f548c04a93b13b7e3b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 15:02:28 +0000 Subject: [PATCH 16/21] Remove overlapping buffer tests and document that overlapping is NOT supported Co-authored-by: jkotas <6668460+jkotas@users.noreply.github.com> --- .../src/System/Globalization/IdnMapping.cs | 4 +-- .../IdnMapping/IdnMappingGetAsciiTests.cs | 27 ------------------- .../IdnMapping/IdnMappingGetUnicodeTests.cs | 27 ------------------- 3 files changed, 2 insertions(+), 56 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index ed5aedc5a72e01..3739982747abee 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -98,7 +98,7 @@ public string GetAscii(string unicode, int index, int count) /// Encodes a Unicode domain name to its ASCII (Punycode) equivalent. /// /// The Unicode domain name to convert. - /// The buffer to write the ASCII result to. + /// The buffer to write the ASCII result to. This buffer must not overlap with . /// When this method returns, contains the number of characters that were written to . /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. /// is invalid based on the and properties, and the IDNA standard. @@ -165,7 +165,7 @@ public string GetUnicode(string ascii, int index, int count) /// Decodes one or more encoded domain name labels to a string of Unicode characters. /// /// The ASCII domain name to convert. The string may contain one or more labels, where each label is prefixed by "xn--". - /// The buffer to write the Unicode result to. + /// The buffer to write the Unicode result to. This buffer must not overlap with . /// When this method returns, contains the number of characters that were written to . /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. /// is invalid based on the and properties, and the IDNA standard. diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs index 9b317cb296de59..b9aee8b0714640 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs @@ -281,32 +281,5 @@ public void TryGetAscii_Invalid(string unicode, int index, int count, Type excep var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true }; Assert.Throws(exceptionType, () => idnStd3.TryGetAscii(slice, destination, out _)); } - - [Fact] - public void TryGetAscii_OverlappingBuffers() - { - // Test with overlapping input and destination buffers using non-ASCII inputs - // that require actual native API calls for conversion - var idn = new IdnMapping(); - - // Test case: input and destination start at same location - // Using Japanese characters that convert to "xn--r8jz45g" (11 chars) - char[] buffer = "\u4F8B\u3048\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0".ToCharArray(); - ReadOnlySpan input = buffer.AsSpan(0, 2); // "例え" (2 Japanese chars) - Span destination = buffer.AsSpan(0, buffer.Length); - - Assert.True(idn.TryGetAscii(input, destination, out int charsWritten)); - Assert.Equal(11, charsWritten); - Assert.Equal("xn--r8jz45g", new string(buffer, 0, charsWritten)); - - // Test case: destination offset but overlapping - buffer = "\u4F8B\u3048\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0".ToCharArray(); - input = buffer.AsSpan(0, 2); - destination = buffer.AsSpan(1, buffer.Length - 1); - - Assert.True(idn.TryGetAscii(input, destination, out charsWritten)); - Assert.Equal(11, charsWritten); - Assert.Equal("xn--r8jz45g", new string(buffer, 1, charsWritten)); - } } } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs index 5be8ad1976cf62..3c3295e5329fce 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs @@ -217,32 +217,5 @@ public void TryGetUnicode_Invalid(string ascii, int index, int count, Type excep var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true }; Assert.Throws(exceptionType, () => idnStd3.TryGetUnicode(slice, destination, out _)); } - - [Fact] - public void TryGetUnicode_OverlappingBuffers() - { - // Test with overlapping input and destination buffers using ACE-encoded inputs - // that require actual native API calls for conversion - var idn = new IdnMapping(); - - // Test case: input and destination start at same location - // "xn--r8jz45g" converts to "例え" (2 Japanese chars) - char[] buffer = "xn--r8jz45g\0\0\0\0\0\0\0\0\0\0".ToCharArray(); - ReadOnlySpan input = buffer.AsSpan(0, 11); // "xn--r8jz45g" - Span destination = buffer.AsSpan(0, buffer.Length); - - Assert.True(idn.TryGetUnicode(input, destination, out int charsWritten)); - Assert.Equal(2, charsWritten); - Assert.Equal("\u4F8B\u3048", new string(buffer, 0, charsWritten)); // "例え" - - // Test case: destination offset but overlapping - buffer = "xn--r8jz45g\0\0\0\0\0\0\0\0\0\0".ToCharArray(); - input = buffer.AsSpan(0, 11); - destination = buffer.AsSpan(5, buffer.Length - 5); - - Assert.True(idn.TryGetUnicode(input, destination, out charsWritten)); - Assert.Equal(2, charsWritten); - Assert.Equal("\u4F8B\u3048", new string(buffer, 5, charsWritten)); // "例え" - } } } From 39fd8aee5c648e290fe1a7975396aeaa5e9663c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 18:54:10 +0000 Subject: [PATCH 17/21] Add overlapping buffer check for TryGetAscii and TryGetUnicode, throw ArgumentException when buffers overlap Co-authored-by: MihaZupan <25307628+MihaZupan@users.noreply.github.com> --- .../src/System/Globalization/IdnMapping.cs | 14 +++++++++++-- .../IdnMapping/IdnMappingGetAsciiTests.cs | 21 +++++++++++++++++++ .../IdnMapping/IdnMappingGetUnicodeTests.cs | 21 +++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 3739982747abee..8a7cfff32638d5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -101,7 +101,7 @@ public string GetAscii(string unicode, int index, int count) /// The buffer to write the ASCII result to. This buffer must not overlap with . /// When this method returns, contains the number of characters that were written to . /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. - /// is invalid based on the and properties, and the IDNA standard. + /// is invalid based on the and properties, and the IDNA standard, or the source and destination buffers overlap. public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out int charsWritten) { if (unicode.Length == 0) @@ -113,6 +113,11 @@ public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); } + if (MemoryMarshal.AsBytes(unicode).Overlaps(MemoryMarshal.AsBytes(destination))) + { + ThrowHelper.ThrowArgumentException(ExceptionResource.InvalidOperation_SpanOverlappedOperation); + } + if (GlobalizationMode.Invariant) { return TryGetAsciiInvariant(unicode, destination, out charsWritten); @@ -168,7 +173,7 @@ public string GetUnicode(string ascii, int index, int count) /// The buffer to write the Unicode result to. This buffer must not overlap with . /// When this method returns, contains the number of characters that were written to . /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. - /// is invalid based on the and properties, and the IDNA standard. + /// is invalid based on the and properties, and the IDNA standard, or the source and destination buffers overlap. public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out int charsWritten) { // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. @@ -177,6 +182,11 @@ public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out if (ascii.Length > 0 && ascii[^1] == (char)0) throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + if (MemoryMarshal.AsBytes(ascii).Overlaps(MemoryMarshal.AsBytes(destination))) + { + ThrowHelper.ThrowArgumentException(ExceptionResource.InvalidOperation_SpanOverlappedOperation); + } + if (GlobalizationMode.Invariant) { return TryGetUnicodeInvariant(ascii, destination, out charsWritten); diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs index b9aee8b0714640..027892ba742557 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs @@ -281,5 +281,26 @@ public void TryGetAscii_Invalid(string unicode, int index, int count, Type excep var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true }; Assert.Throws(exceptionType, () => idnStd3.TryGetAscii(slice, destination, out _)); } + + [Fact] + public void TryGetAscii_OverlappingBuffers_ThrowsArgumentException() + { + var idn = new IdnMapping(); + char[] buffer = new char[100]; + + // Write unicode input to the buffer + string unicode = "\u0101\u0062\u0063"; // "ābc" + unicode.AsSpan().CopyTo(buffer); + + // Test overlapping: input and destination start at same location + Assert.Throws(() => idn.TryGetAscii(buffer.AsSpan(0, unicode.Length), buffer.AsSpan(0, buffer.Length), out _)); + + // Test overlapping: destination starts inside input + Assert.Throws(() => idn.TryGetAscii(buffer.AsSpan(0, unicode.Length), buffer.AsSpan(1, buffer.Length - 1), out _)); + + // Test overlapping: input starts inside destination + unicode.AsSpan().CopyTo(buffer.AsSpan(10)); + Assert.Throws(() => idn.TryGetAscii(buffer.AsSpan(10, unicode.Length), buffer.AsSpan(0, buffer.Length), out _)); + } } } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs index 3c3295e5329fce..49d1ba9c5437f6 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs @@ -217,5 +217,26 @@ public void TryGetUnicode_Invalid(string ascii, int index, int count, Type excep var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true }; Assert.Throws(exceptionType, () => idnStd3.TryGetUnicode(slice, destination, out _)); } + + [Fact] + public void TryGetUnicode_OverlappingBuffers_ThrowsArgumentException() + { + var idn = new IdnMapping(); + char[] buffer = new char[100]; + + // Write ASCII input to the buffer + string ascii = "xn--ab-dla"; // represents "aāb" + ascii.AsSpan().CopyTo(buffer); + + // Test overlapping: input and destination start at same location + Assert.Throws(() => idn.TryGetUnicode(buffer.AsSpan(0, ascii.Length), buffer.AsSpan(0, buffer.Length), out _)); + + // Test overlapping: destination starts inside input + Assert.Throws(() => idn.TryGetUnicode(buffer.AsSpan(0, ascii.Length), buffer.AsSpan(1, buffer.Length - 1), out _)); + + // Test overlapping: input starts inside destination + ascii.AsSpan().CopyTo(buffer.AsSpan(10)); + Assert.Throws(() => idn.TryGetUnicode(buffer.AsSpan(10, ascii.Length), buffer.AsSpan(0, buffer.Length), out _)); + } } } From 354d564931751e693aafd32ffcbc5c2d09c693ea Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 19:05:31 +0000 Subject: [PATCH 18/21] Simplify overlapping buffer check to use source.Overlaps(destination) like Normalization.cs Co-authored-by: MihaZupan <25307628+MihaZupan@users.noreply.github.com> --- .../src/System/Globalization/IdnMapping.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 8a7cfff32638d5..f60f706171d291 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -113,7 +113,7 @@ public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); } - if (MemoryMarshal.AsBytes(unicode).Overlaps(MemoryMarshal.AsBytes(destination))) + if (unicode.Overlaps(destination)) { ThrowHelper.ThrowArgumentException(ExceptionResource.InvalidOperation_SpanOverlappedOperation); } @@ -182,7 +182,7 @@ public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out if (ascii.Length > 0 && ascii[^1] == (char)0) throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); - if (MemoryMarshal.AsBytes(ascii).Overlaps(MemoryMarshal.AsBytes(destination))) + if (ascii.Overlaps(destination)) { ThrowHelper.ThrowArgumentException(ExceptionResource.InvalidOperation_SpanOverlappedOperation); } From b7952d89c6bd1e06d816b1aa1b904cb8f80953ef Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 27 Jan 2026 02:27:46 +0000 Subject: [PATCH 19/21] Apply formatting suggestions: add blank lines and braces to if statements Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../src/System/Globalization/IdnMapping.cs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index f60f706171d291..f6d3822bb07ca5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -108,6 +108,7 @@ public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out { throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); } + if (unicode[^1] == 0) { throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); @@ -180,7 +181,9 @@ public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. // The Win32 APIs fail on an embedded null, but not on a terminating null. if (ascii.Length > 0 && ascii[^1] == (char)0) + { throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + } if (ascii.Overlaps(destination)) { @@ -250,6 +253,7 @@ private string GetAsciiInvariant(string unicodeString, int index, int count) { return unicodeString; } + return unicode.ToString(); } @@ -282,6 +286,7 @@ private bool TryGetAsciiInvariant(ReadOnlySpan unicode, Span destina charsWritten = unicode.Length; return true; } + charsWritten = 0; return false; } @@ -657,7 +662,9 @@ private string GetUnicodeInvariant(string ascii, int index, int count) // Output name MUST obey IDNA rules & round trip (casing differences are allowed) string asciiRoundtrip = GetAscii(strUnicode); if (!asciiRoundtrip.Equals(asciiSlice, StringComparison.OrdinalIgnoreCase)) + { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } // If the ASCII round-trip equals the original string, return it as-is (no allocation) if (index == 0 && count == ascii.Length && strUnicode.Equals(ascii, StringComparison.OrdinalIgnoreCase)) @@ -678,7 +685,9 @@ private bool TryGetUnicodeInvariant(ReadOnlySpan ascii, Span destina // Output name MUST obey IDNA rules & round trip (casing differences are allowed) if (!asciiString.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase)) + { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } if (strUnicode.Length <= destination.Length) { From e3397b343b0243bcc003b0bc6ac85e5a415b5087 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:32:53 +0000 Subject: [PATCH 20/21] Fix test failures: skip invalid index/count entries in TryGetAscii_Invalid and TryGetUnicode_Invalid, use Std3-compatible test data for TryGetAscii_WithFlags Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../IdnMapping/IdnMappingGetAsciiTests.cs | 32 ++++++++++++++++++- .../IdnMapping/IdnMappingGetUnicodeTests.cs | 13 ++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs index 027892ba742557..2aa4cd07a21da9 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs @@ -246,7 +246,7 @@ public void TryGetAscii_NullContaining_ThrowsArgumentException(string unicode) } [Theory] - [MemberData(nameof(GetAscii_TestData))] + [MemberData(nameof(GetAscii_Std3Compatible_TestData))] public void TryGetAscii_WithFlags(string unicode, int index, int count, string expected) { // Test with UseStd3AsciiRules = true and AllowUnassigned = true @@ -263,6 +263,23 @@ public void TryGetAscii_WithFlags(string unicode, int index, int count, string e Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); } + /// + /// Test data compatible with UseStd3AsciiRules=true (excludes special ASCII characters). + /// + public static IEnumerable GetAscii_Std3Compatible_TestData() + { + // Only include alphanumeric ASCII and non-ASCII test data that works with Std3 rules + yield return new object[] { "\u0101", 0, 1, "xn--yda" }; + yield return new object[] { "\u0101\u0061\u0041", 0, 3, "xn--aa-cla" }; + yield return new object[] { "\u0061\u0101\u0062", 0, 3, "xn--ab-dla" }; + yield return new object[] { "\u0061\u0062\u0101", 0, 3, "xn--ab-ela" }; + yield return new object[] { "\uD800\uDF00\uD800\uDF01\uD800\uDF02", 0, 6, "xn--097ccd" }; // Surrogate pairs + yield return new object[] { "\u0061\u0062\u0063", 0, 3, "\u0061\u0062\u0063" }; // ASCII only code points + yield return new object[] { "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067", 0, 7, "xn--d9juau41awczczp" }; // Non-ASCII only code points + yield return new object[] { "\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0", 0, 9, "xn--de-jg4avhby1noc0d" }; // ASCII and non-ASCII code points + yield return new object[] { "\u0061\u0062\u0063.\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067.\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0", 0, 21, "abc.xn--d9juau41awczczp.xn--de-jg4avhby1noc0d" }; // Fully qualified domain name + } + [Theory] [MemberData(nameof(GetAscii_Invalid_TestData))] public void TryGetAscii_Invalid(string unicode, int index, int count, Type exceptionType) @@ -272,6 +289,19 @@ public void TryGetAscii_Invalid(string unicode, int index, int count, Type excep return; // TryGetAscii takes ReadOnlySpan, which can't be null } + // Skip entries with invalid index/count (those test the GetAscii(string, int, int) validation, not the span content validation) + if (index < 0 || count < 0 || index > unicode.Length || index + count > unicode.Length) + { + return; + } + + // Also skip empty count tests - they test ArgumentException for empty string validation + // but TryGetAscii span-based API doesn't have index/count overloads + if (count == 0) + { + return; + } + string slice = unicode.Substring(index, count); char[] destination = new char[100]; diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs index 49d1ba9c5437f6..4111ceb4bb5724 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs @@ -208,6 +208,19 @@ public void TryGetUnicode_Invalid(string ascii, int index, int count, Type excep return; // TryGetUnicode takes ReadOnlySpan, which can't be null } + // Skip entries with invalid index/count (those test the GetUnicode(string, int, int) validation, not the span content validation) + if (index < 0 || count < 0 || index > ascii.Length || index + count > ascii.Length) + { + return; + } + + // Also skip empty count tests - they test ArgumentException for empty string validation + // but TryGetUnicode span-based API doesn't have index/count overloads + if (count == 0) + { + return; + } + string slice = ascii.Substring(index, count); char[] destination = new char[100]; From 15aaef5cd0cd6958b7eef18fa162b2a1cf2a3773 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:12:08 +0000 Subject: [PATCH 21/21] Address feedback: add blank line before return, move == 0 check inside <= destination.Length block Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../System/Globalization/IdnMapping.Icu.cs | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs index 2a260dcad3e0b2..406ee60d6143e7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs @@ -45,6 +45,7 @@ private string IcuGetAsciiCore(string unicodeString, int index, int count) { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); } + return GetStringForOutput(unicodeString, unicode, outputHeap.AsSpan(0, actualLength)); } @@ -58,13 +59,13 @@ private bool IcuTryGetAsciiCore(ReadOnlySpan unicode, Span destinati int actualLength = Interop.Globalization.ToAscii(flags, unicode, unicode.Length, destination, destination.Length); - if (actualLength == 0) - { - throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); - } - if (actualLength <= destination.Length) { + if (actualLength == 0) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); + } + charsWritten = actualLength; return true; } @@ -129,13 +130,13 @@ private bool IcuTryGetUnicodeCore(ReadOnlySpan ascii, Span destinati int actualLength = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, destination, destination.Length); - if (actualLength == 0) - { - throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); - } - if (actualLength <= destination.Length) { + if (actualLength == 0) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } + charsWritten = actualLength; return true; }