diff --git a/src/libraries/Common/src/Interop/Interop.Idna.cs b/src/libraries/Common/src/Interop/Interop.Idna.cs index f5a8e90adcbeb0..e62e82005389f7 100644 --- a/src/libraries/Common/src/Interop/Interop.Idna.cs +++ b/src/libraries/Common/src/Interop/Interop.Idna.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System; using System.Runtime.InteropServices; internal static partial class Interop @@ -11,9 +12,9 @@ internal static partial class Globalization internal const int UseStd3AsciiRules = 0x2; [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToAscii", StringMarshalling = StringMarshalling.Utf16)] - internal static unsafe partial int ToAscii(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); + internal static partial int ToAscii(uint flags, ReadOnlySpan src, int srcLen, Span dstBuffer, int dstBufferCapacity); [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToUnicode", StringMarshalling = StringMarshalling.Utf16)] - internal static unsafe partial int ToUnicode(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); + internal static partial int ToUnicode(uint flags, ReadOnlySpan src, int srcLen, Span dstBuffer, int dstBufferCapacity); } } diff --git a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs index c5f56fbfb44fe7..ea4187ad357766 100644 --- a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs +++ b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Idna.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System; using System.Runtime.InteropServices; internal static partial class Interop @@ -12,19 +13,19 @@ internal static partial class Normaliz // [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] - internal static unsafe partial int IdnToAscii( + internal static partial int IdnToAscii( uint dwFlags, - char* lpUnicodeCharStr, + ReadOnlySpan lpUnicodeCharStr, int cchUnicodeChar, - char* lpASCIICharStr, + Span lpASCIICharStr, int cchASCIIChar); [LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] - internal static unsafe partial int IdnToUnicode( + internal static partial int IdnToUnicode( uint dwFlags, - char* lpASCIICharStr, + ReadOnlySpan lpASCIICharStr, int cchASCIIChar, - char* lpUnicodeCharStr, + Span lpUnicodeCharStr, int cchUnicodeChar); internal const int IDN_ALLOW_UNASSIGNED = 0x1; diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs index 1b13711ff1e59c..fced8e03130913 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CharUnicodeInfo.cs @@ -48,7 +48,7 @@ internal static StrongBidiCategory GetBidiCategory(string s, int index) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); } - return GetBidiCategoryNoBoundsChecks((uint)GetCodePointFromString(s, index)); + return GetBidiCategory((ReadOnlySpan)s, index); } internal static StrongBidiCategory GetBidiCategory(StringBuilder s, int index) @@ -91,6 +91,33 @@ private static StrongBidiCategory GetBidiCategoryNoBoundsChecks(uint codePoint) return bidiCategory; } + internal static StrongBidiCategory GetBidiCategory(ReadOnlySpan s, int index) + { + Debug.Assert(index >= 0 && index < s.Length, "index < s.Length"); + + // The logic below follows Table 3-5 in the Unicode Standard, Sec. 3.9. + // First char (high surrogate) = 110110wwwwxxxxxx + // Second char (low surrogate) = 110111xxxxxxxxxx + + int c = (int)s[index]; + if (index < s.Length - 1) + { + int temp1 = c - HIGH_SURROGATE_START; // temp1 = 000000wwwwxxxxxx + if ((uint)temp1 <= HIGH_SURROGATE_RANGE) + { + int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; // temp2 = 000000xxxxxxxxxx + if ((uint)temp2 <= HIGH_SURROGATE_RANGE) + { + // |--------temp1--||-temp2--| + // 00000uuuuuuxxxxxxxxxxxxxxxx (where uuuuu = wwww + 1) + c = (temp1 << 10) + temp2 + UNICODE_PLANE01_START; + } + } + } + + return GetBidiCategoryNoBoundsChecks((uint)c); + } + /* * GetDecimalDigitValue * ==================== @@ -115,7 +142,7 @@ public static int GetDecimalDigitValue(string s, int index) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); } - return GetDecimalDigitValueInternalNoBoundsCheck((uint)GetCodePointFromString(s, index)); + return GetDecimalDigitValueInternalNoBoundsCheck((uint)GetCodePoint(s, index)); } private static int GetDecimalDigitValueInternalNoBoundsCheck(uint codePoint) @@ -149,7 +176,7 @@ public static int GetDigitValue(string s, int index) ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index); } - return GetDigitValueInternalNoBoundsCheck((uint)GetCodePointFromString(s, index)); + return GetDigitValueInternalNoBoundsCheck((uint)GetCodePoint(s, index)); } private static int GetDigitValueInternalNoBoundsCheck(uint codePoint) @@ -228,7 +255,7 @@ public static double GetNumericValue(string s, int index) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static double GetNumericValueInternal(string s, int index) => GetNumericValueNoBoundsCheck((uint)GetCodePointFromString(s, index)); + internal static double GetNumericValueInternal(string s, int index) => GetNumericValueNoBoundsCheck((uint)GetCodePoint(s, index)); private static double GetNumericValueNoBoundsCheck(uint codePoint) { @@ -365,7 +392,7 @@ internal static UnicodeCategory GetUnicodeCategoryInternal(string value, int ind Debug.Assert(value != null, "value can not be null"); Debug.Assert(index < value.Length); - return GetUnicodeCategoryNoBoundsChecks((uint)GetCodePointFromString(value, index)); + return GetUnicodeCategoryNoBoundsChecks((uint)GetCodePoint(value, index)); } /// @@ -378,7 +405,7 @@ internal static UnicodeCategory GetUnicodeCategoryInternal(string str, int index Debug.Assert(str.Length > 0); Debug.Assert(index >= 0 && index < str.Length); - uint codePoint = (uint)GetCodePointFromString(str, index); + uint codePoint = (uint)GetCodePoint(str, index); UnicodeDebug.AssertIsValidCodePoint(codePoint); charLength = (codePoint >= UNICODE_PLANE01_START) ? 2 /* surrogate pair */ : 1 /* BMP char */; @@ -406,9 +433,8 @@ private static UnicodeCategory GetUnicodeCategoryNoBoundsChecks(uint codePoint) /// WARNING: since it doesn't throw an exception it CAN return a value /// in the surrogate range D800-DFFF, which is not a legal scalar value. /// - private static int GetCodePointFromString(string s, int index) + private static int GetCodePoint(ReadOnlySpan s, int index) { - Debug.Assert(s != null); Debug.Assert((uint)index < (uint)s.Length, "index < s.Length"); int codePoint = 0; diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs index 142c2b01be8032..406ee60d6143e7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Icu.cs @@ -7,14 +7,14 @@ namespace System.Globalization { public sealed partial class IdnMapping { - private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int count) + private string IcuGetAsciiCore(string unicodeString, int index, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(unicodeString != null && unicodeString.Length >= count); + ReadOnlySpan unicode = unicodeString.AsSpan(index, count); uint flags = IcuFlags; - CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode)); + CheckInvalidIdnCharacters(unicode, flags, nameof(unicode)); const int StackallocThreshold = 512; // Each unicode character is represented by up to 3 ASCII chars @@ -23,16 +23,16 @@ private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int c int actualLength; if ((uint)estimatedLength < StackallocThreshold) { - char* outputStack = stackalloc char[estimatedLength]; + Span outputStack = stackalloc char[estimatedLength]; actualLength = Interop.Globalization.ToAscii(flags, unicode, count, outputStack, estimatedLength); if (actualLength > 0 && actualLength <= estimatedLength) { - return GetStringForOutput(unicodeString, unicode, count, outputStack, actualLength); + return GetStringForOutput(unicodeString, unicode, outputStack.Slice(0, actualLength)); } } else { - actualLength = Interop.Globalization.ToAscii(flags, unicode, count, null, 0); + actualLength = Interop.Globalization.ToAscii(flags, unicode, count, Span.Empty, 0); } if (actualLength == 0) { @@ -40,68 +40,109 @@ private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int c } char[] outputHeap = new char[actualLength]; - fixed (char* pOutputHeap = &outputHeap[0]) + actualLength = Interop.Globalization.ToAscii(flags, unicode, count, outputHeap, actualLength); + if (actualLength == 0 || actualLength > outputHeap.Length) { - actualLength = Interop.Globalization.ToAscii(flags, unicode, count, pOutputHeap, actualLength); - if (actualLength == 0 || actualLength > outputHeap.Length) + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); + } + + return GetStringForOutput(unicodeString, unicode, outputHeap.AsSpan(0, actualLength)); + } + + private bool IcuTryGetAsciiCore(ReadOnlySpan unicode, Span destination, out int charsWritten) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + + uint flags = IcuFlags; + CheckInvalidIdnCharacters(unicode, flags, nameof(unicode)); + + int actualLength = Interop.Globalization.ToAscii(flags, unicode, unicode.Length, destination, destination.Length); + + if (actualLength <= destination.Length) + { + if (actualLength == 0) { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode)); } - return GetStringForOutput(unicodeString, unicode, count, pOutputHeap, actualLength); + + charsWritten = actualLength; + return true; } + + charsWritten = 0; + return false; } - private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int count) + private string IcuGetUnicodeCore(string asciiString, int index, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(asciiString != null && asciiString.Length >= count); + ReadOnlySpan ascii = asciiString.AsSpan(index, count); uint flags = IcuFlags; - CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii)); + CheckInvalidIdnCharacters(ascii, flags, nameof(ascii)); const int StackAllocThreshold = 512; if ((uint)count < StackAllocThreshold) { - char* output = stackalloc char[count]; - return IcuGetUnicodeCore(asciiString, ascii, count, flags, output, count, reattempt: true); + Span output = stackalloc char[count]; + return IcuGetUnicodeCore(asciiString, ascii, flags, output, reattempt: true); } else { char[] output = new char[count]; - fixed (char* pOutput = &output[0]) - { - return IcuGetUnicodeCore(asciiString, ascii, count, flags, pOutput, count, reattempt: true); - } + return IcuGetUnicodeCore(asciiString, ascii, flags, output, reattempt: true); } } - private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int count, uint flags, char* output, int outputLength, bool reattempt) + private static string IcuGetUnicodeCore(string asciiString, ReadOnlySpan ascii, uint flags, Span output, bool reattempt) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(!GlobalizationMode.UseNls); - Debug.Assert(asciiString != null && asciiString.Length >= count); - int realLen = Interop.Globalization.ToUnicode(flags, ascii, count, output, outputLength); + int realLen = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, output, output.Length); if (realLen == 0) { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); } - else if (realLen <= outputLength) + else if (realLen <= output.Length) { - return GetStringForOutput(asciiString, ascii, count, output, realLen); + return GetStringForOutput(asciiString, ascii, output.Slice(0, realLen)); } else if (reattempt) { char[] newOutput = new char[realLen]; - fixed (char* pNewOutput = newOutput) + return IcuGetUnicodeCore(asciiString, ascii, flags, newOutput, reattempt: false); + } + + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } + + private bool IcuTryGetUnicodeCore(ReadOnlySpan ascii, Span destination, out int charsWritten) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(!GlobalizationMode.UseNls); + + uint flags = IcuFlags; + CheckInvalidIdnCharacters(ascii, flags, nameof(ascii)); + + int actualLength = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, destination, destination.Length); + + if (actualLength <= destination.Length) + { + if (actualLength == 0) { - return IcuGetUnicodeCore(asciiString, ascii, count, flags, pNewOutput, realLen, reattempt: false); + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); } + + charsWritten = actualLength; + return true; } - throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + charsWritten = 0; + return false; } private uint IcuFlags @@ -122,11 +163,11 @@ private uint IcuFlags /// To match Windows behavior, we walk the string ourselves looking for these /// bad characters so we can continue to throw ArgumentException in these cases. /// - private static unsafe void CheckInvalidIdnCharacters(char* s, int count, uint flags, string paramName) + private static void CheckInvalidIdnCharacters(ReadOnlySpan s, uint flags, string paramName) { if ((flags & Interop.Globalization.UseStd3AsciiRules) == 0) { - for (int i = 0; i < count; i++) + for (int i = 0; i < s.Length; i++) { char c = s[i]; diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs index f4569191426fa2..6117295a8abc2d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.Nls.cs @@ -9,16 +9,16 @@ namespace System.Globalization { public sealed partial class IdnMapping { - private unsafe string NlsGetAsciiCore(string unicodeString, char* unicode, int count) + private string NlsGetAsciiCore(string unicodeString, int index, int count) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(unicodeString != null && unicodeString.Length >= count); + ReadOnlySpan unicode = unicodeString.AsSpan(index, count); uint flags = NlsFlags; // Determine the required length - int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, null, 0); + int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, Span.Empty, 0); if (length == 0) { ThrowForZeroLength(unicode: true); @@ -28,44 +28,71 @@ private unsafe string NlsGetAsciiCore(string unicodeString, char* unicode, int c const int StackAllocThreshold = 512; // arbitrary limit to switch from stack to heap allocation if ((uint)length < StackAllocThreshold) { - char* output = stackalloc char[length]; - return NlsGetAsciiCore(unicodeString, unicode, count, flags, output, length); + Span output = stackalloc char[length]; + return NlsGetAsciiCore(unicodeString, unicode, flags, output); } else { char[] output = new char[length]; - fixed (char* pOutput = &output[0]) - { - return NlsGetAsciiCore(unicodeString, unicode, count, flags, pOutput, length); - } + return NlsGetAsciiCore(unicodeString, unicode, flags, output); } } - private static unsafe string NlsGetAsciiCore(string unicodeString, char* unicode, int count, uint flags, char* output, int outputLength) + private static string NlsGetAsciiCore(string unicodeString, ReadOnlySpan unicode, uint flags, Span output) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(unicodeString != null && unicodeString.Length >= count); - int length = Interop.Normaliz.IdnToAscii(flags, unicode, count, output, outputLength); + int length = Interop.Normaliz.IdnToAscii(flags, unicode, unicode.Length, output, output.Length); if (length == 0) { ThrowForZeroLength(unicode: true); } - Debug.Assert(length == outputLength); - return GetStringForOutput(unicodeString, unicode, count, output, length); + Debug.Assert(length == output.Length); + return GetStringForOutput(unicodeString, unicode, output.Slice(0, length)); } - private unsafe string NlsGetUnicodeCore(string asciiString, char* ascii, int count) + private bool NlsTryGetAsciiCore(ReadOnlySpan unicode, Span destination, out int charsWritten) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(asciiString != null && asciiString.Length >= count); uint flags = NlsFlags; // Determine the required length - int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, null, 0); + int length = Interop.Normaliz.IdnToAscii(flags, unicode, unicode.Length, Span.Empty, 0); + if (length == 0) + { + ThrowForZeroLength(unicode: true); + } + + if (length > destination.Length) + { + charsWritten = 0; + return false; + } + + // Do the conversion + int actualLength = Interop.Normaliz.IdnToAscii(flags, unicode, unicode.Length, destination, destination.Length); + if (actualLength == 0) + { + ThrowForZeroLength(unicode: true); + } + + charsWritten = actualLength; + return true; + } + + private string NlsGetUnicodeCore(string asciiString, int index, int count) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(GlobalizationMode.UseNls); + + ReadOnlySpan ascii = asciiString.AsSpan(index, count); + uint flags = NlsFlags; + + // Determine the required length + int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, Span.Empty, 0); if (length == 0) { ThrowForZeroLength(unicode: false); @@ -75,32 +102,59 @@ private unsafe string NlsGetUnicodeCore(string asciiString, char* ascii, int cou const int StackAllocThreshold = 512; // arbitrary limit to switch from stack to heap allocation if ((uint)length < StackAllocThreshold) { - char* output = stackalloc char[length]; - return NlsGetUnicodeCore(asciiString, ascii, count, flags, output, length); + Span output = stackalloc char[length]; + return NlsGetUnicodeCore(asciiString, ascii, flags, output); } else { char[] output = new char[length]; - fixed (char* pOutput = &output[0]) - { - return NlsGetUnicodeCore(asciiString, ascii, count, flags, pOutput, length); - } + return NlsGetUnicodeCore(asciiString, ascii, flags, output); } } - private static unsafe string NlsGetUnicodeCore(string asciiString, char* ascii, int count, uint flags, char* output, int outputLength) + private static string NlsGetUnicodeCore(string asciiString, ReadOnlySpan ascii, uint flags, Span output) { Debug.Assert(!GlobalizationMode.Invariant); Debug.Assert(GlobalizationMode.UseNls); - Debug.Assert(asciiString != null && asciiString.Length >= count); - int length = Interop.Normaliz.IdnToUnicode(flags, ascii, count, output, outputLength); + int length = Interop.Normaliz.IdnToUnicode(flags, ascii, ascii.Length, output, output.Length); if (length == 0) { ThrowForZeroLength(unicode: false); } - Debug.Assert(length == outputLength); - return GetStringForOutput(asciiString, ascii, count, output, length); + Debug.Assert(length == output.Length); + return GetStringForOutput(asciiString, ascii, output.Slice(0, length)); + } + + private bool NlsTryGetUnicodeCore(ReadOnlySpan ascii, Span destination, out int charsWritten) + { + Debug.Assert(!GlobalizationMode.Invariant); + Debug.Assert(GlobalizationMode.UseNls); + + uint flags = NlsFlags; + + // Determine the required length + int length = Interop.Normaliz.IdnToUnicode(flags, ascii, ascii.Length, Span.Empty, 0); + if (length == 0) + { + ThrowForZeroLength(unicode: false); + } + + if (length > destination.Length) + { + charsWritten = 0; + return false; + } + + // Do the conversion + int actualLength = Interop.Normaliz.IdnToUnicode(flags, ascii, ascii.Length, destination, destination.Length); + if (actualLength == 0) + { + ThrowForZeroLength(unicode: false); + } + + charsWritten = actualLength; + return true; } private uint NlsFlags diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs index 9d664c583648fa..f6d3822bb07ca5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/IdnMapping.cs @@ -26,6 +26,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Text; namespace System.Globalization @@ -88,15 +89,44 @@ public string GetAscii(string unicode, int index, int count) return GetAsciiInvariant(unicode, index, count); } - unsafe + return GlobalizationMode.UseNls ? + NlsGetAsciiCore(unicode, index, count) : + IcuGetAsciiCore(unicode, index, count); + } + + /// + /// Encodes a Unicode domain name to its ASCII (Punycode) equivalent. + /// + /// The Unicode domain name to convert. + /// The buffer to write the ASCII result to. This buffer must not overlap with . + /// When this method returns, contains the number of characters that were written to . + /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. + /// is invalid based on the and properties, and the IDNA standard, or the source and destination buffers overlap. + public bool TryGetAscii(ReadOnlySpan unicode, Span destination, out int charsWritten) + { + if (unicode.Length == 0) { - fixed (char* pUnicode = unicode) - { - return GlobalizationMode.UseNls ? - NlsGetAsciiCore(unicode, pUnicode + index, count) : - IcuGetAsciiCore(unicode, pUnicode + index, count); - } + throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode)); + } + + if (unicode[^1] == 0) + { + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); + } + + if (unicode.Overlaps(destination)) + { + ThrowHelper.ThrowArgumentException(ExceptionResource.InvalidOperation_SpanOverlappedOperation); + } + + if (GlobalizationMode.Invariant) + { + return TryGetAsciiInvariant(unicode, destination, out charsWritten); } + + return GlobalizationMode.UseNls ? + NlsTryGetAsciiCore(unicode, destination, out charsWritten) : + IcuTryGetAsciiCore(unicode, destination, out charsWritten); } // Gets Unicode version of the string. Normalized and limited to IDNA characters. @@ -132,15 +162,42 @@ public string GetUnicode(string ascii, int index, int count) return GetUnicodeInvariant(ascii, index, count); } - unsafe + return GlobalizationMode.UseNls ? + NlsGetUnicodeCore(ascii, index, count) : + IcuGetUnicodeCore(ascii, index, count); + } + + /// + /// Decodes one or more encoded domain name labels to a string of Unicode characters. + /// + /// The ASCII domain name to convert. The string may contain one or more labels, where each label is prefixed by "xn--". + /// The buffer to write the Unicode result to. This buffer must not overlap with . + /// When this method returns, contains the number of characters that were written to . + /// if the conversion was successful and the result was written to ; otherwise, if is too small to contain the result. + /// is invalid based on the and properties, and the IDNA standard, or the source and destination buffers overlap. + public bool TryGetUnicode(ReadOnlySpan ascii, Span destination, out int charsWritten) + { + // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ. + // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null. + // The Win32 APIs fail on an embedded null, but not on a terminating null. + if (ascii.Length > 0 && ascii[^1] == (char)0) { - fixed (char* pAscii = ascii) - { - return GlobalizationMode.UseNls ? - NlsGetUnicodeCore(ascii, pAscii + index, count) : - IcuGetUnicodeCore(ascii, pAscii + index, count); - } + throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii)); + } + + if (ascii.Overlaps(destination)) + { + ThrowHelper.ThrowArgumentException(ExceptionResource.InvalidOperation_SpanOverlappedOperation); + } + + if (GlobalizationMode.Invariant) + { + return TryGetUnicodeInvariant(ascii, destination, out charsWritten); } + + return GlobalizationMode.UseNls ? + NlsTryGetUnicodeCore(ascii, destination, out charsWritten) : + IcuTryGetUnicodeCore(ascii, destination, out charsWritten); } public override bool Equals([NotNullWhen(true)] object? obj) => @@ -152,18 +209,19 @@ public override int GetHashCode() => (_allowUnassigned ? 100 : 200) + (_useStd3AsciiRules ? 1000 : 2000); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe string GetStringForOutput(string originalString, char* input, int inputLength, char* output, int outputLength) + private static string GetStringForOutput(string? originalString, ReadOnlySpan input, ReadOnlySpan output) { - Debug.Assert(inputLength > 0); + Debug.Assert(input.Length > 0); - if (originalString.Length == inputLength && - inputLength == outputLength && - Ordinal.EqualsIgnoreCase(ref *input, ref *output, inputLength)) + if (originalString is not null && + originalString.Length == input.Length && + input.Length == output.Length && + Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(input), ref MemoryMarshal.GetReference(output), input.Length)) { return originalString; } - return new string(output, 0, outputLength); + return output.ToString(); } // @@ -183,22 +241,59 @@ private static unsafe string GetStringForOutput(string originalString, char* inp private const int c_skew = 38; private const int c_damp = 700; - private string GetAsciiInvariant(string unicode, int index, int count) + private string GetAsciiInvariant(string unicodeString, int index, int count) { - if (index > 0 || count < unicode.Length) + ReadOnlySpan unicode = unicodeString.AsSpan(index, count); + + // Check for ASCII only string, which will be unchanged + if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true)) { - unicode = unicode.Substring(index, count); + // Return original string if the entire string was requested and it doesn't need modification + if (index == 0 && count == unicodeString.Length) + { + return unicodeString; + } + + return unicode.ToString(); } + // Cannot be null terminated (normalization won't help us with this one, and + // may have returned false before checking the whole string above) + Debug.Assert(unicode.Length >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now."); + if (unicode[^1] <= 0x1f) + { + throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); + } + + // May need to check Std3 rules again for non-ascii + if (UseStd3AsciiRules) + { + ValidateStd3AndAscii(unicode, true, false); + } + + // Go ahead and encode it + return PunycodeEncode(unicode); + } + + private bool TryGetAsciiInvariant(ReadOnlySpan unicode, Span destination, out int charsWritten) + { // Check for ASCII only string, which will be unchanged if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true)) { - return unicode; + if (unicode.Length <= destination.Length) + { + unicode.CopyTo(destination); + charsWritten = unicode.Length; + return true; + } + + charsWritten = 0; + return false; } // Cannot be null terminated (normalization won't help us with this one, and // may have returned false before checking the whole string above) - Debug.Assert(count >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now."); + Debug.Assert(unicode.Length >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now."); if (unicode[^1] <= 0x1f) { throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode)); @@ -211,11 +306,20 @@ private string GetAsciiInvariant(string unicode, int index, int count) } // Go ahead and encode it - return PunycodeEncode(unicode); + string result = PunycodeEncode(unicode); + if (result.Length <= destination.Length) + { + result.CopyTo(destination); + charsWritten = result.Length; + return true; + } + + charsWritten = 0; + return false; } // See if we're only ASCII - private static bool ValidateStd3AndAscii(string unicode, bool bUseStd3, bool bCheckAscii) + private static bool ValidateStd3AndAscii(ReadOnlySpan unicode, bool bUseStd3, bool bCheckAscii) { // If its empty, then its too small if (unicode.Length == 0) @@ -304,7 +408,7 @@ private static bool ValidateStd3AndAscii(string unicode, bool bUseStd3, bool bCh /* value can be any of the punycode_status values defined above */ /* except punycode_bad_input; if not punycode_success, then */ /* output_size and output might contain garbage. */ - private static string PunycodeEncode(string unicode) + private static string PunycodeEncode(ReadOnlySpan unicode) { // 0 length strings aren't allowed if (unicode.Length == 0) @@ -322,7 +426,7 @@ private static string PunycodeEncode(string unicode) const string DotSeparators = ".\u3002\uFF0E\uFF61"; // Find end of this segment - iNextDot = unicode.AsSpan(iAfterLastDot).IndexOfAny(DotSeparators); + iNextDot = unicode.Slice(iAfterLastDot).IndexOfAny(DotSeparators); iNextDot = iNextDot < 0 ? unicode.Length : iNextDot + iAfterLastDot; // Only allowed to have empty . section at end (www.microsoft.com.) @@ -350,7 +454,7 @@ private static string PunycodeEncode(string unicode) // Check last char int iTest = iNextDot - 1; - if (char.IsLowSurrogate(unicode, iTest)) + if (char.IsLowSurrogate(unicode[iTest])) { iTest--; } @@ -369,7 +473,7 @@ private static string PunycodeEncode(string unicode) for (basicCount = iAfterLastDot; basicCount < iNextDot; basicCount++) { // Can't be lonely surrogate because it would've thrown in normalization - Debug.Assert(!char.IsLowSurrogate(unicode, basicCount), "[IdnMapping.punycode_encode]Unexpected low surrogate"); + Debug.Assert(!char.IsLowSurrogate(unicode[basicCount]), "[IdnMapping.punycode_encode]Unexpected low surrogate"); // Double check our bidi rules StrongBidiCategory testBidi = CharUnicodeInfo.GetBidiCategory(unicode, basicCount); @@ -395,7 +499,7 @@ private static string PunycodeEncode(string unicode) numProcessed++; } // If its a surrogate, skip the next since our bidi category tester doesn't handle it. - else if (char.IsSurrogatePair(unicode, basicCount)) + else if (basicCount + 1 < iNextDot && char.IsSurrogatePair(unicode[basicCount], unicode[basicCount + 1])) basicCount++; } @@ -410,7 +514,7 @@ private static string PunycodeEncode(string unicode) else { // If it has some non-basic code points the input cannot start with xn-- - if (unicode.AsSpan(iAfterLastDot).StartsWith(c_strAcePrefix, StringComparison.OrdinalIgnoreCase)) + if (unicode.Slice(iAfterLastDot).StartsWith(c_strAcePrefix, StringComparison.OrdinalIgnoreCase)) throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(unicode)); // Need to do ACE encoding @@ -439,7 +543,7 @@ private static string PunycodeEncode(string unicode) j < iNextDot; j += IsSupplementary(test) ? 2 : 1) { - test = char.ConvertToUtf32(unicode, j); + test = GetCodePoint(unicode, j); if (test >= n && test < m) m = test; } @@ -452,7 +556,7 @@ private static string PunycodeEncode(string unicode) for (j = iAfterLastDot; j < iNextDot; j += IsSupplementary(test) ? 2 : 1) { // Make sure we're aware of surrogates - test = char.ConvertToUtf32(unicode, j); + test = GetCodePoint(unicode, j); // Adjust for character position (only the chars in our string already, some // haven't been processed. @@ -528,6 +632,17 @@ private static bool Basic(uint cp) => // Is it in ASCII range? cp < 0x80; + private static int GetCodePoint(ReadOnlySpan s, int index) + { + // Check if the character at index is a high surrogate. + if (char.IsHighSurrogate(s[index]) && index + 1 < s.Length && char.IsLowSurrogate(s[index + 1])) + { + return char.ConvertToUtf32(s[index], s[index + 1]); + } + + return s[index]; + } + // Validate Std3 rules for a character private static void ValidateStd3(char c, bool bNextToDot) { @@ -540,19 +655,49 @@ private static void ValidateStd3(char c, bool bNextToDot) private string GetUnicodeInvariant(string ascii, int index, int count) { - if (index > 0 || count < ascii.Length) + // Convert Punycode to Unicode + string asciiSlice = ascii.Substring(index, count); + string strUnicode = PunycodeDecode(asciiSlice); + + // Output name MUST obey IDNA rules & round trip (casing differences are allowed) + string asciiRoundtrip = GetAscii(strUnicode); + if (!asciiRoundtrip.Equals(asciiSlice, StringComparison.OrdinalIgnoreCase)) + { + throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } + + // If the ASCII round-trip equals the original string, return it as-is (no allocation) + if (index == 0 && count == ascii.Length && strUnicode.Equals(ascii, StringComparison.OrdinalIgnoreCase)) { - // We're only using part of the string - ascii = ascii.Substring(index, count); + return ascii; } + + return strUnicode; + } + + private bool TryGetUnicodeInvariant(ReadOnlySpan ascii, Span destination, out int charsWritten) + { + // Convert the span to a string for PunycodeDecode since it uses string operations extensively + string asciiString = ascii.ToString(); + // Convert Punycode to Unicode - string strUnicode = PunycodeDecode(ascii); + string strUnicode = PunycodeDecode(asciiString); // Output name MUST obey IDNA rules & round trip (casing differences are allowed) - if (!ascii.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase)) + if (!asciiString.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase)) + { throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii)); + } - return strUnicode; + if (strUnicode.Length <= destination.Length) + { + strUnicode.CopyTo(destination); + charsWritten = strUnicode.Length; + return true; + } + + charsWritten = 0; + return false; } /* PunycodeDecode() converts Punycode to Unicode. The input is */ diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index 8ab493e0e83543..3df336ffbf6f0a 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -224,6 +224,11 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild // b) if label is ascii and ace and not valid idn then just lowercase it // c) if label is ascii and ace and is valid idn then get its unicode eqvl // d) if label is unicode then clean it by running it through idnmapping + + // Buffer for intermediate ASCII form when processing non-ASCII labels + // Max label length is 63 chars, but punycode can expand - 256 is a safe upper bound + Span asciiBuffer = stackalloc char[256]; + for (int i = 0; i < hostname.Length; i++) { if (i != 0) @@ -241,11 +246,15 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild if (!Ascii.IsValid(label)) { + // For non-ASCII labels, first convert to ASCII (punycode), then to normalized Unicode + // Use span-based APIs to avoid intermediate string allocations try { - string asciiForm = s_idnMapping.GetAscii(hostname, i, label.Length); + bool asciiSuccess = s_idnMapping.TryGetAscii(label, asciiBuffer, out int asciiWritten); + Debug.Assert(asciiSuccess, "TryGetAscii should always succeed with a 255-char buffer for valid IDN labels"); - dest.Append(s_idnMapping.GetUnicode(asciiForm)); + // Now convert the ASCII form to Unicode and append directly to dest + AppendIdnUnicode(asciiBuffer.Slice(0, asciiWritten), ref dest); } catch (ArgumentException) { @@ -258,10 +267,10 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild if (label.StartsWith("xn--", StringComparison.Ordinal)) { - // check ace validity + // Check ace validity - use span-based API to avoid string allocation try { - dest.Append(s_idnMapping.GetUnicode(hostname, i, label.Length)); + AppendIdnUnicode(label, ref dest); aceValid = true; } catch (ArgumentException) @@ -283,5 +292,20 @@ public static bool TryGetUnicodeEquivalent(string hostname, ref ValueStringBuild return true; } + + /// + /// Converts ASCII (punycode) to Unicode and appends directly to the ValueStringBuilder. + /// + private static void AppendIdnUnicode(scoped ReadOnlySpan ascii, ref ValueStringBuilder dest) + { + int charsWritten; + + while (!s_idnMapping.TryGetUnicode(ascii, dest.RawChars.Slice(dest.Length), out charsWritten)) + { + dest.EnsureCapacity(dest.Capacity + 1); + } + + dest.Length += charsWritten; + } } } diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 841e4e04c88fc0..5634545a383125 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -9626,6 +9626,8 @@ public IdnMapping() { } public string GetUnicode(string ascii) { throw null; } public string GetUnicode(string ascii, int index) { throw null; } public string GetUnicode(string ascii, int index, int count) { throw null; } + public bool TryGetAscii(System.ReadOnlySpan unicode, System.Span destination, out int charsWritten) { throw null; } + public bool TryGetUnicode(System.ReadOnlySpan ascii, System.Span destination, out int charsWritten) { throw null; } } public static partial class ISOWeek { diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs index 595b005c96d918..2aa4cd07a21da9 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetAsciiTests.cs @@ -176,5 +176,161 @@ public void TestStringWithHyphenIn3rdAnd4thPlace() // Ensure we are not throwing on Linux because of the 3rd and 4th hyphens in the string. Assert.Equal(unicode, new IdnMapping().GetAscii(unicode)); } + + [Theory] + [MemberData(nameof(GetAscii_TestData))] + public void TryGetAscii(string unicode, int index, int count, string expected) + { + var idn = new IdnMapping(); + ReadOnlySpan unicodeSpan = unicode.AsSpan(index, count); + + // Test with exact size buffer + char[] destination = new char[expected.Length]; + Assert.True(idn.TryGetAscii(unicodeSpan, destination, out int charsWritten)); + Assert.Equal(expected.Length, charsWritten); + // IDN names are case-insensitive; the underlying API may lowercase the output + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + + // Test with larger buffer + destination = new char[expected.Length + 10]; + Assert.True(idn.TryGetAscii(unicodeSpan, destination, out charsWritten)); + Assert.Equal(expected.Length, charsWritten); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + } + + [Theory] + [MemberData(nameof(GetAscii_TestData))] + public void TryGetAscii_BufferTooSmall(string unicode, int index, int count, string expected) + { + if (expected.Length == 0) + { + return; + } + + var idn = new IdnMapping(); + ReadOnlySpan unicodeSpan = unicode.AsSpan(index, count); + + // Test with buffer that is too small + char[] destination = new char[expected.Length - 1]; + Assert.False(idn.TryGetAscii(unicodeSpan, destination, out int charsWritten)); + Assert.Equal(0, charsWritten); + } + + [Fact] + public void TryGetAscii_EmptyBuffer() + { + var idn = new IdnMapping(); + + // Test with empty destination when result would be non-empty + Assert.False(idn.TryGetAscii("abc", Span.Empty, out int charsWritten)); + Assert.Equal(0, charsWritten); + } + + [Theory] + [InlineData("")] + public void TryGetAscii_Empty_ThrowsArgumentException(string unicode) + { + var idn = new IdnMapping(); + char[] destination = new char[100]; + Assert.Throws(() => idn.TryGetAscii(unicode, destination, out _)); + } + + [Theory] + [InlineData("\u0101\u0000")] + [InlineData("\u0101\u0000\u0101")] + public void TryGetAscii_NullContaining_ThrowsArgumentException(string unicode) + { + var idn = new IdnMapping(); + char[] destination = new char[100]; + Assert.Throws(() => idn.TryGetAscii(unicode, destination, out _)); + } + + [Theory] + [MemberData(nameof(GetAscii_Std3Compatible_TestData))] + public void TryGetAscii_WithFlags(string unicode, int index, int count, string expected) + { + // Test with UseStd3AsciiRules = true and AllowUnassigned = true + var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true, AllowUnassigned = true }; + ReadOnlySpan unicodeSpan = unicode.AsSpan(index, count); + char[] destination = new char[expected.Length + 10]; + + Assert.True(idnStd3.TryGetAscii(unicodeSpan, destination, out int charsWritten)); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + + // Test with AllowUnassigned = false (default) + var idnNoUnassigned = new IdnMapping() { AllowUnassigned = false }; + Assert.True(idnNoUnassigned.TryGetAscii(unicodeSpan, destination, out charsWritten)); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + } + + /// + /// Test data compatible with UseStd3AsciiRules=true (excludes special ASCII characters). + /// + public static IEnumerable GetAscii_Std3Compatible_TestData() + { + // Only include alphanumeric ASCII and non-ASCII test data that works with Std3 rules + yield return new object[] { "\u0101", 0, 1, "xn--yda" }; + yield return new object[] { "\u0101\u0061\u0041", 0, 3, "xn--aa-cla" }; + yield return new object[] { "\u0061\u0101\u0062", 0, 3, "xn--ab-dla" }; + yield return new object[] { "\u0061\u0062\u0101", 0, 3, "xn--ab-ela" }; + yield return new object[] { "\uD800\uDF00\uD800\uDF01\uD800\uDF02", 0, 6, "xn--097ccd" }; // Surrogate pairs + yield return new object[] { "\u0061\u0062\u0063", 0, 3, "\u0061\u0062\u0063" }; // ASCII only code points + yield return new object[] { "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067", 0, 7, "xn--d9juau41awczczp" }; // Non-ASCII only code points + yield return new object[] { "\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0", 0, 9, "xn--de-jg4avhby1noc0d" }; // ASCII and non-ASCII code points + yield return new object[] { "\u0061\u0062\u0063.\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067.\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0", 0, 21, "abc.xn--d9juau41awczczp.xn--de-jg4avhby1noc0d" }; // Fully qualified domain name + } + + [Theory] + [MemberData(nameof(GetAscii_Invalid_TestData))] + public void TryGetAscii_Invalid(string unicode, int index, int count, Type exceptionType) + { + if (unicode is null) + { + return; // TryGetAscii takes ReadOnlySpan, which can't be null + } + + // Skip entries with invalid index/count (those test the GetAscii(string, int, int) validation, not the span content validation) + if (index < 0 || count < 0 || index > unicode.Length || index + count > unicode.Length) + { + return; + } + + // Also skip empty count tests - they test ArgumentException for empty string validation + // but TryGetAscii span-based API doesn't have index/count overloads + if (count == 0) + { + return; + } + + string slice = unicode.Substring(index, count); + char[] destination = new char[100]; + + var idnNoStd3 = new IdnMapping() { UseStd3AsciiRules = false }; + Assert.Throws(exceptionType, () => idnNoStd3.TryGetAscii(slice, destination, out _)); + + var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true }; + Assert.Throws(exceptionType, () => idnStd3.TryGetAscii(slice, destination, out _)); + } + + [Fact] + public void TryGetAscii_OverlappingBuffers_ThrowsArgumentException() + { + var idn = new IdnMapping(); + char[] buffer = new char[100]; + + // Write unicode input to the buffer + string unicode = "\u0101\u0062\u0063"; // "ābc" + unicode.AsSpan().CopyTo(buffer); + + // Test overlapping: input and destination start at same location + Assert.Throws(() => idn.TryGetAscii(buffer.AsSpan(0, unicode.Length), buffer.AsSpan(0, buffer.Length), out _)); + + // Test overlapping: destination starts inside input + Assert.Throws(() => idn.TryGetAscii(buffer.AsSpan(0, unicode.Length), buffer.AsSpan(1, buffer.Length - 1), out _)); + + // Test overlapping: input starts inside destination + unicode.AsSpan().CopyTo(buffer.AsSpan(10)); + Assert.Throws(() => idn.TryGetAscii(buffer.AsSpan(10, unicode.Length), buffer.AsSpan(0, buffer.Length), out _)); + } } } diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs index 35a04512c1595b..4111ceb4bb5724 100644 --- a/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs +++ b/src/libraries/System.Runtime/tests/System.Globalization.Extensions.Tests/IdnMapping/IdnMappingGetUnicodeTests.cs @@ -121,5 +121,135 @@ static void getUnicode_Invalid(IdnMapping idnMapping, string ascii, int index, i getUnicode_Invalid(new IdnMapping() { UseStd3AsciiRules = false }, ascii, index, count, exceptionType); getUnicode_Invalid(new IdnMapping() { UseStd3AsciiRules = true }, ascii, index, count, exceptionType); } + + [Theory] + [MemberData(nameof(GetUnicode_TestData))] + public void TryGetUnicode(string ascii, int index, int count, string expected) + { + var idn = new IdnMapping(); + ReadOnlySpan asciiSpan = ascii.AsSpan(index, count); + + // Test with exact size buffer + char[] destination = new char[expected.Length]; + Assert.True(idn.TryGetUnicode(asciiSpan, destination, out int charsWritten)); + Assert.Equal(expected.Length, charsWritten); + // IDN names are case-insensitive; the underlying API may lowercase the output + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + + // Test with larger buffer + destination = new char[expected.Length + 10]; + Assert.True(idn.TryGetUnicode(asciiSpan, destination, out charsWritten)); + Assert.Equal(expected.Length, charsWritten); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + } + + [Theory] + [MemberData(nameof(GetUnicode_TestData))] + public void TryGetUnicode_BufferTooSmall(string ascii, int index, int count, string expected) + { + if (expected.Length == 0) + { + return; + } + + var idn = new IdnMapping(); + ReadOnlySpan asciiSpan = ascii.AsSpan(index, count); + + // Test with buffer that is too small + char[] destination = new char[expected.Length - 1]; + Assert.False(idn.TryGetUnicode(asciiSpan, destination, out int charsWritten)); + Assert.Equal(0, charsWritten); + } + + [Fact] + public void TryGetUnicode_EmptyBuffer() + { + var idn = new IdnMapping(); + + // Test with empty destination when result would be non-empty + Assert.False(idn.TryGetUnicode("abc", Span.Empty, out int charsWritten)); + Assert.Equal(0, charsWritten); + } + + [Theory] + [InlineData("abc\u0000")] + [InlineData("ab\u0000c")] + public void TryGetUnicode_NullContaining_ThrowsArgumentException(string ascii) + { + var idn = new IdnMapping(); + char[] destination = new char[100]; + Assert.Throws(() => idn.TryGetUnicode(ascii, destination, out _)); + } + + [Theory] + [MemberData(nameof(GetUnicode_TestData))] + public void TryGetUnicode_WithFlags(string ascii, int index, int count, string expected) + { + // Test with UseStd3AsciiRules = true and AllowUnassigned = true + var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true, AllowUnassigned = true }; + ReadOnlySpan asciiSpan = ascii.AsSpan(index, count); + char[] destination = new char[expected.Length + 10]; + + Assert.True(idnStd3.TryGetUnicode(asciiSpan, destination, out int charsWritten)); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + + // Test with AllowUnassigned = false (default) + var idnNoUnassigned = new IdnMapping() { AllowUnassigned = false }; + Assert.True(idnNoUnassigned.TryGetUnicode(asciiSpan, destination, out charsWritten)); + Assert.Equal(expected, new string(destination, 0, charsWritten), StringComparer.OrdinalIgnoreCase); + } + + [Theory] + [MemberData(nameof(GetUnicode_Invalid_TestData))] + public void TryGetUnicode_Invalid(string ascii, int index, int count, Type exceptionType) + { + if (ascii is null) + { + return; // TryGetUnicode takes ReadOnlySpan, which can't be null + } + + // Skip entries with invalid index/count (those test the GetUnicode(string, int, int) validation, not the span content validation) + if (index < 0 || count < 0 || index > ascii.Length || index + count > ascii.Length) + { + return; + } + + // Also skip empty count tests - they test ArgumentException for empty string validation + // but TryGetUnicode span-based API doesn't have index/count overloads + if (count == 0) + { + return; + } + + string slice = ascii.Substring(index, count); + char[] destination = new char[100]; + + var idnNoStd3 = new IdnMapping() { UseStd3AsciiRules = false }; + Assert.Throws(exceptionType, () => idnNoStd3.TryGetUnicode(slice, destination, out _)); + + var idnStd3 = new IdnMapping() { UseStd3AsciiRules = true }; + Assert.Throws(exceptionType, () => idnStd3.TryGetUnicode(slice, destination, out _)); + } + + [Fact] + public void TryGetUnicode_OverlappingBuffers_ThrowsArgumentException() + { + var idn = new IdnMapping(); + char[] buffer = new char[100]; + + // Write ASCII input to the buffer + string ascii = "xn--ab-dla"; // represents "aāb" + ascii.AsSpan().CopyTo(buffer); + + // Test overlapping: input and destination start at same location + Assert.Throws(() => idn.TryGetUnicode(buffer.AsSpan(0, ascii.Length), buffer.AsSpan(0, buffer.Length), out _)); + + // Test overlapping: destination starts inside input + Assert.Throws(() => idn.TryGetUnicode(buffer.AsSpan(0, ascii.Length), buffer.AsSpan(1, buffer.Length - 1), out _)); + + // Test overlapping: input starts inside destination + ascii.AsSpan().CopyTo(buffer.AsSpan(10)); + Assert.Throws(() => idn.TryGetUnicode(buffer.AsSpan(10, ascii.Length), buffer.AsSpan(0, buffer.Length), out _)); + } } }