Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f5e0842
Initial plan
Copilot Jan 25, 2026
81ddd69
Add TryGetAscii and TryGetUnicode span-based APIs to IdnMapping
Copilot Jan 25, 2026
a4f93e4
Add tests for TryGetAscii and TryGetUnicode methods
Copilot Jan 25, 2026
511b2c5
Remove unnecessary TryGetAsciiCore/TryGetUnicodeCore indirection
Copilot Jan 25, 2026
5e3facf
Remove GetAsciiCore/GetUnicodeCore indirection, inline directly into …
Copilot Jan 25, 2026
d626cb2
Remove unsafe code from TryGetAscii/TryGetUnicode, use span-based int…
Copilot Jan 25, 2026
a909529
Remove unsafe code, use span-based APIs throughout, fix allocation issue
Copilot Jan 25, 2026
ad00162
Fix potential out-of-bounds access in IsSurrogatePair check
Copilot Jan 25, 2026
1b4eaec
Simplify GetBidiCategory(string) to call span-based overload
Copilot Jan 25, 2026
30f67eb
Remove unused pointer-based interop method definitions
Copilot Jan 26, 2026
1488db9
Address review feedback: simplify code and fix docs
Copilot Jan 26, 2026
5d34496
Use span-based TryGetAscii/TryGetUnicode in DomainNameHelper to avoid…
Copilot Jan 26, 2026
b9f72ff
Address copilot-pull-request-reviewer feedback: add tests with UseStd…
Copilot Jan 26, 2026
bbcb649
Address MihaZupan review feedback: fix test build errors, simplify Ap…
Copilot Jan 26, 2026
165ef82
Address MihaZupan feedback: fix comment casing, add debug assert, use…
Copilot Jan 26, 2026
c753f6b
Remove overlapping buffer tests and document that overlapping is NOT …
Copilot Jan 26, 2026
39fd8ae
Add overlapping buffer check for TryGetAscii and TryGetUnicode, throw…
Copilot Jan 26, 2026
354d564
Simplify overlapping buffer check to use source.Overlaps(destination)…
Copilot Jan 26, 2026
12e1a54
Merge branch 'main' into copilot/add-idnmapping-span-apis-again
MihaZupan Jan 26, 2026
b7952d8
Apply formatting suggestions: add blank lines and braces to if statem…
Copilot Jan 27, 2026
ab4609c
Merge branch 'main' into copilot/add-idnmapping-span-apis-again
stephentoub Jan 28, 2026
50a1b96
Merge branch 'main' into copilot/add-idnmapping-span-apis-again
stephentoub Jan 29, 2026
e3397b3
Fix test failures: skip invalid index/count entries in TryGetAscii_In…
Copilot Jan 29, 2026
15aaef5
Address feedback: add blank line before return, move == 0 check insid…
Copilot Jan 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/libraries/Common/src/Interop/Interop.Idna.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.InteropServices;

internal static partial class Interop
Expand All @@ -11,9 +12,9 @@ internal static partial class Globalization
internal const int UseStd3AsciiRules = 0x2;

[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToAscii", StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int ToAscii(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity);
internal static partial int ToAscii(uint flags, ReadOnlySpan<char> src, int srcLen, Span<char> dstBuffer, int dstBufferCapacity);

[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ToUnicode", StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int ToUnicode(uint flags, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity);
internal static partial int ToUnicode(uint flags, ReadOnlySpan<char> src, int srcLen, Span<char> dstBuffer, int dstBufferCapacity);
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.InteropServices;

internal static partial class Interop
Expand All @@ -12,19 +13,19 @@ internal static partial class Normaliz
//

[LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int IdnToAscii(
internal static partial int IdnToAscii(
uint dwFlags,
char* lpUnicodeCharStr,
ReadOnlySpan<char> lpUnicodeCharStr,
int cchUnicodeChar,
char* lpASCIICharStr,
Span<char> lpASCIICharStr,
int cchASCIIChar);

[LibraryImport("Normaliz.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)]
internal static unsafe partial int IdnToUnicode(
internal static partial int IdnToUnicode(
uint dwFlags,
char* lpASCIICharStr,
ReadOnlySpan<char> lpASCIICharStr,
int cchASCIIChar,
char* lpUnicodeCharStr,
Span<char> lpUnicodeCharStr,
int cchUnicodeChar);

internal const int IDN_ALLOW_UNASSIGNED = 0x1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ internal static StrongBidiCategory GetBidiCategory(string s, int index)
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
}

return GetBidiCategoryNoBoundsChecks((uint)GetCodePointFromString(s, index));
return GetBidiCategory((ReadOnlySpan<char>)s, index);
}

internal static StrongBidiCategory GetBidiCategory(StringBuilder s, int index)
Expand Down Expand Up @@ -91,6 +91,33 @@ private static StrongBidiCategory GetBidiCategoryNoBoundsChecks(uint codePoint)
return bidiCategory;
}

internal static StrongBidiCategory GetBidiCategory(ReadOnlySpan<char> s, int index)
{
Debug.Assert(index >= 0 && index < s.Length, "index < s.Length");

// The logic below follows Table 3-5 in the Unicode Standard, Sec. 3.9.
// First char (high surrogate) = 110110wwwwxxxxxx
// Second char (low surrogate) = 110111xxxxxxxxxx

int c = (int)s[index];
if (index < s.Length - 1)
{
int temp1 = c - HIGH_SURROGATE_START; // temp1 = 000000wwwwxxxxxx
if ((uint)temp1 <= HIGH_SURROGATE_RANGE)
{
int temp2 = (int)s[index + 1] - LOW_SURROGATE_START; // temp2 = 000000xxxxxxxxxx
if ((uint)temp2 <= HIGH_SURROGATE_RANGE)
{
// |--------temp1--||-temp2--|
// 00000uuuuuuxxxxxxxxxxxxxxxx (where uuuuu = wwww + 1)
c = (temp1 << 10) + temp2 + UNICODE_PLANE01_START;
}
}
}

return GetBidiCategoryNoBoundsChecks((uint)c);
}

/*
* GetDecimalDigitValue
* ====================
Expand All @@ -115,7 +142,7 @@ public static int GetDecimalDigitValue(string s, int index)
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
}

return GetDecimalDigitValueInternalNoBoundsCheck((uint)GetCodePointFromString(s, index));
return GetDecimalDigitValueInternalNoBoundsCheck((uint)GetCodePoint(s, index));
}

private static int GetDecimalDigitValueInternalNoBoundsCheck(uint codePoint)
Expand Down Expand Up @@ -149,7 +176,7 @@ public static int GetDigitValue(string s, int index)
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
}

return GetDigitValueInternalNoBoundsCheck((uint)GetCodePointFromString(s, index));
return GetDigitValueInternalNoBoundsCheck((uint)GetCodePoint(s, index));
}

private static int GetDigitValueInternalNoBoundsCheck(uint codePoint)
Expand Down Expand Up @@ -228,7 +255,7 @@ public static double GetNumericValue(string s, int index)
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static double GetNumericValueInternal(string s, int index) => GetNumericValueNoBoundsCheck((uint)GetCodePointFromString(s, index));
internal static double GetNumericValueInternal(string s, int index) => GetNumericValueNoBoundsCheck((uint)GetCodePoint(s, index));

private static double GetNumericValueNoBoundsCheck(uint codePoint)
{
Expand Down Expand Up @@ -365,7 +392,7 @@ internal static UnicodeCategory GetUnicodeCategoryInternal(string value, int ind
Debug.Assert(value != null, "value can not be null");
Debug.Assert(index < value.Length);

return GetUnicodeCategoryNoBoundsChecks((uint)GetCodePointFromString(value, index));
return GetUnicodeCategoryNoBoundsChecks((uint)GetCodePoint(value, index));
}

/// <summary>
Expand All @@ -378,7 +405,7 @@ internal static UnicodeCategory GetUnicodeCategoryInternal(string str, int index
Debug.Assert(str.Length > 0);
Debug.Assert(index >= 0 && index < str.Length);

uint codePoint = (uint)GetCodePointFromString(str, index);
uint codePoint = (uint)GetCodePoint(str, index);
UnicodeDebug.AssertIsValidCodePoint(codePoint);

charLength = (codePoint >= UNICODE_PLANE01_START) ? 2 /* surrogate pair */ : 1 /* BMP char */;
Expand Down Expand Up @@ -406,9 +433,8 @@ private static UnicodeCategory GetUnicodeCategoryNoBoundsChecks(uint codePoint)
/// WARNING: since it doesn't throw an exception it CAN return a value
/// in the surrogate range D800-DFFF, which is not a legal scalar value.
/// </summary>
private static int GetCodePointFromString(string s, int index)
private static int GetCodePoint(ReadOnlySpan<char> s, int index)
{
Debug.Assert(s != null);
Debug.Assert((uint)index < (uint)s.Length, "index < s.Length");

int codePoint = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ namespace System.Globalization
{
public sealed partial class IdnMapping
{
private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int count)
private string IcuGetAsciiCore(string unicodeString, int index, int count)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);
Debug.Assert(unicodeString != null && unicodeString.Length >= count);

ReadOnlySpan<char> unicode = unicodeString.AsSpan(index, count);
uint flags = IcuFlags;
CheckInvalidIdnCharacters(unicode, count, flags, nameof(unicode));
CheckInvalidIdnCharacters(unicode, flags, nameof(unicode));

const int StackallocThreshold = 512;
// Each unicode character is represented by up to 3 ASCII chars
Expand All @@ -23,85 +23,126 @@ private unsafe string IcuGetAsciiCore(string unicodeString, char* unicode, int c
int actualLength;
if ((uint)estimatedLength < StackallocThreshold)
{
char* outputStack = stackalloc char[estimatedLength];
Span<char> outputStack = stackalloc char[estimatedLength];
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, outputStack, estimatedLength);
if (actualLength > 0 && actualLength <= estimatedLength)
{
return GetStringForOutput(unicodeString, unicode, count, outputStack, actualLength);
return GetStringForOutput(unicodeString, unicode, outputStack.Slice(0, actualLength));
}
}
else
{
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, null, 0);
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, Span<char>.Empty, 0);
}
if (actualLength == 0)
{
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode));
}

char[] outputHeap = new char[actualLength];
fixed (char* pOutputHeap = &outputHeap[0])
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, outputHeap, actualLength);
if (actualLength == 0 || actualLength > outputHeap.Length)
{
actualLength = Interop.Globalization.ToAscii(flags, unicode, count, pOutputHeap, actualLength);
if (actualLength == 0 || actualLength > outputHeap.Length)
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode));
}

return GetStringForOutput(unicodeString, unicode, outputHeap.AsSpan(0, actualLength));
}

private bool IcuTryGetAsciiCore(ReadOnlySpan<char> unicode, Span<char> destination, out int charsWritten)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);

uint flags = IcuFlags;
CheckInvalidIdnCharacters(unicode, flags, nameof(unicode));

int actualLength = Interop.Globalization.ToAscii(flags, unicode, unicode.Length, destination, destination.Length);

if (actualLength <= destination.Length)
{
if (actualLength == 0)
{
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(unicode));
}
return GetStringForOutput(unicodeString, unicode, count, pOutputHeap, actualLength);

charsWritten = actualLength;
return true;
}

charsWritten = 0;
return false;
}

private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int count)
private string IcuGetUnicodeCore(string asciiString, int index, int count)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);
Debug.Assert(asciiString != null && asciiString.Length >= count);

ReadOnlySpan<char> ascii = asciiString.AsSpan(index, count);
uint flags = IcuFlags;
CheckInvalidIdnCharacters(ascii, count, flags, nameof(ascii));
CheckInvalidIdnCharacters(ascii, flags, nameof(ascii));

const int StackAllocThreshold = 512;
if ((uint)count < StackAllocThreshold)
{
char* output = stackalloc char[count];
return IcuGetUnicodeCore(asciiString, ascii, count, flags, output, count, reattempt: true);
Span<char> output = stackalloc char[count];
return IcuGetUnicodeCore(asciiString, ascii, flags, output, reattempt: true);
}
else
{
char[] output = new char[count];
fixed (char* pOutput = &output[0])
{
return IcuGetUnicodeCore(asciiString, ascii, count, flags, pOutput, count, reattempt: true);
}
return IcuGetUnicodeCore(asciiString, ascii, flags, output, reattempt: true);
}
}

private unsafe string IcuGetUnicodeCore(string asciiString, char* ascii, int count, uint flags, char* output, int outputLength, bool reattempt)
private static string IcuGetUnicodeCore(string asciiString, ReadOnlySpan<char> ascii, uint flags, Span<char> output, bool reattempt)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);
Debug.Assert(asciiString != null && asciiString.Length >= count);

int realLen = Interop.Globalization.ToUnicode(flags, ascii, count, output, outputLength);
int realLen = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, output, output.Length);

if (realLen == 0)
{
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
}
else if (realLen <= outputLength)
else if (realLen <= output.Length)
{
return GetStringForOutput(asciiString, ascii, count, output, realLen);
return GetStringForOutput(asciiString, ascii, output.Slice(0, realLen));
}
else if (reattempt)
{
char[] newOutput = new char[realLen];
fixed (char* pNewOutput = newOutput)
return IcuGetUnicodeCore(asciiString, ascii, flags, newOutput, reattempt: false);
}

throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
}

private bool IcuTryGetUnicodeCore(ReadOnlySpan<char> ascii, Span<char> destination, out int charsWritten)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);

uint flags = IcuFlags;
CheckInvalidIdnCharacters(ascii, flags, nameof(ascii));

int actualLength = Interop.Globalization.ToUnicode(flags, ascii, ascii.Length, destination, destination.Length);

if (actualLength <= destination.Length)
{
if (actualLength == 0)
{
return IcuGetUnicodeCore(asciiString, ascii, count, flags, pNewOutput, realLen, reattempt: false);
throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
}

charsWritten = actualLength;
return true;
}

throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));
charsWritten = 0;
return false;
}

private uint IcuFlags
Expand All @@ -122,11 +163,11 @@ private uint IcuFlags
/// To match Windows behavior, we walk the string ourselves looking for these
/// bad characters so we can continue to throw ArgumentException in these cases.
/// </summary>
private static unsafe void CheckInvalidIdnCharacters(char* s, int count, uint flags, string paramName)
private static void CheckInvalidIdnCharacters(ReadOnlySpan<char> s, uint flags, string paramName)
{
if ((flags & Interop.Globalization.UseStd3AsciiRules) == 0)
{
for (int i = 0; i < count; i++)
for (int i = 0; i < s.Length; i++)
{
char c = s[i];

Expand Down
Loading
Loading