diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
index a25bcdfc984f28..8684386c6e66f5 100644
--- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
+++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
@@ -1212,6 +1212,7 @@
+
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/RunePosition.cs b/src/libraries/System.Private.CoreLib/src/System/Text/RunePosition.cs
new file mode 100644
index 00000000000000..4292ab93f2d291
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/RunePosition.cs
@@ -0,0 +1,319 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.ComponentModel;
+using System.Buffers;
+using System.Collections.Generic;
+using System.Collections;
+
+namespace System.Text;
+
+///
+/// Represents a position in Unicode data, allowing for deeper data inspection.
+///
+///
+/// Invalid Unicode symbols will be represented by the value.
+///
+public readonly struct RunePosition : IEquatable
+{
+ ///
+ /// Returns an enumeration of from the provided span that allows deeper data inspection.
+ ///
+ /// The with Unicode data.
+ ///
+ /// to enumerate from the provided span with UTF-16
+ /// Unicode data.
+ ///
+ ///
+ /// Invalid Unicode symbols will be represented by
+ /// value.
+ ///
+ public static Utf16Enumerator EnumerateUtf16(ReadOnlySpan span) => new(span);
+
+ ///
+ /// Returns an enumeration of from the provided span that allows deeper data inspection.
+ ///
+ /// The with Unicode data.
+ ///
+ /// to enumerate from the provided span with UTF-8 Unicode
+ /// data.
+ ///
+ ///
+ /// Invalid Unicode symbols will be represented by value.
+ ///
+ public static Utf8Enumerator EnumerateUtf8(ReadOnlySpan span) => new(span);
+
+ ///
+ /// Unicode scalar value of the current symbol in Unicode data.
+ /// Invalid Unicode symbols will be represented by value.
+ ///
+ public Rune Rune { get; }
+
+ ///
+ /// The index of current symbol in Unicode data.
+ ///
+ public int StartIndex { get; }
+
+ ///
+ /// The length of current symbol in Unicode data.
+ ///
+ public int Length { get; }
+
+ ///
+ /// it current Unicode symbol is correct encoded and
+ /// contain its scalar value.
+ ///
+ /// if current Unicode symbol is invalid encoded and was
+ /// replaced by value.
+ ///
+ public bool WasReplaced { get; }
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The Unicode scalar value.
+ /// The index of the current symbol in Unicode data.
+ /// The length of the current symbol in Unicode data.
+ /// Indicates if the current Unicode symbol was replaced.
+ public RunePosition(Rune rune, int startIndex, int length, bool wasReplaced)
+ {
+ if (startIndex < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_NeedNonNegNum);
+ }
+
+ if ((uint)length > Rune.MaxUtf8BytesPerRune)
+ {
+ throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NeedNonNegNum);
+ }
+
+ StartIndex = startIndex;
+ Length = length;
+ Rune = rune;
+ WasReplaced = wasReplaced;
+ }
+
+ ///
+ /// Determines whether the specified is equal to the current .
+ ///
+ /// The other to compare with.
+ ///
+ /// if the specified is equal to the current
+ /// ; otherwise, .
+ ///
+ public bool Equals(RunePosition other) =>
+ Rune == other.Rune && StartIndex == other.StartIndex && Length == other.Length && WasReplaced == other.WasReplaced;
+
+ ///
+ /// Determines whether the specified is equal to the current .
+ ///
+ /// The object to compare with the current .
+ ///
+ /// if the specified is equal to the current
+ /// ; otherwise, .
+ ///
+ public override bool Equals(object? obj) =>
+ obj is RunePosition runePosition && Equals(runePosition);
+
+ ///
+ /// Returns the hash code for the current .
+ ///
+ /// The hash code for the current .
+ public override int GetHashCode() =>
+ HashCode.Combine(Rune, StartIndex, Length, WasReplaced);
+
+ ///
+ /// Deconstructs the into its components.
+ ///
+ /// The Unicode scalar value.
+ /// The index of the current symbol in Unicode data.
+ [EditorBrowsable(EditorBrowsableState.Never)]
+ public void Deconstruct(out Rune rune, out int startIndex)
+ {
+ rune = Rune;
+ startIndex = StartIndex;
+ }
+
+ ///
+ /// Deconstructs the into its components.
+ ///
+ /// The Unicode scalar value.
+ /// The index of the current symbol in Unicode data.
+ /// The length of the current symbol in Unicode data.
+ [EditorBrowsable(EditorBrowsableState.Never)]
+ public void Deconstruct(out Rune rune, out int startIndex, out int length)
+ {
+ rune = Rune;
+ startIndex = StartIndex;
+ length = Length;
+ }
+
+ ///
+ /// Determines whether two specified instances are equal.
+ ///
+ /// The first to compare.
+ /// The second to compare.
+ ///
+ /// if the two instances are equal; otherwise,
+ /// .
+ ///
+ public static bool operator ==(RunePosition left, RunePosition right) => left.Equals(right);
+
+ ///
+ /// Determines whether two specified instances are not equal.
+ ///
+ /// The first to compare.
+ /// The second to compare.
+ ///
+ /// if the two instances are not equal; otherwise,
+ /// .
+ ///
+ public static bool operator !=(RunePosition left, RunePosition right) => !(left == right);
+
+ ///
+ /// An enumerator for retrieving instances from Unicode data.
+ ///
+ ///
+ /// Methods are pattern-matched by compiler to allow using foreach pattern.
+ ///
+ public ref struct Utf16Enumerator : IEnumerator
+ {
+ private ReadOnlySpan _original;
+ private ReadOnlySpan _remaining;
+
+ ///
+ /// The current in the Unicode data.
+ ///
+ public RunePosition Current { get; private set; }
+
+ ///
+ /// Returns the current enumerator instance.
+ ///
+ /// The current enumerator instance.
+ public Utf16Enumerator GetEnumerator() => this;
+
+ internal Utf16Enumerator(ReadOnlySpan buffer)
+ {
+ _original = _remaining = buffer;
+ Current = default;
+ }
+
+ ///
+ /// Moves to the next in the Unicode data.
+ ///
+ ///
+ /// if the enumerator was successfully advanced to the next ;
+ ///
+ /// if the enumerator has passed the end of the span.
+ public bool MoveNext()
+ {
+ if (_remaining.IsEmpty)
+ {
+ // reached the end of the buffer
+ Current = default;
+ return false;
+ }
+
+ // In UTF-16 specifically, invalid sequences always have length 1, which is the same
+ // length as the replacement character U+FFFD. This means that we can always bump the
+ // next index by the current scalar's UTF-16 sequence length. This optimization is not
+ // generally applicable; for example, enumerating scalars from UTF-8 cannot utilize
+ // this same trick.
+
+ int scalarValue = Rune.ReadFirstRuneFromUtf16Buffer(_remaining);
+ if (scalarValue >= 0)
+ {
+ Rune rune = Rune.UnsafeCreate((uint)scalarValue);
+ int length = rune.Utf16SequenceLength;
+ Current = new RunePosition(rune, Current.StartIndex + Current.Length, length, false);
+ _remaining = _remaining.Slice(length);
+ }
+ else
+ {
+ Current = new RunePosition(Rune.ReplacementChar, Current.StartIndex + Current.Length, 1, true);
+ _remaining = _remaining.Slice(1);
+ }
+ return true;
+ }
+
+ public void Reset()
+ {
+ _remaining = _original;
+ Current = default;
+ }
+
+ object IEnumerator.Current => Current;
+
+ void IEnumerator.Reset() => Reset();
+
+ void IDisposable.Dispose() { }
+ }
+
+ ///
+ /// An enumerator for retrieving instances from Unicode data.
+ ///
+ ///
+ /// Methods are pattern-matched by compiler to allow using foreach pattern.
+ ///
+ public ref struct Utf8Enumerator : IEnumerator
+ {
+ private ReadOnlySpan _original;
+ private ReadOnlySpan _remaining;
+
+ ///
+ /// The current in the Unicode data.
+ ///
+ public RunePosition Current { get; private set; }
+
+ ///
+ /// Returns the current enumerator instance.
+ ///
+ /// The current enumerator instance.
+ public Utf8Enumerator GetEnumerator() => this;
+
+ ///
+ /// Initializes a new instance of the struct.
+ ///
+ /// The buffer containing the Unicode data.
+ internal Utf8Enumerator(ReadOnlySpan buffer)
+ {
+ _original = _remaining = buffer;
+ Current = default;
+ }
+
+ ///
+ /// Moves to the next in the Unicode data.
+ ///
+ ///
+ /// if the enumerator was successfully advanced to the next ;
+ ///
+ /// if the enumerator has passed the end of the span.
+ ///
+ public bool MoveNext()
+ {
+ if (_remaining.IsEmpty)
+ {
+ // reached the end of the buffer
+ Current = default;
+ return false;
+ }
+
+ bool wasReplaced = Rune.DecodeFromUtf8(_remaining, out Rune rune, out int charsConsumed) != OperationStatus.Done;
+ Current = new RunePosition(rune, Current.StartIndex + Current.Length, charsConsumed, wasReplaced);
+ _remaining = _remaining.Slice(charsConsumed);
+ return true;
+ }
+
+ public void Reset()
+ {
+ _remaining = _original;
+ Current = default;
+ }
+
+ object IEnumerator.Current => Current;
+
+ void IEnumerator.Reset() => Reset();
+
+ void IDisposable.Dispose() { }
+ }
+}
diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs
index e1d6c46cbfe499..0807ee840135d4 100644
--- a/src/libraries/System.Runtime/ref/System.Runtime.cs
+++ b/src/libraries/System.Runtime/ref/System.Runtime.cs
@@ -15720,6 +15720,48 @@ public enum NormalizationForm
public bool TryEncodeToUtf8(System.Span destination, out int bytesWritten) { throw null; }
public static bool TryGetRuneAt(string input, int index, out System.Text.Rune value) { throw null; }
}
+ public readonly partial struct RunePosition : System.IEquatable
+ {
+ private readonly int _dummyPrimitive;
+ public static System.Text.RunePosition.Utf16Enumerator EnumerateUtf16(System.ReadOnlySpan span) { throw null; }
+ public static System.Text.RunePosition.Utf8Enumerator EnumerateUtf8(System.ReadOnlySpan span) { throw null; }
+ public System.Text.Rune Rune { get { throw null; } }
+ public int StartIndex { get { throw null; } }
+ public int Length { get { throw null; } }
+ public bool WasReplaced { get { throw null; } }
+ public RunePosition(Rune rune, int startIndex, int length, bool wasReplaced) { throw null; }
+ public bool Equals(System.Text.RunePosition other) { throw null; }
+ public override bool Equals(object? obj) { throw null; }
+ public override int GetHashCode() { throw null; }
+ [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)]
+ public void Deconstruct(out System.Text.Rune rune, out int startIndex) { throw null; }
+ [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)]
+ public void Deconstruct(out System.Text.Rune rune, out int startIndex, out int length) { throw null; }
+ public static bool operator ==(System.Text.RunePosition left, System.Text.RunePosition right) { throw null; }
+ public static bool operator !=(System.Text.RunePosition left, System.Text.RunePosition right) { throw null; }
+ public ref partial struct Utf16Enumerator : System.Collections.Generic.IEnumerator, System.Collections.IEnumerator, System.IDisposable
+ {
+ private readonly int _dummyPrimitive;
+ public System.Text.RunePosition Current { get { throw null; } }
+ public System.Text.RunePosition.Utf16Enumerator GetEnumerator() { throw null; }
+ public bool MoveNext() { throw null; }
+ public void Reset() { throw null; }
+ object System.Collections.IEnumerator.Current { get { throw null; } }
+ void System.Collections.IEnumerator.Reset() { }
+ void System.IDisposable.Dispose() { }
+ }
+ public ref partial struct Utf8Enumerator: System.Collections.Generic.IEnumerator, System.Collections.IEnumerator, System.IDisposable
+ {
+ private readonly int _dummyPrimitive;
+ public System.Text.RunePosition Current { get { throw null; } }
+ public System.Text.RunePosition.Utf8Enumerator GetEnumerator() { throw null; }
+ public bool MoveNext() { throw null; }
+ public void Reset() { throw null; }
+ object System.Collections.IEnumerator.Current { get { throw null; } }
+ void System.Collections.IEnumerator.Reset() { }
+ void System.IDisposable.Dispose() { }
+ }
+ }
public sealed partial class StringBuilder : System.Runtime.Serialization.ISerializable
{
public StringBuilder() { }
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Text/RuneTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Text/RuneTests.cs
index 5ef480c0ba0166..378bf7d92d88e8 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Text/RuneTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Text/RuneTests.cs
@@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
using System.Buffers;
+using System.Collections.Generic;
using System.Globalization;
using System.Text.Unicode;
using Xunit;
@@ -58,7 +59,7 @@ public static void Casing_Invariant(int original, int upper, int lower)
}
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization), nameof(PlatformDetection.IsNotHybridGlobalizationOnApplePlatform))]
- // HybridGlobalization on Apple mobile platforms has issues with casing dotless I
+ // HybridGlobalization on Apple mobile platforms has issues with casing dotless I
[InlineData('0', '0', '0')]
[InlineData('a', 'A', 'a')]
[InlineData('i', 'I', 'i')]
@@ -721,5 +722,259 @@ public static void TryEncodeToUtf8(GeneralTestData testData)
Assert.Equal(testData.Utf8Sequence.Length, rune.EncodeToUtf8(utf8Buffer));
Assert.True(utf8Buffer.Slice(0, testData.Utf8Sequence.Length).SequenceEqual(testData.Utf8Sequence));
}
+
+ //
+ // RunePosition tests
+ //
+
+ private static void RunePosition_TestProps(Rune rune, int startIndex, int length, bool wasReplaced, RunePosition runePosition)
+ {
+ Assert.Equal(rune, runePosition.Rune);
+ Assert.Equal(startIndex, runePosition.StartIndex);
+ Assert.Equal(length, runePosition.Length);
+ Assert.Equal(wasReplaced, runePosition.WasReplaced);
+
+ Assert.Equal(new RunePosition(rune, startIndex, length, wasReplaced), runePosition);
+ }
+
+ private static void RunePosition_TestEquals(RunePosition expected, RunePosition runePosition)
+ {
+ if (expected.Rune == runePosition.Rune && expected.StartIndex == runePosition.StartIndex &&
+ expected.Length == runePosition.Length && expected.WasReplaced == runePosition.WasReplaced)
+ {
+ Assert.Equal(expected, runePosition);
+ Assert.Equal(runePosition, expected);
+
+ Assert.True(expected.Equals(runePosition));
+ Assert.True(runePosition.Equals(expected));
+
+ Assert.True(((object)expected).Equals(runePosition));
+ Assert.True(((object)runePosition).Equals(expected));
+
+ Assert.True(expected == runePosition);
+ Assert.True(runePosition == expected);
+
+ Assert.False(expected != runePosition);
+ Assert.False(runePosition != expected);
+
+ Assert.Equal(expected.GetHashCode(), runePosition.GetHashCode());
+ }
+ else
+ {
+ Assert.NotEqual(expected, runePosition);
+ Assert.NotEqual(runePosition, expected);
+
+ Assert.False(expected.Equals(runePosition));
+ Assert.False(runePosition.Equals(expected));
+
+ Assert.False(((object)expected).Equals(runePosition));
+ Assert.False(((object)runePosition).Equals(expected));
+
+ Assert.False(expected == runePosition);
+ Assert.False(runePosition == expected);
+
+ Assert.True(expected != runePosition);
+ Assert.True(runePosition != expected);
+ }
+ }
+
+ private static void RunePosition_TestDeconstruct(RunePosition runePosition)
+ {
+ {
+ (Rune rune, int startIndex) = runePosition;
+ Assert.Equal(runePosition.Rune, rune);
+ Assert.Equal(runePosition.StartIndex, startIndex);
+ }
+ {
+ (Rune rune, int startIndex, int length) = runePosition;
+ Assert.Equal(runePosition.Rune, rune);
+ Assert.Equal(runePosition.StartIndex, startIndex);
+ Assert.Equal(runePosition.Length, length);
+ }
+ }
+
+ [Fact]
+ public static void RunePosition_DefaultTest()
+ {
+ RunePosition runePosition = default;
+ RunePosition_TestProps(default, 0, 0, false, runePosition);
+ RunePosition_TestEquals(default, runePosition);
+ RunePosition_TestDeconstruct(runePosition);
+
+ runePosition = new RunePosition();
+ RunePosition_TestProps(default, 0, 0, false, runePosition);
+ RunePosition_TestEquals(default, runePosition);
+ RunePosition_TestDeconstruct(runePosition);
+ }
+
+ [Fact]
+ public static void EnumerateRunePositions_Empty()
+ {
+ {
+ RunePosition.Utf16Enumerator enumerator = RunePosition.EnumerateUtf16([]).GetEnumerator();
+ Assert.False(enumerator.MoveNext());
+ }
+ {
+ RunePosition.Utf8Enumerator enumerator = RunePosition.EnumerateUtf8([]).GetEnumerator();
+ Assert.False(enumerator.MoveNext());
+ }
+ }
+
+ [Theory]
+ [InlineData(new char[0])] // empty
+ [InlineData(new char[] { 'x', 'y', 'z' })]
+ [InlineData(new char[] { 'x', '\uD86D', '\uDF54', 'y' })] // valid surrogate pair
+ [InlineData(new char[] { 'x', '\uD86D', 'y' })] // standalone high surrogate
+ [InlineData(new char[] { 'x', '\uDF54', 'y' })] // standalone low surrogate
+ [InlineData(new char[] { 'x', '\uD86D' })] // standalone high surrogate at end of string
+ [InlineData(new char[] { 'x', '\uDF54' })] // standalone low surrogate at end of string
+ [InlineData(new char[] { 'x', '\uD86D', '\uD86D', 'y' })] // two high surrogates should be two replacement chars
+ [InlineData(new char[] { 'x', '\uFFFD', 'y' })] // literal U+FFFD
+ public static void EnumerateRunePositions_Battery16(char[] chars)
+ {
+ // Test data is smuggled as char[] instead of straight-up string since the test framework
+ // doesn't like invalid UTF-16 literals.
+
+ RunePosition.Utf16Enumerator enumerator = RunePosition.EnumerateUtf16(chars).GetEnumerator();
+
+ int expectedIndex = 0;
+ while (enumerator.MoveNext())
+ {
+ bool wasReplaced = Rune.DecodeFromUtf16(chars.AsSpan(expectedIndex), out Rune expectedRune, out int charsConsumed) != OperationStatus.Done;
+ RunePosition runePosition = enumerator.Current;
+
+ RunePosition_TestProps(expectedRune, expectedIndex, charsConsumed, wasReplaced, runePosition);
+
+ expectedIndex += charsConsumed;
+ }
+ Assert.Equal(chars.Length, expectedIndex);
+ }
+
+ [Theory]
+ [InlineData(new byte[0])] // empty
+ [InlineData(new byte[] { 0x30, 0x40, 0x50 })]
+ [InlineData(new byte[] { 0x31, 0x80, 0x41 })] // standalone continuation byte
+ [InlineData(new byte[] { 0x32, 0xC1, 0x42 })] // C1 is never a valid UTF-8 byte
+ [InlineData(new byte[] { 0x33, 0xF5, 0x43 })] // F5 is never a valid UTF-8 byte
+ [InlineData(new byte[] { 0x34, 0xC2, 0x44 })] // C2 is a valid byte; expecting it to be followed by a continuation byte
+ [InlineData(new byte[] { 0x35, 0xED, 0x45 })] // ED is a valid byte; expecting it to be followed by a continuation byte
+ [InlineData(new byte[] { 0x36, 0xF4, 0x46 })] // F4 is a valid byte; expecting it to be followed by a continuation byte
+ [InlineData(new byte[] { 0x37, 0xC2, 0xC2, 0x47 })] // C2 not followed by continuation byte
+ [InlineData(new byte[] { 0x38, 0xC3, 0x90, 0x48 })] // [ C3 90 ] is U+00D0 LATIN CAPITAL LETTER ETH
+ [InlineData(new byte[] { 0x39, 0xC1, 0xBF, 0x49 })] // [ C1 BF ] is overlong 2-byte sequence, all overlong sequences have maximal invalid subsequence length 1
+ [InlineData(new byte[] { 0x40, 0xE0, 0x9F, 0x50 })] // [ E0 9F ] is overlong 3-byte sequence, all overlong sequences have maximal invalid subsequence length 1
+ [InlineData(new byte[] { 0x41, 0xE0, 0xA0, 0x51 })] // [ E0 A0 ] is valid 2-byte start of 3-byte sequence
+ [InlineData(new byte[] { 0x42, 0xED, 0x9F, 0x52 })] // [ ED 9F ] is valid 2-byte start of 3-byte sequence
+ [InlineData(new byte[] { 0x43, 0xED, 0xBF, 0x53 })] // [ ED BF ] would place us in UTF-16 surrogate range, all surrogate sequences have maximal invalid subsequence length 1
+ [InlineData(new byte[] { 0x44, 0xEE, 0x80, 0x54 })] // [ EE 80 ] is valid 2-byte start of 3-byte sequence
+ [InlineData(new byte[] { 0x45, 0xF0, 0x8F, 0x55 })] // [ F0 8F ] is overlong 4-byte sequence, all overlong sequences have maximal invalid subsequence length 1
+ [InlineData(new byte[] { 0x46, 0xF0, 0x90, 0x56 })] // [ F0 90 ] is valid 2-byte start of 4-byte sequence
+ [InlineData(new byte[] { 0x47, 0xF4, 0x90, 0x57 })] // [ F4 90 ] would place us beyond U+10FFFF, all such sequences have maximal invalid subsequence length 1
+ [InlineData(new byte[] { 0x48, 0xE2, 0x88, 0xB4, 0x58 })] // [ E2 88 B4 ] is U+2234 THEREFORE
+ [InlineData(new byte[] { 0x49, 0xE2, 0x88, 0xC0, 0x59 })] // [ E2 88 ] followed by non-continuation byte, maximal invalid subsequence length 2
+ [InlineData(new byte[] { 0x50, 0xF0, 0x9F, 0x98, 0x60 })] // [ F0 9F 98 ] is valid 3-byte start of 4-byte sequence
+ [InlineData(new byte[] { 0x51, 0xF0, 0x9F, 0x98, 0x20, 0x61 })] // [ F0 9F 98 ] followed by non-continuation byte, maximal invalid subsequence length 3
+ [InlineData(new byte[] { 0x52, 0xF0, 0x9F, 0x98, 0xB2, 0x62 })] // [ F0 9F 98 B2 ] is U+1F632 ASTONISHED FACE
+ public static void EnumerateRunePositions_Battery8(byte[] bytes)
+ {
+ RunePosition.Utf8Enumerator enumerator = RunePosition.EnumerateUtf8(bytes).GetEnumerator();
+
+ int expectedIndex = 0;
+ while (enumerator.MoveNext())
+ {
+ bool wasReplaced = Rune.DecodeFromUtf8(bytes.AsSpan(expectedIndex), out Rune expectedRune, out int charsConsumed) != OperationStatus.Done;
+ RunePosition runePosition = enumerator.Current;
+
+ RunePosition_TestProps(expectedRune, expectedIndex, charsConsumed, wasReplaced, runePosition);
+
+ expectedIndex += charsConsumed;
+ }
+ Assert.Equal(bytes.Length, expectedIndex);
+ }
+
+ [Fact]
+ public static void EnumerateRunePositions_DoesNotReadPastEndOfSpan()
+ {
+ // As an optimization, reading scalars from a string *may* read past the end of the string
+ // to the terminating null. This optimization is invalid for arbitrary spans, so this test
+ // ensures that we're not performing this optimization here.
+
+ {
+ ReadOnlySpan span = "xy\U0002B754z".AsSpan(1, 2); // well-formed string, but span splits surrogate pair
+
+ List enumeratedValues = new List();
+ foreach (RunePosition runePosition in RunePosition.EnumerateUtf16(span))
+ {
+ enumeratedValues.Add(runePosition.Rune.Value);
+ }
+ Assert.Equal(new int[] { 'y', '\uFFFD' }, enumeratedValues.ToArray());
+ }
+
+ {
+ ReadOnlySpan span = "xy\U0002B754z"u8.Slice(1, 2); // well-formed string, but span splits surrogate pair
+
+ List enumeratedValues = new List();
+ foreach (RunePosition runePosition in RunePosition.EnumerateUtf8(span))
+ {
+ enumeratedValues.Add(runePosition.Rune.Value);
+ }
+ Assert.Equal(new int[] { 'y', '\uFFFD' }, enumeratedValues.ToArray());
+ }
+ }
+
+ [Fact]
+ public static void EnumerateRunePositions_ResetEnumeration()
+ {
+ string text = "AB\U0002B754CD";
+ byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
+
+ {
+ RunePosition.Utf16Enumerator enumerator = RunePosition.EnumerateUtf16(text).GetEnumerator();
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('A', enumerator.Current.Rune.Value);
+
+ enumerator.Reset();
+
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('A', enumerator.Current.Rune.Value);
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('B', enumerator.Current.Rune.Value);
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal(0x2B754, enumerator.Current.Rune.Value);
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('C', enumerator.Current.Rune.Value);
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('D', enumerator.Current.Rune.Value);
+
+ Assert.False(enumerator.MoveNext());
+ enumerator.Reset();
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('A', enumerator.Current.Rune.Value);
+ }
+
+ {
+ RunePosition.Utf8Enumerator enumerator = RunePosition.EnumerateUtf8(utf8Bytes).GetEnumerator();
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('A', enumerator.Current.Rune.Value);
+
+ enumerator.Reset();
+
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('A', enumerator.Current.Rune.Value);
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('B', enumerator.Current.Rune.Value);
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal(0x2B754, enumerator.Current.Rune.Value);
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('C', enumerator.Current.Rune.Value);
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('D', enumerator.Current.Rune.Value);
+
+ Assert.False(enumerator.MoveNext());
+ enumerator.Reset();
+ Assert.True(enumerator.MoveNext());
+ Assert.Equal('A', enumerator.Current.Rune.Value);
+ }
+ }
}
}