diff --git a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
index 8768d19223d4..f209d954bedb 100644
--- a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
+++ b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
@@ -652,8 +652,10 @@
+
+
diff --git a/src/System.Private.CoreLib/shared/System/MemoryExtensions.cs b/src/System.Private.CoreLib/shared/System/MemoryExtensions.cs
index 6521a5af2e31..6145801faff3 100644
--- a/src/System.Private.CoreLib/shared/System/MemoryExtensions.cs
+++ b/src/System.Private.CoreLib/shared/System/MemoryExtensions.cs
@@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
+using System.Text;
using Internal.Runtime.CompilerServices;
@@ -975,6 +976,28 @@ ref MemoryMarshal.GetReference(value),
valueLength);
}
+ ///
+ /// Returns an enumeration of from the provided span.
+ ///
+ ///
+ /// Invalid sequences will be represented in the enumeration by .
+ ///
+ public static SpanRuneEnumerator EnumerateRunes(this ReadOnlySpan span)
+ {
+ return new SpanRuneEnumerator(span);
+ }
+
+ ///
+ /// Returns an enumeration of from the provided span.
+ ///
+ ///
+ /// Invalid sequences will be represented in the enumeration by .
+ ///
+ public static SpanRuneEnumerator EnumerateRunes(this Span span)
+ {
+ return new SpanRuneEnumerator(span);
+ }
+
///
/// Reverses the sequence of the elements in the entire span.
///
diff --git a/src/System.Private.CoreLib/shared/System/String.cs b/src/System.Private.CoreLib/shared/System/String.cs
index 7050644d9aac..366b678a22ac 100644
--- a/src/System.Private.CoreLib/shared/System/String.cs
+++ b/src/System.Private.CoreLib/shared/System/String.cs
@@ -532,6 +532,17 @@ IEnumerator IEnumerable.GetEnumerator()
return new CharEnumerator(this);
}
+ ///
+ /// Returns an enumeration of from this string.
+ ///
+ ///
+ /// Invalid sequences will be represented in the enumeration by .
+ ///
+ public StringRuneEnumerator EnumerateRunes()
+ {
+ return new StringRuneEnumerator(this);
+ }
+
internal static unsafe int wcslen(char* ptr)
{
char* end = ptr;
diff --git a/src/System.Private.CoreLib/shared/System/Text/Rune.cs b/src/System.Private.CoreLib/shared/System/Text/Rune.cs
index a4ef3a37b731..d405b69b2138 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Rune.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Rune.cs
@@ -249,6 +249,43 @@ public static Rune GetRuneAt(string input, int index)
[CLSCompliant(false)]
public static bool IsValid(uint value) => UnicodeUtility.IsValidUnicodeScalar(value);
+ // returns a negative number on failure
+ internal static int ReadFirstRuneFromUtf16Buffer(ReadOnlySpan input)
+ {
+ if (input.IsEmpty)
+ {
+ return -1;
+ }
+
+ // Optimistically assume input is within BMP.
+
+ uint returnValue = input[0];
+ if (UnicodeUtility.IsSurrogateCodePoint(returnValue))
+ {
+ if (!UnicodeUtility.IsHighSurrogateCodePoint(returnValue))
+ {
+ return -1;
+ }
+
+ // Treat 'returnValue' as the high surrogate.
+
+ if (1 >= (uint)input.Length)
+ {
+ return -1; // not an argument exception - just a "bad data" failure
+ }
+
+ uint potentialLowSurrogate = input[1];
+ if (!UnicodeUtility.IsLowSurrogateCodePoint(potentialLowSurrogate))
+ {
+ return -1;
+ }
+
+ returnValue = UnicodeUtility.GetScalarFromUtf16SurrogatePair(returnValue, potentialLowSurrogate);
+ }
+
+ return (int)returnValue;
+ }
+
// returns a negative number on failure
private static int ReadRuneFromString(string input, int index)
{
diff --git a/src/System.Private.CoreLib/shared/System/Text/SpanRuneEnumerator.cs b/src/System.Private.CoreLib/shared/System/Text/SpanRuneEnumerator.cs
new file mode 100644
index 000000000000..082a5108c140
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/SpanRuneEnumerator.cs
@@ -0,0 +1,51 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Text
+{
+ // An enumerator for retrieving System.Text.Rune instances from a ROS.
+ // Methods are pattern-matched by compiler to allow using foreach pattern.
+ public ref struct SpanRuneEnumerator
+ {
+ private ReadOnlySpan _remaining;
+ private Rune _current;
+
+ internal SpanRuneEnumerator(ReadOnlySpan buffer)
+ {
+ _remaining = buffer;
+ _current = default;
+ }
+
+ public Rune Current => _current;
+
+ public SpanRuneEnumerator GetEnumerator() => this;
+
+ public bool MoveNext()
+ {
+ if (_remaining.IsEmpty)
+ {
+ // reached the end of the buffer
+ _current = default;
+ return false;
+ }
+
+ int scalarValue = Rune.ReadFirstRuneFromUtf16Buffer(_remaining);
+ if (scalarValue < 0)
+ {
+ // replace invalid sequences with U+FFFD
+ scalarValue = Rune.ReplacementChar.Value;
+ }
+
+ // In UTF-16 specifically, invalid sequences always have length 1, which is the same
+ // length as the replacement character U+FFFD. This means that we can always bump the
+ // next index by the current scalar's UTF-16 sequence length. This optimization is not
+ // generally applicable; for example, enumerating scalars from UTF-8 cannot utilize
+ // this same trick.
+
+ _current = Rune.UnsafeCreate((uint)scalarValue);
+ _remaining = _remaining.Slice(_current.Utf16SequenceLength);
+ return true;
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/shared/System/Text/StringRuneEnumerator.cs b/src/System.Private.CoreLib/shared/System/Text/StringRuneEnumerator.cs
new file mode 100644
index 000000000000..fe12dfa4f74b
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/StringRuneEnumerator.cs
@@ -0,0 +1,70 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections;
+using System.Collections.Generic;
+
+namespace System.Text
+{
+ // An enumerator for retrieving System.Text.Rune instances from a System.String.
+ public struct StringRuneEnumerator : IEnumerable, IEnumerator
+ {
+ private readonly string _string;
+ private Rune _current;
+ private int _nextIndex;
+
+ internal StringRuneEnumerator(string value)
+ {
+ _string = value;
+ _current = default;
+ _nextIndex = 0;
+ }
+
+ public Rune Current => _current;
+
+ public StringRuneEnumerator GetEnumerator() => this;
+
+ public bool MoveNext()
+ {
+ if ((uint)_nextIndex >= _string.Length)
+ {
+ // reached the end of the string
+ _current = default;
+ return false;
+ }
+
+ if (!Rune.TryGetRuneAt(_string, _nextIndex, out _current))
+ {
+ // replace invalid sequences with U+FFFD
+ _current = Rune.ReplacementChar;
+ }
+
+ // In UTF-16 specifically, invalid sequences always have length 1, which is the same
+ // length as the replacement character U+FFFD. This means that we can always bump the
+ // next index by the current scalar's UTF-16 sequence length. This optimization is not
+ // generally applicable; for example, enumerating scalars from UTF-8 cannot utilize
+ // this same trick.
+
+ _nextIndex += _current.Utf16SequenceLength;
+ return true;
+ }
+
+ object IEnumerator.Current => _current;
+
+ void IDisposable.Dispose()
+ {
+ // no-op
+ }
+
+ IEnumerator IEnumerable.GetEnumerator() => this;
+
+ IEnumerator IEnumerable.GetEnumerator() => this;
+
+ void IEnumerator.Reset()
+ {
+ _current = default;
+ _nextIndex = 0;
+ }
+ }
+}