Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -652,8 +652,10 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\NormalizationForm.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Rune.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\SpanRuneEnumerator.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilder.Debug.cs" Condition="'$(Configuration)' == 'Debug'" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringRuneEnumerator.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeDebug.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeEncoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeUtility.cs" />
Expand Down
23 changes: 23 additions & 0 deletions src/System.Private.CoreLib/shared/System/MemoryExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;

using Internal.Runtime.CompilerServices;

Expand Down Expand Up @@ -975,6 +976,28 @@ ref MemoryMarshal.GetReference(value),
valueLength);
}

/// <summary>
/// Returns an enumeration of <see cref="Rune"/> from the provided span.
/// </summary>
/// <remarks>
/// Invalid sequences will be represented in the enumeration by <see cref="Rune.ReplacementChar"/>.
/// </remarks>
public static SpanRuneEnumerator EnumerateRunes(this ReadOnlySpan<char> span)
{
return new SpanRuneEnumerator(span);
}

/// <summary>
/// Returns an enumeration of <see cref="Rune"/> from the provided span.
/// </summary>
/// <remarks>
/// Invalid sequences will be represented in the enumeration by <see cref="Rune.ReplacementChar"/>.
/// </remarks>
public static SpanRuneEnumerator EnumerateRunes(this Span<char> span)
{
return new SpanRuneEnumerator(span);
}

/// <summary>
/// Reverses the sequence of the elements in the entire span.
/// </summary>
Expand Down
11 changes: 11 additions & 0 deletions src/System.Private.CoreLib/shared/System/String.cs
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,17 @@ IEnumerator IEnumerable.GetEnumerator()
return new CharEnumerator(this);
}

/// <summary>
/// Returns an enumeration of <see cref="Rune"/> from this string.
/// </summary>
/// <remarks>
/// Invalid sequences will be represented in the enumeration by <see cref="Rune.ReplacementChar"/>.
/// </remarks>
public StringRuneEnumerator EnumerateRunes()
{
return new StringRuneEnumerator(this);
}

internal static unsafe int wcslen(char* ptr)
{
char* end = ptr;
Expand Down
37 changes: 37 additions & 0 deletions src/System.Private.CoreLib/shared/System/Text/Rune.cs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,43 @@ public static Rune GetRuneAt(string input, int index)
[CLSCompliant(false)]
public static bool IsValid(uint value) => UnicodeUtility.IsValidUnicodeScalar(value);

// returns a negative number on failure
internal static int ReadFirstRuneFromUtf16Buffer(ReadOnlySpan<char> input)
{
if (input.IsEmpty)
{
return -1;
}

// Optimistically assume input is within BMP.

uint returnValue = input[0];
if (UnicodeUtility.IsSurrogateCodePoint(returnValue))
{
if (!UnicodeUtility.IsHighSurrogateCodePoint(returnValue))
{
return -1;
}

// Treat 'returnValue' as the high surrogate.

if (1 >= (uint)input.Length)
{
return -1; // not an argument exception - just a "bad data" failure
}

uint potentialLowSurrogate = input[1];
if (!UnicodeUtility.IsLowSurrogateCodePoint(potentialLowSurrogate))
{
return -1;
}

returnValue = UnicodeUtility.GetScalarFromUtf16SurrogatePair(returnValue, potentialLowSurrogate);
}

return (int)returnValue;
}

// returns a negative number on failure
private static int ReadRuneFromString(string input, int index)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

namespace System.Text
{
// An enumerator for retrieving System.Text.Rune instances from a ROS<char>.
// Methods are pattern-matched by compiler to allow using foreach pattern.
public ref struct SpanRuneEnumerator
{
private ReadOnlySpan<char> _remaining;
private Rune _current;

internal SpanRuneEnumerator(ReadOnlySpan<char> buffer)
{
_remaining = buffer;
_current = default;
}

public Rune Current => _current;

public SpanRuneEnumerator GetEnumerator() => this;

public bool MoveNext()
{
if (_remaining.IsEmpty)
{
// reached the end of the buffer
_current = default;
return false;
}

int scalarValue = Rune.ReadFirstRuneFromUtf16Buffer(_remaining);
if (scalarValue < 0)
{
// replace invalid sequences with U+FFFD
scalarValue = Rune.ReplacementChar.Value;
}

// In UTF-16 specifically, invalid sequences always have length 1, which is the same
// length as the replacement character U+FFFD. This means that we can always bump the
// next index by the current scalar's UTF-16 sequence length. This optimization is not
// generally applicable; for example, enumerating scalars from UTF-8 cannot utilize
// this same trick.

_current = Rune.UnsafeCreate((uint)scalarValue);
_remaining = _remaining.Slice(_current.Utf16SequenceLength);
return true;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections;
using System.Collections.Generic;

namespace System.Text
{
// An enumerator for retrieving System.Text.Rune instances from a System.String.
public struct StringRuneEnumerator : IEnumerable<Rune>, IEnumerator<Rune>
{
private readonly string _string;
private Rune _current;
private int _nextIndex;

internal StringRuneEnumerator(string value)
{
_string = value;
_current = default;
_nextIndex = 0;
}

public Rune Current => _current;

public StringRuneEnumerator GetEnumerator() => this;

public bool MoveNext()
{
if ((uint)_nextIndex >= _string.Length)
{
// reached the end of the string
_current = default;
return false;
}

if (!Rune.TryGetRuneAt(_string, _nextIndex, out _current))
{
// replace invalid sequences with U+FFFD
_current = Rune.ReplacementChar;
}

// In UTF-16 specifically, invalid sequences always have length 1, which is the same
// length as the replacement character U+FFFD. This means that we can always bump the
// next index by the current scalar's UTF-16 sequence length. This optimization is not
// generally applicable; for example, enumerating scalars from UTF-8 cannot utilize
// this same trick.

_nextIndex += _current.Utf16SequenceLength;
return true;
}

object IEnumerator.Current => _current;

void IDisposable.Dispose()
{
// no-op
}

IEnumerator IEnumerable.GetEnumerator() => this;

IEnumerator<Rune> IEnumerable<Rune>.GetEnumerator() => this;

void IEnumerator.Reset()
{
_current = default;
_nextIndex = 0;
}
}
}