Skip to content

Commit

Permalink
ReadOnlySpan.Split (#216)
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonCropp authored Sep 12, 2024
1 parent 27fe680 commit 9c5bcfe
Show file tree
Hide file tree
Showing 9 changed files with 586 additions and 2 deletions.
2 changes: 1 addition & 1 deletion apiCount.include.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
**API count: 333**
**API count: 338**
4 changes: 4 additions & 0 deletions api_list.include.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,10 @@
#### ReadOnlySpan<T>

* `Boolean Contains<T>(T) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.contains#system-memoryextensions-contains-1(system-readonlyspan((-0))-0))
* `Polyfills.Polyfill/SpanSplitEnumerator<T> Split<T>(T) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split#system-memoryextensions-split-1(system-readonlyspan((-0))-0))
* `Polyfills.Polyfill/SpanSplitEnumerator<T> Split<T>(ReadOnlySpan<T>) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split#system-memoryextensions-split-1(system-readonlyspan((-0))-system-readonlyspan((-0))))
* `Polyfills.Polyfill/SpanSplitEnumerator<T> SplitAny<T>(ReadOnlySpan<T>) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.splitany#system-memoryextensions-splitany-1(system-readonlyspan((-0))-system-readonlyspan((-0))))
* `Polyfills.Polyfill/SpanSplitEnumerator<T> SplitAny<T>(Buffers.SearchValues<T>) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.splitany#system-memoryextensions-splitany-1(system-readonlyspan((-0))-system-buffers-searchvalues((-0))))


#### Reflection.EventInfo
Expand Down
6 changes: 5 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The package targets `netstandard2.0` and is designed to support the following ru
* `net5.0`, `net6.0`, `net7.0`, `net8.0`, `net9.0`


**API count: 333**<!-- singleLineInclude: apiCount. path: /apiCount.include.md -->
**API count: 338**<!-- singleLineInclude: apiCount. path: /apiCount.include.md -->


**See [Milestones](../../milestones?state=closed) for release notes.**
Expand Down Expand Up @@ -653,6 +653,10 @@ The class `Polyfill` includes the following extension methods:
#### ReadOnlySpan<T>

* `Boolean Contains<T>(T) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.contains#system-memoryextensions-contains-1(system-readonlyspan((-0))-0))
* `Polyfills.Polyfill/SpanSplitEnumerator<T> Split<T>(T) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split#system-memoryextensions-split-1(system-readonlyspan((-0))-0))
* `Polyfills.Polyfill/SpanSplitEnumerator<T> Split<T>(ReadOnlySpan<T>) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split#system-memoryextensions-split-1(system-readonlyspan((-0))-system-readonlyspan((-0))))
* `Polyfills.Polyfill/SpanSplitEnumerator<T> SplitAny<T>(ReadOnlySpan<T>) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.splitany#system-memoryextensions-splitany-1(system-readonlyspan((-0))-system-readonlyspan((-0))))
* `Polyfills.Polyfill/SpanSplitEnumerator<T> SplitAny<T>(Buffers.SearchValues<T>) where T : IEquatable<T>` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.splitany#system-memoryextensions-splitany-1(system-readonlyspan((-0))-system-buffers-searchvalues((-0))))


#### Reflection.EventInfo
Expand Down
4 changes: 4 additions & 0 deletions src/Consume/Consume.cs
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,10 @@ void ReadOnlySpan_Methods()
result = readOnlySpan.SequenceEqual("value");
result = readOnlySpan.StartsWith("value");
result = readOnlySpan.StartsWith("value", StringComparison.Ordinal);
var split = readOnlySpan.Split('a');
split = readOnlySpan.Split("a".AsSpan());
split = readOnlySpan.SplitAny('a');
split = readOnlySpan.SplitAny("a".AsSpan());
}

#endif
Expand Down
93 changes: 93 additions & 0 deletions src/Polyfill/Polyfill_Memory_SpanSplit.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// <auto-generated />

#pragma warning disable

#if FeatureMemory

namespace Polyfills;

using System;
using System.Buffers;
using System.Diagnostics.CodeAnalysis;
using Link = System.ComponentModel.DescriptionAttribute;

static partial class Polyfill
{

#if !NET9_0_OR_GREATER
//https://github.com/bbartels/runtime/blob/master/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs

/// <summary>
/// Returns a type that allows for enumeration of each element within a split span
/// using the provided separator character.
/// </summary>
/// <typeparam name="T">The type of the elements.</typeparam>
/// <param name="source">The source span to be enumerated.</param>
/// <param name="separator">The separator character to be used to split the provided span.</param>
/// <returns>Returns a <see cref="SpanSplitEnumerator{T}"/>.</returns>
[Link("https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split#system-memoryextensions-split-1(system-readonlyspan((-0))-0)")]
public static SpanSplitEnumerator<T> Split<T>(this ReadOnlySpan<T> source, T separator)
where T : IEquatable<T> =>
new SpanSplitEnumerator<T>(source, separator);

/// <summary>
/// Returns a type that allows for enumeration of each element within a split span
/// using the provided separator span.
/// </summary>
/// <typeparam name="T">The type of the elements.</typeparam>
/// <param name="source">The source span to be enumerated.</param>
/// <param name="separator">The separator span to be used to split the provided span.</param>
/// <returns>Returns a <see cref="SpanSplitEnumerator{T}"/>.</returns>
[Link("https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split#system-memoryextensions-split-1(system-readonlyspan((-0))-system-readonlyspan((-0)))")]
public static SpanSplitEnumerator<T> Split<T>(this ReadOnlySpan<T> source, ReadOnlySpan<T> separator)
where T : IEquatable<T> =>
new SpanSplitEnumerator<T>(source, separator, treatAsSingleSeparator: true);

/// <summary>
/// Returns a type that allows for enumeration of each element within a split span
/// using any of the provided elements.
/// </summary>
/// <typeparam name="T">The type of the elements.</typeparam>
/// <param name="source">The source span to be enumerated.</param>
/// <param name="separators">The separators to be used to split the provided span.</param>
/// <returns>Returns a <see cref="SpanSplitEnumerator{T}"/>.</returns>
/// <remarks>
/// If <typeparamref name="T"/> is <see cref="char"/> and if <paramref name="separators"/> is empty,
/// all Unicode whitespace characters are used as the separators. This matches the behavior of when
/// <see cref="string.Split(char[])"/> and related overloads are used with an empty separator array,
/// or when <see cref="SplitAny(ReadOnlySpan{char}, Span{Range}, ReadOnlySpan{char}, StringSplitOptions)"/>
/// is used with an empty separator span.
/// </remarks>
[Link("https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.splitany#system-memoryextensions-splitany-1(system-readonlyspan((-0))-system-readonlyspan((-0)))")]
public static SpanSplitEnumerator<T> SplitAny<T>(this ReadOnlySpan<T> source, [UnscopedRef] params ReadOnlySpan<T> separators)
where T : IEquatable<T> =>
new SpanSplitEnumerator<T>(source, separators);

#if NET8_0

/// <summary>
/// Returns a type that allows for enumeration of each element within a split span
/// using the provided <see cref="SpanSplitEnumerator{T}"/>.
/// </summary>
/// <typeparam name="T">The type of the elements.</typeparam>
/// <param name="source">The source span to be enumerated.</param>
/// <param name="separators">The <see cref="SpanSplitEnumerator{T}"/> to be used to split the provided span.</param>
/// <returns>Returns a <see cref="SpanSplitEnumerator{T}"/>.</returns>
/// <remarks>
/// Unlike <see cref="SplitAny{T}(ReadOnlySpan{T}, ReadOnlySpan{T})"/>, the <paramref name="separators"/> is not checked for being empty.
/// An empty <paramref name="separators"/> will result in no separators being found, regardless of the type of <typeparamref name="T"/>,
/// whereas <see cref="SplitAny{T}(ReadOnlySpan{T}, ReadOnlySpan{T})"/> will use all Unicode whitespace characters as separators if <paramref name="separators"/> is
/// empty and <typeparamref name="T"/> is <see cref="char"/>.
/// </remarks>
[Link("https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.splitany#system-memoryextensions-splitany-1(system-readonlyspan((-0))-system-buffers-searchvalues((-0)))")]
public static SpanSplitEnumerator<T> SplitAny<T>(this ReadOnlySpan<T> source, SearchValues<T> separators)
where T : IEquatable<T> =>
new SpanSplitEnumerator<T>(source, separators);

#endif

#endif

}

#endif
209 changes: 209 additions & 0 deletions src/Polyfill/Polyfill_Memory_SpanSplitEnumerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// <auto-generated />
#pragma warning disable

#if FeatureMemory && !NET9_0_OR_GREATER

namespace Polyfills;
using System;
using System.Buffers;
using System.Linq;
using System.Runtime.CompilerServices;
using Link = System.ComponentModel.DescriptionAttribute;

static partial class Polyfill
{
/// <summary>
/// Enables enumerating each split within a <see cref="ReadOnlySpan{T}"/> that has been divided using one or more separators.
/// </summary>
//https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs
[Link("https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.spansplitenumerator-1")]
public ref struct SpanSplitEnumerator<T>
where T : IEquatable<T>
{
/// <summary>The input span being split.</summary>
readonly ReadOnlySpan<T> _span;

/// <summary>A single separator to use when <see cref="_splitMode"/> is <see cref="SpanSplitEnumeratorMode.SingleElement"/>.</summary>
readonly T _separator = default!;

/// <summary>
/// A separator span to use when <see cref="_splitMode"/> is <see cref="SpanSplitEnumeratorMode.Sequence"/> (in which case
/// it's treated as a single separator) or <see cref="SpanSplitEnumeratorMode.Any"/> (in which case it's treated as a set of separators).
/// </summary>
readonly ReadOnlySpan<T> _separatorBuffer;

#if NET8_0
/// <summary>A set of separators to use when <see cref="_splitMode"/> is <see cref="SpanSplitEnumeratorMode.SearchValues"/>.</summary>
readonly SearchValues<T> _searchValues = default!;
#endif

/// <summary>Mode that dictates how the instance was configured and how its fields should be used in <see cref="MoveNext"/>.</summary>
SpanSplitEnumeratorMode _splitMode;

/// <summary>The inclusive starting index in <see cref="_span"/> of the current range.</summary>
int _startCurrent = 0;

/// <summary>The exclusive ending index in <see cref="_span"/> of the current range.</summary>
int _endCurrent = 0;

/// <summary>The index in <see cref="_span"/> from which the next separator search should start.</summary>
int _startNext = 0;

/// <summary>Gets an enumerator that allows for iteration over the split span.</summary>
/// <returns>Returns a <see cref="SpanSplitEnumerator{T}"/> that can be used to iterate over the split span.</returns>
public SpanSplitEnumerator<T> GetEnumerator() => this;

/// <summary>Gets the current element of the enumeration.</summary>
/// <returns>Returns a <see cref="Range"/> instance that indicates the bounds of the current element withing the source span.</returns>
public Range Current => new Range(_startCurrent, _endCurrent);

#if NET8_0
/// <summary>Initializes the enumerator for <see cref="SpanSplitEnumeratorMode.SearchValues"/>.</summary>
internal SpanSplitEnumerator(ReadOnlySpan<T> span, SearchValues<T> searchValues)
{
_span = span;
_splitMode = SpanSplitEnumeratorMode.SearchValues;
_searchValues = searchValues;
}
#endif

/// <summary>Initializes the enumerator for <see cref="SpanSplitEnumeratorMode.Any"/>.</summary>
/// <remarks>
/// If <paramref name="separators"/> is empty and <typeparamref name="T"/> is <see cref="char"/>, as an optimization
/// it will instead use <see cref="SpanSplitEnumeratorMode.SearchValues"/> with a cached <see cref="SearchValues{Char}"/>
/// for all whitespace characters.
/// </remarks>
internal SpanSplitEnumerator(ReadOnlySpan<T> span, ReadOnlySpan<T> separators)
{
_span = span;

if (typeof(T) == typeof(char) && separators.Length == 0)
{
#if NET8_0
_searchValues = Unsafe.As<SearchValues<T>>(WhiteSpaceChars);
_splitMode = SpanSplitEnumeratorMode.SearchValues;
#else
_separatorBuffer = WhiteSpaceChars.Cast<T>().ToArray().AsSpan<T>();
_splitMode = SpanSplitEnumeratorMode.Any;
#endif
return;
}

_separatorBuffer = separators;
_splitMode = SpanSplitEnumeratorMode.Any;
}

/// <summary>Initializes the enumerator for <see cref="SpanSplitEnumeratorMode.Sequence"/> (or <see cref="SpanSplitEnumeratorMode.EmptySequence"/> if the separator is empty).</summary>
/// <remarks><paramref name="treatAsSingleSeparator"/> must be true.</remarks>
internal SpanSplitEnumerator(ReadOnlySpan<T> span, ReadOnlySpan<T> separator, bool treatAsSingleSeparator)
{
_span = span;
_separatorBuffer = separator;
_splitMode = separator.Length == 0 ? SpanSplitEnumeratorMode.EmptySequence : SpanSplitEnumeratorMode.Sequence;
}

/// <summary>Initializes the enumerator for <see cref="SpanSplitEnumeratorMode.SingleElement"/>.</summary>
internal SpanSplitEnumerator(ReadOnlySpan<T> span, T separator)
{
_span = span;
_separator = separator;
_splitMode = SpanSplitEnumeratorMode.SingleElement;
}

/// <summary>
/// Advances the enumerator to the next element of the enumeration.
/// </summary>
/// <returns><see langword="true"/> if the enumerator was successfully advanced to the next element; <see langword="false"/> if the enumerator has passed the end of the enumeration.</returns>
public bool MoveNext()
{
// Search for the next separator index.
int separatorIndex, separatorLength;
switch (_splitMode)
{
case SpanSplitEnumeratorMode.None:
return false;

case SpanSplitEnumeratorMode.SingleElement:
separatorLength = 1;
#if NETFRAMEWORK
if (_separator is null)
{
separatorIndex = -1;
for (int i = _startNext; i < _span.Length; i++)
{
if (_span[i] == null)
{
separatorIndex = i;
break;
}
}
break;
}
#endif
separatorIndex = _span.Slice(_startNext)
.IndexOf(_separator);
break;

case SpanSplitEnumeratorMode.Any:
separatorLength = 1;
#if !NETCOREAPP
//https://github.com/dotnet/coreclr/pull/25075
if (_separatorBuffer.Length == 0)
{
separatorIndex = -1;
break;
}
#endif
separatorIndex = _span.Slice(_startNext)
.IndexOfAny(_separatorBuffer);
break;

case SpanSplitEnumeratorMode.Sequence:
separatorIndex = _span.Slice(_startNext)
.IndexOf(_separatorBuffer);
separatorLength = _separatorBuffer.Length;
break;

case SpanSplitEnumeratorMode.EmptySequence:
separatorIndex = -1;
separatorLength = 1;
break;
#if NET8_0
case SpanSplitEnumeratorMode.SearchValues:
separatorIndex = _span.Slice(_startNext).IndexOfAny(_searchValues);
separatorLength = 1;
break;
#endif

default:
throw new Exception($"Invalid split mode: {_splitMode}");
}

_startCurrent = _startNext;
if (separatorIndex >= 0)
{
_endCurrent = _startCurrent + separatorIndex;
_startNext = _endCurrent + separatorLength;
}
else
{
_startNext = _endCurrent = _span.Length;

// Set _splitMode to None so that subsequent MoveNext calls will return false.
_splitMode = SpanSplitEnumeratorMode.None;
}

return true;
}
}

#if NET8_0_OR_GREATER
public static readonly SearchValues<char> WhiteSpaceChars =
SearchValues.Create("\t\n\v\f\r\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000".AsSpan());
#else
public static readonly char[] WhiteSpaceChars = "\t\n\v\f\r\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000".ToArray();
#endif

}

#endif
38 changes: 38 additions & 0 deletions src/Polyfill/Polyfill_Memory_SpanSplitEnumeratorMode.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// <auto-generated />
#pragma warning disable

#if FeatureMemory && !NET9_0_OR_GREATER

namespace Polyfills;

static partial class Polyfill
{
/// <summary>Indicates in which mode <see cref="SpanSplitEnumerator{T}"/> is operating, with regards to how it should interpret its state.</summary>
enum SpanSplitEnumeratorMode
{
/// <summary>Either a default <see cref="SpanSplitEnumerator{T}"/> was used, or the enumerator has finished enumerating and there's no more work to do.</summary>
None = 0,

/// <summary>A single T separator was provided.</summary>
SingleElement,

/// <summary>A span of separators was provided, each of which should be treated independently.</summary>
Any,

/// <summary>The separator is a span of elements to be treated as a single sequence.</summary>
Sequence,

/// <summary>The separator is an empty sequence, such that no splits should be performed.</summary>
EmptySequence,

#if NET8_0
/// <summary>
/// A <see cref="SearchValues{Char}"/> was provided and should behave the same as with <see cref="Any"/> but with the separators in the <see cref="SearchValues"/>
/// instance instead of in a <see cref="ReadOnlySpan{Char}"/>.
/// </summary>
SearchValues
#endif
}
}

#endif
Loading

0 comments on commit 9c5bcfe

Please sign in to comment.