Introduce a Utf8String type #933

GrabYourPitchforks · 2018-06-19T03:05:48Z

AB#1117209
This is the API proposal for Utf8String, an immutable, heap-allocated representation of UTF-8 string data. See dotnet/corefxlab#2368 for the scenarios and design philosophy behind this proposal.

Included in this are also APIs to improve text processing across the framework as a whole, including changes to
existing types like String and CultureInfo.

// n.b. System namespace
namespace System
{
    // New APIs added to System.String
    public sealed partial class String
    {
        public UnicodeScalar GetScalarAt(int index) { throw null; }
        public bool TryGetScalarAt(int index, out UnicodeScalar value) { throw null; }
    }

    // Represents a string whose internal representation consists of UTF-8 subsequences.
    // Like the String class, developers are *strongly discouraged* from creating instances of
    // this type that have invalid UTF-8 subsequences (and our APIs try to encourage good hygiene
    // in this regard), but instances of this type are *not guaranteed* to consist only of well-
    // formed UTF-8 subsequences. The APIs hanging off this type have well-defined, predictable
    // behavior regardless of whether the UTF-8 string contains invalid subsequences.
    //
    // The class isn't directly indexable or enumerable, instead relying on the developer to
    // go through one of the AsBytes / AsScalars / AsSpan APIs.
    //
    // Whenever length / index / offset / count / etc. occurs in these APIs, it's in terms of number
    // of Char8 elements. (Or, "byte length" if you prefer.)
    public sealed unsafe class Utf8String : IEquatable<Utf8String>
    {
        /*
         * CONSTRUCTORS
         * All public ctors are validating ctors.
         * Complexity is O(n) for memcpy and O(n) for validation.
         * Behavior given invalid input: bad sequences replaced with U+FFFD.
         * Scroll down further in the file for static factories that suppress validation.
         */

        // For null-terminated UTF-8 and UTF-16 sequences.
        // If not null-terminated, wrap (ptr, length) in a Span and call the Span-based ctors.
        public unsafe Utf8String(byte* value) { }
        public Utf8String(char* value) => { }

        // For non null-terminated UTF-8 and UTF-16 sequences.
        public Utf8String(ReadOnlySpan<byte> value) { }
        public Utf8String(ReadOnlySpan<char> value) { }
        public Utf8String(byte[] value, int startIndex, int length) { }        
        public Utf8String(char[] value, int startIndex, int length) { }

        // For discoverability / ease of use, equivalent to ROS<char>-based ctor
        public Utf8String(String value) { }

        // No Utf8String(ReadOnlySpan<Utf8Char>) or similar ctor due to complexity of plumbing this through
        // the VM, but can call ROS<Utf8Char>.ToUtf8String() extension method for now as workaround.

        /*
         * COMPARISON
         * All equality / comparison methods which don't explicitly take a StringComparison
         * are ordinal by default. This differs slightly from System.String but is self-consistent
         * within the Utf8String class.
         */

        public static bool operator ==(Utf8String a, Utf8String b) => throw null;
        public static bool operator !=(Utf8String a, Utf8String b) => throw null;

        /*
         * PROJECTION
         * n.b. No implicit or explicit cast from Utf8String <-> String.
         * Reason for this is that the cast would have O(n) complexity, which would be
         * potentially surprising for developers. Use ToString() / ToUtf8String() instead.
         */

        public static implicit operator ReadOnlySpan<Utf8Char>(Utf8String value) => throw null;

        // static readonly field, not property or const, to match String.Empty
        public static readonly Utf8String Empty;

        // Length (in UTF-8 code units)
        public int Length { get => throw null; }

        // Indexer (in UTF-8 code units)
        // Returns 'ref readonly' since enables more scenarios for callers
        public ref readonly Utf8Char this[int index] => throw null;

        /*
         * CONCAT
         * This set of overloads may change based on how language and compiler support for '+' works
         * with Utf8String instances, including whether struct-based builder types come online.
         * Let's go with this for now pending how those other features shake out.
         */

        public static Utf8String Concat(Utf8String str0, Utf8String str1) => throw null;
        public static Utf8String Concat(Utf8String str0, Utf8String str1, Utf8String str2) => throw null;
        public static Utf8String Concat(Utf8String str0, Utf8String str1, Utf8String str2, Utf8String str3) => throw null;

        // Contains: overloads which don't take a StringComparison assume Ordinal.

        public bool Contains(char value) => throw null;
        public bool Contains(char value, StringComparison comparisonType) => throw null;
        public bool Contains(Utf8String value) => throw null;
        public bool Contains(Utf8String value, StringComparison comparisonType) => throw null;
        public bool Contains(UnicodeScalar value) => throw null;
        public bool Contains(UnicodeScalar value, StringComparison comparisonType) => throw null;

        public void CopyTo(Span<byte> destination) => throw null;
        public void CopyTo(Span<Utf8Char> destination) => throw null;

        // The static factories below allow the developer to control how (or even whether) validation is performed
        // against the incoming data. Developers should exercise caution when calling this API with the "allow invalid
        // data" flag, taking into account such considerations as:
        // (a) whether the input came from a trustworthy source,
        // (b) which component the constructed instance will be passed to, and
        // (c) the behavior such component might exhibit if faced with invalid sequences.
        //
        // As an example of such a scenario that requires further scrutiny, consider a forum that allows users to
        // sign up for new accounts and post messages. Forum administrators use a web interface to perform such
        // tasks as deleting abusive accounts, moving messages, and so forth. If a malicious user attempts to sign
        // up with a username that contains an invalid UTF-8 sequence, and if such sequence round-trips through the
        // messages database, the username that appears in the page's HTML (as a string) might be different than the
        // username that actually exists in the database (as an arbitrary byte sequence). One potential consequence
        // of this is that if such user starts posting abusive messages, admins will be powerless to do anything via
        // the web interface since the "delete account" API will return "user does not exist", instead requiring the
        // IT administrator to go directly to the database and purge the abuser's account.

        public static Utf8String Create(ReadOnlySpan<byte> value, InvalidSequenceBehavior behavior = InvalidSequenceBehavior.ReplaceInvalidSequence) => throw null;
        public static Utf8String Create(ReadOnlySpan<Utf8Char> value, InvalidSequenceBehavior behavior = InvalidSequenceBehavior.ReplaceInvalidSequence) => throw null;
        public static Utf8String Create<TState>(int length, TState state, System.Buffers.SpanAction<byte, TState> action, InvalidSequenceBehavior behavior = InvalidSequenceBehavior.ReplaceInvalidSequence) => throw null;

        // "CreateFromBytes" is renamed so that type inference doesn't fail if the developer
        // passes an untyped lambda as the third parameter. O(n) for memcpy + O(n) for validation.
        // Behavior given invalid input: fixes up invalid sequences on-the-fly.

        public static Utf8String Create<TState>(int length, TState state, SpanAction<Char8, TState> action) => throw null;
        public static Utf8String CreateFromBytes<TState>(int length, TState state, SpanAction<byte, TState> action) => throw null;

        // EndsWith: for simplicity, only implemented as Ordinal for now.
        
        public bool EndsWith(char value) => throw null;
        public bool EndsWith(UnicodeScalar value) => throw null;
        public bool EndsWith(Utf8String value) => throw null;

        /*
         * EQUALS
         * The Equals(object) overload only matches Utf8String, not String.
         *
         * OPEN QUESTION: Do we need an Equals(Utf8String, String) overload? The performance of that method could be
         * somewhat rough and might involve a transcoding operation, which may surprise the developer.
         *
         * When transcoding is required, comparison is by ordinal scalar, and invalid subsequences immediately return failure.
         * Example: the UTF-8 string [ C1 80 ] will *never* match any UTF-16 string.
         */

        public override bool Equals(object obj) => throw null;
        public bool Equals(Utf8String value) => throw null;
        public static bool Equals(Utf8String a, Utf8String b) => throw null;
        public static bool Equals(Utf8String a, Utf8String b, StringComparison comparisonType) => throw null;

        public int GetHashCode(StringComparison comparisonType) => throw null;
        public static int GetHashCode(ReadOnlySpan<byte> value) => throw null;
        public static int GetHashCode(ReadOnlySpan<byte> value, StringComparison comparisonType) => throw null;
        public static int GetHashCode(ReadOnlySpan<Utf8Char> value) => throw null;
        public static int GetHashCode(ReadOnlySpan<Utf8Char> value, StringComparison comparisonType) => throw null;

        // Used for pinning. Typed as 'byte' instead of 'Utf8Char' because the scenario for calling this
        // is p/invoke, and we don't want to require a reinterpret_cast.

        [EditorBrowsable(EditorBrowsableState.Never)]
        public ref readonly byte GetPinnableReference() => throw null;

        public UnicodeScalar GetScalarAt(int index) => throw null;
        public bool TryGetScalarAt(int index, out UnicodeScalar scalar) => throw null;

        // GetStream: Returns a read-only Stream which wraps this instance. Useful for networking and other i/o scenarios.
        // ** OPEN QUESTION ** Should we ditch this and simply have a ReadOnlyMemory<byte>.GetStream() extension method?

        public System.IO.Stream GetStream() => throw null;

        // Literal: A stopgap measure to support literal UTF-8 values until we get first-class compiler support.
        // Syntax is Utf8String theString = Utf8String.Literal("I am a literal string.");
        // JIT will special-case this call and optimize it just as it would've done with a String literal.

        public static Utf8String Literal(string value) => throw null;

        // IndexOf / LastIndexOf: Ordinal for simplicity for now.

        public int IndexOf(char value) => throw null;
        public int IndexOf(char value, int startIndex) => throw null;
        public int IndexOf(char value, int startIndex, int count) => throw null;
        public int IndexOf(UnicodeScalar value) => throw null;
        public int IndexOf(UnicodeScalar value, int startIndex) => throw null;
        public int IndexOf(UnicodeScalar value, int startIndex, int count) => throw null;
        public int IndexOf(Utf8String value) => throw null;
        public int IndexOf(Utf8String value, int startIndex) => throw null;
        public int IndexOf(Utf8String value, int startIndex, int count) => throw null;
        public int LastIndexOf(char value) => throw null;
        public int LastIndexOf(char value, int startIndex) => throw null;
        public int LastIndexOf(char value, int startIndex, int count) => throw null;
        public int LastIndexOf(UnicodeScalar value) => throw null;
        public int LastIndexOf(UnicodeScalar value, int startIndex) => throw null;
        public int LastIndexOf(UnicodeScalar value, int startIndex, int count) => throw null;
        public int LastIndexOf(Utf8String value) => throw null;
        public int LastIndexOf(Utf8String value, int startIndex) => throw null;
        public int LastIndexOf(Utf8String value, int startIndex, int count) => throw null;

        public static bool IsNullOrEmpty(Utf8String value) => throw null;
        public static bool IsNullOrWhiteSpace(Utf8String value) => throw null;
        public static bool IsEmptyOrWhiteSpace(ReadOnlySpan<byte> value) => throw null;
        public static bool IsEmptyOrWhiteSpace(ReadOnlySpan<Utf8Char> value) => throw null;

        // Replace: Ordinal only for now for simplicity.

        public Utf8String Replace(Utf8String oldValue, Utf8String newValue) => throw null;

        // n.b. Utf8String.Split returns its results in an array, just like String.Split. There will be non-allocating
        // Split APIs hanging off of ROM<Char8> / ROS<Char8> and other types for more advanced use cases.

        public Utf8String[] Split(UnicodeScalar separator) => throw null;
        public Utf8String[] Split(UnicodeScalar separator, int count) => throw null;
        public Utf8String[] Split(UnicodeScalar separator, int count, StringSplitOptions options) => throw null;
        public Utf8String[] Split(ReadOnlySpan<UnicodeScalar> separator) => throw null;
        public Utf8String[] Split(ReadOnlySpan<UnicodeScalar> separator, int count) => throw null;
        public Utf8String[] Split(ReadOnlySpan<UnicodeScalar> separator, int count, StringSplitOptions options) => throw null;
        public Utf8String[] Split(Utf8String separator) => throw null;
        public Utf8String[] Split(Utf8String separator, int count) => throw null;
        public Utf8String[] Split(Utf8String separator, int count, StringSplitOptions options) => throw null;

        public bool StartsWith(UnicodeScalar value) => throw null;
        public bool StartsWith(Utf8String value, StringComparison comparisonType) => throw null;

        // The natural way to use Substring is first to call IndexOf(...), then to substring on the index
        // that is returned. Since the parameter passed to IndexOf is generally a literal or some other value
        // under the developer's control, this means that the natural way of calling Substring shouldn't
        // inadvertently lead to splitting the string in the middle of a UTF-8 sequence. (This same argument
        // holds for the String class.)
        //
        // If the developer wants to go out of their way to substring a valid string in such a way that the
        // result is invalid UTF-8, we won't stop them.

        public Utf8String Substring(int startIndex) => throw null;
        public Utf8String Substring(int startIndex, int length) => throw null;

        // No ToLower() method - method name contains 'invariant' or culture must be specified
        public Utf8String ToLowerInvariant() => throw null;
        public Utf8String ToLower(CultureInfo culture) => throw null;

        public override string ToString() => throw null;

        public Utf8String ToUpperInvariant() => throw null;
        public Utf8String ToUpper(CultureInfo culture) => throw null;

        // Trim: only trims whitespace (not arbitrary charaters) for now for simplicity.

        public Utf8String Trim() => throw null;
        public Utf8String TrimEnd() => throw null;
        public Utf8String TrimStart() => throw null;

        // IsWellFormed: Determines whether a given input is well-formed UTF-8

        public static bool IsWellFormed(Utf8String value) => throw null;
        public static bool IsWellFormed(ReadOnlySpan<byte> span) => throw null;
        public static bool IsWellFormed(ReadOnlySpan<Utf8Char> span) => throw null;
    }

    // New APIs added to System.MemoryExtensions
    public static partial class MemoryExtensions
    {
        // Convert a Utf8String to a ROS<Utf8Char> or a ROS<byte>

        public static ReadOnlySpan<Utf8Char> AsSpan(this Utf8String text) { throw null; }
        public static ReadOnlySpan<Utf8Char> AsSpan(this Utf8String text, int start) { throw null; }
        public static ReadOnlySpan<Utf8Char> AsSpan(this Utf8String text, int start, int length) { throw null; }
        public static ReadOnlySpan<byte> AsBytes(this Utf8String value) { throw null; }
        public static ReadOnlySpan<byte> AsBytes(this Utf8String value, int start) { throw null; }
        public static ReadOnlySpan<byte> AsBytes(this Utf8String value, int start, int length) { throw null; }

        // Convert a {ReadOnly}Span<Utf8Char> to a ReadOnlySpan<byte>
        //
        // Conversion is one-way - it's ok to take UTF-8 data and treat it as binary data for the purpose of
        // bit-blasting it across i/o, but we don't want to encourage developers to take incoming arbitrary
        // binary data and treat it as structured UTF-8 text. Use an API like Utf8Parser which is intended
        // for accepting ROS<byte> input - which allows you to avoid the T/U conversion altogether, or use
        // an unsafe API like MemoryMarshal.Cast to convert back and forth between the two representations.

        public static ReadOnlySpan<byte> AsBytes(this Span<Utf8Char> value) { throw null; }
        public static ReadOnlySpan<byte> AsBytes(this ReadOnlySpan<Utf8Char> value) { throw null; }

        // Convert a Utf8String to a ROM<Utf8Char> or a ROM<byte>.
        // This means ROM<T> can now be backed by one of four things:
        //   1. T[],
        //   2. MemoryManager<T>,
        //   3. String (only if T = char), or
        //   4. Utf8String (only if T = byte or T = Utf8Char) ** NEW **
        //
        // Consequently, the ReadOnlyMemory<byte>.Span property getter will now have to check for Utf8String
        // in addition to checking for byte[]. From the results in https://github.com/dotnet/coreclr/pull/20386
        // I expect a 5%-ish performance regression in the ROM<byte>.Span property getter, but since we've
        // optimized that code path so aggressively in recent weeks this extra check shouldn't be very impactful.
        //
        // Finally, unlike ROS<Utf8Char> to ROS<byte>, ROM<Utf8Char> and ROM<byte> *cannot* be converted back and
        // forth between each other. The caller is expected to know which T representation (text or binary?) is
        // applicable for their scenario and to call the appropriate AsMemory API. This matches existing ROM<T>
        // behavior; where e.g., it's valid to reinterpret cast between Span<ushort> and Span<char> (and such APIs
        // are provided), but reinterpret casting between Memory<ushort> and Memory<char> is forbidden.

        public static ReadOnlyMemory<Utf8Char> AsMemory(this Utf8String text) { throw null; }
        public static ReadOnlyMemory<Utf8Char> AsMemory(this Utf8String text, int start) { throw null; }
        public static ReadOnlyMemory<Utf8Char> AsMemory(this Utf8String text, int start, int length) { throw null; }
        public static ReadOnlyMemory<byte> AsMemoryBytes(this Utf8String text) { throw null; }
        public static ReadOnlyMemory<byte> AsMemoryBytes(this Utf8String text, int start) { throw null; }
        public static ReadOnlyMemory<byte> AsMemoryBytes(this Utf8String text, int start, int length) { throw null; }
    }
}

namespace System.Runtime.InteropServices
{
    // New APIs added to System.Runtime.InteropServices.MemoryMarshal
    public static partial class MemoryMarshal
    {
        // These methods have the same behavior as the existing TryGetString method.

        public static bool TryGetUtf8String(ReadOnlyMemory<byte> memory, out Utf8String text, out int start, out int length) { throw null; }
        public static bool TryGetUtf8String(ReadOnlyMemory<Utf8Char> memory, out Utf8String text, out int start, out int length) { throw null; }
    }
}

namespace System.Runtime.CompilerServices {
    // New APIs added to System.Runtime.CompilerServices.RuntimeHelpers
    public static class RuntimeHelpers
    {
        // For compiler use, provides support for constant (literal) Utf8String values
        public static Utf8String GetUtf8StringLiteral(string s) => throw null;
    }
}

namespace System.Text
{
    // Represents the fundamental elemental type of UTF-8 textual data and is distinct
    // from System.Byte, similar to how System.Char is the fundamental elemental type
    // of UTF-16 textual data and is distinct from System.UInt16.
    //
    // Ideally the compiler would support various syntaxes for this, like:
    // Utf8Char theChar = 63; // Implicit assignment of const to local of type Utf8Char
    public readonly struct Utf8Char : IComparable<Utf8Char>, IEquatable<Utf8Char>
    {
        private readonly int _dummy;

        // Construction is performed via a cast. All casts are checked for overflow
        // but not for correctness. For example, casting -1 to Utf8Char will fail
        // with an OverflowException, but casting 0xFF to Utf8Char will succeed even
        // though 0xFF is never a valid UTF-8 code unit. Additionally, even though
        // the cast from Byte to Utf8Char can never overflow, it's still an explicit
        // cast because we don't want devs to fall into the habit of treating arbitrary
        // integral types as equivalent to textual data types. As an existing example of
        // this in the current compiler, there's no implicit cast from Byte to Char even
        // though it's a widening operation, but there is an explicit cast.

        public static explicit operator Utf8Char(byte value) => throw null;
        public static explicit operator Utf8Char(sbyte value) => throw null;
        public static explicit operator Utf8Char(char value) => throw null;
        public static explicit operator Utf8Char(short value) => throw null;
        public static explicit operator Utf8Char(ushort value) => throw null;
        public static explicit operator Utf8Char(int value) => throw null;
        public static explicit operator Utf8Char(uint value) => throw null;
        public static explicit operator Utf8Char(long value) => throw null;
        public static explicit operator Utf8Char(ulong value) => throw null;

        // Casts to the various primitive integral types. All casts are implicit
        // with two exceptions, which are explicit:
        // - Cast to SByte, because it could result in an OverflowException.
        // - Cast to Char, for the same reason as the Byte-to-Utf8Char cast.

        public static implicit operator byte(Utf8Char value) => throw null;
        public static explicit operator sbyte(Utf8Char value) => throw null;
        public static explicit operator char(Utf8Char value) => throw null;
        public static implicit operator short(Utf8Char value) => throw null;
        public static implicit operator ushort(Utf8Char value) => throw null;
        public static implicit operator int(Utf8Char value) => throw null;
        public static implicit operator uint(Utf8Char value) => throw null;
        public static implicit operator long(Utf8Char value) => throw null;
        public static implicit operator ulong(Utf8Char value) => throw null;

        public static bool operator ==(Utf8Char a, Utf8Char b) => throw null;
        public static bool operator !=(Utf8Char a, Utf8Char b) => throw null;
        public static bool operator <(Utf8Char a, Utf8Char b) => throw null;
        public static bool operator <=(Utf8Char a, Utf8Char b) => throw null;
        public static bool operator >(Utf8Char a, Utf8Char b) => throw null;
        public static bool operator >=(Utf8Char a, Utf8Char b) => throw null;
        public int CompareTo(Utf8Char other) => throw null;
        public override bool Equals(object obj) => throw null;
        public bool Equals(Utf8Char other) => throw null;
        public override int GetHashCode() => throw null;
        public override string ToString() => throw null;
    }

    // Represents a Unicode scalar value ([ U+0000..U+D7FF ], inclusive; or [ U+E000..U+10FFFF ], inclusive).
    // This type's ctors are guaranteed to validate the input, and consumers can call the APIs assuming
    // that the input is well-formed.
    //
    // This type's ctors validate, but that shouldn't be a terrible imposition because very few components
    // are going to need to create instances of this type. UnicodeScalar instances will almost always be
    // created as a result of enumeration over a UTF-8 or UTF-16 sequence, or instances will be created
    // by the compiler from known good constants in source. In both cases validation can be elided, which
    // means that there's *no runtime check at all* - not in the ctors nor in the instance methods hanging
    // off this type. This gives improved performance over APIs which require the consumer to call an
    // IsValid method before operating on instances of this type, and it means that we can get away without
    // potentially expensive branching logic in many of our property getters.
    public readonly partial struct UnicodeScalar : IComparable<UnicodeScalar>, IEquatable<UnicodeScalar>
    {
        private readonly int _dummyPrimitive;

        public static UnicodeScalar ReplacementChar { get => throw null; } // = U+FFFD

        // ctors - throw if out of range

        public UnicodeScalar(char ch) => throw null; // from UTF-16 code unit (must not be surrogate)
        public UnicodeScalar(int scalarValue) => throw null;
        public UnicodeScalar(uint scalarValue) => throw null;

        // try pattern - returns (false, default(UnicodeScalar)) on failure

        public static bool TryCreate(char value, out UnicodeScalar result) => throw null;
        public static bool TryCreate(int value, out UnicodeScalar result) => throw null;
        public static bool TryCreate(uint value, out UnicodeScalar result) => throw null;

        // cast operators are explicit because value is checked

        public static explicit operator UnicodeScalar(char value) => throw null;
        public static explicit operator UnicodeScalar(uint value) =>throw null;
        public static explicit operator UnicodeScalar(int value) => throw null;

        public static bool operator ==(UnicodeScalar a, UnicodeScalar b) => throw null;
        public static bool operator !=(UnicodeScalar a, UnicodeScalar b) => throw null;
        public static bool operator <(UnicodeScalar a, UnicodeScalar b) => throw null;
        public static bool operator <=(UnicodeScalar a, UnicodeScalar b) => throw null;
        public static bool operator >(UnicodeScalar a, UnicodeScalar b) => throw null;
        public static bool operator >=(UnicodeScalar a, UnicodeScalar b) => throw null;

        public bool IsAscii { get => throw null; } // returns true iff Value <= 0x7F
        public bool IsBmp { get => throw null; } // returns true iff Value <= 0xFFFF
        public int Plane { get => throw null; } // returns 0 .. 16
        public int Utf16SequenceLength { get => throw null; } // returns 1 .. 2
        public int Utf8SequenceLength { get => throw null; } // return 1 .. 4
        public uint Value { get => throw null; }

        // Determines whether an arbitrary integer is a valid Unicode scalar value.
        // Not an instance method because we always assume 'this' is valid.

        public static bool IsValid(int value) => throw null;
        public static bool IsValid(uint value) => throw null;

        public int CompareTo(UnicodeScalar other) => throw null;
        public override bool Equals(object obj) => throw null;
        public bool Equals(UnicodeScalar other) => throw null;
        public override int GetHashCode() => throw null;

        // returns the scalar as a standalone UTF-16 string or a standalone UTF-8 string;
        // or writes the scalar to a UTF-16 span or a UTF-8 span

        public override string ToString() => throw null;
        public int ToUtf16(Span<char> output) => throw null;
        public int ToUtf8(Span<Utf8Char> output) => throw null;
        public Utf8String ToUtf8String() => throw null;

        // These are analogs of APIs on System.Char

        public static double GetNumericValue(UnicodeScalar s) => throw null;
        public static System.Globalization.UnicodeCategory GetUnicodeCategory(UnicodeScalar s) => throw null;
        public static bool IsControl(UnicodeScalar s) => throw null;
        public static bool IsDigit(UnicodeScalar s) => throw null;
        public static bool IsLetter(UnicodeScalar s) => throw null;
        public static bool IsLetterOrDigit(UnicodeScalar s) => throw null;
        public static bool IsLower(UnicodeScalar s) => throw null;
        public static bool IsNumber(UnicodeScalar s) => throw null;
        public static bool IsPunctuation(UnicodeScalar s) => throw null;
        public static bool IsSeparator(UnicodeScalar s) => throw null;
        public static bool IsSymbol(UnicodeScalar s) => throw null;
        public static bool IsUpper(UnicodeScalar s) => throw null;
        public static bool IsWhiteSpace(UnicodeScalar s) => throw null;
        public static UnicodeScalar ToLower(UnicodeScalar s, System.Globalization.CultureInfo culture) => throw null;
        public static UnicodeScalar ToLowerInvariant(UnicodeScalar s) => throw null;
        public static UnicodeScalar ToUpper(UnicodeScalar s, System.Globalization.CultureInfo culture) => throw null;
        public static UnicodeScalar ToUpperInvariant(UnicodeScalar s) => throw null;
    }

    // Allows enumerating UnicodeScalar instances from an underlying UTF-16 or UTF-8 string or span.
    //
    // **OPEN QUESTION**
    // It would be a lot easier if we just used UnicodeScalar directly as our elemental type of enumeration,
    // but this has two important consequences. First, it's possible that developers may care about the StartIndex
    // of the element (though 'foreach' really doesn't lend itself well to this). Second, if we return U+FFFD when
    // we see an invalid sequence, callers won't be able to tell the difference between an invalid sequence and a
    // valid sequence that really did read U+FFFD.
    public static partial class UnicodeExtensions
    {
        public static (SequenceValidity Validity, UnicodeScalar Scalar, int SequenceLength) GetFirstScalar(ReadOnlySpan<char> span) => throw null;
        public static (SequenceValidity Validity, UnicodeScalar Scalar, int SequenceLength) GetFirstScalar(ReadOnlySpan<Utf8Char> span) => throw null;
        public static (SequenceValidity Validity, UnicodeScalar Scalar, int SequenceLength) GetLastScalar(ReadOnlySpan<char> span) => throw null;
        public static (SequenceValidity Validity, UnicodeScalar Scalar, int SequenceLength) GetLastScalar(ReadOnlySpan<Utf8Char> span) => throw null;

        public static CharSpanScalarEnumerator GetScalars(ReadOnlySpan<char> value) => throw null;
        public static Utf8CharSpanScalarEnumerator GetScalars(ReadOnlySpan<Utf8Char> value) => throw null;
        public static StringScalarEnumerator GetScalars(string value) => throw null;
        public static Utf8StringScalarEnumerator GetScalars(Utf8String value) => throw null;
        public struct StringScalarEnumerator
            : System.Collections.Generic.IEnumerable<(UnicodeScalar? ScalarValue, int StartIndex, int Length)>
            , System.Collections.Generic.IEnumerator<(UnicodeScalar? ScalarValue, int StartIndex, int Length)>
        {
            private int _dummy;
            public (UnicodeScalar? ScalarValue, int StartIndex, int Length) Current => throw null;
            [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)]
            public StringScalarEnumerator GetEnumerator() => throw null;
            public bool MoveNext() => throw null;
            void IDisposable.Dispose() { }
            System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() => throw null;
            System.Collections.Generic.IEnumerator<(UnicodeScalar? ScalarValue, int StartIndex, int Length)> System.Collections.Generic.IEnumerable<(UnicodeScalar? ScalarValue, int StartIndex, int Length)>.GetEnumerator() => throw null;
            object System.Collections.IEnumerator.Current => Current;
            void System.Collections.IEnumerator.Reset() { }
        }
        public ref struct CharSpanScalarEnumerator
        {
            private int _dummy;
            public (UnicodeScalar? ScalarValue, int StartIndex, int Length) Current => throw null;
            [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)]
            public CharSpanScalarEnumerator GetEnumerator() => throw null;
            public bool MoveNext() => throw null;
        }
        public struct Utf8StringScalarEnumerator
            : System.Collections.Generic.IEnumerable<(UnicodeScalar? ScalarValue, int StartIndex, int Length)>
            , System.Collections.Generic.IEnumerator<(UnicodeScalar? ScalarValue, int StartIndex, int Length)>
        {
            private int _dummy;
            public (UnicodeScalar? ScalarValue, int StartIndex, int Length) Current => throw null;
            [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)]
            public Utf8StringScalarEnumerator GetEnumerator() => throw null;
            public bool MoveNext() => throw null;
            void IDisposable.Dispose() { }
            System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() => throw null;
            System.Collections.Generic.IEnumerator<(UnicodeScalar? ScalarValue, int StartIndex, int Length)> System.Collections.Generic.IEnumerable<(UnicodeScalar? ScalarValue, int StartIndex, int Length)>.GetEnumerator() => throw null;
            object System.Collections.IEnumerator.Current => Current;
            void System.Collections.IEnumerator.Reset() { }
        }
        public ref struct Utf8CharSpanScalarEnumerator
        {
            private int _dummy;
            public (UnicodeScalar? ScalarValue, int StartIndex, int Length) Current => throw null;
            [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)]
            public Utf8CharSpanScalarEnumerator GetEnumerator() => throw null;
            public bool MoveNext() => throw null;
        }
    }

    // When creating / transcoding UTF-8 data, controls what happens when an invalid sequence is detected
    // in the input stream.
    public enum InvalidSequenceBehavior
    {
        // Fails the operation immediately. The failure mode is caller-implemented. For example, a constructor
        // could throw an exception, while an OperationStatus-returning method could return InvalidData.
        Fail = 0,

        // Replaces invalid sequences with U+FFFD. This matches the current behavior of Encoding.UTF8
        // and is the recommendation from the Unicode Consortium for scenarios where the application should attempt
        // to continue.
        ReplaceInvalidSequence = 1,

        // "Garbage in - garbage out." Propagates invalid sequences as-is without validation. This enum option is
        // not valid for all scenarios. For example, APIs which transcode between UTF-8 and UTF-16 cannot use
        // this value.
        LeaveUnchanged = 2,
    }

    // Represents the validity of a UTF-8 or UTF-16 code unit sequence.
    public enum SequenceValidity
    {
        // The input sequence is well-formed, i.e., it is an unambiguous representation of a Unicode scalar value.
        // Examples:
        // The UTF-8 sequence [ CE A9 ] is well-formed because it unambiguously represents the Unicode scalar value U+03A9.
        // The UTF-8 sequence [ F2 AB B3 9E ] is well-formed because it unambiguously represents the Unicode scalar value U+ABCDE.
        Valid,

        // The input sequence is not well-formed, i.e., it does not correspond to a valid Unicode scalar value.
        // Examples:
        // The UTF-8 sequence [ C0 ] is not well-formed.
        // The UTF-8 sequence [ C2 20 ] is not well-formed.
        // The UTF-8 sequence [ ED A0 80 ] is not well-formed.
        Invalid,

        // The input sequence is incomplete (or empty). It is not valid on its own, but it could be the start of a longer valid
        // sequence. The caller should more input data if available. If no further input data is available, the sequence should
        // be treated as not well-formed.
        // Examples:
        // The UTF-8 sequence [ C2 ] is incomplete.
        // The UTF-8 sequence [ F2 AB B3 ] is incomplete.
        Incomplete
    }

    // APIs for fast transcoding of data between different UTF-* representations.
    // These make up for some shortcomings in the Encoding class when dealing with streaming data, such as needing to know
    // ahead of time how large the output buffer should be (which potentially requires two passes over the input).
    public static partial class Transcode
    {
        // There are byte and Utf8Char versions of the below APIs because we want to support the scenario where the
        // caller is writing directly to or reading directly from a network stream. APIs which are designed to be used
        // against any possible input (including potentially malicious or malformed input) and which properly perform
        // validation can be written in terms of byte for convenience.

        public static OperationStatus ToUtf8(ReadOnlySpan<char> source, Span<byte> destination, bool isFinalBlock, out int charsRead, out int bytesWritten, InvalidSequenceBehavior behavior = InvalidSequenceBehavior.Fail);
        public static OperationStatus ToUtf8(ReadOnlySpan<char> source, Span<Utf8Char> destination, bool isFinalBlock, out int charsRead, out int utf8CharsWritten, InvalidSequenceBehavior behavior = InvalidSequenceBehavior.Fail);
        public static OperationStatus FromUtf8(ReadOnlySpan<byte> source, Span<char> destination, bool isFinalBlock, out int bytesRead, out int charsWritten, InvalidSequenceBehavior behavior = InvalidSequenceBehavior.Fail);
        public static OperationStatus FromUtf8(ReadOnlySpan<Utf8Char> source, Span<char> destination, bool isFinalBlock, out int utf8CharsRead, out int charsWritten, InvalidSequenceBehavior behavior = InvalidSequenceBehavior.Fail);
    }
}

Edits:

Nov. 8 - 9, 2018 - Updated API proposals in preparation for upcoming review.

The text was updated successfully, but these errors were encountered:

terrajobst · 2018-06-20T01:52:30Z

Notes for our initial review are here.

Nukepayload2 · 2019-08-22T03:14:39Z

The indexer of Utf8String is not consumable in VB.

public ref readonly Utf8Char this[int index] => throw null;

Please add the following member to solve this problem:

[System.Runtime.CompilerServices.SpecialName]
public Utf8Char get_Chars(int index) => throw null;

This is what it looks like in VB:

Public ReadOnly Property Chars(index As Integer) As Utf8Char

tannergooding · 2020-03-30T18:58:17Z

Has there been any update on this in general and what other considerations/design changes have happened since the initial review?

The current implementation in the NuGet package is vastly different from the proposed API.

GrabYourPitchforks · 2020-03-30T19:16:43Z

The NuGet package generally follows the proposal in dotnet/corefxlab#2350, which is where most of the discussion has taken place. It's a bit aggravating that the discussion is split across so many different forums, I know. :(

jeffhandley · 2021-01-14T04:51:27Z

The next steps on this are to:

Collect the list of scenarios where the Utf8String type is desired from our partner teams
For each of those scenarios:
1. Identify the acceptance criteria for the scenario
2. Collect the desired code for all code paths involved in the scenario
Create an end-to-end prototype that would make that desired code compile and run for each scenario
Determine the feasibility of meeting the other acceptance criteria with the technical approach
Review the prototype with all partner teams that would be involved if the prototype moved toward full implementation
Compile and document the findings to present a recommended path forward

jkotas · 2021-01-14T05:16:44Z

We may also want to evaluate alternative that does not introduce the Utf8String type at all. We had a good discussion about it in dotnet/corefxlab#2350 recently.

ceztko · 2021-03-11T22:06:12Z

I noticed dotnet/corefxlab#2350 just got closed. Did the discussion moved somewhere else about more UTF8 first citizen support efforts?

GrabYourPitchforks · 2021-03-12T02:19:52Z

@ceztko The corefxlab repo was archived, so open issues were closed to support that effort. That thread also got so large that it was difficult to follow. @krwq is working on restructuring the conversation so that we can continue the discussion in a better forum.

sfiruch · 2021-05-25T17:42:02Z

Collect the list of scenarios where the Utf8String type is desired from our partner teams

I'm currently (pre-)processing multi-TB data sets in C#. I have to match and join millions of strings, which are taking up A LOT of memory (100+ GB). Because my machine only has 64GB of memory, I had to switch to a more efficient string representation:

Implement my own byte[] backed string-type, with hash code, comparisons, etc.
Read, parse and write of my own strings from/to binary streams and from/to regular strings.
Switch the whole codebase to this new type

It would've saved me days of work if UTF8 string representation was a runtime configuration switch.

Identify the acceptance criteria for the scenario

Main goal: Only use 50% of the memory with equal performance in managed code. I don't care about marshalling performance. Having to change the type in all existing code would be doable, but not ideal.

We may also want to evaluate alternative that does not introduce the Utf8String type at all. We had a good discussion about it in dotnet/corefxlab#2350 recently.

Personally, I'd strongly prefer this approach.

sgf · 2021-05-28T18:29:45Z

Maybe we need to step up, if we've been talking about an api for a couple of years, it's going to be as slow and inefficient as the C-plusplus standard.
Obviously, UTF8String saves memory to some extent and speeds up processing of specific scenarios without consuming cpu and additional memory for conversion.

Make UTF8String as an option for developers.

krwq · 2021-06-07T13:54:28Z

@sgf we currently are discussing options here. We need to be really careful what we do with UTF-8 String because we do not want to duplicate all String APIs with Utf8String overloads but at the same time we do want UTF-8 Strings. Just swapping internals of string will break lots of apps at the moment as there is plenty of them relying on things like: fixed (char* foo = someString) which might cause really bad bugs. Once we analyze all options we will figure out what the next should be and based on that we will make a call if we will do this work soon, gradually or not do it at all. With the experimental features available now there is a chance something will show up next release but we do not have any firm decision either way.

sfiruch · 2021-10-15T22:25:51Z

What are the chances of getting UTF8 strings into the 7.0.0 milestone?

LifeIsStrange · 2022-04-14T17:40:17Z

sorry for the low effort comment, I have not read the whole thread but my opinion is that an external Utf8string is not convenient to use regarding discoverability and readability (explicitly expose an implementation detail while the developper intent is just to mean a String, which increase cognitive overhead) and for those reasons, usage will be niche/seldom used.
A better solution would be to make Strings UTF 8 by default like Java 18 https://news.ycombinator.com/item?id=31024255 OR at the very least, provide a global setter (can be called at program initialization, or a compiler flag) that set the String representation mode for any future String that will be constructed. As such, people would opt-in to UTF-8 by default in their projects and experience better performance/lower memory for most usages.
You can still introduce an explicit Utf8 type (useful when a program has uses for mixing multiple representations) but the proposed global optin config flag would fit the mainstream use case that most people desire since the dawn of time.

ufcpp · 2022-04-15T02:42:15Z

Does the internal representation of String remain UTF-16 or Latin-1 (Compact String) in Java 18? Only default encoding might be changed to UTF-8.

LifeIsStrange · 2022-04-15T11:42:01Z

Yes UTF-8 is not the default internally as of now although they use latin-1 for characters that fits in the ASCII table

timcassell · 2022-06-09T20:04:56Z

If roles becomes a thing, would it make sense to have Utf8String be a role for ReadOnlySpan<byte>? It wouldn't need to be a full type and would work with existing utf8 string usage.

sfiruch · 2022-06-09T20:25:40Z

The primary problem is that all of today's useful API is defined on string, not on Utf8String. Requiring conversions for all existing functions defined on string would cost too much performance to be useful, in my use cases.

However, one could augment all the existing types and methods with Utf8String overloads. Because that seems like a lot of work which will never be complete, I favor a runtime switch instead, where strings internal backing memory can be switched to UTF-8. This way few things in managed code would have to be implemented twice. Vectorized string code and runtime functions would have to be adapted, and conversions become necessary cost when using Win32 UTF-16 APIs.

ceztko · 2022-06-09T20:51:22Z

I understand from dotnet/corefxlab#2350 that the Utf8String proposal has been either dismissed or put on hold at best. If the former, I recommend to close this issue as well. I ask again if further public discussions about UTF8 first citizen support (in whatever form this will be actually implemented) can be found elsewhere.

hez2010 · 2022-07-10T06:05:39Z

@timcassell Exactly.

Instead of introducing a Utf8String class, I think we should utilize the roles feature which may be shipped in future release of C# to introduce a Utf8String role of ROS<byte>/Span<char>/ROM<byte>/Memory<byte>/byte[].
No breaking changes required, same functionality provided, more light-weighted than classes.

dmitry-azaraev · 2022-07-10T06:59:15Z

Utf8String are required as identifiable on-heap type, and it can't substituted by byte[] by normal means. Byte[] is what already can be used and is brutally not compatible with diagnostics/profiling. This why VST needed.

hez2010 · 2022-07-10T07:01:11Z

If we introduce a role type from runtime, then the debugger/profiler can recognize the role type to provide diagnostics functionality.

dmitry-azaraev · 2022-07-10T07:09:12Z

@hez2010 i'm not against roles. I'm just point what named heap store (e.g. Utf8String) is useful for memory diagnostics. Is not semi/exclusive things.

inforithmics · 2022-11-10T06:12:40Z

With static interfaces wouldn't it be possible to Introduce an IString<T> Interface like INumber<T> and Implement this on String and UTF8String so that String Apis could be written that use an IString<T> Type. Maybe an IChar<T> Interfaces needs to be introduced to handle Char Operations (UTF8Char and Char implements IChar<T> ).

Neme12 · 2023-08-25T16:56:19Z

Another option instead of a runtime switch is to add new Utf8String and Utf8Char types and add a compiler switch for C# to use these types instead of the old ones when using the keywords string and char.

Neme12 · 2023-08-25T17:00:20Z

I find it hard to imagine a runtime switch to change the representation of System.String to UTF-8, because that would mean that indexing the string is now O(n) instead of O(1). Unless char is also changed to be 1 byte, but that would be a huge breaking change and basically every existing app would now be broken.

AshleighAdams · 2023-08-26T20:25:09Z

@Neme12 No it isn't. A UTF-8 string would not index into the code points, but into the raw bytes. It also would not change System.String, but rather the realias string to a brand new type. Indexing code points should not be in the BCL, but be done thru an an extension method from an additional nuget package, as 99% of people shouldn't be worrying about the code points unless they're rendering the string, validating, or normalizing, none of which the BCL should be doing. Parsing things like CSVs, HTTP, source code is also fine, it's safe to assume the string contains only 7-bit ASCII.

For cases of normalization, parsing, runes and such, it's always a cat and mouse game, with new "runes" added all the time, which is why I say it's not suited for the BCL itself.

vrubleg · 2023-08-26T20:49:18Z

@AshleighAdams

Indexing a standard UTF-16 string doesn't give you code points, it gives you just 16-bit code units. A lot of Unicode characters (code points) take two 16-bit code units. UTF-16 is not different from UTF-8 on this matter. Only UTF-32 allows to index code points directly.

Neme12 · 2023-08-26T21:40:15Z

No it isn't.

Which part were you responding to?

AshleighAdams · 2023-08-28T13:07:03Z

@vrubleg I never said the built in string gives you code points? It just adds to my reasoning that a BCL UTF-8 string should not try to enumerate code points, giving only O(1) access to the raw bytes

vrubleg · 2023-08-28T13:18:54Z

It gives O(1) access to code units that is consistent with standard UTF-16 strings. Yes, in case of UTF-8 code units are equal to raw bytes, and in case of UTF-16 code units are equal to raw words. So what?

AshleighAdams · 2023-08-28T13:27:32Z

@vrubleg I'm refuting @Neme12's assertion that indexing the string would be O(n)

vrubleg · 2023-08-28T13:32:10Z

Probably, @Neme12 meant that it would be O(n) if just internal representation of System.String was changed to UTF-8 while maintaining external visibility as a normal UTF-16 string. Hence the proposal to introduce a separate type System.Utf8String and a compile time switch for string to represent either System.String or System.Utf8String (that could be even per file, like nullable).

AshleighAdams · 2023-08-28T13:52:02Z

@vrubleg Oooh my bad, I misinterpreted what they were saying. I apologize, and am in agreement then 🙈

ceztko · 2024-01-19T11:39:56Z

Hence the proposal to introduce a separate type System.Utf8String and a compile time switch for string to represent either System.String or System.Utf8String (that could be even per file, like nullable).

@vrubleg If viable this seems a great solution. It's not clear if this is currently the official Microsoft stance, though: are there more references on this?

ramonsmits · 2024-06-15T14:46:21Z

A Utf8String type would be a good additional. ""u8 to a ROS isn't great an having a great option to basically pass the ROS of that type to be written to a stream where very often the encoding is utf8 this would be a huge gain.

ramonsmits · 2024-06-15T14:50:16Z

There exists https://github.com/U8String/U8String but likely .net internals could also benefit from a native utf8 string type.

AshleighAdams · 2024-07-28T03:16:19Z

@ramonsmits Interesting, though if a standard Utf8String type is added, I don't think any type of parsing should be added like that library does

People using UTF8 strings are in 99% of cases just moving the string around, or doing simple ASCII parsing. Very rarely are people actually rendering it themselves.

I think the type .NET gets (if it does get one) should have constant time access to each byte/Char8, along with an EnumerateCodepoints() method, as the UTF-8 spec is fixed with how those are parsed, but trying to do runes in the language's runtime/BCL would be a mistake. String normalization, understanding how many code points form a "single visible character" is hard, error prone, and a lot of data to store. Instead I think those can be provided via an extension method in a NuGet package, and that package could then move fast without being tied to a specific .NET version or what not

Edit: Actually no, I misunderstood, a rune is a Unicode codepoint, in which case that library is perfect I think, not over-extending into areas it shouldn't, nice!

maryamariyan transferred this issue from dotnet/corefx Dec 16, 2019

Dotnet-GitSync-Bot added area-System.Runtime untriaged New issue has not been triaged by the area owner labels Dec 16, 2019

maryamariyan added this to the Future milestone Dec 16, 2019

bgrainger mentioned this issue Feb 1, 2020

Add ADO.NET support for Utf8String #28966

Closed

tannergooding mentioned this issue Mar 30, 2020

Utf8String design proposal dotnet/corefxlab#2350

Closed

joperezr added enhancement Product code improvement that does NOT require public API changes/additions api-suggestion Early API idea and discussion, it is NOT ready for implementation and removed untriaged New issue has not been triaged by the area owner labels Jul 2, 2020

jeffhandley self-assigned this Jan 6, 2021

jeffhandley assigned krwq and GrabYourPitchforks Jan 14, 2021

jeffhandley added the Priority:2 Work that is important, but not critical for the release label Jan 14, 2021

krwq added the Cost:XL Work that requires one engineer more than 4 weeks label Jan 15, 2021

alexrp mentioned this issue Aug 22, 2021

Request: Small Roslyn changes to aid runtimes without UTF-16 strings dotnet/roslyn#55798

Closed

This comment was marked as off-topic.

Sign in to view

Introduce a Utf8String type #933

Introduce a Utf8String type #933

Comments

GrabYourPitchforks commented Jun 19, 2018 • edited by StephenBonikowsky Loading

terrajobst commented Jun 20, 2018

Nukepayload2 commented Aug 22, 2019 • edited Loading

tannergooding commented Mar 30, 2020

GrabYourPitchforks commented Mar 30, 2020

jeffhandley commented Jan 14, 2021

jkotas commented Jan 14, 2021

ceztko commented Mar 11, 2021

GrabYourPitchforks commented Mar 12, 2021

sfiruch commented May 25, 2021 • edited Loading

sgf commented May 28, 2021 • edited Loading

krwq commented Jun 7, 2021

sfiruch commented Oct 15, 2021

LifeIsStrange commented Apr 14, 2022 • edited Loading

ufcpp commented Apr 15, 2022

LifeIsStrange commented Apr 15, 2022

timcassell commented Jun 9, 2022

sfiruch commented Jun 9, 2022

ceztko commented Jun 9, 2022 • edited Loading

This comment was marked as off-topic.

hez2010 commented Jul 10, 2022

dmitry-azaraev commented Jul 10, 2022

hez2010 commented Jul 10, 2022 • edited Loading

dmitry-azaraev commented Jul 10, 2022 • edited Loading

inforithmics commented Nov 10, 2022 • edited Loading

Neme12 commented Aug 25, 2023 • edited Loading

Neme12 commented Aug 25, 2023 • edited Loading

AshleighAdams commented Aug 26, 2023

vrubleg commented Aug 26, 2023

Neme12 commented Aug 26, 2023

AshleighAdams commented Aug 28, 2023

vrubleg commented Aug 28, 2023

AshleighAdams commented Aug 28, 2023

vrubleg commented Aug 28, 2023 • edited Loading

AshleighAdams commented Aug 28, 2023

ceztko commented Jan 19, 2024

ramonsmits commented Jun 15, 2024

ramonsmits commented Jun 15, 2024

AshleighAdams commented Jul 28, 2024 • edited Loading

GrabYourPitchforks commented Jun 19, 2018 •

edited by StephenBonikowsky

Loading

Nukepayload2 commented Aug 22, 2019 •

edited

Loading

sfiruch commented May 25, 2021 •

edited

Loading

sgf commented May 28, 2021 •

edited

Loading

LifeIsStrange commented Apr 14, 2022 •

edited

Loading

ceztko commented Jun 9, 2022 •

edited

Loading

hez2010 commented Jul 10, 2022 •

edited

Loading

dmitry-azaraev commented Jul 10, 2022 •

edited

Loading

inforithmics commented Nov 10, 2022 •

edited

Loading

Neme12 commented Aug 25, 2023 •

edited

Loading

Neme12 commented Aug 25, 2023 •

edited

Loading

vrubleg commented Aug 28, 2023 •

edited

Loading

AshleighAdams commented Jul 28, 2024 •

edited

Loading