diff --git a/eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml b/eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml index 9f2b06ec638bfd..2c9d95a807d11b 100644 --- a/eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml +++ b/eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml @@ -153,4 +153,12 @@ extends: onefuzzDropDirectory: $(fuzzerProject)/deployment/UTF8Fuzzer SYSTEM_ACCESSTOKEN: $(System.AccessToken) displayName: Send UTF8Fuzzer to OneFuzz + + - task: onefuzz-task@0 + inputs: + onefuzzOSes: 'Windows' + env: + onefuzzDropDirectory: $(fuzzerProject)/deployment/Utf8JsonWriterFuzzer + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + displayName: Send Utf8JsonWriterFuzzer to OneFuzz # ONEFUZZ_TASK_WORKAROUND_END diff --git a/src/libraries/Fuzzing/DotnetFuzzing/Assert.cs b/src/libraries/Fuzzing/DotnetFuzzing/Assert.cs index a5f2a9dd1d195b..2814de3f08bf49 100644 --- a/src/libraries/Fuzzing/DotnetFuzzing/Assert.cs +++ b/src/libraries/Fuzzing/DotnetFuzzing/Assert.cs @@ -45,4 +45,24 @@ static void Throw(ReadOnlySpan<T> expected, ReadOnlySpan<T> actual) throw new Exception($"Expected={expected[diffIndex]} Actual={actual[diffIndex]} at index {diffIndex}"); } } + + public static TException Throws<TException, TState>(Action<TState> action, TState state) + where TException : Exception + where TState : allows ref struct + { + try + { + action(state); + } + catch (TException ex) + { + return ex; + } + catch (Exception ex) + { + throw new Exception($"Expected exception of type {typeof(TException).Name} but got {ex.GetType().Name}"); + } + + throw new Exception($"Expected exception of type {typeof(TException).Name} but no exception was thrown"); + } } diff --git a/src/libraries/Fuzzing/DotnetFuzzing/DotnetFuzzing.csproj b/src/libraries/Fuzzing/DotnetFuzzing/DotnetFuzzing.csproj index f538468d180f19..a392983c364e9f 100644 --- a/src/libraries/Fuzzing/DotnetFuzzing/DotnetFuzzing.csproj +++ b/src/libraries/Fuzzing/DotnetFuzzing/DotnetFuzzing.csproj @@ -1,4 +1,4 @@ -<Project Sdk="Microsoft.NET.Sdk"> +<Project Sdk="Microsoft.NET.Sdk"> <PropertyGroup> <OutputType>Exe</OutputType> @@ -29,6 +29,7 @@ <Compile Include="Fuzzers\TextEncodingFuzzer.cs" /> <Compile Include="Fuzzers\TypeNameFuzzer.cs" /> <Compile Include="Fuzzers\UTF8Fuzzer.cs" /> + <Compile Include="Fuzzers\Utf8JsonWriterFuzzer.cs" /> <Compile Include="IFuzzer.cs" /> <Compile Include="PooledBoundedMemory.cs" /> <Compile Include="Program.cs" /> diff --git a/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs b/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs new file mode 100644 index 00000000000000..767821bcf875b1 --- /dev/null +++ b/src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs @@ -0,0 +1,214 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Buffers; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; +using System.Text.Encodings.Web; +using System.Text.Json; +using System.Text.Unicode; +using SharpFuzz; + +namespace DotnetFuzzing.Fuzzers; + +internal sealed class Utf8JsonWriterFuzzer : IFuzzer +{ + public string[] TargetAssemblies { get; } = ["System.Text.Json"]; + + public string[] TargetCoreLibPrefixes => []; + + // One of the bytes in the input is used to set various test options. + // Each bit in that byte represents a different option as indicated here. + + // Options for JsonWriterOptions + private const byte IndentFlag = 1; + private const byte EncoderFlag = 1 << 1; + private const byte MaxDepthFlag = 1 << 2; + private const byte NewLineFlag = 1 << 3; + private const byte SkipValidationFlag = 1 << 4; + + // Options for choosing between UTF-8 and UTF-16 encoding + private const byte EncodingFlag = 1 << 5; + + public void FuzzTarget(ReadOnlySpan<byte> bytes) + { + const int minLength = 10; // 2 ints, 1 byte, and 1 padding to align chars + if (bytes.Length < minLength) + { + return; + } + + // First 2 ints are used as indices to slice the input and the following byte is used for options + ReadOnlySpan<int> ints = MemoryMarshal.Cast<byte, int>(bytes); + int slice1 = ints[0]; + int slice2 = ints[1]; + byte optionsByte = bytes[8]; + bytes = bytes.Slice(minLength); + ReadOnlySpan<char> chars = MemoryMarshal.Cast<byte, char>(bytes); + + // Validate that the indices are within bounds of the input + bool utf8 = (optionsByte & EncodingFlag) == 0; + if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (utf8 ? bytes.Length : chars.Length))) + { + return; + } + + // Set up options based on the first byte + bool indented = (optionsByte & IndentFlag) == 0; + JsonWriterOptions options = new() + { + Encoder = (optionsByte & EncodingFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping, + Indented = indented, + MaxDepth = (optionsByte & MaxDepthFlag) == 0 ? 1 : 0, + NewLine = (optionsByte & NewLineFlag) == 0 ? "\n" : "\r\n", + SkipValidation = (optionsByte & SkipValidationFlag) == 0, + }; + + // Compute the expected result by using the encoder directly and the input + int maxExpandedSizeBytes = 6 * bytes.Length + 2; + byte[] expectedBuffer = ArrayPool<byte>.Shared.Rent(maxExpandedSizeBytes); + Span<byte> expected = + expectedBuffer.AsSpan(0, utf8 + ? EncodeToUtf8(bytes, expectedBuffer, options.Encoder) + : EncodeToUtf8(chars, expectedBuffer, options.Encoder)); + + // Compute the actual result by using Utf8JsonWriter. Each iteration is a different slice of the input, but the result should be the same. + byte[] actualBuffer = new byte[expected.Length]; + foreach (ReadOnlySpan<Range> ranges in new[] + { + new[] { 0.. }, + new[] { 0..slice1, slice1.. }, + new[] { 0..slice1, slice1..slice2, slice2.. }, + }) + { + using MemoryStream stream = new(actualBuffer); + using Utf8JsonWriter writer = new(stream, options); + + if (utf8) + { + WriteStringValueSegments(writer, bytes, ranges); + } + else + { + WriteStringValueSegments(writer, chars, ranges); + } + + writer.Flush(); + + // Compare the expected and actual results + Assert.SequenceEqual(expected, actualBuffer); + Assert.Equal(expected.Length, writer.BytesCommitted); + Assert.Equal(0, writer.BytesPending); + + Array.Clear(actualBuffer); + } + + // Additional test for mixing UTF-8 and UTF-16 encoding. The alignment math is easier in UTF-16 mode so just run it for that. + if (!utf8) + { + Array.Clear(expectedBuffer); + + { + ReadOnlySpan<char> firstSegment = chars[slice1..]; + ReadOnlySpan<byte> secondSegment = bytes[0..(2 * slice1)]; + + expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, expectedBuffer, options.Encoder)); + + actualBuffer = new byte[expected.Length]; + using MemoryStream stream = new(actualBuffer); + using Utf8JsonWriter writer = new(stream, options); + + writer.WriteStringValueSegment(firstSegment, false); + + Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteStringValueSegment(state, true), secondSegment); + } + + Array.Clear(expectedBuffer); + + { + ReadOnlySpan<byte> firstSegment = bytes[0..(2 * slice1)]; + ReadOnlySpan<char> secondSegment = chars[slice1..]; + + expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, secondSegment, expectedBuffer, options.Encoder)); + + actualBuffer = new byte[expected.Length]; + using MemoryStream stream = new(actualBuffer); + using Utf8JsonWriter writer = new(stream, options); + + writer.WriteStringValueSegment(firstSegment, false); + Assert.Throws<InvalidOperationException, ReadOnlySpan<char>>(state => writer.WriteStringValueSegment(state, true), secondSegment); + } + } + + ArrayPool<byte>.Shared.Return(expectedBuffer); + } + + private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<byte> bytes, ReadOnlySpan<Range> ranges) + { + for (int i = 0; i < ranges.Length; i++) + { + writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1); + } + } + + private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<char> chars, ReadOnlySpan<Range> ranges) + { + for (int i = 0; i < ranges.Length; i++) + { + writer.WriteStringValueSegment(chars[ranges[i]], i == ranges.Length - 1); + } + } + + private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder) + { + destBuffer[0] = (byte)'"'; + encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int written, isFinalBlock: true); + destBuffer[++written] = (byte)'"'; + return written + 1; + } + + private static int EncodeToUtf8(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder) + { + int written = 1; + destBuffer[0] = (byte)'"'; + destBuffer[written += EncodeTranscode(chars, destBuffer[1..], encoder)] = (byte)'"'; + return written + 1; + } + + private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder) + { + int written = 1; + destBuffer[0] = (byte)'"'; + encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int writtenTemp, isFinalBlock: true); + written += writtenTemp; + destBuffer[written += EncodeTranscode(chars, destBuffer[written..], encoder, isFinalBlock: true)] = (byte)'"'; + return written + 1; + } + + private static int EncodeToUtf8(ReadOnlySpan<char> chars, ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder) + { + int written = 1; + destBuffer[0] = (byte)'"'; + written += EncodeTranscode(chars, destBuffer[1..], encoder, isFinalBlock: true); + encoder.EncodeUtf8(bytes, destBuffer[written..], out _, out int writtenTemp, isFinalBlock: true); + written += writtenTemp; + destBuffer[written] = (byte)'"'; + return written + 1; + } + + private static int EncodeTranscode(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder, bool isFinalBlock = true) + { + var utf16buffer = ArrayPool<char>.Shared.Rent(6 * chars.Length); + encoder.Encode(chars, utf16buffer, out _, out int written, isFinalBlock: true); + + Utf8.FromUtf16(utf16buffer.AsSpan(0, written), destBuffer, out _, out written, isFinalBlock); + ArrayPool<char>.Shared.Return(utf16buffer); + return written; + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index b283674d8ace46..325d64bb278bb3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -8,6 +8,10 @@ using System.Runtime.CompilerServices; using System.Text.Unicode; +#if !SYSTEM_PRIVATE_CORELIB +#pragma warning disable CS3019 // CLS compliance checking will not be performed because it is not visible from outside this assembly +#endif + namespace System.Text { /// <summary> @@ -18,7 +22,12 @@ namespace System.Text /// assuming that the underlying <see cref="Rune"/> instance is well-formed. /// </remarks> [DebuggerDisplay("{DebuggerDisplay,nq}")] - public readonly struct Rune : IComparable, IComparable<Rune>, IEquatable<Rune> +#if SYSTEM_PRIVATE_CORELIB + public +#else + internal +#endif + readonly struct Rune : IComparable, IComparable<Rune>, IEquatable<Rune> #if SYSTEM_PRIVATE_CORELIB #pragma warning disable SA1001 // Commas should be spaced correctly , ISpanFormattable @@ -141,7 +150,14 @@ private Rune(uint scalarValue, bool _) public static explicit operator Rune(int value) => new Rune(value); // Displayed as "'<char>' (U+XXXX)"; e.g., "'e' (U+0065)" - private string DebuggerDisplay => string.Create(CultureInfo.InvariantCulture, $"U+{_value:X4} '{(IsValid(_value) ? ToString() : "\uFFFD")}'"); + private string DebuggerDisplay => +#if SYSTEM_PRIVATE_CORELIB + string.Create( + CultureInfo.InvariantCulture, +#else + FormattableString.Invariant( +#endif + $"U+{_value:X4} '{(IsValid(_value) ? ToString() : "\uFFFD")}'"); /// <summary> /// Returns true if and only if this scalar value is ASCII ([ U+0000..U+007F ]) @@ -242,7 +258,6 @@ private static Rune ChangeCaseCultureAware(Rune rune, TextInfo textInfo, bool to #else private static Rune ChangeCaseCultureAware(Rune rune, CultureInfo culture, bool toUpper) { - Debug.Assert(!GlobalizationMode.Invariant, "This should've been checked by the caller."); Debug.Assert(culture != null, "This should've been checked by the caller."); Span<char> original = stackalloc char[MaxUtf16CharsPerRune]; // worst case scenario = 2 code units (for a surrogate pair) @@ -1375,12 +1390,12 @@ public static Rune ToLower(Rune value, CultureInfo culture) // ASCII characters differently than the invariant culture (e.g., Turkish I). Instead // we'll just jump straight to the globalization tables if they're available. +#if SYSTEM_PRIVATE_CORELIB if (GlobalizationMode.Invariant) { return ToLowerInvariant(value); } -#if SYSTEM_PRIVATE_CORELIB return ChangeCaseCultureAware(value, culture.TextInfo, toUpper: false); #else return ChangeCaseCultureAware(value, culture, toUpper: false); @@ -1399,6 +1414,7 @@ public static Rune ToLowerInvariant(Rune value) return UnsafeCreate(Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(value._value)); } +#if SYSTEM_PRIVATE_CORELIB if (GlobalizationMode.Invariant) { return UnsafeCreate(CharUnicodeInfo.ToLower(value._value)); @@ -1406,7 +1422,6 @@ public static Rune ToLowerInvariant(Rune value) // Non-ASCII data requires going through the case folding tables. -#if SYSTEM_PRIVATE_CORELIB return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: false); #else return ChangeCaseCultureAware(value, CultureInfo.InvariantCulture, toUpper: false); @@ -1424,12 +1439,12 @@ public static Rune ToUpper(Rune value, CultureInfo culture) // ASCII characters differently than the invariant culture (e.g., Turkish I). Instead // we'll just jump straight to the globalization tables if they're available. +#if SYSTEM_PRIVATE_CORELIB if (GlobalizationMode.Invariant) { return ToUpperInvariant(value); } -#if SYSTEM_PRIVATE_CORELIB return ChangeCaseCultureAware(value, culture.TextInfo, toUpper: true); #else return ChangeCaseCultureAware(value, culture, toUpper: true); @@ -1448,6 +1463,7 @@ public static Rune ToUpperInvariant(Rune value) return UnsafeCreate(Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(value._value)); } +#if SYSTEM_PRIVATE_CORELIB if (GlobalizationMode.Invariant) { return UnsafeCreate(CharUnicodeInfo.ToUpper(value._value)); @@ -1455,7 +1471,6 @@ public static Rune ToUpperInvariant(Rune value) // Non-ASCII data requires going through the case folding tables. -#if SYSTEM_PRIVATE_CORELIB return ChangeCaseCultureAware(value, TextInfo.Invariant, toUpper: true); #else return ChangeCaseCultureAware(value, CultureInfo.InvariantCulture, toUpper: true); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs index 8961529dfed83c..7a79a3a6592657 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs @@ -3,7 +3,10 @@ using System.Diagnostics; using System.Runtime.CompilerServices; + +#if SYSTEM_PRIVATE_CORELIB using System.Runtime.Intrinsics; +#endif namespace System.Text.Unicode { @@ -277,6 +280,7 @@ internal static bool UInt64OrdinalIgnoreCaseAscii(ulong valueA, ulong valueB) return (differentBits & indicator) == 0; } +#if SYSTEM_PRIVATE_CORELIB /// <summary> /// Returns true iff the TVector represents ASCII UTF-16 characters in machine endianness. /// </summary> @@ -286,5 +290,6 @@ internal static bool AllCharsInVectorAreAscii<TVector>(TVector vec) { return (vec & TVector.Create(unchecked((ushort)~0x007F))).Equals(TVector.Zero); } +#endif } } diff --git a/src/libraries/System.Text.Encodings.Web/src/Polyfills/System.Text.Rune.netstandard20.cs b/src/libraries/System.Text.Encodings.Web/src/Polyfills/System.Text.Rune.netstandard20.cs deleted file mode 100644 index f48490179238eb..00000000000000 --- a/src/libraries/System.Text.Encodings.Web/src/Polyfills/System.Text.Rune.netstandard20.cs +++ /dev/null @@ -1,547 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Buffers; -using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Text.Encodings.Web; - -// Contains a polyfill implementation of System.Text.Rune that works on netstandard2.0. -// Implementation copied from: -// https://github.com/dotnet/runtime/blob/177d6f1a0bfdc853ae9ffeef4be99ff984c4f5dd/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs - -namespace System.Text -{ - internal readonly struct Rune : IEquatable<Rune> - { - private const int MaxUtf16CharsPerRune = 2; // supplementary plane code points are encoded as 2 UTF-16 code units - - private const char HighSurrogateStart = '\ud800'; - private const char LowSurrogateStart = '\udc00'; - private const int HighSurrogateRange = 0x3FF; - - private readonly uint _value; - - /// <summary> - /// Creates a <see cref="Rune"/> from the provided Unicode scalar value. - /// </summary> - /// <exception cref="ArgumentOutOfRangeException"> - /// If <paramref name="value"/> does not represent a value Unicode scalar value. - /// </exception> - public Rune(uint value) - { - if (!UnicodeUtility.IsValidUnicodeScalar(value)) - { - ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.value); - } - _value = value; - } - - /// <summary> - /// Creates a <see cref="Rune"/> from the provided Unicode scalar value. - /// </summary> - /// <exception cref="ArgumentOutOfRangeException"> - /// If <paramref name="value"/> does not represent a value Unicode scalar value. - /// </exception> - public Rune(int value) - : this((uint)value) - { - } - - // non-validating ctor - private Rune(uint scalarValue, bool _) - { - UnicodeDebug.AssertIsValidScalar(scalarValue); - _value = scalarValue; - } - - /// <summary> - /// Returns true if and only if this scalar value is ASCII ([ U+0000..U+007F ]) - /// and therefore representable by a single UTF-8 code unit. - /// </summary> - public bool IsAscii => UnicodeUtility.IsAsciiCodePoint(_value); - - /// <summary> - /// Returns true if and only if this scalar value is within the BMP ([ U+0000..U+FFFF ]) - /// and therefore representable by a single UTF-16 code unit. - /// </summary> - public bool IsBmp => UnicodeUtility.IsBmpCodePoint(_value); - - public static bool operator ==(Rune left, Rune right) => left._value == right._value; - - public static bool operator !=(Rune left, Rune right) => left._value != right._value; - - public static bool IsControl(Rune value) - { - // Per the Unicode stability policy, the set of control characters - // is forever fixed at [ U+0000..U+001F ], [ U+007F..U+009F ]. No - // characters will ever be added to or removed from the "control characters" - // group. See https://www.unicode.org/policies/stability_policy.html. - - // Logic below depends on Rune.Value never being -1 (since Rune is a validating type) - // 00..1F (+1) => 01..20 (&~80) => 01..20 - // 7F..9F (+1) => 80..A0 (&~80) => 00..20 - - return ((value._value + 1) & ~0x80u) <= 0x20u; - } - - /// <summary> - /// A <see cref="Rune"/> instance that represents the Unicode replacement character U+FFFD. - /// </summary> - public static Rune ReplacementChar => UnsafeCreate(UnicodeUtility.ReplacementChar); - - /// <summary> - /// Returns the length in code units (<see cref="char"/>) of the - /// UTF-16 sequence required to represent this scalar value. - /// </summary> - /// <remarks> - /// The return value will be 1 or 2. - /// </remarks> - public int Utf16SequenceLength - { - get - { - int codeUnitCount = UnicodeUtility.GetUtf16SequenceLength(_value); - Debug.Assert(codeUnitCount > 0 && codeUnitCount <= MaxUtf16CharsPerRune); - return codeUnitCount; - } - } - - /// <summary> - /// Returns the Unicode scalar value as an integer. - /// </summary> - public int Value => (int)_value; - - /// <summary> - /// Decodes the <see cref="Rune"/> at the beginning of the provided UTF-16 source buffer. - /// </summary> - /// <returns> - /// <para> - /// If the source buffer begins with a valid UTF-16 encoded scalar value, returns <see cref="OperationStatus.Done"/>, - /// and outs via <paramref name="result"/> the decoded <see cref="Rune"/> and via <paramref name="charsConsumed"/> the - /// number of <see langword="char"/>s used in the input buffer to encode the <see cref="Rune"/>. - /// </para> - /// <para> - /// If the source buffer is empty or contains only a standalone UTF-16 high surrogate character, returns <see cref="OperationStatus.NeedMoreData"/>, - /// and outs via <paramref name="result"/> <see cref="ReplacementChar"/> and via <paramref name="charsConsumed"/> the length of the input buffer. - /// </para> - /// <para> - /// If the source buffer begins with an ill-formed UTF-16 encoded scalar value, returns <see cref="OperationStatus.InvalidData"/>, - /// and outs via <paramref name="result"/> <see cref="ReplacementChar"/> and via <paramref name="charsConsumed"/> the number of - /// <see langword="char"/>s used in the input buffer to encode the ill-formed sequence. - /// </para> - /// </returns> - /// <remarks> - /// The general calling convention is to call this method in a loop, slicing the <paramref name="source"/> buffer by - /// <paramref name="charsConsumed"/> elements on each iteration of the loop. On each iteration of the loop <paramref name="result"/> - /// will contain the real scalar value if successfully decoded, or it will contain <see cref="ReplacementChar"/> if - /// the data could not be successfully decoded. This pattern provides convenient automatic U+FFFD substitution of - /// invalid sequences while iterating through the loop. - /// </remarks> - public static OperationStatus DecodeFromUtf16(ReadOnlySpan<char> source, out Rune result, out int charsConsumed) - { - if (!source.IsEmpty) - { - // First, check for the common case of a BMP scalar value. - // If this is correct, return immediately. - - char firstChar = source[0]; - if (TryCreate(firstChar, out result)) - { - charsConsumed = 1; - return OperationStatus.Done; - } - - // First thing we saw was a UTF-16 surrogate code point. - // Let's optimistically assume for now it's a high surrogate and hope - // that combining it with the next char yields useful results. - - if (1 < (uint)source.Length) - { - char secondChar = source[1]; - if (TryCreate(firstChar, secondChar, out result)) - { - // Success! Formed a supplementary scalar value. - charsConsumed = 2; - return OperationStatus.Done; - } - else - { - // Either the first character was a low surrogate, or the second - // character was not a low surrogate. This is an error. - goto InvalidData; - } - } - else if (!char.IsHighSurrogate(firstChar)) - { - // Quick check to make sure we're not going to report NeedMoreData for - // a single-element buffer where the data is a standalone low surrogate - // character. Since no additional data will ever make this valid, we'll - // report an error immediately. - goto InvalidData; - } - } - - // If we got to this point, the input buffer was empty, or the buffer - // was a single element in length and that element was a high surrogate char. - - charsConsumed = source.Length; - result = ReplacementChar; - return OperationStatus.NeedMoreData; - - InvalidData: - - charsConsumed = 1; // maximal invalid subsequence for UTF-16 is always a single code unit in length - result = ReplacementChar; - return OperationStatus.InvalidData; - } - - /// <summary> - /// Decodes the <see cref="Rune"/> at the beginning of the provided UTF-8 source buffer. - /// </summary> - /// <returns> - /// <para> - /// If the source buffer begins with a valid UTF-8 encoded scalar value, returns <see cref="OperationStatus.Done"/>, - /// and outs via <paramref name="result"/> the decoded <see cref="Rune"/> and via <paramref name="bytesConsumed"/> the - /// number of <see langword="byte"/>s used in the input buffer to encode the <see cref="Rune"/>. - /// </para> - /// <para> - /// If the source buffer is empty or contains only a partial UTF-8 subsequence, returns <see cref="OperationStatus.NeedMoreData"/>, - /// and outs via <paramref name="result"/> <see cref="ReplacementChar"/> and via <paramref name="bytesConsumed"/> the length of the input buffer. - /// </para> - /// <para> - /// If the source buffer begins with an ill-formed UTF-8 encoded scalar value, returns <see cref="OperationStatus.InvalidData"/>, - /// and outs via <paramref name="result"/> <see cref="ReplacementChar"/> and via <paramref name="bytesConsumed"/> the number of - /// <see langword="char"/>s used in the input buffer to encode the ill-formed sequence. - /// </para> - /// </returns> - /// <remarks> - /// The general calling convention is to call this method in a loop, slicing the <paramref name="source"/> buffer by - /// <paramref name="bytesConsumed"/> elements on each iteration of the loop. On each iteration of the loop <paramref name="result"/> - /// will contain the real scalar value if successfully decoded, or it will contain <see cref="ReplacementChar"/> if - /// the data could not be successfully decoded. This pattern provides convenient automatic U+FFFD substitution of - /// invalid sequences while iterating through the loop. - /// </remarks> - public static OperationStatus DecodeFromUtf8(ReadOnlySpan<byte> source, out Rune result, out int bytesConsumed) - { - // This method follows the Unicode Standard's recommendation for detecting - // the maximal subpart of an ill-formed subsequence. See The Unicode Standard, - // Ch. 3.9 for more details. In summary, when reporting an invalid subsequence, - // it tries to consume as many code units as possible as long as those code - // units constitute the beginning of a longer well-formed subsequence per Table 3-7. - - int index = 0; - - // Try reading input[0]. - - if ((uint)index >= (uint)source.Length) - { - goto NeedsMoreData; - } - - uint tempValue = source[index]; - if (!UnicodeUtility.IsAsciiCodePoint(tempValue)) - { - goto NotAscii; - } - - Finish: - - bytesConsumed = index + 1; - Debug.Assert(1 <= bytesConsumed && bytesConsumed <= 4); // Valid subsequences are always length [1..4] - result = UnsafeCreate(tempValue); - return OperationStatus.Done; - - NotAscii: - - // Per Table 3-7, the beginning of a multibyte sequence must be a code unit in - // the range [C2..F4]. If it's outside of that range, it's either a standalone - // continuation byte, or it's an overlong two-byte sequence, or it's an out-of-range - // four-byte sequence. - - if (!UnicodeUtility.IsInRangeInclusive(tempValue, 0xC2, 0xF4)) - { - goto FirstByteInvalid; - } - - tempValue = (tempValue - 0xC2) << 6; - - // Try reading input[1]. - - index++; - if ((uint)index >= (uint)source.Length) - { - goto NeedsMoreData; - } - - // Continuation bytes are of the form [10xxxxxx], which means that their two's - // complement representation is in the range [-65..-128]. This allows us to - // perform a single comparison to see if a byte is a continuation byte. - - int thisByteSignExtended = (sbyte)source[index]; - if (thisByteSignExtended >= -64) - { - goto Invalid; - } - - tempValue += (uint)thisByteSignExtended; - tempValue += 0x80; // remove the continuation byte marker - tempValue += (0xC2 - 0xC0) << 6; // remove the leading byte marker - - if (tempValue < 0x0800) - { - Debug.Assert(UnicodeUtility.IsInRangeInclusive(tempValue, 0x0080, 0x07FF)); - goto Finish; // this is a valid 2-byte sequence - } - - // This appears to be a 3- or 4-byte sequence. Since per Table 3-7 we now have - // enough information (from just two code units) to detect overlong or surrogate - // sequences, we need to perform these checks now. - - if (!UnicodeUtility.IsInRangeInclusive(tempValue, ((0xE0 - 0xC0) << 6) + (0xA0 - 0x80), ((0xF4 - 0xC0) << 6) + (0x8F - 0x80))) - { - // The first two bytes were not in the range [[E0 A0]..[F4 8F]]. - // This is an overlong 3-byte sequence or an out-of-range 4-byte sequence. - goto Invalid; - } - - if (UnicodeUtility.IsInRangeInclusive(tempValue, ((0xED - 0xC0) << 6) + (0xA0 - 0x80), ((0xED - 0xC0) << 6) + (0xBF - 0x80))) - { - // This is a UTF-16 surrogate code point, which is invalid in UTF-8. - goto Invalid; - } - - if (UnicodeUtility.IsInRangeInclusive(tempValue, ((0xF0 - 0xC0) << 6) + (0x80 - 0x80), ((0xF0 - 0xC0) << 6) + (0x8F - 0x80))) - { - // This is an overlong 4-byte sequence. - goto Invalid; - } - - // The first two bytes were just fine. We don't need to perform any other checks - // on the remaining bytes other than to see that they're valid continuation bytes. - - // Try reading input[2]. - - index++; - if ((uint)index >= (uint)source.Length) - { - goto NeedsMoreData; - } - - thisByteSignExtended = (sbyte)source[index]; - if (thisByteSignExtended >= -64) - { - goto Invalid; // this byte is not a UTF-8 continuation byte - } - - tempValue <<= 6; - tempValue += (uint)thisByteSignExtended; - tempValue += 0x80; // remove the continuation byte marker - tempValue -= (0xE0 - 0xC0) << 12; // remove the leading byte marker - - if (tempValue <= 0xFFFF) - { - Debug.Assert(UnicodeUtility.IsInRangeInclusive(tempValue, 0x0800, 0xFFFF)); - goto Finish; // this is a valid 3-byte sequence - } - - // Try reading input[3]. - - index++; - if ((uint)index >= (uint)source.Length) - { - goto NeedsMoreData; - } - - thisByteSignExtended = (sbyte)source[index]; - if (thisByteSignExtended >= -64) - { - goto Invalid; // this byte is not a UTF-8 continuation byte - } - - tempValue <<= 6; - tempValue += (uint)thisByteSignExtended; - tempValue += 0x80; // remove the continuation byte marker - tempValue -= (0xF0 - 0xE0) << 18; // remove the leading byte marker - - UnicodeDebug.AssertIsValidSupplementaryPlaneScalar(tempValue); - goto Finish; // this is a valid 4-byte sequence - - FirstByteInvalid: - - index = 1; // Invalid subsequences are always at least length 1. - - Invalid: - - Debug.Assert(1 <= index && index <= 3); // Invalid subsequences are always length 1..3 - bytesConsumed = index; - result = ReplacementChar; - return OperationStatus.InvalidData; - - NeedsMoreData: - - Debug.Assert(0 <= index && index <= 3); // Incomplete subsequences are always length 0..3 - bytesConsumed = index; - result = ReplacementChar; - return OperationStatus.NeedMoreData; - } - - public override bool Equals([NotNullWhen(true)] object? obj) => (obj is Rune other) && Equals(other); - - public bool Equals(Rune other) => this == other; - - public override int GetHashCode() => Value; - - /// <summary> - /// Attempts to create a <see cref="Rune"/> from the provided input value. - /// </summary> - public static bool TryCreate(char ch, out Rune result) - { - uint extendedValue = ch; - if (!UnicodeUtility.IsSurrogateCodePoint(extendedValue)) - { - result = UnsafeCreate(extendedValue); - return true; - } - else - { - result = default; - return false; - } - } - - /// <summary> - /// Attempts to create a <see cref="Rune"/> from the provided UTF-16 surrogate pair. - /// Returns <see langword="false"/> if the input values don't represent a well-formed UTF-16surrogate pair. - /// </summary> - public static bool TryCreate(char highSurrogate, char lowSurrogate, out Rune result) - { - // First, extend both to 32 bits, then calculate the offset of - // each candidate surrogate char from the start of its range. - - uint highSurrogateOffset = (uint)highSurrogate - HighSurrogateStart; - uint lowSurrogateOffset = (uint)lowSurrogate - LowSurrogateStart; - - // This is a single comparison which allows us to check both for validity at once since - // both the high surrogate range and the low surrogate range are the same length. - // If the comparison fails, we call to a helper method to throw the correct exception message. - - if ((highSurrogateOffset | lowSurrogateOffset) <= HighSurrogateRange) - { - // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding. - result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - LowSurrogateStart) + (0x40u << 10)); - return true; - } - else - { - // Didn't have a high surrogate followed by a low surrogate. - result = default; - return false; - } - } - - /// <summary> - /// Encodes this <see cref="Rune"/> to a UTF-16 destination buffer. - /// </summary> - /// <param name="destination">The buffer to which to write this value as UTF-16.</param> - /// <param name="charsWritten"> - /// The number of <see cref="char"/>s written to <paramref name="destination"/>, - /// or 0 if the destination buffer is not large enough to contain the output.</param> - /// <returns>True if the value was written to the buffer; otherwise, false.</returns> - public bool TryEncodeToUtf16(Span<char> destination, out int charsWritten) - { - if (destination.Length >= 1) - { - if (IsBmp) - { - destination[0] = (char)_value; - charsWritten = 1; - return true; - } - else if (destination.Length >= 2) - { - UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(_value, out destination[0], out destination[1]); - charsWritten = 2; - return true; - } - } - - // Destination buffer not large enough - - charsWritten = default; - return false; - } - - /// <summary> - /// Encodes this <see cref="Rune"/> to a destination buffer as UTF-8 bytes. - /// </summary> - /// <param name="destination">The buffer to which to write this value as UTF-8.</param> - /// <param name="bytesWritten"> - /// The number of <see cref="byte"/>s written to <paramref name="destination"/>, - /// or 0 if the destination buffer is not large enough to contain the output.</param> - /// <returns>True if the value was written to the buffer; otherwise, false.</returns> - public bool TryEncodeToUtf8(Span<byte> destination, out int bytesWritten) - { - // The bit patterns below come from the Unicode Standard, Table 3-6. - - if (destination.Length >= 1) - { - if (IsAscii) - { - destination[0] = (byte)_value; - bytesWritten = 1; - return true; - } - - if (destination.Length >= 2) - { - if (_value <= 0x7FFu) - { - // Scalar 00000yyy yyxxxxxx -> bytes [ 110yyyyy 10xxxxxx ] - destination[0] = (byte)((_value + (0b110u << 11)) >> 6); - destination[1] = (byte)((_value & 0x3Fu) + 0x80u); - bytesWritten = 2; - return true; - } - - if (destination.Length >= 3) - { - if (_value <= 0xFFFFu) - { - // Scalar zzzzyyyy yyxxxxxx -> bytes [ 1110zzzz 10yyyyyy 10xxxxxx ] - destination[0] = (byte)((_value + (0b1110 << 16)) >> 12); - destination[1] = (byte)(((_value & (0x3Fu << 6)) >> 6) + 0x80u); - destination[2] = (byte)((_value & 0x3Fu) + 0x80u); - bytesWritten = 3; - return true; - } - - if (destination.Length >= 4) - { - // Scalar 000uuuuu zzzzyyyy yyxxxxxx -> bytes [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ] - destination[0] = (byte)((_value + (0b11110 << 21)) >> 18); - destination[1] = (byte)(((_value & (0x3Fu << 12)) >> 12) + 0x80u); - destination[2] = (byte)(((_value & (0x3Fu << 6)) >> 6) + 0x80u); - destination[3] = (byte)((_value & 0x3Fu) + 0x80u); - bytesWritten = 4; - return true; - } - } - } - } - - // Destination buffer not large enough - - bytesWritten = default; - return false; - } - - /// <summary> - /// Creates a <see cref="Rune"/> without performing validation on the input. - /// </summary> - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Rune UnsafeCreate(uint scalarValue) => new Rune(scalarValue, false); - } -} diff --git a/src/libraries/System.Text.Encodings.Web/src/Resources/Strings.resx b/src/libraries/System.Text.Encodings.Web/src/Resources/Strings.resx index 2a0d862e398987..b6d9de5a23fa29 100644 --- a/src/libraries/System.Text.Encodings.Web/src/Resources/Strings.resx +++ b/src/libraries/System.Text.Encodings.Web/src/Resources/Strings.resx @@ -117,6 +117,18 @@ <resheader name="writer"> <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value> </resheader> + <data name="ArgumentOutOfRange_IndexMustBeLess" xml:space="preserve"> + <value>Index was out of range. Must be non-negative and less than the size of the collection.</value> + </data> + <data name="Argument_CannotExtractScalar" xml:space="preserve"> + <value>Cannot extract a Unicode scalar value from the specified index in the input.</value> + </data> + <data name="Argument_DestinationTooShort" xml:space="preserve"> + <value>Destination is too short.</value> + </data> + <data name="Arg_MustBeRune" xml:space="preserve"> + <value>Object must be of type Rune.</value> + </data> <data name="TextEncoderDoesNotImplementMaxOutputCharsPerInputChar" xml:space="preserve"> <value>TextEncoder does not implement MaxOutputCharsPerInputChar correctly.</value> </data> diff --git a/src/libraries/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj b/src/libraries/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj index 426e51a961445e..9536c99694bd46 100644 --- a/src/libraries/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj +++ b/src/libraries/System.Text.Encodings.Web/src/System.Text.Encodings.Web.csproj @@ -5,8 +5,7 @@ <TargetFrameworks Condition="'$(NetCoreAppPrevious)' != ''">$(TargetFrameworks);$(NetCoreAppPrevious)-windows;$(NetCoreAppPrevious)</TargetFrameworks> <AllowUnsafeBlocks>true</AllowUnsafeBlocks> <!-- CS3011: Only CLS-compliant members can be abstract --> - <!-- CS3019: CLS attributes on internal types. Some shared source files are internal in this project. --> - <NoWarn>$(NoWarn);CS3011;CS3019</NoWarn> + <NoWarn>$(NoWarn);CS3011</NoWarn> <UseCompilerGeneratedDocXmlFile>false</UseCompilerGeneratedDocXmlFile> <IsPackable>true</IsPackable> <PackageDescription>Provides types for encoding and escaping strings for use in JavaScript, HyperText Markup Language (HTML), and uniform resource locators (URL). @@ -51,6 +50,7 @@ System.Text.Encodings.Web.JavaScriptEncoder</PackageDescription> <Compile Include="$(CoreLibSharedDir)System\Text\UnicodeUtility.cs" Link="System\Text\UnicodeUtility.cs" /> <Compile Include="$(CommonPath)System\HexConverter.cs" Link="Common\System\HexConverter.cs" /> <Compile Include="$(CommonPath)System\Text\ValueStringBuilder.cs" Link="Common\System\Text\ValueStringBuilder.cs" /> + <Compile Include="System\ThrowHelper.cs" /> </ItemGroup> <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'"> @@ -59,8 +59,9 @@ System.Text.Encodings.Web.JavaScriptEncoder</PackageDescription> </ItemGroup> <ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'"> + <Compile Include="$(CoreLibSharedDir)System\Text\Rune.cs" Link="System\Text\Rune.cs" /> + <Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf16Utility.cs" Link="System\Text\Unicode\Utf16Utility.cs" /> <Compile Include="Polyfills\System.Numerics.BitOperations.netstandard20.cs" /> - <Compile Include="Polyfills\System.Text.Rune.netstandard20.cs" /> </ItemGroup> <ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', '$(NetCoreAppCurrent)'))"> diff --git a/src/libraries/System.Text.Encodings.Web/src/System/ThrowHelper.cs b/src/libraries/System.Text.Encodings.Web/src/System/ThrowHelper.cs new file mode 100644 index 00000000000000..0c3bc8378e5d56 --- /dev/null +++ b/src/libraries/System.Text.Encodings.Web/src/System/ThrowHelper.cs @@ -0,0 +1,115 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using SR = System.SR; + +namespace System +{ + internal static class ThrowHelper + { + [DoesNotReturn] + internal static void ThrowArgumentOutOfRangeException() + { + throw new ArgumentOutOfRangeException(); + } + + [DoesNotReturn] + internal static void ThrowArgumentException_DestinationTooShort() + { + throw new ArgumentException(SR.Argument_DestinationTooShort, "destination"); + } + + [DoesNotReturn] + internal static void ThrowArgumentException_CannotExtractScalar(ExceptionArgument argument) + { + throw GetArgumentException(ExceptionResource.Argument_CannotExtractScalar, argument); + } + + [DoesNotReturn] + internal static void ThrowArgumentOutOfRange_IndexMustBeLessException() + { + throw GetArgumentOutOfRangeException(ExceptionArgument.index, + ExceptionResource.ArgumentOutOfRange_IndexMustBeLess); + } + + [DoesNotReturn] + internal static void ThrowArgumentNullException(ExceptionArgument argument) + { + throw new ArgumentNullException(GetArgumentName(argument)); + } + + [DoesNotReturn] + internal static void ThrowArgumentOutOfRangeException(ExceptionArgument argument) + { + throw new ArgumentOutOfRangeException(GetArgumentName(argument)); + } + + private static ArgumentOutOfRangeException GetArgumentOutOfRangeException(ExceptionArgument argument, ExceptionResource resource) + { + return new ArgumentOutOfRangeException(GetArgumentName(argument), GetResourceString(resource)); + } + + private static ArgumentException GetArgumentException(ExceptionResource resource, ExceptionArgument argument) + { + return new ArgumentException(GetResourceString(resource), GetArgumentName(argument)); + } + + private static string GetArgumentName(ExceptionArgument argument) + { + switch (argument) + { + case ExceptionArgument.ch: + return nameof(ExceptionArgument.ch); + case ExceptionArgument.culture: + return nameof(ExceptionArgument.culture); + case ExceptionArgument.index: + return nameof(ExceptionArgument.index); + case ExceptionArgument.input: + return nameof(ExceptionArgument.input); + case ExceptionArgument.value: + return nameof(ExceptionArgument.value); + default: + Debug.Fail("The enum value is not defined, please check the ExceptionArgument Enum."); + return ""; + + }; + } + + private static string GetResourceString(ExceptionResource resource) + { + switch (resource) + { + case ExceptionResource.ArgumentOutOfRange_IndexMustBeLess: + return SR.ArgumentOutOfRange_IndexMustBeLess; + case ExceptionResource.Argument_CannotExtractScalar: + return SR.Argument_CannotExtractScalar; + default: + Debug.Fail("The enum value is not defined, please check the ExceptionResource Enum."); + return ""; + } + } + } + + // + // The convention for this enum is using the argument name as the enum name + // + internal enum ExceptionArgument + { + ch, + culture, + index, + input, + value, + } + + // + // The convention for this enum is using the resource name as the enum name + // + internal enum ExceptionResource + { + Argument_CannotExtractScalar, + ArgumentOutOfRange_IndexMustBeLess + } +} diff --git a/src/libraries/System.Text.Encodings.Web/tests/SR.cs b/src/libraries/System.Text.Encodings.Web/tests/SR.cs new file mode 100644 index 00000000000000..bfdbe18e4ba9d9 --- /dev/null +++ b/src/libraries/System.Text.Encodings.Web/tests/SR.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +namespace System +{ + internal static partial class SR + { + /// <summary>Index was out of range. Must be non-negative and less than the size of the collection.</summary> + internal static string @ArgumentOutOfRange_IndexMustBeLess => @"Index was out of range. Must be non-negative and less than the size of the collection."; + /// <summary>Cannot extract a Unicode scalar value from the specified index in the input.</summary> + internal static string @Argument_CannotExtractScalar => @"Cannot extract a Unicode scalar value from the specified index in the input."; + /// <summary>Destination is too short.</summary> + internal static string @Argument_DestinationTooShort => @"Destination is too short."; + /// <summary>Object must be of type Rune.</summary> + internal static string @Arg_MustBeRune => @"Object must be of type Rune."; + } +} diff --git a/src/libraries/System.Text.Encodings.Web/tests/System.Text.Encodings.Web.Tests.csproj b/src/libraries/System.Text.Encodings.Web/tests/System.Text.Encodings.Web.Tests.csproj index 80ee57a4d5013b..59ebeba971090e 100644 --- a/src/libraries/System.Text.Encodings.Web/tests/System.Text.Encodings.Web.Tests.csproj +++ b/src/libraries/System.Text.Encodings.Web/tests/System.Text.Encodings.Web.Tests.csproj @@ -3,6 +3,8 @@ <AllowUnsafeBlocks>true</AllowUnsafeBlocks> <TargetFrameworks>$(NetCoreAppCurrent);$(NetFrameworkMinimum)</TargetFrameworks> <UnicodeUcdVersion>15.0</UnicodeUcdVersion> + <!-- CS3021: 'type' does not need a CLSCompliant attribute because the assembly does not have a CLSCompliant attribute --> + <NoWarn Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'">$(NoWarn);CS3021</NoWarn> </PropertyGroup> <ItemGroup> <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies"> @@ -19,6 +21,7 @@ <Compile Include="AsciiByteMapTests.cs" /> <Compile Include="AsciiPreescapedDataTests.cs" /> <Compile Include="AllowedAsciiCodePointsTests.cs" /> + <Compile Include="SR.cs" /> <Compile Include="InboxEncoderCommonTests.cs" /> <Compile Include="AllowedBmpCodePointsBitmapTests.cs" /> <Compile Include="TextEncoderBatteryTests.cs" /> @@ -39,9 +42,10 @@ <Compile Include="UnicodeTestHelpers.cs" /> <Compile Include="UrlEncoderTests.cs" /> </ItemGroup> - <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETFramework'"> - <Compile Include="..\src\Polyfills\System.Text.Rune.netstandard20.cs" Link="Polyfills\System.Text.Rune.netstandard20.cs" /> - <Compile Include="..\src\System\Text\Encodings\Web\ThrowHelper.cs" Link="System\Text\Encodings\Web\ThrowHelper.cs" /> + <ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'"> + <Compile Include="$(CoreLibSharedDir)System\Text\Rune.cs" Link="System\Text\Rune.cs" /> + <Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf16Utility.cs" Link="System\Text\Unicode\Utf16Utility.cs" /> + <Compile Include="..\src\System\ThrowHelper.cs" Link="System\ThrowHelper.cs" /> </ItemGroup> <ItemGroup> <PackageReference Include="System.Private.Runtime.UnicodeData" Version="$(SystemPrivateRuntimeUnicodeDataVersion)" ExcludeAssets="contentFiles" GeneratePathProperty="true" /> diff --git a/src/libraries/System.Text.Json/ref/System.Text.Json.cs b/src/libraries/System.Text.Json/ref/System.Text.Json.cs index fb0c0103f2c89a..469ea5e4f07c89 100644 --- a/src/libraries/System.Text.Json/ref/System.Text.Json.cs +++ b/src/libraries/System.Text.Json/ref/System.Text.Json.cs @@ -679,6 +679,8 @@ public void WriteStringValue(System.ReadOnlySpan<byte> utf8Value) { } public void WriteStringValue(System.ReadOnlySpan<char> value) { } public void WriteStringValue(string? value) { } public void WriteStringValue(System.Text.Json.JsonEncodedText value) { } + public void WriteStringValueSegment(System.ReadOnlySpan<byte> value, bool isFinalSegment) { } + public void WriteStringValueSegment(System.ReadOnlySpan<char> value, bool isFinalSegment) { } } } namespace System.Text.Json.Nodes diff --git a/src/libraries/System.Text.Json/src/Resources/Strings.resx b/src/libraries/System.Text.Json/src/Resources/Strings.resx index 17f409dc4fa9d3..cd8a5e392ca66e 100644 --- a/src/libraries/System.Text.Json/src/Resources/Strings.resx +++ b/src/libraries/System.Text.Json/src/Resources/Strings.resx @@ -767,7 +767,6 @@ <data name="JsonSchemaExporter_DepthTooLarge" xml:space="preserve"> <value>The depth of the generated JSON schema exceeds the JsonSerializerOptions.MaxDepth setting.</value> </data> - <!-- System.Collections polyfills --> <data name="Arg_WrongType" xml:space="preserve"> <value>The value '{0}' is not of type '{1}' and cannot be used in this generic collection.</value> </data> @@ -810,4 +809,22 @@ <data name="Argument_InvalidOffLen" xml:space="preserve"> <value>Offset and length were out of bounds for the array or count is greater than the number of elements from index to the end of the source collection.</value> </data> -</root> + <data name="CannotWriteWithinString" xml:space="preserve"> + <value>Writing a JSON property or value before writing the final string value segment is not supported.</value> + </data> + <data name="Arg_MustBeRune" xml:space="preserve"> + <value>Object must be of type Rune.</value> + </data> + <data name="ArgumentOutOfRange_IndexMustBeLess" xml:space="preserve"> + <value>Index was out of range. Must be non-negative and less than the size of the collection.</value> + </data> + <data name="Argument_DestinationTooShort" xml:space="preserve"> + <value>Destination is too short.</value> + </data> + <data name="Argument_CannotExtractScalar" xml:space="preserve"> + <value>Cannot extract a Unicode scalar value from the specified index in the input.</value> + </data> + <data name="CannotMixEncodings" xml:space="preserve"> + <value>Mixing UTF encodings in a single multi-segment JSON string is not supported. The previous segment's encoding was '{0}' and the current segment's encoding is '{1}'.</value> + </data> +</root> \ No newline at end of file diff --git a/src/libraries/System.Text.Json/src/System.Text.Json.csproj b/src/libraries/System.Text.Json/src/System.Text.Json.csproj index 5c429898a5fdfe..b7130feea79ea8 100644 --- a/src/libraries/System.Text.Json/src/System.Text.Json.csproj +++ b/src/libraries/System.Text.Json/src/System.Text.Json.csproj @@ -160,6 +160,7 @@ The System.Text.Json library is built-in as part of the shared framework in .NET <Compile Include="System\Text\Json\Serialization\PolymorphicSerializationState.cs" /> <Compile Include="System\Text\Json\StackHelper.cs" /> <Compile Include="System\Text\Json\ValueQueue.cs" /> + <Compile Include="System\Text\Json\Writer\Utf8JsonWriter.WriteValues.StringSegment.cs" /> <Compile Include="System\Text\Json\Writer\Utf8JsonWriterCache.cs" /> <Compile Include="System\Text\Json\Serialization\ReferenceEqualsWrapper.cs" /> <Compile Include="System\Text\Json\Serialization\ConverterStrategy.cs" /> @@ -308,7 +309,6 @@ The System.Text.Json library is built-in as part of the shared framework in .NET <Compile Include="System\Text\Json\Writer\JsonWriterHelper.Date.cs" /> <Compile Include="System\Text\Json\Writer\JsonWriterHelper.Escaping.cs" /> <Compile Include="System\Text\Json\Writer\JsonWriterOptions.cs" /> - <Compile Include="System\Text\Json\Writer\SequenceValidity.cs" /> <Compile Include="System\Text\Json\Writer\Utf8JsonWriter.cs" /> <Compile Include="System\Text\Json\Writer\Utf8JsonWriter.WriteProperties.Bytes.cs" /> <Compile Include="System\Text\Json\Writer\Utf8JsonWriter.WriteProperties.DateTime.cs" /> @@ -340,6 +340,7 @@ The System.Text.Json library is built-in as part of the shared framework in .NET <Compile Include="System\Text\Json\Writer\Utf8JsonWriter.WriteValues.UnsignedNumber.cs" /> <Compile Include="System\ReflectionExtensions.cs" /> <Compile Include="$(CommonPath)System\Obsoletions.cs" Link="Common\System\Obsoletions.cs" /> + <Compile Include="System\ThrowHelper.cs" /> </ItemGroup> <ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'"> @@ -389,6 +390,14 @@ The System.Text.Json library is built-in as part of the shared framework in .NET <Compile Include="System\Text\Json\Reader\JsonReaderHelper.netstandard.cs" /> </ItemGroup> + <!-- Polyfills for working with Unicode --> + <ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'"> + <Compile Include="$(CoreLibSharedDir)System\Text\Rune.cs" Link="System\Text\Rune.cs" /> + <Compile Include="$(CoreLibSharedDir)System\Text\UnicodeDebug.cs" Link="System\Text\UnicodeDebug.cs" /> + <Compile Include="$(CoreLibSharedDir)System\Text\UnicodeUtility.cs" Link="System\Text\UnicodeUtility.cs" /> + <Compile Include="$(CoreLibSharedDir)System\Text\Unicode\Utf16Utility.cs" Link="System\Text\Unicode\Utf16Utility.cs" /> + </ItemGroup> + <!-- Application tfms (.NETCoreApp, .NETFramework) need to use the same or higher version of .NETStandard's dependencies. --> <ItemGroup Condition="'$(TargetFramework)' != '$(NetCoreAppCurrent)'"> <ProjectReference Include="$(LibrariesProjectRoot)System.Text.Encodings.Web\src\System.Text.Encodings.Web.csproj" /> diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.Unescaping.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.Unescaping.cs index 8d9145febf4234..63f69942b3b7ad 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.Unescaping.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.Unescaping.cs @@ -572,12 +572,8 @@ private static bool TryUnescape(ReadOnlySpan<byte> source, Span<byte> destinatio + JsonConstants.UnicodePlane01StartValue; } -#if NET var rune = new Rune(scalar); bool success = rune.TryEncodeToUtf8(destination.Slice(written), out int bytesWritten); -#else - bool success = TryEncodeToUtf8Bytes((uint)scalar, destination.Slice(written), out int bytesWritten); -#endif if (!success) { goto DestinationTooShort; @@ -644,73 +640,5 @@ private static bool TryUnescape(ReadOnlySpan<byte> source, Span<byte> destinatio DestinationTooShort: return false; } - -#if !NET - /// <summary> - /// Copies the UTF-8 code unit representation of this scalar to an output buffer. - /// The buffer must be large enough to hold the required number of <see cref="byte"/>s. - /// </summary> - private static bool TryEncodeToUtf8Bytes(uint scalar, Span<byte> utf8Destination, out int bytesWritten) - { - Debug.Assert(JsonHelpers.IsValidUnicodeScalar(scalar)); - - if (scalar < 0x80U) - { - // Single UTF-8 code unit - if ((uint)utf8Destination.Length < 1u) - { - bytesWritten = 0; - return false; - } - - utf8Destination[0] = (byte)scalar; - bytesWritten = 1; - } - else if (scalar < 0x800U) - { - // Two UTF-8 code units - if ((uint)utf8Destination.Length < 2u) - { - bytesWritten = 0; - return false; - } - - utf8Destination[0] = (byte)(0xC0U | (scalar >> 6)); - utf8Destination[1] = (byte)(0x80U | (scalar & 0x3FU)); - bytesWritten = 2; - } - else if (scalar < 0x10000U) - { - // Three UTF-8 code units - if ((uint)utf8Destination.Length < 3u) - { - bytesWritten = 0; - return false; - } - - utf8Destination[0] = (byte)(0xE0U | (scalar >> 12)); - utf8Destination[1] = (byte)(0x80U | ((scalar >> 6) & 0x3FU)); - utf8Destination[2] = (byte)(0x80U | (scalar & 0x3FU)); - bytesWritten = 3; - } - else - { - // Four UTF-8 code units - if ((uint)utf8Destination.Length < 4u) - { - bytesWritten = 0; - return false; - } - - utf8Destination[0] = (byte)(0xF0U | (scalar >> 18)); - utf8Destination[1] = (byte)(0x80U | ((scalar >> 12) & 0x3FU)); - utf8Destination[2] = (byte)(0x80U | ((scalar >> 6) & 0x3FU)); - utf8Destination[3] = (byte)(0x80U | (scalar & 0x3FU)); - bytesWritten = 4; - } - - return true; - } -#endif } } diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/ThrowHelper.cs b/src/libraries/System.Text.Json/src/System/Text/Json/ThrowHelper.cs index 9d5ac90d8a83e2..2bc50fcfeb4d39 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/ThrowHelper.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/ThrowHelper.cs @@ -311,6 +311,12 @@ public static void ThrowInvalidOperationException_CannotSkipOnPartial() throw GetInvalidOperationException(SR.CannotSkip); } + [DoesNotReturn] + public static void ThrowInvalidOperationException_CannotMixEncodings(Utf8JsonWriter.SegmentEncoding previousEncoding, Utf8JsonWriter.SegmentEncoding currentEncoding) + { + throw GetInvalidOperationException(SR.Format(SR.CannotMixEncodings, previousEncoding, currentEncoding)); + } + private static InvalidOperationException GetInvalidOperationException(string message, JsonTokenType tokenType) { return GetInvalidOperationException(SR.Format(SR.InvalidCast, tokenType, message)); @@ -616,6 +622,9 @@ private static string GetResourceString(ExceptionResource resource, int currentD case ExceptionResource.CannotWriteValueAfterPrimitiveOrClose: message = SR.Format(SR.CannotWriteValueAfterPrimitiveOrClose, tokenType); break; + case ExceptionResource.CannotWriteWithinString: + message = SR.CannotWriteWithinString; + break; default: Debug.Fail($"The ExceptionResource enum value: {resource} is not part of the switch. Add the appropriate case and exception message."); break; @@ -782,6 +791,7 @@ internal enum ExceptionResource ExpectedOneCompleteToken, NotEnoughData, InvalidLeadingZeroInNumber, + CannotWriteWithinString, } internal enum NumericType diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs index 140ecfb9112314..3010e31cbd6fd8 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs @@ -79,37 +79,42 @@ public static int GetMaxEscapedLength(int textLength, int firstIndexToEscape) return firstIndexToEscape + JsonConstants.MaxExpansionFactorWhileEscaping * (textLength - firstIndexToEscape); } - private static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination, JavaScriptEncoder encoder, ref int written) + private static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination, JavaScriptEncoder encoder, ref int consumed, ref int written, bool isFinalBlock) { Debug.Assert(encoder != null); - OperationStatus result = encoder.EncodeUtf8(value, destination, out int encoderBytesConsumed, out int encoderBytesWritten); + OperationStatus result = encoder.EncodeUtf8(value, destination, out int encoderBytesConsumed, out int encoderBytesWritten, isFinalBlock); Debug.Assert(result != OperationStatus.DestinationTooSmall); - Debug.Assert(result != OperationStatus.NeedMoreData); + Debug.Assert(result != OperationStatus.NeedMoreData || !isFinalBlock); - if (result != OperationStatus.Done) + if (!(result == OperationStatus.Done || (result == OperationStatus.NeedMoreData && !isFinalBlock))) { ThrowHelper.ThrowArgumentException_InvalidUTF8(value.Slice(encoderBytesWritten)); } - Debug.Assert(encoderBytesConsumed == value.Length); + Debug.Assert(encoderBytesConsumed == value.Length || (result == OperationStatus.NeedMoreData && !isFinalBlock)); written += encoderBytesWritten; + consumed += encoderBytesConsumed; } public static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination, int indexOfFirstByteToEscape, JavaScriptEncoder? encoder, out int written) + => EscapeString(value, destination, indexOfFirstByteToEscape, encoder, out _, out written, isFinalBlock: true); + + public static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination, int indexOfFirstByteToEscape, JavaScriptEncoder? encoder, out int consumed, out int written, bool isFinalBlock = true) { Debug.Assert(indexOfFirstByteToEscape >= 0 && indexOfFirstByteToEscape < value.Length); value.Slice(0, indexOfFirstByteToEscape).CopyTo(destination); written = indexOfFirstByteToEscape; + consumed = indexOfFirstByteToEscape; if (encoder != null) { destination = destination.Slice(indexOfFirstByteToEscape); value = value.Slice(indexOfFirstByteToEscape); - EscapeString(value, destination, encoder, ref written); + EscapeString(value, destination, encoder, ref consumed, ref written, isFinalBlock); } else { @@ -124,12 +129,14 @@ public static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination { EscapeNextBytes(val, destination, ref written); indexOfFirstByteToEscape++; + consumed++; } else { destination[written] = val; written++; indexOfFirstByteToEscape++; + consumed++; } } else @@ -137,7 +144,7 @@ public static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination // Fall back to default encoder. destination = destination.Slice(written); value = value.Slice(indexOfFirstByteToEscape); - EscapeString(value, destination, JavaScriptEncoder.Default, ref written); + EscapeString(value, destination, JavaScriptEncoder.Default, ref consumed, ref written, isFinalBlock); break; } } @@ -190,37 +197,42 @@ private static void EscapeNextBytes(byte value, Span<byte> destination, ref int private static bool IsAsciiValue(char value) => value <= LastAsciiCharacter; - private static void EscapeString(ReadOnlySpan<char> value, Span<char> destination, JavaScriptEncoder encoder, ref int written) + private static void EscapeString(ReadOnlySpan<char> value, Span<char> destination, JavaScriptEncoder encoder, ref int consumed, ref int written, bool isFinalBlock) { Debug.Assert(encoder != null); - OperationStatus result = encoder.Encode(value, destination, out int encoderBytesConsumed, out int encoderCharsWritten); + OperationStatus result = encoder.Encode(value, destination, out int encoderBytesConsumed, out int encoderCharsWritten, isFinalBlock); Debug.Assert(result != OperationStatus.DestinationTooSmall); - Debug.Assert(result != OperationStatus.NeedMoreData); + Debug.Assert(result != OperationStatus.NeedMoreData || !isFinalBlock); - if (result != OperationStatus.Done) + if (!(result == OperationStatus.Done || (result == OperationStatus.NeedMoreData && !isFinalBlock))) { ThrowHelper.ThrowArgumentException_InvalidUTF16(value[encoderCharsWritten]); } - Debug.Assert(encoderBytesConsumed == value.Length); + Debug.Assert(encoderBytesConsumed == value.Length || (result == OperationStatus.NeedMoreData && !isFinalBlock)); written += encoderCharsWritten; + consumed += encoderBytesConsumed; } public static void EscapeString(ReadOnlySpan<char> value, Span<char> destination, int indexOfFirstByteToEscape, JavaScriptEncoder? encoder, out int written) + => EscapeString(value, destination, indexOfFirstByteToEscape, encoder, out _, out written, isFinalBlock: true); + + public static void EscapeString(ReadOnlySpan<char> value, Span<char> destination, int indexOfFirstByteToEscape, JavaScriptEncoder? encoder, out int consumed, out int written, bool isFinalBlock = true) { Debug.Assert(indexOfFirstByteToEscape >= 0 && indexOfFirstByteToEscape < value.Length); value.Slice(0, indexOfFirstByteToEscape).CopyTo(destination); written = indexOfFirstByteToEscape; + consumed = indexOfFirstByteToEscape; if (encoder != null) { destination = destination.Slice(indexOfFirstByteToEscape); value = value.Slice(indexOfFirstByteToEscape); - EscapeString(value, destination, encoder, ref written); + EscapeString(value, destination, encoder, ref consumed, ref written, isFinalBlock); } else { @@ -235,12 +247,14 @@ public static void EscapeString(ReadOnlySpan<char> value, Span<char> destination { EscapeNextChars(val, destination, ref written); indexOfFirstByteToEscape++; + consumed++; } else { destination[written] = val; written++; indexOfFirstByteToEscape++; + consumed++; } } else @@ -248,7 +262,7 @@ public static void EscapeString(ReadOnlySpan<char> value, Span<char> destination // Fall back to default encoder. destination = destination.Slice(written); value = value.Slice(indexOfFirstByteToEscape); - EscapeString(value, destination, JavaScriptEncoder.Default, ref written); + EscapeString(value, destination, JavaScriptEncoder.Default, ref consumed, ref written, isFinalBlock); break; } } diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/SequenceValidity.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/SequenceValidity.cs deleted file mode 100644 index 6d7ec2ce08e397..00000000000000 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/SequenceValidity.cs +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -namespace System.Buffers.Text -{ - /// <summary> - /// Represents the validity of a UTF code unit sequence. - /// </summary> - internal enum SequenceValidity - { - /// <summary> - /// The sequence is empty. - /// </summary> - Empty = 0, - - /// <summary> - /// The sequence is well-formed and unambiguously represents a proper Unicode scalar value. - /// </summary> - /// <remarks> - /// [ 20 ] (U+0020 SPACE) is a well-formed UTF-8 sequence. - /// [ C3 A9 ] (U+00E9 LATIN SMALL LETTER E WITH ACUTE) is a well-formed UTF-8 sequence. - /// [ F0 9F 98 80 ] (U+1F600 GRINNING FACE) is a well-formed UTF-8 sequence. - /// [ D83D DE00 ] (U+1F600 GRINNING FACE) is a well-formed UTF-16 sequence. - /// </remarks> - WellFormed = 1, - - /// <summary> - /// The sequence is not well-formed on its own, but it could appear as a prefix - /// of a longer well-formed sequence. More code units are needed to make a proper - /// determination as to whether this sequence is well-formed. Incomplete sequences - /// can only appear at the end of a string. - /// </summary> - /// <remarks> - /// [ C2 ] is an incomplete UTF-8 sequence if it is followed by nothing. - /// [ F0 9F ] is an incomplete UTF-8 sequence if it is followed by nothing. - /// [ D83D ] is an incomplete UTF-16 sequence if it is followed by nothing. - /// </remarks> - Incomplete = 2, - - /// <summary> - /// The sequence is never well-formed anywhere, or this sequence can never appear as a prefix - /// of a longer well-formed sequence, or the sequence was improperly terminated by the code - /// unit which appeared immediately after this sequence. - /// </summary> - /// <remarks> - /// [ 80 ] is an invalid UTF-8 sequence (code unit cannot appear at start of sequence). - /// [ FE ] is an invalid UTF-8 sequence (sequence is never well-formed anywhere in UTF-8 string). - /// [ C2 ] is an invalid UTF-8 sequence if it is followed by [ 20 ] (sequence improperly terminated). - /// [ ED A0 ] is an invalid UTF-8 sequence (sequence is never well-formed anywhere in UTF-8 string). - /// [ DE00 ] is an invalid UTF-16 sequence (code unit cannot appear at start of sequence). - /// </remarks> - Invalid = 3 - } -} diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteProperties.Helpers.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteProperties.Helpers.cs index 95a0a67451641c..b3b85281b82cc9 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteProperties.Helpers.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteProperties.Helpers.cs @@ -36,6 +36,9 @@ private void ValidateWritingProperty() { if (!_options.SkipValidation) { + // Make sure a new property is not attempted within an unfinalized string. + ValidateNotWithinUnfinalizedString(); + if (!_inObject || _tokenType == JsonTokenType.PropertyName) { Debug.Assert(_tokenType != JsonTokenType.StartObject); @@ -49,6 +52,9 @@ private void ValidateWritingProperty(byte token) { if (!_options.SkipValidation) { + // Make sure a new property is not attempted within an unfinalized string. + ValidateNotWithinUnfinalizedString(); + if (!_inObject || _tokenType == JsonTokenType.PropertyName) { Debug.Assert(_tokenType != JsonTokenType.StartObject); diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Comment.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Comment.cs index aa62396df6c876..e0fa3e91cb1ad8 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Comment.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Comment.cs @@ -61,6 +61,13 @@ public void WriteCommentValue(ReadOnlySpan<char> value) private void WriteCommentByOptions(ReadOnlySpan<char> value) { + if (!_options.SkipValidation) + { + // Comments generally can be placed anywhere in JSON, but not after a non-final + // string segment. + ValidateNotWithinUnfinalizedString(); + } + if (_options.Indented) { WriteCommentIndented(value); diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs index eeee39e0447d67..a8440144d4cf88 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs @@ -4,16 +4,44 @@ using System.Buffers; using System.Buffers.Text; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace System.Text.Json { public sealed partial class Utf8JsonWriter { + private bool HasPartialCodePoint => PartialCodePointLength != 0; + + private void ClearPartialCodePoint() => PartialCodePointLength = 0; + + private void ValidateEncodingDidNotChange(SegmentEncoding currentSegmentEncoding) + { + if (PreviousSegmentEncoding != currentSegmentEncoding) + { + ThrowHelper.ThrowInvalidOperationException_CannotMixEncodings(PreviousSegmentEncoding, currentSegmentEncoding); + } + } + + private void ValidateNotWithinUnfinalizedString() + { + if (_tokenType == StringSegmentSentinel) + { + ThrowHelper.ThrowInvalidOperationException(ExceptionResource.CannotWriteWithinString, currentDepth: default, maxDepth: _options.MaxDepth, token: default, _tokenType); + } + + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None); + Debug.Assert(!HasPartialCodePoint); + } + private void ValidateWritingValue() { Debug.Assert(!_options.SkipValidation); + // Make sure a new value is not attempted within an unfinalized string. + ValidateNotWithinUnfinalizedString(); + if (_inObject) { if (_tokenType != JsonTokenType.PropertyName) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.StringSegment.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.StringSegment.cs new file mode 100644 index 00000000000000..08b517cce9648d --- /dev/null +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.StringSegment.cs @@ -0,0 +1,472 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Buffers; +using System.Buffers.Text; +using System.ComponentModel; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace System.Text.Json +{ + public sealed partial class Utf8JsonWriter + { + /// <summary> + /// Writes the text value segment as a partial JSON string. + /// </summary> + /// <param name="value">The value to write.</param> + /// <param name="isFinalSegment">Indicates that this is the final segment of the string.</param> + /// <exception cref="ArgumentException"> + /// Thrown when the specified value is too large. + /// </exception> + /// <exception cref="InvalidOperationException"> + /// Thrown if this would result in invalid JSON being written (while validation is enabled) or + /// if the previously written segment (if any) was not written with this same overload. + /// </exception> + /// <remarks> + /// The value is escaped before writing. + /// </remarks> + public void WriteStringValueSegment(ReadOnlySpan<char> value, bool isFinalSegment) + { + JsonWriterHelper.ValidateValue(value); + + if (_tokenType != Utf8JsonWriter.StringSegmentSentinel) + { + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None); + Debug.Assert(!HasPartialCodePoint); + + if (!_options.SkipValidation) + { + ValidateWritingValue(); + } + + WriteStringSegmentPrologue(); + + PreviousSegmentEncoding = SegmentEncoding.Utf16; + _tokenType = Utf8JsonWriter.StringSegmentSentinel; + } + else + { + ValidateEncodingDidNotChange(SegmentEncoding.Utf16); + } + + // The steps to write a string segment are to complete the previous partial code point + // and escape either of which might not be required so there is a fast path for each of these steps. + if (HasPartialCodePoint) + { + WriteStringSegmentWithLeftover(value, isFinalSegment); + } + else + { + WriteStringSegmentEscape(value, isFinalSegment); + } + + if (isFinalSegment) + { + WriteStringSegmentEpilogue(); + + SetFlagToAddListSeparatorBeforeNextItem(); + PreviousSegmentEncoding = SegmentEncoding.None; + _tokenType = JsonTokenType.String; + } + } + + private void WriteStringSegmentWithLeftover(scoped ReadOnlySpan<char> value, bool isFinalSegment) + { + Debug.Assert(HasPartialCodePoint); + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Utf16); + + scoped ReadOnlySpan<char> partialCodePointBuffer = PartialUtf16CodePoint; + + Span<char> combinedBuffer = stackalloc char[2]; + combinedBuffer = combinedBuffer.Slice(0, ConcatInto(partialCodePointBuffer, value, combinedBuffer)); + + switch (Rune.DecodeFromUtf16(combinedBuffer, out _, out int charsConsumed)) + { + case OperationStatus.NeedMoreData: + Debug.Assert(value.Length + partialCodePointBuffer.Length < 2); + Debug.Assert(charsConsumed == value.Length + partialCodePointBuffer.Length); + // Let the encoder deal with the error if this is a final buffer. + value = combinedBuffer.Slice(0, charsConsumed); + partialCodePointBuffer = ReadOnlySpan<char>.Empty; + break; + case OperationStatus.Done: + Debug.Assert(charsConsumed > partialCodePointBuffer.Length); + Debug.Assert(charsConsumed <= 2); + // Divide up the code point chars into its own buffer and the remainder of the input buffer. + value = value.Slice(charsConsumed - partialCodePointBuffer.Length); + partialCodePointBuffer = combinedBuffer.Slice(0, charsConsumed); + break; + case OperationStatus.InvalidData: + Debug.Assert(charsConsumed >= partialCodePointBuffer.Length); + Debug.Assert(charsConsumed <= 2); + value = value.Slice(charsConsumed - partialCodePointBuffer.Length); + partialCodePointBuffer = combinedBuffer.Slice(0, charsConsumed); + break; + case OperationStatus.DestinationTooSmall: + default: + Debug.Fail("Unexpected OperationStatus return value."); + break; + } + + // The "isFinalSegment" argument indicates whether input that NeedsMoreData should be consumed as an error or not. + // Because we have validated above that partialCodePointBuffer will be the next consumed chars during Rune decoding + // (even if this is because it is invalid), we should pass isFinalSegment = true to indicate to the decoder to + // parse the code units without extra data. + // + // This is relevant in the case of having ['\uD800', 'C'], where the validation above would have needed both code units + // to determine that only the first unit should be consumed (as invalid). So this method will get only ['\uD800']. + // Because we know more data will not be able to complete this code point, we need to pass isFinalSegment = true + // to ensure that the encoder consumes this data eagerly instead of leaving it and returning NeedsMoreData. + WriteStringSegmentEscape(partialCodePointBuffer, true); + + ClearPartialCodePoint(); + + WriteStringSegmentEscape(value, isFinalSegment); + } + + private void WriteStringSegmentEscape(ReadOnlySpan<char> value, bool isFinalSegment) + { + if (value.IsEmpty) return; + + int escapeIdx = JsonWriterHelper.NeedsEscaping(value, _options.Encoder); + if (escapeIdx != -1) + { + WriteStringSegmentEscapeValue(value, escapeIdx, isFinalSegment); + } + else + { + WriteStringSegmentData(value); + } + } + + private void WriteStringSegmentEscapeValue(ReadOnlySpan<char> value, int firstEscapeIndexVal, bool isFinalSegment) + { + Debug.Assert(int.MaxValue / JsonConstants.MaxExpansionFactorWhileEscaping >= value.Length); + Debug.Assert(firstEscapeIndexVal >= 0 && firstEscapeIndexVal < value.Length); + + char[]? valueArray = null; + + int length = JsonWriterHelper.GetMaxEscapedLength(value.Length, firstEscapeIndexVal); + + Span<char> escapedValue = length <= JsonConstants.StackallocCharThreshold ? + stackalloc char[JsonConstants.StackallocCharThreshold] : + (valueArray = ArrayPool<char>.Shared.Rent(length)); + + JsonWriterHelper.EscapeString(value, escapedValue, firstEscapeIndexVal, _options.Encoder, out int consumed, out int written, isFinalSegment); + + WriteStringSegmentData(escapedValue.Slice(0, written)); + + Debug.Assert(consumed == value.Length || !isFinalSegment); + if (value.Length != consumed) + { + Debug.Assert(!isFinalSegment); + Debug.Assert(value.Length - consumed < 2); + PartialUtf16CodePoint = value.Slice(consumed); + } + + if (valueArray != null) + { + ArrayPool<char>.Shared.Return(valueArray); + } + } + + private void WriteStringSegmentData(ReadOnlySpan<char> escapedValue) + { + Debug.Assert(escapedValue.Length < (int.MaxValue / JsonConstants.MaxExpansionFactorWhileTranscoding)); + + int requiredBytes = escapedValue.Length * JsonConstants.MaxExpansionFactorWhileTranscoding; + + if (_memory.Length - BytesPending < requiredBytes) + { + Grow(requiredBytes); + } + + Span<byte> output = _memory.Span; + + TranscodeAndWrite(escapedValue, output); + } + + /// <summary> + /// Writes the UTF-8 text value segment as a partial JSON string. + /// </summary> + /// <param name="value">The UTF-8 encoded value to be written as a JSON string element of a JSON array.</param> + /// <param name="isFinalSegment">Indicates that this is the final segment of the string.</param> + /// <exception cref="ArgumentException"> + /// Thrown when the specified value is too large. + /// </exception> + /// <exception cref="InvalidOperationException"> + /// Thrown if this would result in invalid JSON being written (while validation is enabled) or + /// if the previously written segment (if any) was not written with this same overload. + /// </exception> + /// <remarks> + /// The value is escaped before writing. + /// </remarks> + public void WriteStringValueSegment(ReadOnlySpan<byte> value, bool isFinalSegment) + { + JsonWriterHelper.ValidateValue(value); + + if (_tokenType != Utf8JsonWriter.StringSegmentSentinel) + { + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None); + Debug.Assert(!HasPartialCodePoint); + + if (!_options.SkipValidation) + { + ValidateWritingValue(); + } + + WriteStringSegmentPrologue(); + + PreviousSegmentEncoding = SegmentEncoding.Utf8; + _tokenType = Utf8JsonWriter.StringSegmentSentinel; + } + else + { + ValidateEncodingDidNotChange(SegmentEncoding.Utf8); + } + + // The steps to write a string segment are to complete the previous partial code point + // and escape either of which might not be required so there is a fast path for each of these steps. + if (HasPartialCodePoint) + { + WriteStringSegmentWithLeftover(value, isFinalSegment); + } + else + { + WriteStringSegmentEscape(value, isFinalSegment); + } + + if (isFinalSegment) + { + WriteStringSegmentEpilogue(); + + SetFlagToAddListSeparatorBeforeNextItem(); + PreviousSegmentEncoding = SegmentEncoding.None; + _tokenType = JsonTokenType.String; + } + } + + private void WriteStringSegmentWithLeftover(scoped ReadOnlySpan<byte> utf8Value, bool isFinalSegment) + { + Debug.Assert(HasPartialCodePoint); + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Utf8); + + scoped ReadOnlySpan<byte> partialCodePointBuffer = PartialUtf8CodePoint; + + Span<byte> combinedBuffer = stackalloc byte[4]; + combinedBuffer = combinedBuffer.Slice(0, ConcatInto(partialCodePointBuffer, utf8Value, combinedBuffer)); + + switch (Rune.DecodeFromUtf8(combinedBuffer, out _, out int bytesConsumed)) + { + case OperationStatus.NeedMoreData: + Debug.Assert(utf8Value.Length + partialCodePointBuffer.Length < 4); + Debug.Assert(bytesConsumed == utf8Value.Length + partialCodePointBuffer.Length); + // Let the encoder deal with the error if this is a final buffer. + utf8Value = combinedBuffer.Slice(0, bytesConsumed); + partialCodePointBuffer = ReadOnlySpan<byte>.Empty; + break; + case OperationStatus.Done: + Debug.Assert(bytesConsumed > partialCodePointBuffer.Length); + Debug.Assert(bytesConsumed <= 4); + // Divide up the code point bytes into its own buffer and the remainder of the input buffer. + utf8Value = utf8Value.Slice(bytesConsumed - partialCodePointBuffer.Length); + partialCodePointBuffer = combinedBuffer.Slice(0, bytesConsumed); + break; + case OperationStatus.InvalidData: + Debug.Assert(bytesConsumed >= partialCodePointBuffer.Length); + Debug.Assert(bytesConsumed <= 4); + utf8Value = utf8Value.Slice(bytesConsumed - partialCodePointBuffer.Length); + partialCodePointBuffer = combinedBuffer.Slice(0, bytesConsumed); + break; + case OperationStatus.DestinationTooSmall: + default: + Debug.Fail("Unexpected OperationStatus return value."); + break; + } + + // The "isFinalSegment" argument indicates whether input that NeedsMoreData should be consumed as an error or not. + // Because we have validated above that partialCodePointBuffer will be the next consumed bytes during Rune decoding + // (even if this is because it is invalid), we should pass isFinalSegment = true to indicate to the decoder to + // parse the code units without extra data. + // + // This is relevant in the case of having [<3-length prefix code unit>, <continuation>, <ascii>], where the validation + // above would have needed all 3 code units to determine that only the first 2 units should be consumed (as invalid). + // So this method will get only <3-size prefix code unit><continuation>. Because we know more data will not be able + // to complete this code point, we need to pass isFinalSegment = true to ensure that the encoder consumes this data eagerly + // instead of leaving it and returning NeedsMoreData. + WriteStringSegmentEscape(partialCodePointBuffer, true); + + ClearPartialCodePoint(); + + WriteStringSegmentEscape(utf8Value, isFinalSegment); + } + + private void WriteStringSegmentEscape(ReadOnlySpan<byte> utf8Value, bool isFinalSegment) + { + if (utf8Value.IsEmpty) return; + + int escapeIdx = JsonWriterHelper.NeedsEscaping(utf8Value, _options.Encoder); + if (escapeIdx != -1) + { + WriteStringSegmentEscapeValue(utf8Value, escapeIdx, isFinalSegment); + } + else + { + WriteStringSegmentData(utf8Value); + } + } + + private void WriteStringSegmentEscapeValue(ReadOnlySpan<byte> utf8Value, int firstEscapeIndexVal, bool isFinalSegment) + { + Debug.Assert(int.MaxValue / JsonConstants.MaxExpansionFactorWhileEscaping >= utf8Value.Length); + Debug.Assert(firstEscapeIndexVal >= 0 && firstEscapeIndexVal < utf8Value.Length); + byte[]? valueArray = null; + int length = JsonWriterHelper.GetMaxEscapedLength(utf8Value.Length, firstEscapeIndexVal); + Span<byte> escapedValue = length <= JsonConstants.StackallocByteThreshold ? + stackalloc byte[JsonConstants.StackallocByteThreshold] : + (valueArray = ArrayPool<byte>.Shared.Rent(length)); + + JsonWriterHelper.EscapeString(utf8Value, escapedValue, firstEscapeIndexVal, _options.Encoder, out int consumed, out int written, isFinalSegment); + + WriteStringSegmentData(escapedValue.Slice(0, written)); + + Debug.Assert(consumed == utf8Value.Length || !isFinalSegment); + if (utf8Value.Length != consumed) + { + Debug.Assert(!isFinalSegment); + Debug.Assert(utf8Value.Length - consumed < 4); + PartialUtf8CodePoint = utf8Value.Slice(consumed); + } + + if (valueArray != null) + { + ArrayPool<byte>.Shared.Return(valueArray); + } + } + + private void WriteStringSegmentData(ReadOnlySpan<byte> escapedValue) + { + Debug.Assert(escapedValue.Length < int.MaxValue - 3); + + int requiredBytes = escapedValue.Length; + + if (_memory.Length - BytesPending < requiredBytes) + { + Grow(requiredBytes); + } + + Span<byte> output = _memory.Span; + + escapedValue.CopyTo(output.Slice(BytesPending)); + BytesPending += escapedValue.Length; + } + + private void WriteStringSegmentPrologue() + { + if (_options.Indented) + { + WriteStringSegmentIndentedPrologue(); + } + else + { + WriteStringSegmentMinimizedPrologue(); + } + } + + private void WriteStringSegmentIndentedPrologue() + { + int indent = Indentation; + Debug.Assert(indent <= _indentLength * _options.MaxDepth); + + // One quote and optionally 1 indent, 1 list separator and 1-2 bytes for new line + int bytesRequired = 1 + indent + 1 + _newLineLength; + if (_memory.Length - BytesPending < bytesRequired) + { + Grow(bytesRequired); + } + + Span<byte> output = _memory.Span; + + if (_currentDepth < 0) + { + output[BytesPending++] = JsonConstants.ListSeparator; + } + + if (_tokenType != JsonTokenType.PropertyName) + { + if (_tokenType != JsonTokenType.None) + { + WriteNewLine(output); + } + WriteIndentation(output.Slice(BytesPending), indent); + BytesPending += indent; + } + + output[BytesPending++] = JsonConstants.Quote; + } + + private void WriteStringSegmentMinimizedPrologue() + { + // One quote and optionally 1 list separator + int bytesRequired = 2; + if (_memory.Length - BytesPending < bytesRequired) + { + Grow(bytesRequired); + } + + Span<byte> output = _memory.Span; + + if (_currentDepth < 0) + { + output[BytesPending++] = JsonConstants.ListSeparator; + } + + output[BytesPending++] = JsonConstants.Quote; + } + + private void WriteStringSegmentEpilogue() + { + if (_memory.Length == BytesPending) + { + Grow(1); + } + + _memory.Span[BytesPending++] = JsonConstants.Quote; + } + + /// <summary> + /// Given a byte buffer <paramref name="dest"/>, concatenates as much of <paramref name="srcLeft"/> followed + /// by <paramref name="srcRight"/> into it as will fit, then returns the total number of bytes copied. + /// </summary> + private static int ConcatInto<T>(ReadOnlySpan<T> srcLeft, ReadOnlySpan<T> srcRight, Span<T> dest) + { + int total = 0; + for (int i = 0; i < srcLeft.Length; i++) + { + if ((uint)total >= (uint)dest.Length) + { + goto Finish; + } + else + { + dest[total++] = srcLeft[i]; + } + } + for (int i = 0; i < srcRight.Length; i++) + { + if ((uint)total >= (uint)dest.Length) + { + goto Finish; + } + else + { + dest[total++] = srcRight[i]; + } + } + Finish: + return total; + } + } +} diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.cs index f357a89d5d2d04..da9dc4b6bac503 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.cs @@ -5,12 +5,10 @@ using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; - -#if !NET -using System.Runtime.InteropServices; -#endif +using System.ComponentModel; namespace System.Text.Json { @@ -37,6 +35,16 @@ public sealed partial class Utf8JsonWriter : IDisposable, IAsyncDisposable private const int DefaultGrowthSize = 4096; private const int InitialGrowthSize = 256; + // A special value for JsonTokenType that lets the writer keep track of string segments. + private const JsonTokenType StringSegmentSentinel = (JsonTokenType)255; + + // Masks and flags for the length and encoding of the partial code point + private const byte PartialCodePointLengthMask = 0b000_000_11; + private const byte PartialCodePointEncodingMask = 0b000_111_00; + + private const byte PartialCodePointUtf8EncodingFlag = 0b000_001_00; + private const byte PartialCodePointUtf16EncodingFlag = 0b000_010_00; + private IBufferWriter<byte>? _output; private Stream? _stream; private ArrayBufferWriter<byte>? _arrayBufferWriter; @@ -48,6 +56,31 @@ public sealed partial class Utf8JsonWriter : IDisposable, IAsyncDisposable private JsonTokenType _tokenType; private BitStack _bitStack; + /// <summary> + /// This 3-byte array stores the partial code point leftover when writing a string value + /// segment that is split across multiple segment write calls. + /// </summary> +#if !NET + private byte[]? _partialCodePoint; + private Span<byte> PartialCodePointRaw => _partialCodePoint ??= new byte[3]; +#else + private Inline3ByteArray _partialCodePoint; + private Span<byte> PartialCodePointRaw => _partialCodePoint; + + [InlineArray(3)] + private struct Inline3ByteArray + { + public byte byte0; + } +#endif + + /// <summary> + /// Stores the length and encoding of the partial code point. Outside of segment writes, this value is 0. + /// Across segment writes, this value is always non-zero even if the length is 0, to indicate the encoding of the segment. + /// This allows detection of encoding changes across segment writes. + /// </summary> + private byte _partialCodePointFlags; + // The highest order bit of _currentDepth is used to discern whether we are writing the first item in a list or not. // if (_currentDepth >> 31) == 1, add a list separator before writing the item // else, no list separator is needed since we are writing the first item. @@ -94,6 +127,91 @@ public sealed partial class Utf8JsonWriter : IDisposable, IAsyncDisposable /// </summary> public int CurrentDepth => _currentDepth & JsonConstants.RemoveFlagsBitMask; + /// <summary> + /// Length of the partial code point. + /// </summary> + private byte PartialCodePointLength + { + get => (byte)(_partialCodePointFlags & PartialCodePointLengthMask); + set => _partialCodePointFlags = (byte)((_partialCodePointFlags & ~PartialCodePointLengthMask) | (byte)value); + } + + /// <summary> + /// The partial UTF-8 code point. + /// </summary> + private ReadOnlySpan<byte> PartialUtf8CodePoint + { + get + { + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Utf8); + + ReadOnlySpan<byte> partialCodePointBytes = PartialCodePointRaw; + Debug.Assert(partialCodePointBytes.Length == 3); + + byte length = PartialCodePointLength; + Debug.Assert(length < 4); + + return partialCodePointBytes.Slice(0, length); + } + + set + { + Debug.Assert(value.Length <= 3); + + Span<byte> partialCodePointBytes = PartialCodePointRaw; + + value.CopyTo(partialCodePointBytes); + PartialCodePointLength = (byte)value.Length; + } + } + + /// <summary> + /// The partial UTF-16 code point. + /// </summary> + private ReadOnlySpan<char> PartialUtf16CodePoint + { + get + { + Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.Utf16); + + ReadOnlySpan<byte> partialCodePointBytes = PartialCodePointRaw; + Debug.Assert(partialCodePointBytes.Length == 3); + + byte length = PartialCodePointLength; + Debug.Assert(length is 2 or 0); + + return MemoryMarshal.Cast<byte, char>(partialCodePointBytes.Slice(0, length)); + } + set + { + Debug.Assert(value.Length <= 1); + + Span<byte> partialCodePointBytes = PartialCodePointRaw; + + value.CopyTo(MemoryMarshal.Cast<byte, char>(partialCodePointBytes)); + PartialCodePointLength = (byte)(2 * value.Length); + } + } + + /// <summary> + /// Encoding used for the previous string segment write. + /// </summary> + private SegmentEncoding PreviousSegmentEncoding + { + get => (SegmentEncoding)(_partialCodePointFlags & PartialCodePointEncodingMask); + set => _partialCodePointFlags = (byte)((_partialCodePointFlags & ~PartialCodePointEncodingMask) | (byte)value); + } + + /// <summary> + /// Convenience enumeration to track the encoding of the partial code point. This must be kept in sync with the PartialCodePoint*Encoding flags. + /// </summary> + internal enum SegmentEncoding : byte + { + None = 0, + Utf8 = PartialCodePointUtf8EncodingFlag, + Utf16 = PartialCodePointUtf16EncodingFlag, + } + private Utf8JsonWriter() { } @@ -271,6 +389,9 @@ private void ResetHelper() _currentDepth = default; _bitStack = default; + + _partialCodePoint = default; + _partialCodePointFlags = default; } private void CheckNotDisposed() @@ -534,6 +655,9 @@ private void WriteStartSlow(byte token) private void ValidateStart() { + // Make sure a new object or array is not attempted within an unfinalized string. + ValidateNotWithinUnfinalizedString(); + if (_inObject) { if (_tokenType != JsonTokenType.PropertyName) @@ -959,6 +1083,9 @@ private void WriteEndSlow(byte token) private void ValidateEnd(byte token) { + // Make sure an object is not ended within an unfinalized string. + ValidateNotWithinUnfinalizedString(); + if (_bitStack.CurrentDepth <= 0 || _tokenType == JsonTokenType.PropertyName) ThrowHelper.ThrowInvalidOperationException(ExceptionResource.MismatchedObjectArray, currentDepth: default, maxDepth: _options.MaxDepth, token, _tokenType); diff --git a/src/libraries/System.Text.Json/src/System/ThrowHelper.cs b/src/libraries/System.Text.Json/src/System/ThrowHelper.cs new file mode 100644 index 00000000000000..0c3bc8378e5d56 --- /dev/null +++ b/src/libraries/System.Text.Json/src/System/ThrowHelper.cs @@ -0,0 +1,115 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using SR = System.SR; + +namespace System +{ + internal static class ThrowHelper + { + [DoesNotReturn] + internal static void ThrowArgumentOutOfRangeException() + { + throw new ArgumentOutOfRangeException(); + } + + [DoesNotReturn] + internal static void ThrowArgumentException_DestinationTooShort() + { + throw new ArgumentException(SR.Argument_DestinationTooShort, "destination"); + } + + [DoesNotReturn] + internal static void ThrowArgumentException_CannotExtractScalar(ExceptionArgument argument) + { + throw GetArgumentException(ExceptionResource.Argument_CannotExtractScalar, argument); + } + + [DoesNotReturn] + internal static void ThrowArgumentOutOfRange_IndexMustBeLessException() + { + throw GetArgumentOutOfRangeException(ExceptionArgument.index, + ExceptionResource.ArgumentOutOfRange_IndexMustBeLess); + } + + [DoesNotReturn] + internal static void ThrowArgumentNullException(ExceptionArgument argument) + { + throw new ArgumentNullException(GetArgumentName(argument)); + } + + [DoesNotReturn] + internal static void ThrowArgumentOutOfRangeException(ExceptionArgument argument) + { + throw new ArgumentOutOfRangeException(GetArgumentName(argument)); + } + + private static ArgumentOutOfRangeException GetArgumentOutOfRangeException(ExceptionArgument argument, ExceptionResource resource) + { + return new ArgumentOutOfRangeException(GetArgumentName(argument), GetResourceString(resource)); + } + + private static ArgumentException GetArgumentException(ExceptionResource resource, ExceptionArgument argument) + { + return new ArgumentException(GetResourceString(resource), GetArgumentName(argument)); + } + + private static string GetArgumentName(ExceptionArgument argument) + { + switch (argument) + { + case ExceptionArgument.ch: + return nameof(ExceptionArgument.ch); + case ExceptionArgument.culture: + return nameof(ExceptionArgument.culture); + case ExceptionArgument.index: + return nameof(ExceptionArgument.index); + case ExceptionArgument.input: + return nameof(ExceptionArgument.input); + case ExceptionArgument.value: + return nameof(ExceptionArgument.value); + default: + Debug.Fail("The enum value is not defined, please check the ExceptionArgument Enum."); + return ""; + + }; + } + + private static string GetResourceString(ExceptionResource resource) + { + switch (resource) + { + case ExceptionResource.ArgumentOutOfRange_IndexMustBeLess: + return SR.ArgumentOutOfRange_IndexMustBeLess; + case ExceptionResource.Argument_CannotExtractScalar: + return SR.Argument_CannotExtractScalar; + default: + Debug.Fail("The enum value is not defined, please check the ExceptionResource Enum."); + return ""; + } + } + } + + // + // The convention for this enum is using the argument name as the enum name + // + internal enum ExceptionArgument + { + ch, + culture, + index, + input, + value, + } + + // + // The convention for this enum is using the resource name as the enum name + // + internal enum ExceptionResource + { + Argument_CannotExtractScalar, + ArgumentOutOfRange_IndexMustBeLess + } +} diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/JsonTestHelper.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/JsonTestHelper.cs index a3c139ee4831ee..7979d4eca35e3e 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/JsonTestHelper.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/JsonTestHelper.cs @@ -4,6 +4,7 @@ using System.Buffers; using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; using System.Text.Json.Tests; @@ -694,7 +695,13 @@ public static string GetCompactString(string jsonString) } } - public static void AssertContents(string expectedValue, ArrayBufferWriter<byte> buffer, bool skipSpecialRules = false) + public static void AssertContents( +#if NET + [StringSyntax(StringSyntaxAttribute.Json)] +#endif + string expectedValue, + ArrayBufferWriter<byte> buffer, + bool skipSpecialRules = false) { string value = Encoding.UTF8.GetString( buffer.WrittenSpan @@ -706,14 +713,26 @@ public static void AssertContents(string expectedValue, ArrayBufferWriter<byte> AssertContentsAgainstJsonNet(expectedValue, value, skipSpecialRules); } - public static void AssertContents(string expectedValue, MemoryStream stream, bool skipSpecialRules = false) + public static void AssertContents( +#if NET + [StringSyntax(StringSyntaxAttribute.Json)] +#endif + string expectedValue, + MemoryStream stream, + bool skipSpecialRules = false) { string value = Encoding.UTF8.GetString(stream.ToArray()); AssertContentsAgainstJsonNet(expectedValue, value, skipSpecialRules); } - public static void AssertContentsNotEqual(string expectedValue, ArrayBufferWriter<byte> buffer, bool skipSpecialRules = false) + public static void AssertContentsNotEqual( +#if NET + [StringSyntax(StringSyntaxAttribute.Json)] +#endif + string expectedValue, + ArrayBufferWriter<byte> buffer, + bool skipSpecialRules = false) { string value = Encoding.UTF8.GetString( buffer.WrittenSpan @@ -725,12 +744,24 @@ public static void AssertContentsNotEqual(string expectedValue, ArrayBufferWrite AssertContentsNotEqualAgainstJsonNet(expectedValue, value, skipSpecialRules); } - public static void AssertContentsAgainstJsonNet(string expectedValue, string value, bool skipSpecialRules) + public static void AssertContentsAgainstJsonNet( +#if NET + [StringSyntax(StringSyntaxAttribute.Json)] +#endif + string expectedValue, + string value, + bool skipSpecialRules) { Assert.Equal(expectedValue.NormalizeToJsonNetFormat(skipSpecialRules), value.NormalizeToJsonNetFormat(skipSpecialRules), ignoreLineEndingDifferences: true); } - public static void AssertContentsNotEqualAgainstJsonNet(string expectedValue, string value, bool skipSpecialRules) + public static void AssertContentsNotEqualAgainstJsonNet( +#if NET + [StringSyntax(StringSyntaxAttribute.Json)] +#endif + string expectedValue, + string value, + bool skipSpecialRules) { Assert.NotEqual(expectedValue.NormalizeToJsonNetFormat(skipSpecialRules), value.NormalizeToJsonNetFormat(skipSpecialRules)); } diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/System.Text.Json.Tests.csproj b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/System.Text.Json.Tests.csproj index 17437b0b6c9a8f..b70e73d3b83254 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/System.Text.Json.Tests.csproj +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/System.Text.Json.Tests.csproj @@ -245,6 +245,7 @@ <Compile Include="Utf8JsonReaderTests.TryGet.cs" /> <Compile Include="Utf8JsonReaderTests.TryGet.Date.cs" /> <Compile Include="Utf8JsonReaderTests.ValueTextEquals.cs" /> + <Compile Include="Utf8JsonWriterTests.Values.StringSegment.cs" /> <Compile Include="Utf8JsonWriterTests.cs" /> <Compile Include="Utf8JsonWriterTests.WriteRaw.cs" /> diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.Values.StringSegment.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.Values.StringSegment.cs new file mode 100644 index 00000000000000..9d95eeb0f9a26a --- /dev/null +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.Values.StringSegment.cs @@ -0,0 +1,1086 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + + + +using System.Buffers; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text.Encodings.Web; +using Xunit; + +namespace System.Text.Json.Tests +{ + public partial class Utf8JsonWriterTests + { + public static IEnumerable<JsonWriterOptions> BasicStringJsonOptions => + from indented in new[] { true, false } + from encoding in new[] { JavaScriptEncoder.Default, JavaScriptEncoder.UnsafeRelaxedJsonEscaping, JavaScriptEncoder.Create() } + select new JsonWriterOptions + { + Indented = indented, + Encoder = encoding + }; + + public static IEnumerable<object[]> BasicStringJsonOptions_TestData => + from option in BasicStringJsonOptions + select new object[] { option }; + + public static IEnumerable<char[]> InvalidUtf16Data() + { + char[][] input = [ + // Unpaired low surrogate + ['\uDC00'], + + // Unpaired high surrogate + ['\uD800'], + ['\uD800', '\uD800'], + + // Two unpaired low surrogates + ['a', '\uDC00', '\uDC00'], + ]; + + // Separate each case with a character + yield return input.SelectMany(arr => arr.Concat(['j'])).ToArray(); + + // Test without separation + yield return input.SelectMany(arr => arr).ToArray(); + } + + public static IEnumerable<object[]> InvalidUtf16DataWithOptions_TestData => + from data in InvalidUtf16Data() + from option in BasicStringJsonOptions + select new object[] { data, option }; + + [Theory] + [MemberData(nameof(InvalidUtf16DataWithOptions_TestData))] + public static void WriteStringValueSegment_Utf16_SplitCodePointsReplacement(char[] inputArr, JsonWriterOptions options) + { + var expectedChars = new char[inputArr.Length * MaxExpansionFactorWhileEscaping]; + + options.Encoder.Encode(inputArr, expectedChars, out int charsConsumed, out int charsWritten); + Assert.Equal(inputArr.Length, charsConsumed); + + SplitCodePointsHelper(inputArr, $@"""{new string(expectedChars, 0, charsWritten)}""", options); + } + + public static IEnumerable<byte[]> InvalidUtf8Data() + { + byte[][] input = [ + // Continuation without start + [0b10_111111], + + // 2-byte sequence containing < 2 bytes + [0b110_11111], + + // 2-byte overlong + [0b110_00000, 0b10_111111], + + // 3-byte sequence containing < 3 bytes + [0b1110_1111], + [0b1110_1111, 0b10_111111], + + // 3-byte overlong + [0b1110_0000, 0b10_000000, 0b10_000000], + + // 4-byte sequence containing < 4 bytes + [0b11110_100], + [0b11110_100, 0b10_001111], + [0b11110_100, 0b10_001111, 0b10_111111], + + // 4-byte overlong + [0b11110_000, 0b10_000000, 0b10_000000, 0b10_000000], + + // Greater than Unicode max value + [0b11110_111, 0b10_000000], + [0b11110_100, 0b10_100000, 0b10_000000], + ]; + + // Separate each case with a character + yield return input.SelectMany(arr => arr.Concat([(byte)'j'])).ToArray(); + + // Test without separation + yield return input.SelectMany(arr => arr).ToArray(); + } + + public static IEnumerable<object[]> InvalidUtf8DataWithOptions_TestData => + from data in InvalidUtf8Data() + from option in BasicStringJsonOptions + select new object[] { data, option }; + + [Theory] + [MemberData(nameof(InvalidUtf8DataWithOptions_TestData))] + public static void WriteStringValueSegment_Utf8_SplitCodePointsReplacement(byte[] inputArr, JsonWriterOptions options) + { + var expectedBytes = new byte[inputArr.Length * MaxExpansionFactorWhileEscaping]; + + options.Encoder.EncodeUtf8(inputArr, expectedBytes, out int bytesConsumed, out int bytesWritten); + Assert.Equal(inputArr.Length, bytesConsumed); + + string expectedString = $@"""{Encoding.UTF8.GetString(expectedBytes, 0, bytesWritten)}"""; + + SplitCodePointsHelper(inputArr, expectedString, options); + } + + private static void SplitCodePointsHelper<T>( + T[] inputArr, + string expected, + JsonWriterOptions options) + where T : struct + { + SplitCodePointsHelper<T>(inputArr, options, output => JsonTestHelper.AssertContents(expected, output)); + } + + private static void SplitCodePointsHelper<T>( + T[] inputArr, + JsonWriterOptions options, + Action<ArrayBufferWriter<byte>> assert) + where T : struct + { + SplitCodePointsHelper<T>(inputArr.AsSpan(), options, assert); + } + + private static void SplitCodePointsHelper<T>( + ReadOnlySpan<T> inputArr, + JsonWriterOptions options, + Action<ArrayBufferWriter<byte>> assert) + where T : struct + { + ReadOnlySpan<T> input = inputArr; + + // Sanity check with non-segmented API + { + var output = new ArrayBufferWriter<byte>(1024); + + using (var writer = new Utf8JsonWriter(output, options)) + { + WriteStringValueHelper(writer, input); + writer.Flush(); + } + + assert(output); + } + + for (int splitIndex = 0; splitIndex <= input.Length; splitIndex++) + { + var output = new ArrayBufferWriter<byte>(1024); + + using (var writer = new Utf8JsonWriter(output, options)) + { + WriteStringValueSegmentsHelper(writer, input.Slice(0, splitIndex), input.Slice(splitIndex)); + writer.Flush(); + } + + assert(output); + } + + for (int splitIndex = 0; splitIndex <= input.Length; splitIndex++) + { + for (int splitIndex2 = splitIndex; splitIndex2 <= input.Length; splitIndex2++) + { + var output = new ArrayBufferWriter<byte>(1024); + + using (var writer = new Utf8JsonWriter(output, options)) + { + WriteStringValueSegmentsHelper(writer, input.Slice(0, splitIndex), input.Slice(splitIndex, splitIndex2 - splitIndex), input.Slice(splitIndex2)); + writer.Flush(); + } + + assert(output); + } + } + } + + [Theory] + [MemberData(nameof(BasicStringJsonOptions_TestData))] + public static void WriteStringValueSegment_Utf16_Basic(JsonWriterOptions options) + { + WriteStringValueSegment_BasicHelper( + "Hello".AsSpan(), + " Wor".AsSpan(), + "ld!".AsSpan(), + options.Encoder.Encode("Hello"), + options.Encoder.Encode(" Wor"), + options.Encoder.Encode("ld!"), + options); + } + + [Theory] + [MemberData(nameof(BasicStringJsonOptions_TestData))] + public static void WriteStringValueSegment_Utf8_Basic(JsonWriterOptions options) + { + WriteStringValueSegment_BasicHelper( + "Hello"u8, + " Wor"u8, + "ld!"u8, + options.Encoder.Encode("Hello"), + options.Encoder.Encode(" Wor"), + options.Encoder.Encode("ld!"), + options); + } + + private static void WriteStringValueSegment_BasicHelper<T>( + ReadOnlySpan<T> segment1, + ReadOnlySpan<T> segment2, + ReadOnlySpan<T> segment3, + string expected1, + string expected2, + string expected3, + JsonWriterOptions options) + where T : struct + { + string indent = options.Indented ? new string(options.IndentCharacter, options.IndentSize) : ""; + string n = options.Indented ? options.NewLine : ""; + string ni = n + indent; + string nii = ni + indent; + string s = options.Indented ? " " : ""; + string e1 = '"' + expected1 + '"'; + string e2 = '"' + expected1 + expected2 + '"'; + string e3 = '"' + expected1 + expected2 + expected3 + '"'; + string foo = '"' + options.Encoder.Encode("foo") + '"'; + string bar = '"' + options.Encoder.Encode("bar") + '"'; + string baz = '"' + options.Encoder.Encode("baz") + '"'; + string inner = '"' + options.Encoder.Encode("inner") + '"'; + + // JSON string + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + WriteStringValueSegmentsHelper(jsonUtf8, segment1); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents(e1, output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents(e2, output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents(e3, output); + } + + // JSON array + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + WriteStringValueSegmentsHelper(jsonUtf8, segment1); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{n}{indent}{e1}{n}]", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{ni}{e2}{n}]", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{ni}{e3}{n}]", + output); + } + + // Middle item in array + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteBooleanValue(true); + WriteStringValueSegmentsHelper(jsonUtf8, segment1); + jsonUtf8.WriteBooleanValue(false); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{ni}true,{ni}{e1},{ni}false{n}]", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteBooleanValue(true); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + jsonUtf8.WriteBooleanValue(false); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{ni}true,{ni}{e2},{ni}false{n}]", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteBooleanValue(true); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + jsonUtf8.WriteBooleanValue(false); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{ni}true,{ni}{e3},{ni}false{n}]", + output); + } + + // Nested array + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteBooleanValue(true); + WriteStringValueSegmentsHelper(jsonUtf8, segment1); + jsonUtf8.WriteBooleanValue(false); + jsonUtf8.WriteEndArray(); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{ni}[{nii}true,{nii}{e1},{nii}false{ni}]{n}]", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteBooleanValue(true); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + jsonUtf8.WriteBooleanValue(false); + jsonUtf8.WriteEndArray(); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{ni}[{nii}true,{nii}{e2},{nii}false{ni}]{n}]", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteStartArray(); + jsonUtf8.WriteBooleanValue(true); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + jsonUtf8.WriteBooleanValue(false); + jsonUtf8.WriteEndArray(); + jsonUtf8.WriteEndArray(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $"[{ni}[{nii}true,{nii}{e3},{nii}false{ni}]{n}]", + output); + } + + // JSON object + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{foo}:{s}{e1}{n}}}", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{foo}:{s}{e2}{n}}}", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{foo}:{s}{e3}{n}}}", + output); + } + + // Middle item in object + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WriteBoolean("bar", true); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1); + jsonUtf8.WriteBoolean("baz", false); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{bar}:{s}true,{ni}{foo}:{s}{e1},{ni}{baz}:{s}false{n}}}", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WriteBoolean("bar", true); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + jsonUtf8.WriteBoolean("baz", false); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{bar}:{s}true,{ni}{foo}:{s}{e2},{ni}{baz}:{s}false{n}}}", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WriteBoolean("bar", true); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + jsonUtf8.WriteBoolean("baz", false); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{bar}:{s}true,{ni}{foo}:{s}{e3},{ni}{baz}:{s}false{n}}}", + output); + } + + // Nested object + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WriteStartObject("inner"); + jsonUtf8.WriteBoolean("bar", true); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1); + jsonUtf8.WriteBoolean("baz", false); + jsonUtf8.WriteEndObject(); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{inner}:{s}{{{nii}{bar}:{s}true,{nii}{foo}:{s}{e1},{nii}{baz}:{s}false{ni}}}{n}}}", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WriteStartObject("inner"); + jsonUtf8.WriteBoolean("bar", true); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2); + jsonUtf8.WriteBoolean("baz", false); + jsonUtf8.WriteEndObject(); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{inner}:{s}{{{nii}{bar}:{s}true,{nii}{foo}:{s}{e2},{nii}{baz}:{s}false{ni}}}{n}}}", + output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, options); + jsonUtf8.WriteStartObject(); + jsonUtf8.WriteStartObject("inner"); + jsonUtf8.WriteBoolean("bar", true); + jsonUtf8.WritePropertyName("foo"); + WriteStringValueSegmentsHelper(jsonUtf8, segment1, segment2, segment3); + jsonUtf8.WriteBoolean("baz", false); + jsonUtf8.WriteEndObject(); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents( + $@"{{{ni}{inner}:{s}{{{nii}{bar}:{s}true,{nii}{foo}:{s}{e3},{nii}{baz}:{s}false{ni}}}{n}}}", + output); + } + } + + [Fact] + public static void WriteStringValueSegment_Utf16_BadSurrogatePairs() + { + const string result = "\\uFFFD\\uD83D\\uDE00\\uFFFD"; + + ReadOnlySpan<char> surrogates = ['\uD83D', '\uD83D', '\uDE00', '\uDE00']; + + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStartObject(); + jsonUtf8.WritePropertyName("full"); + // complete string -> expect 0xFFFD 0xD83D 0xDE00 0xFFFD + jsonUtf8.WriteStringValue(surrogates); + jsonUtf8.WritePropertyName("segmented"); + // only high surrogate -> expect cached + jsonUtf8.WriteStringValueSegment(surrogates.Slice(0, 1), isFinalSegment: false); + // only high surrogate -> expect 0xFFFD + jsonUtf8.WriteStringValueSegment(surrogates.Slice(0, 1), isFinalSegment: false); + // only low surrogate -> expect 0xD83D 0xDE00 + jsonUtf8.WriteStringValueSegment(surrogates.Slice(2, 1), isFinalSegment: false); + // only low surrogate -> expect 0xFFFD + jsonUtf8.WriteStringValueSegment(surrogates.Slice(2, 1), isFinalSegment: true); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents($"{{\"full\":\"{result}\",\"segmented\":\"{result}\"}}", output); + } + + [Fact] + public static void WriteStringValueSegment_Utf16_SplitInSurrogatePair() + { + const string result = "\\uD83D\\uDE00\\uD83D\\uDE00\\uD83D\\uDE00"; + + Span<char> surrogates = stackalloc char[] { '\uD83D', '\uDE00', '\uD83D', '\uDE00', '\uD83D', '\uDE00' }; + + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStartObject(); + jsonUtf8.WritePropertyName("full"); + // complete string -> expect 0xD83D 0xDE00 0xD83D 0xDE00 0xD83D 0xDE00 + jsonUtf8.WriteStringValue(surrogates); + jsonUtf8.WritePropertyName("segmented"); + // only high surrogate -> expect cached + jsonUtf8.WriteStringValueSegment(surrogates.Slice(0, 2), isFinalSegment: false); + // only low surrogate -> expect 0xD83D 0xDE00 + jsonUtf8.WriteStringValueSegment(surrogates.Slice(0, 1), isFinalSegment: false); + // low surrogate followed by another high surrogate -> expect 0xD83D 0xDE00 + cached + jsonUtf8.WriteStringValueSegment(surrogates.Slice(1, 2), isFinalSegment: false); + // only low surrogate -> expect 0xD83D 0xDE00 + jsonUtf8.WriteStringValueSegment(surrogates.Slice(1, 1), isFinalSegment: true); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents($"{{\"full\":\"{result}\",\"segmented\":\"{result}\"}}", output); + } + + [Fact] + public static void WriteStringValueSegment_Utf8_Split8CodePointsBasic() + { + const string result = "\\uD83D\\uDE00"; + + Span<byte> utf8Bytes = Encoding.UTF8.GetBytes("\uD83D\uDE00"); + + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStartObject(); + jsonUtf8.WritePropertyName("full"); + // complete string -> expect 0xD83D 0xDE00 + jsonUtf8.WriteStringValue(utf8Bytes); + jsonUtf8.WritePropertyName("segmented"); + // incomplete UTf-8 sequence -> expect cached + jsonUtf8.WriteStringValueSegment(utf8Bytes.Slice(0, 1), isFinalSegment: false); + // incomplete UTf-8 sequence -> expect cached + jsonUtf8.WriteStringValueSegment(utf8Bytes.Slice(1, 1), isFinalSegment: false); + // remainder of UTF-8 sequence -> expect 0xD83D 0xDE00 + jsonUtf8.WriteStringValueSegment(utf8Bytes.Slice(2, 2), isFinalSegment: true); + jsonUtf8.WriteEndObject(); + jsonUtf8.Flush(); + + JsonTestHelper.AssertContents($"{{\"full\":\"{result}\",\"segmented\":\"{result}\"}}", output); + } + + [Fact] + public static void WriteStringValueSegment_Utf8_ClearedPartial() + { + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + jsonUtf8.WriteStartArray(); + + jsonUtf8.WriteStringValueSegment([0b110_11111], false); + jsonUtf8.WriteStringValueSegment([0b10_111111], true); + + jsonUtf8.WriteStringValueSegment([0b10_111111], true); + + jsonUtf8.WriteStringValueSegment([0b110_11111], false); + jsonUtf8.WriteStringValueSegment([0b10_111111], false); + jsonUtf8.WriteStringValueSegment([0b10_111111], true); + + jsonUtf8.WriteEndArray(); + + jsonUtf8.Flush(); + + // First code point is written (escaped) and the second is replaced. + JsonTestHelper.AssertContents("""["\u07ff","\uFFFD","\u07ff\uFFFD"]""", output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output, new JsonWriterOptions { Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping }); + + jsonUtf8.WriteStartArray(); + + jsonUtf8.WriteStringValueSegment([0b110_11111], false); + jsonUtf8.WriteStringValueSegment([0b10_111111], true); + + jsonUtf8.WriteStringValueSegment([0b10_111111], true); + + jsonUtf8.WriteStringValueSegment([0b110_11111], false); + jsonUtf8.WriteStringValueSegment([0b10_111111], false); + jsonUtf8.WriteStringValueSegment([0b10_111111], true); + + jsonUtf8.WriteEndArray(); + + jsonUtf8.Flush(); + + // First code point is written (unescaped) and the second is replaced. + JsonTestHelper.AssertContents($"""["{'\u07ff'}","\uFFFD","{'\u07ff'}\uFFFD"]""", output); + } + } + + [Fact] + public static void WriteStringValueSegment_Utf16_ClearedPartial() + { + var output = new ArrayBufferWriter<byte>(); + + { + using var jsonUtf8 = new Utf8JsonWriter(output); + + jsonUtf8.WriteStartArray(); + + WriteStringValueSegmentsHelper(jsonUtf8, ['\uD800'], ['\uDC00']); + WriteStringValueSegmentsHelper(jsonUtf8, ['\uDC00']); + WriteStringValueSegmentsHelper(jsonUtf8, ['\uD800'], ['\uDC00'], ['\uDC00']); + + jsonUtf8.WriteEndArray(); + + jsonUtf8.Flush(); + + // First code point is written and the second is replaced. + JsonTestHelper.AssertContents("""["\uD800\uDC00","\uFFFD","\uD800\uDC00\uFFFD"]""", output); + } + } + + [Fact] + public static void WriteStringValueSegment_Flush() + { + var noEscape = JavaScriptEncoder.UnsafeRelaxedJsonEscaping; + TestFlushImpl('\uD800', '\uDC00', new(), @"""\uD800\uDC00"""); + TestFlushImpl<byte>(0b110_11111, 0b10_111111, new(), @"""\u07FF"""); + TestFlushImpl<byte>(0b110_11111, 0b10_111111, new() { Encoder = noEscape }, "\"\u07FF\""); + + void TestFlushImpl<T>(T unit1, T unit2, JsonWriterOptions options, string expected) + where T : struct + { + byte[] expectedBytes = Encoding.UTF8.GetBytes(expected); + var output = new ArrayBufferWriter<byte>(); + using Utf8JsonWriter jsonUtf8 = new(output, options); + + WriteStringValueSegmentHelper(jsonUtf8, [unit1], false); + + Assert.Equal(0, output.WrittenCount); + Assert.Equal(0, jsonUtf8.BytesCommitted); + Assert.Equal(1, jsonUtf8.BytesPending); + + jsonUtf8.Flush(); + Assert.Equal(1, output.WrittenCount); + Assert.Equal(1, jsonUtf8.BytesCommitted); + Assert.Equal(0, jsonUtf8.BytesPending); + + WriteStringValueSegmentHelper(jsonUtf8, [unit2], true); + + Assert.Equal(1, output.WrittenCount); + Assert.Equal(1, jsonUtf8.BytesCommitted); + Assert.Equal(expectedBytes.Length - 1, jsonUtf8.BytesPending); + + jsonUtf8.Flush(); + Assert.Equal(expectedBytes.Length, output.WrittenCount); + Assert.Equal(expectedBytes.Length, jsonUtf8.BytesCommitted); + Assert.Equal(0, jsonUtf8.BytesPending); + + JsonTestHelper.AssertContents(expected, output); + } + } + + [Fact] + public static void WriteStringValueSegment_Utf16_Reset() + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + jsonUtf8.WriteStringValueSegment("\uD800".AsSpan(), false); + jsonUtf8.Flush(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(1, jsonUtf8.BytesCommitted); + + jsonUtf8.Reset(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(0, jsonUtf8.BytesCommitted); + + jsonUtf8.WriteStringValueSegment("\uDC00".AsSpan(), true); + + string expected = @"""\uFFFD"""; + Assert.Equal(expected.Length, jsonUtf8.BytesPending); + Assert.Equal(0, jsonUtf8.BytesCommitted); + + jsonUtf8.Flush(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(expected.Length, jsonUtf8.BytesCommitted); + JsonTestHelper.AssertContents('"' + expected, output); + } + + [Fact] + public static void WriteStringValueSegment_Utf8_Reset() + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + jsonUtf8.WriteStringValueSegment([0b110_11111], false); + jsonUtf8.Flush(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(1, jsonUtf8.BytesCommitted); + + jsonUtf8.Reset(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(0, jsonUtf8.BytesCommitted); + + jsonUtf8.WriteStringValueSegment([0b10_111111], true); + + string expected = @"""\uFFFD"""; + Assert.Equal(expected.Length, jsonUtf8.BytesPending); + Assert.Equal(0, jsonUtf8.BytesCommitted); + + jsonUtf8.Flush(); + + Assert.Equal(0, jsonUtf8.BytesPending); + Assert.Equal(expected.Length, jsonUtf8.BytesCommitted); + JsonTestHelper.AssertContents('"' + expected, output); + } + + [Fact] + public static void WriteStringValueSegment_MixEncoding() + { + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + // High surrogate + jsonUtf8.WriteStringValueSegment("\uD8D8".AsSpan(), false); + + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WriteStringValueSegment([0b10_111111], true)); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + + // Start of a 3-byte sequence + jsonUtf8.WriteStringValueSegment([0b1110_1111], false); + + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WriteStringValueSegment("\u8080".AsSpan(), true)); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment([0b110_11111], false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-8 sequence will still keep the partial code point + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-16 sequence will throw + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, false)); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(['\uD800'], false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-16 sequence will still keep the partial code point + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + + // Writing empty UTF-8 sequence will dump the partial UTF-16 code point + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, false)); + } + } + + [Fact] + public static void WriteStringValueSegment_Empty() + { + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, false); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, false); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, false); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<byte>.Empty, true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, false); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"", output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, false); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); + } + + { + var output = new ArrayBufferWriter<byte>(); + using var jsonUtf8 = new Utf8JsonWriter(output); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, false); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, false); + jsonUtf8.WriteStringValueSegment(ReadOnlySpan<char>.Empty, true); + jsonUtf8.Flush(); + JsonTestHelper.AssertContents("\"\"", output); + } + } + + // Switch this to use an enum discriminator input when base64 is supported + private static void WriteStringValueHelper<T>(Utf8JsonWriter writer, ReadOnlySpan<T> value) + where T : struct + { + if (typeof(T) == typeof(char)) + { + writer.WriteStringValue(MemoryMarshal.Cast<T, char>(value)); + } + else if (typeof(T) == typeof(byte)) + { + writer.WriteStringValue(MemoryMarshal.Cast<T, byte>(value)); + } + else + { + if (typeof(T) == typeof(int)) + { + Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueHelper)}."); + } + else + { + Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueHelper)}."); + } + } + } + + // Switch this to use an enum discriminator input when base64 is supported + private static void WriteStringValueSegmentHelper<T>(Utf8JsonWriter writer, ReadOnlySpan<T> value, bool isFinal) + where T : struct + { + if (typeof(T) == typeof(char)) + { + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, char>(value), isFinal); + } + else if (typeof(T) == typeof(byte)) + { + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, byte>(value), isFinal); + } + else + { + if (typeof(T) == typeof(int)) + { + Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + } + else + { + Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + } + } + } + + // Switch this to use an enum discriminator input when base64 is supported + private static void WriteStringValueSegmentsHelper<T>(Utf8JsonWriter writer, ReadOnlySpan<T> value) + where T : struct + { + if (typeof(T) == typeof(char)) + { + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, char>(value), true); + } + else if (typeof(T) == typeof(byte)) + { + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, byte>(value), true); + } + else + { + if (typeof(T) == typeof(int)) + { + Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + } + else + { + Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + } + } + } + + // Switch this to use an enum discriminator input when base64 is supported + private static void WriteStringValueSegmentsHelper<T>(Utf8JsonWriter writer, ReadOnlySpan<T> value1, ReadOnlySpan<T> value2) + where T : struct + { + if (typeof(T) == typeof(char)) + { + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, char>(value1), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, char>(value2), true); + } + else if (typeof(T) == typeof(byte)) + { + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, byte>(value1), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, byte>(value2), true); + } + else + { + if (typeof(T) == typeof(int)) + { + Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + } + else + { + Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + } + } + } + + // Switch this to use an enum discriminator input when base64 is supported + private static void WriteStringValueSegmentsHelper<T>(Utf8JsonWriter writer, ReadOnlySpan<T> value1, ReadOnlySpan<T> value2, ReadOnlySpan<T> value3) + where T : struct + { + if (typeof(T) == typeof(char)) + { + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, char>(value1), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, char>(value2), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, char>(value3), true); + } + else if (typeof(T) == typeof(byte)) + { + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, byte>(value1), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, byte>(value2), false); + writer.WriteStringValueSegment(MemoryMarshal.Cast<T, byte>(value3), true); + } + else + { + if (typeof(T) == typeof(int)) + { + Assert.Fail($"Did you pass in int or int[] instead of byte or byte[]? Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + } + else + { + Assert.Fail($"Type {typeof(T)} is not supported by {nameof(WriteStringValueSegmentsHelper)}."); + } + } + } + + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value) + => WriteStringValueSegmentsHelper(writer, value.AsSpan()); + + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value1, string value2) + => WriteStringValueSegmentsHelper(writer, value1.AsSpan(), value2.AsSpan()); + + private static void WriteStringValueSegmentsHelper(Utf8JsonWriter writer, string value1, string value2, string value3) + => WriteStringValueSegmentsHelper(writer, value1.AsSpan(), value2.AsSpan(), value3.AsSpan()); + } +} diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs index 4515e69412d3f3..382349214beb1b 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs @@ -230,6 +230,12 @@ public void EscapingTestWhileWriting(char replacementChar, JavaScriptEncoder enc written = WriteUtf8StringHelper(writerOptions, Array.Empty<byte>()); Assert.Equal(-1, written.Span.IndexOf((byte)'\\')); + + written = WriteStringSegmentHelper(writerOptions, Array.Empty<char>()); + Assert.Equal(-1, written.Span.IndexOf((byte)'\\')); + + written = WriteUtf8StringSegmentHelper(writerOptions, Array.Empty<byte>()); + Assert.Equal(-1, written.Span.IndexOf((byte)'\\')); } var random = new Random(42); @@ -263,6 +269,21 @@ public void EscapingTestWhileWriting(char replacementChar, JavaScriptEncoder enc written = WriteUtf8StringHelper(writerOptions, sourceUtf8); escapedIndex = written.Span.IndexOf((byte)'\\'); Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex); // Account for the start quote + + if (dataLength < 10) + { + SplitCodePointsHelper(changed, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex); // Account for the start quote + }); + + SplitCodePointsHelper(changed, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex); // Account for the start quote + }); + } } if (dataLength != 0) @@ -279,6 +300,21 @@ public void EscapingTestWhileWriting(char replacementChar, JavaScriptEncoder enc written = WriteUtf8StringHelper(writerOptions, sourceUtf8); escapedIndex = written.Span.IndexOf((byte)'\\'); Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex); // Account for the start quote + + if (dataLength < 10) + { + SplitCodePointsHelper(changed, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex); // Account for the start quote + }); + + SplitCodePointsHelper(sourceUtf8, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex); // Account for the start quote + }); + } } } } @@ -288,82 +324,82 @@ public static IEnumerable<object[]> EscapingTestData get { return new List<object[]> - { - new object[] { 'a', null, false }, // ASCII not escaped - new object[] { '\u001F', null, true }, // control character within single byte range - new object[] { '\u2000', null, true }, // space character outside single byte range - new object[] { '\u00A2', null, true }, // non-ASCII but < 255 - new object[] { '\uA686', null, true }, // non-ASCII above short.MaxValue - new object[] { '\u6C49', null, true }, // non-ASCII from chinese alphabet - multibyte - new object[] { '"', null, true }, // ASCII but must always be escaped in JSON - new object[] { '\\', null, true }, // ASCII but must always be escaped in JSON - new object[] { '<', null, true }, // ASCII but escaped by default - new object[] { '>', null, true }, // ASCII but escaped by default - new object[] { '&', null, true }, // ASCII but escaped by default - new object[] { '`', null, true }, // ASCII but escaped by default - new object[] { '\'', null, true }, // ASCII but escaped by default - new object[] { '+', null, true }, // ASCII but escaped by default - - new object[] { 'a', JavaScriptEncoder.Default, false }, - new object[] { '\u001F', JavaScriptEncoder.Default, true }, - new object[] { '\u2000', JavaScriptEncoder.Default, true }, - new object[] { '\u00A2', JavaScriptEncoder.Default, true }, - new object[] { '\uA686', JavaScriptEncoder.Default, true }, - new object[] { '\u6C49', JavaScriptEncoder.Default, true }, - new object[] { '"', JavaScriptEncoder.Default, true }, - new object[] { '\\', JavaScriptEncoder.Default, true }, - new object[] { '<', JavaScriptEncoder.Default, true }, - new object[] { '>', JavaScriptEncoder.Default, true }, - new object[] { '&', JavaScriptEncoder.Default, true }, - new object[] { '`', JavaScriptEncoder.Default, true }, - new object[] { '\'', JavaScriptEncoder.Default, true }, - new object[] { '+', JavaScriptEncoder.Default, true }, - - new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), false }, - new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, - - new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false }, - new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false }, - new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false }, - new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false }, - new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - - new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, - new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, - new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, - new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, - new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - }; + { + new object[] { 'a', null, false }, // ASCII not escaped + new object[] { '\u001F', null, true }, // control character within single byte range + new object[] { '\u2000', null, true }, // space character outside single byte range + new object[] { '\u00A2', null, true }, // non-ASCII but < 255 + new object[] { '\uA686', null, true }, // non-ASCII above short.MaxValue + new object[] { '\u6C49', null, true }, // non-ASCII from chinese alphabet - multibyte + new object[] { '"', null, true }, // ASCII but must always be escaped in JSON + new object[] { '\\', null, true }, // ASCII but must always be escaped in JSON + new object[] { '<', null, true }, // ASCII but escaped by default + new object[] { '>', null, true }, // ASCII but escaped by default + new object[] { '&', null, true }, // ASCII but escaped by default + new object[] { '`', null, true }, // ASCII but escaped by default + new object[] { '\'', null, true }, // ASCII but escaped by default + new object[] { '+', null, true }, // ASCII but escaped by default + + new object[] { 'a', JavaScriptEncoder.Default, false }, + new object[] { '\u001F', JavaScriptEncoder.Default, true }, + new object[] { '\u2000', JavaScriptEncoder.Default, true }, + new object[] { '\u00A2', JavaScriptEncoder.Default, true }, + new object[] { '\uA686', JavaScriptEncoder.Default, true }, + new object[] { '\u6C49', JavaScriptEncoder.Default, true }, + new object[] { '"', JavaScriptEncoder.Default, true }, + new object[] { '\\', JavaScriptEncoder.Default, true }, + new object[] { '<', JavaScriptEncoder.Default, true }, + new object[] { '>', JavaScriptEncoder.Default, true }, + new object[] { '&', JavaScriptEncoder.Default, true }, + new object[] { '`', JavaScriptEncoder.Default, true }, + new object[] { '\'', JavaScriptEncoder.Default, true }, + new object[] { '+', JavaScriptEncoder.Default, true }, + + new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), false }, + new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true }, + + new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false }, + new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false }, + new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false }, + new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false }, + new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + + new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, + new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, + new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, + new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, + new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + }; } } @@ -389,6 +425,19 @@ public unsafe void WriteString_NonAscii(char replacementChar, JavaScriptEncoder written = WriteUtf8StringHelper(writerOptions, sourceUtf8); Assert.Equal(-1, written.Span.IndexOf((byte)'\\')); + if (dataLength < 10) + { + SplitCodePointsHelper(str, writerOptions, output => + { + Assert.Equal(-1, output.WrittenSpan.IndexOf((byte)'\\')); + }); + + SplitCodePointsHelper(sourceUtf8, writerOptions, output => + { + Assert.Equal(-1, output.WrittenSpan.IndexOf((byte)'\\')); + }); + } + for (int i = 0; i < dataLength; i++) { string source = baseStr.Insert(i, new string(replacementChar, 1)); @@ -403,6 +452,23 @@ public unsafe void WriteString_NonAscii(char replacementChar, JavaScriptEncoder escapedIndex = written.Span.IndexOf((byte)'\\'); // Each CJK character expands to 3 utf-8 bytes. Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex); // Account for the start quote + + if (dataLength < 10) + { + SplitCodePointsHelper(source.ToCharArray(), writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + // Each CJK character expands to 3 utf-8 bytes. + Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex); // Account for the start quote + }); + + SplitCodePointsHelper(sourceUtf8, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + // Each CJK character expands to 3 utf-8 bytes. + Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex); // Account for the start quote + }); + } } } } @@ -412,37 +478,37 @@ public static IEnumerable<object[]> EscapingTestData_NonAscii get { return new List<object[]> - { - new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false }, - new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false }, - new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false }, - new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false }, - new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true }, - - new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, - new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, - new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, - new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, - new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, - }; + { + new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false }, + new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false }, + new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false }, + new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false }, + new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true }, + + new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, + new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, + new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, + new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true }, + new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false }, + }; } } @@ -470,6 +536,19 @@ public void EscapingTestWhileWritingSurrogate(JavaScriptEncoder encoder) written = WriteUtf8StringHelper(writerOptions, sourceUtf8); Assert.Equal(-1, written.Span.IndexOf((byte)'\\')); + if (dataLength < 10) + { + SplitCodePointsHelper(str, writerOptions, output => + { + Assert.Equal(-1, output.WrittenSpan.IndexOf((byte)'\\')); + }); + + SplitCodePointsHelper(sourceUtf8, writerOptions, output => + { + Assert.Equal(-1, output.WrittenSpan.IndexOf((byte)'\\')); + }); + } + for (int i = 0; i < dataLength - 1; i++) { char[] changed = baseStr.ToCharArray(); @@ -485,6 +564,21 @@ public void EscapingTestWhileWritingSurrogate(JavaScriptEncoder encoder) written = WriteUtf8StringHelper(writerOptions, sourceUtf8); escapedIndex = written.Span.IndexOf((byte)'\\'); Assert.Equal(i + 1, escapedIndex); // Account for the start quote + + if (dataLength < 10) + { + SplitCodePointsHelper(changed, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + Assert.Equal(i + 1, escapedIndex); // Account for the start quote + }); + + SplitCodePointsHelper(sourceUtf8, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + Assert.Equal(i + 1, escapedIndex); // Account for the start quote + }); + } } { @@ -506,6 +600,21 @@ public void EscapingTestWhileWritingSurrogate(JavaScriptEncoder encoder) written = WriteUtf8StringHelper(writerOptions, sourceUtf8); escapedIndex = written.Span.IndexOf((byte)'\\'); Assert.Equal(1, escapedIndex); // Account for the start quote + + if (dataLength < 10) + { + SplitCodePointsHelper(changed, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + Assert.Equal(1, escapedIndex); // Account for the start quote + }); + + SplitCodePointsHelper(sourceUtf8, writerOptions, output => + { + escapedIndex = output.WrittenSpan.IndexOf((byte)'\\'); + Assert.Equal(1, escapedIndex); // Account for the start quote + }); + } } } } @@ -515,13 +624,13 @@ public static IEnumerable<object[]> JavaScriptEncoders get { return new List<object[]> - { - new object[] { null }, - new object[] { JavaScriptEncoder.Default }, - new object[] { JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) }, - new object[] { JavaScriptEncoder.Create(UnicodeRanges.All) }, - new object[] { JavaScriptEncoder.UnsafeRelaxedJsonEscaping }, - }; + { + new object[] { null }, + new object[] { JavaScriptEncoder.Default }, + new object[] { JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) }, + new object[] { JavaScriptEncoder.Create(UnicodeRanges.All) }, + new object[] { JavaScriptEncoder.UnsafeRelaxedJsonEscaping }, + }; } } @@ -555,6 +664,19 @@ public unsafe void WriteStringInvalidCharacter(char replacementChar, JavaScriptE written = WriteUtf8StringHelper(writerOptions, sourceUtf8); Assert.True(BeginsWithReplacementCharacter(written.Span.Slice(i + 1))); // +1 to account for starting quote + + if (dataLength < 10) + { + SplitCodePointsHelper(changed, writerOptions, output => + { + Assert.True(BeginsWithReplacementCharacter(output.WrittenSpan.Slice(i + 1))); // +1 to account for starting quote + }); + + SplitCodePointsHelper(sourceUtf8, writerOptions, output => + { + Assert.True(BeginsWithReplacementCharacter(output.WrittenSpan.Slice(i + 1))); // +1 to account for starting quote + }); + } } } @@ -586,19 +708,19 @@ public static IEnumerable<object[]> InvalidEscapingTestData get { return new List<object[]> - { - new object[] { '\uD801', JavaScriptEncoder.Default }, // Invalid, high surrogate alone - new object[] { '\uDC01', JavaScriptEncoder.Default }, // Invalid, low surrogate alone + { + new object[] { '\uD801', JavaScriptEncoder.Default }, // Invalid, high surrogate alone + new object[] { '\uDC01', JavaScriptEncoder.Default }, // Invalid, low surrogate alone - new object[] { '\uD801', JavaScriptEncoder.UnsafeRelaxedJsonEscaping }, - new object[] { '\uDC01', JavaScriptEncoder.UnsafeRelaxedJsonEscaping }, + new object[] { '\uD801', JavaScriptEncoder.UnsafeRelaxedJsonEscaping }, + new object[] { '\uDC01', JavaScriptEncoder.UnsafeRelaxedJsonEscaping }, - new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.All) }, - new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.All) }, + new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.All) }, + new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.All) }, - new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) }, - new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) }, - }; + new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) }, + new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) }, + }; } } @@ -622,6 +744,26 @@ private static ReadOnlyMemory<byte> WriteUtf8StringHelper(JsonWriterOptions writ return output.WrittenMemory; } + private static ReadOnlyMemory<byte> WriteStringSegmentHelper(JsonWriterOptions writerOptions, ReadOnlySpan<char> str) + { + var output = new ArrayBufferWriter<byte>(); + using (var writer = new Utf8JsonWriter(output, writerOptions)) + { + writer.WriteStringValueSegment(str, true); + } + return output.WrittenMemory; + } + + private static ReadOnlyMemory<byte> WriteUtf8StringSegmentHelper(JsonWriterOptions writerOptions, ReadOnlySpan<byte> utf8str) + { + var output = new ArrayBufferWriter<byte>(); + using (var writer = new Utf8JsonWriter(output, writerOptions)) + { + writer.WriteStringValueSegment(utf8str, true); + } + return output.WrittenMemory; + } + [Fact] public void WriteJsonWritesToIBWOnDemand_Dispose() { @@ -1653,6 +1795,7 @@ public void FixedSizeBufferWriter_Decimal(JsonWriterOptions options) } } + private const JsonValueKind JsonValueKindStringSegment = (JsonValueKind)(1 << 7); public static IEnumerable<object[]> InvalidJsonDueToWritingMultipleValues_TestData() => JsonOptionsWith([ JsonValueKind.Array, @@ -1662,6 +1805,7 @@ public static IEnumerable<object[]> InvalidJsonDueToWritingMultipleValues_TestDa JsonValueKind.True, JsonValueKind.False, JsonValueKind.Null, + JsonValueKindStringSegment ]); [Theory] @@ -1718,6 +1862,30 @@ public void InvalidJsonDueToWritingMultipleValues(JsonWriterOptions options, Jso ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValue("foo"), options.SkipValidation); } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo".AsSpan(), true), options.SkipValidation); + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo".AsSpan(), false), options.SkipValidation); + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo"u8, true), options.SkipValidation); + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo"u8, false), options.SkipValidation); + } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) { WritePreamble(jsonUtf8, kind); @@ -1828,6 +1996,30 @@ public void InvalidJsonDueToWritingMultipleValuesWithComments(JsonWriterOptions ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValue("foo"), options.SkipValidation); } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind, addComments: true); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo".AsSpan(), true), options.SkipValidation); + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind, addComments: true); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo".AsSpan(), false), options.SkipValidation); + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind, addComments: true); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo"u8, true), options.SkipValidation); + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + WritePreamble(jsonUtf8, kind, addComments: true); + ValidateAction(jsonUtf8, () => jsonUtf8.WriteStringValueSegment("foo"u8, false), options.SkipValidation); + } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) { WritePreamble(jsonUtf8, kind, addComments: true); @@ -1918,6 +2110,10 @@ private void WritePreamble(Utf8JsonWriter writer, JsonValueKind kind, bool addCo case JsonValueKind.Null: writer.WriteNullValue(); break; + case JsonValueKindStringSegment: + writer.WriteStringValueSegment("foo".ToCharArray(), false); + writer.WriteStringValueSegment("bar".ToCharArray(), true); + break; default: Debug.Fail($"Invalid JsonValueKind passed in '{kind}'."); break; @@ -2042,7 +2238,7 @@ public void InvalidJsonMismatch(JsonWriterOptions options) jsonUtf8.WriteStartObject(); if (options.SkipValidation) { - jsonUtf8.WriteStringValue("key"); + jsonUtf8.WriteStringValue("value"); } else { @@ -2050,6 +2246,28 @@ public void InvalidJsonMismatch(JsonWriterOptions options) } } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + jsonUtf8.WriteStartObject(); + if (options.SkipValidation) + { + jsonUtf8.WriteStringValueSegment(['a', 'b'], true); + jsonUtf8.WriteStringValueSegment(['a', 'b'], false); + jsonUtf8.WriteStringValueSegment(['a', 'b'], true); + + jsonUtf8.WriteStringValueSegment([65, 66], true); + jsonUtf8.WriteStringValueSegment([65, 66], false); + jsonUtf8.WriteStringValueSegment([65, 66], true); + } + else + { + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WriteStringValueSegment(['a', 'b'], true)); + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WriteStringValueSegment(['a', 'b'], false)); + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WriteStringValueSegment([65, 66], true)); + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WriteStringValueSegment([65, 66], false)); + } + } + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) { jsonUtf8.WriteStartArray(); @@ -2586,6 +2804,81 @@ public void InvalidJsonPrimitive(JsonWriterOptions options) Assert.Throws<InvalidOperationException>(() => jsonUtf8.WritePropertyName("test name")); } } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + jsonUtf8.WriteStringValueSegment("a".AsSpan(), true); + if (options.SkipValidation) + { + jsonUtf8.WritePropertyName("test name"); + } + else + { + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WritePropertyName("test name")); + } + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + jsonUtf8.WriteStringValueSegment("a"u8, true); + if (options.SkipValidation) + { + jsonUtf8.WritePropertyName("test name"); + } + else + { + Assert.Throws<InvalidOperationException>(() => jsonUtf8.WritePropertyName("test name")); + } + } + } + + // Name is present in the test data to make it easier to identify the test case + public static IEnumerable<object[]> InvalidJsonStringValueSegment_TestData => + from write in new (string methodName, Action<Utf8JsonWriter> method)[] { + (nameof(Utf8JsonWriter.WriteStartObject), writer => writer.WriteStartObject()), + (nameof(Utf8JsonWriter.WriteEndObject), writer => writer.WriteEndObject()), + (nameof(Utf8JsonWriter.WriteStartArray), writer => writer.WriteStartArray()), + (nameof(Utf8JsonWriter.WriteEndArray), writer => writer.WriteEndArray()), + (nameof(Utf8JsonWriter.WriteBooleanValue), writer => writer.WriteBooleanValue(true)), + (nameof(Utf8JsonWriter.WriteBoolean), writer => writer.WriteBoolean("foo", true)), + (nameof(Utf8JsonWriter.WriteCommentValue), writer => writer.WriteCommentValue("comment")), + (nameof(Utf8JsonWriter.WriteNullValue), writer => writer.WriteNullValue()), + (nameof(Utf8JsonWriter.WriteStringValue), writer => writer.WriteStringValue("foo")), + (nameof(Utf8JsonWriter.WritePropertyName), writer => writer.WritePropertyName("foo")), + } + from option in new [] { new JsonWriterOptions { SkipValidation = true }, new JsonWriterOptions { SkipValidation = false } } + select new object[] { write.methodName, write.method, option }; + + [Theory] + [MemberData(nameof(InvalidJsonStringValueSegment_TestData))] + public void InvalidJsonStringValueSegment(string _, Action<Utf8JsonWriter> write, JsonWriterOptions options) + { + var output = new ArrayBufferWriter<byte>(1024); + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + jsonUtf8.WriteStringValueSegment("foo"u8, isFinalSegment: false); + if (options.SkipValidation) + { + write(jsonUtf8); + } + else + { + Assert.Throws<InvalidOperationException>(() => write(jsonUtf8)); + } + } + + using (var jsonUtf8 = new Utf8JsonWriter(output, options)) + { + jsonUtf8.WriteStringValueSegment("foo".ToCharArray(), isFinalSegment: false); + if (options.SkipValidation) + { + write(jsonUtf8); + } + else + { + Assert.Throws<InvalidOperationException>(() => write(jsonUtf8)); + } + } } [Theory] @@ -2872,7 +3165,7 @@ public void WritingTooLargeProperty(JsonWriterOptions options) key.AsSpan().Fill((byte)'a'); keyChars.AsSpan().Fill('a'); - var output = new ArrayBufferWriter<byte>(1024); + var output = new ArrayBufferWriter<byte>(1024); using (var jsonUtf8 = new Utf8JsonWriter(output, options)) { @@ -3160,7 +3453,7 @@ public void WriteHelloWorld(JsonWriterOptions options) ReadOnlySpan<byte> utf8PropertyName = "message"u8; ReadOnlySpan<byte> utf8Value = "Hello, World!"u8; - + for (int i = 0; i < 32; i++) { var output = new ArrayBufferWriter<byte>(32); @@ -3346,7 +3639,7 @@ public void WriteHelloWorldEscaped(JsonWriterOptions options) string propertyName = "mess><age"; string value = "Hello,>< World!"; string expectedStr = GetHelloWorldExpectedString(options, propertyName, value); - + ReadOnlySpan<char> propertyNameSpan = propertyName.AsSpan(); ReadOnlySpan<char> valueSpan = value.AsSpan(); ReadOnlySpan<byte> propertyNameSpanUtf8 = Encoding.UTF8.GetBytes(propertyName); @@ -3545,7 +3838,6 @@ public void WriteHelloWorldEscaped(JsonWriterOptions options) [MemberData(nameof(JsonOptions_TestData))] public void WritePartialHelloWorld(JsonWriterOptions options) { - var output = new ArrayBufferWriter<byte>(10); using var jsonUtf8 = new Utf8JsonWriter(output, options); @@ -3745,7 +4037,7 @@ public void WriteInvalidPartialJson(JsonWriterOptions options) public void WriteInvalidBase64(JsonWriterOptions options) { { - var output = new ArrayBufferWriter<byte>(10); + var output = new ArrayBufferWriter<byte>(10); using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartObject(); @@ -3770,7 +4062,7 @@ public void WriteInvalidBase64(JsonWriterOptions options) } } { - var output = new ArrayBufferWriter<byte>(10); + var output = new ArrayBufferWriter<byte>(10); using var jsonUtf8 = new Utf8JsonWriter(output, options); jsonUtf8.WriteStartArray();