|
| 1 | +// Licensed to the .NET Foundation under one or more agreements. |
| 2 | +// The .NET Foundation licenses this file to you under the MIT license. |
| 3 | + |
| 4 | +using System; |
| 5 | +using System.Buffers; |
| 6 | +using System.Collections; |
| 7 | +using System.Collections.Generic; |
| 8 | +using System.Diagnostics; |
| 9 | +using System.IO; |
| 10 | +using System.Runtime.CompilerServices; |
| 11 | +using System.Runtime.InteropServices; |
| 12 | +using System.Text; |
| 13 | +using System.Text.Encodings.Web; |
| 14 | +using System.Text.Json; |
| 15 | +using System.Text.Unicode; |
| 16 | +using SharpFuzz; |
| 17 | + |
| 18 | +namespace DotnetFuzzing.Fuzzers; |
| 19 | + |
| 20 | +internal sealed class Utf8JsonWriterFuzzer : IFuzzer |
| 21 | +{ |
| 22 | + public string[] TargetAssemblies { get; } = ["System.Text.Json"]; |
| 23 | + |
| 24 | + public string[] TargetCoreLibPrefixes => []; |
| 25 | + |
| 26 | + // One of the bytes in the input is used to set various test options. |
| 27 | + // Each bit in that byte represents a different option as indicated here. |
| 28 | + |
| 29 | + // Options for JsonWriterOptions |
| 30 | + private const byte IndentFlag = 1; |
| 31 | + private const byte EncoderFlag = 1 << 1; |
| 32 | + private const byte MaxDepthFlag = 1 << 2; |
| 33 | + private const byte NewLineFlag = 1 << 3; |
| 34 | + private const byte SkipValidationFlag = 1 << 4; |
| 35 | + |
| 36 | + // Options for choosing between UTF-8 and UTF-16 encoding |
| 37 | + private const byte EncodingFlag = 1 << 5; |
| 38 | + |
| 39 | + public void FuzzTarget(ReadOnlySpan<byte> bytes) |
| 40 | + { |
| 41 | + const int minLength = 10; // 2 ints, 1 byte, and 1 padding to align chars |
| 42 | + if (bytes.Length < minLength) |
| 43 | + { |
| 44 | + return; |
| 45 | + } |
| 46 | + |
| 47 | + // First 2 ints are used as indices to slice the input and the following byte is used for options |
| 48 | + ReadOnlySpan<int> ints = MemoryMarshal.Cast<byte, int>(bytes); |
| 49 | + int slice1 = ints[0]; |
| 50 | + int slice2 = ints[1]; |
| 51 | + byte optionsByte = bytes[8]; |
| 52 | + bytes = bytes.Slice(minLength); |
| 53 | + ReadOnlySpan<char> chars = MemoryMarshal.Cast<byte, char>(bytes); |
| 54 | + |
| 55 | + // Validate that the indices are within bounds of the input |
| 56 | + bool utf8 = (optionsByte & EncodingFlag) == 0; |
| 57 | + if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (utf8 ? bytes.Length : chars.Length))) |
| 58 | + { |
| 59 | + return; |
| 60 | + } |
| 61 | + |
| 62 | + // Set up options based on the first byte |
| 63 | + bool indented = (optionsByte & IndentFlag) == 0; |
| 64 | + JsonWriterOptions options = new() |
| 65 | + { |
| 66 | + Encoder = (optionsByte & EncodingFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping, |
| 67 | + Indented = indented, |
| 68 | + MaxDepth = (optionsByte & MaxDepthFlag) == 0 ? 1 : 0, |
| 69 | + NewLine = (optionsByte & NewLineFlag) == 0 ? "\n" : "\r\n", |
| 70 | + SkipValidation = (optionsByte & SkipValidationFlag) == 0, |
| 71 | + }; |
| 72 | + |
| 73 | + // Compute the expected result by using the encoder directly and the input |
| 74 | + int maxExpandedSizeBytes = 6 * bytes.Length + 2; |
| 75 | + byte[] expectedBuffer = ArrayPool<byte>.Shared.Rent(maxExpandedSizeBytes); |
| 76 | + Span<byte> expected = |
| 77 | + expectedBuffer.AsSpan(0, utf8 |
| 78 | + ? EncodeToUtf8(bytes, expectedBuffer, options.Encoder) |
| 79 | + : EncodeToUtf8(chars, expectedBuffer, options.Encoder)); |
| 80 | + |
| 81 | + // Compute the actual result by using Utf8JsonWriter. Each iteration is a different slice of the input, but the result should be the same. |
| 82 | + byte[] actualBuffer = new byte[expected.Length]; |
| 83 | + foreach (ReadOnlySpan<Range> ranges in new[] |
| 84 | + { |
| 85 | + new[] { 0.. }, |
| 86 | + new[] { 0..slice1, slice1.. }, |
| 87 | + new[] { 0..slice1, slice1..slice2, slice2.. }, |
| 88 | + }) |
| 89 | + { |
| 90 | + using MemoryStream stream = new(actualBuffer); |
| 91 | + using Utf8JsonWriter writer = new(stream, options); |
| 92 | + |
| 93 | + if (utf8) |
| 94 | + { |
| 95 | + WriteStringValueSegments(writer, bytes, ranges); |
| 96 | + } |
| 97 | + else |
| 98 | + { |
| 99 | + WriteStringValueSegments(writer, chars, ranges); |
| 100 | + } |
| 101 | + |
| 102 | + writer.Flush(); |
| 103 | + |
| 104 | + // Compare the expected and actual results |
| 105 | + Assert.SequenceEqual(expected, actualBuffer); |
| 106 | + Assert.Equal(expected.Length, writer.BytesCommitted); |
| 107 | + Assert.Equal(0, writer.BytesPending); |
| 108 | + |
| 109 | + Array.Clear(actualBuffer); |
| 110 | + } |
| 111 | + |
| 112 | + // Additional test for mixing UTF-8 and UTF-16 encoding. The alignment math is easier in UTF-16 mode so just run it for that. |
| 113 | + if (!utf8) |
| 114 | + { |
| 115 | + Array.Clear(expectedBuffer); |
| 116 | + |
| 117 | + { |
| 118 | + ReadOnlySpan<char> firstSegment = chars[slice1..]; |
| 119 | + ReadOnlySpan<byte> secondSegment = bytes[0..(2 * slice1)]; |
| 120 | + |
| 121 | + expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, expectedBuffer, options.Encoder)); |
| 122 | + |
| 123 | + actualBuffer = new byte[expected.Length]; |
| 124 | + using MemoryStream stream = new(actualBuffer); |
| 125 | + using Utf8JsonWriter writer = new(stream, options); |
| 126 | + |
| 127 | + writer.WriteStringValueSegment(firstSegment, false); |
| 128 | + |
| 129 | + Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteStringValueSegment(state, true), secondSegment); |
| 130 | + } |
| 131 | + |
| 132 | + Array.Clear(expectedBuffer); |
| 133 | + |
| 134 | + { |
| 135 | + ReadOnlySpan<byte> firstSegment = bytes[0..(2 * slice1)]; |
| 136 | + ReadOnlySpan<char> secondSegment = chars[slice1..]; |
| 137 | + |
| 138 | + expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, secondSegment, expectedBuffer, options.Encoder)); |
| 139 | + |
| 140 | + actualBuffer = new byte[expected.Length]; |
| 141 | + using MemoryStream stream = new(actualBuffer); |
| 142 | + using Utf8JsonWriter writer = new(stream, options); |
| 143 | + |
| 144 | + writer.WriteStringValueSegment(firstSegment, false); |
| 145 | + Assert.Throws<InvalidOperationException, ReadOnlySpan<char>>(state => writer.WriteStringValueSegment(state, true), secondSegment); |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + ArrayPool<byte>.Shared.Return(expectedBuffer); |
| 150 | + } |
| 151 | + |
| 152 | + private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<byte> bytes, ReadOnlySpan<Range> ranges) |
| 153 | + { |
| 154 | + for (int i = 0; i < ranges.Length; i++) |
| 155 | + { |
| 156 | + writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1); |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<char> chars, ReadOnlySpan<Range> ranges) |
| 161 | + { |
| 162 | + for (int i = 0; i < ranges.Length; i++) |
| 163 | + { |
| 164 | + writer.WriteStringValueSegment(chars[ranges[i]], i == ranges.Length - 1); |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder) |
| 169 | + { |
| 170 | + destBuffer[0] = (byte)'"'; |
| 171 | + encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int written, isFinalBlock: true); |
| 172 | + destBuffer[++written] = (byte)'"'; |
| 173 | + return written + 1; |
| 174 | + } |
| 175 | + |
| 176 | + private static int EncodeToUtf8(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder) |
| 177 | + { |
| 178 | + int written = 1; |
| 179 | + destBuffer[0] = (byte)'"'; |
| 180 | + destBuffer[written += EncodeTranscode(chars, destBuffer[1..], encoder)] = (byte)'"'; |
| 181 | + return written + 1; |
| 182 | + } |
| 183 | + |
| 184 | + private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder) |
| 185 | + { |
| 186 | + int written = 1; |
| 187 | + destBuffer[0] = (byte)'"'; |
| 188 | + encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int writtenTemp, isFinalBlock: true); |
| 189 | + written += writtenTemp; |
| 190 | + destBuffer[written += EncodeTranscode(chars, destBuffer[written..], encoder, isFinalBlock: true)] = (byte)'"'; |
| 191 | + return written + 1; |
| 192 | + } |
| 193 | + |
| 194 | + private static int EncodeToUtf8(ReadOnlySpan<char> chars, ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder) |
| 195 | + { |
| 196 | + int written = 1; |
| 197 | + destBuffer[0] = (byte)'"'; |
| 198 | + written += EncodeTranscode(chars, destBuffer[1..], encoder, isFinalBlock: true); |
| 199 | + encoder.EncodeUtf8(bytes, destBuffer[written..], out _, out int writtenTemp, isFinalBlock: true); |
| 200 | + written += writtenTemp; |
| 201 | + destBuffer[written] = (byte)'"'; |
| 202 | + return written + 1; |
| 203 | + } |
| 204 | + |
| 205 | + private static int EncodeTranscode(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder, bool isFinalBlock = true) |
| 206 | + { |
| 207 | + var utf16buffer = ArrayPool<char>.Shared.Rent(6 * chars.Length); |
| 208 | + encoder.Encode(chars, utf16buffer, out _, out int written, isFinalBlock: true); |
| 209 | + |
| 210 | + Utf8.FromUtf16(utf16buffer.AsSpan(0, written), destBuffer, out _, out written, isFinalBlock); |
| 211 | + ArrayPool<char>.Shared.Return(utf16buffer); |
| 212 | + return written; |
| 213 | + } |
| 214 | +} |
0 commit comments