From b84c3df846ec00b0a5a965b7da8dbd3ec93ccd7e Mon Sep 17 00:00:00 2001 From: Eirik Tsarpalis Date: Fri, 11 Nov 2022 12:46:45 +0000 Subject: [PATCH 1/3] Ensure the async reader state resets the BOM offset in every AdvanceBuffer() call. --- .../Json/Serialization/ReadBufferState.cs | 4 +-- .../Serialization/ContinuationTests.cs | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs index efbc842b3687f..c84c00a35247a 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs @@ -122,7 +122,6 @@ public void AdvanceBuffer(int bytesConsumed) // Copy the unprocessed data to the new buffer while shifting the processed bytes. Buffer.BlockCopy(oldBuffer, _offset + bytesConsumed, newBuffer, 0, _count); _buffer = newBuffer; - _offset = 0; _maxCount = _count; // Clear and return the old buffer @@ -133,9 +132,10 @@ public void AdvanceBuffer(int bytesConsumed) { // Shift the processed bytes to the beginning of buffer to make more room. Buffer.BlockCopy(_buffer, _offset + bytesConsumed, _buffer, 0, _count); - _offset = 0; } } + + _offset = 0; } private void ProcessReadBytes() diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs index de4bcaa919123..f1c73024c5a14 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs @@ -235,6 +235,31 @@ public static void InvalidJsonShouldFailAtAnyPosition_Sequence( Assert.Equal(expectedFailure.Column, ex.BytePositionInLine); } + [Fact] + public static async Task BomHandlingRegressionTest() + { + byte[] utf8Bom = Encoding.UTF8.GetPreamble(); + byte[] json = """{ "Value" : "Hello" }"""u8.ToArray(); + + using var stream = new MemoryStream(); + stream.Write(utf8Bom, 0, utf8Bom.Length); + stream.Write(json, 0, json.Length); + stream.Position = 0; + + var options = new JsonSerializerOptions + { + DefaultBufferSize = 32 + }; + + Test result = await JsonSerializer.DeserializeAsync(stream, options); + Assert.Equal("Hello", result.Value); + } + + private class Test + { + public string Value { get; set; } + } + private class Chunk : ReadOnlySequenceSegment { public Chunk(string json, int firstSegmentLength) From eb6722c04bff62d37cf187e2c975b78c1f611b7f Mon Sep 17 00:00:00 2001 From: Eirik Tsarpalis Date: Fri, 11 Nov 2022 14:26:15 +0000 Subject: [PATCH 2/3] Add BOM insertion to async serialization stress testing --- .../JsonSerializerWrapper.Reflection.cs | 120 ++++++++++++++++-- 1 file changed, 107 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs index 24636171cfc7d..2ba22e924972a 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; using System.Text.Json.Nodes; @@ -15,9 +16,9 @@ public abstract partial class JsonSerializerWrapper public static JsonSerializerWrapper SpanSerializer { get; } = new SpanSerializerWrapper(); public static JsonSerializerWrapper StringSerializer { get; } = new StringSerializerWrapper(); public static StreamingJsonSerializerWrapper AsyncStreamSerializer { get; } = new AsyncStreamSerializerWrapper(); - public static StreamingJsonSerializerWrapper AsyncStreamSerializerWithSmallBuffer { get; } = new AsyncStreamSerializerWrapper(forceSmallBufferInOptions: true); + public static StreamingJsonSerializerWrapper AsyncStreamSerializerWithSmallBuffer { get; } = new AsyncStreamSerializerWrapper(forceSmallBufferInOptions: true, forceBomInsertions: true); public static StreamingJsonSerializerWrapper SyncStreamSerializer { get; } = new SyncStreamSerializerWrapper(); - public static StreamingJsonSerializerWrapper SyncStreamSerializerWithSmallBuffer { get; } = new SyncStreamSerializerWrapper(forceSmallBufferInOptions: true); + public static StreamingJsonSerializerWrapper SyncStreamSerializerWithSmallBuffer { get; } = new SyncStreamSerializerWrapper(forceSmallBufferInOptions: true, forceBomInsertions: true); public static JsonSerializerWrapper ReaderWriterSerializer { get; } = new ReaderWriterSerializerWrapper(); public static JsonSerializerWrapper DocumentSerializer { get; } = new DocumentSerializerWrapper(); public static JsonSerializerWrapper ElementSerializer { get; } = new ElementSerializerWrapper(); @@ -120,17 +121,22 @@ public override Task DeserializeWrapper(string json, Type type, JsonSeri private class AsyncStreamSerializerWrapper : StreamingJsonSerializerWrapper { private readonly bool _forceSmallBufferInOptions; + private readonly bool _forceBomInsertions; public override bool IsAsyncSerializer => true; - public AsyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false) + public AsyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false, bool forceBomInsertions = false) { _forceSmallBufferInOptions = forceSmallBufferInOptions; + _forceBomInsertions = forceBomInsertions; } private JsonSerializerOptions? ResolveOptionsInstance(JsonSerializerOptions? options) => _forceSmallBufferInOptions ? JsonSerializerOptionsSmallBufferMapper.ResolveOptionsInstanceWithSmallBuffer(options) : options; + private Stream ResolveReadStream(Stream stream) + => stream is not null && _forceBomInsertions ? new Utf8BomInsertingStream(stream) : stream; + public override Task SerializeWrapper(Stream utf8Json, T value, JsonSerializerOptions options = null) { return JsonSerializer.SerializeAsync(utf8Json, value, ResolveOptionsInstance(options)); @@ -153,38 +159,43 @@ public override Task SerializeWrapper(Stream stream, object value, Type inputTyp public override async Task DeserializeWrapper(Stream utf8Json, JsonSerializerOptions options = null) { - return await JsonSerializer.DeserializeAsync(utf8Json, ResolveOptionsInstance(options)); + return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), ResolveOptionsInstance(options)); } public override async Task DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerOptions options = null) { - return await JsonSerializer.DeserializeAsync(utf8Json, returnType, ResolveOptionsInstance(options)); + return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), returnType, ResolveOptionsInstance(options)); } public override async Task DeserializeWrapper(Stream utf8Json, JsonTypeInfo jsonTypeInfo) { - return await JsonSerializer.DeserializeAsync(utf8Json, jsonTypeInfo); + return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), jsonTypeInfo); } public override async Task DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerContext context) { - return await JsonSerializer.DeserializeAsync(utf8Json, returnType, context); + return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), returnType, context); } } private class SyncStreamSerializerWrapper : StreamingJsonSerializerWrapper { private readonly bool _forceSmallBufferInOptions; + private readonly bool _forceBomInsertions; + + public override bool IsAsyncSerializer => false; - public SyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false) + public SyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false, bool forceBomInsertions = false) { _forceSmallBufferInOptions = forceSmallBufferInOptions; + _forceBomInsertions = forceBomInsertions; } private JsonSerializerOptions? ResolveOptionsInstance(JsonSerializerOptions? options) => _forceSmallBufferInOptions ? JsonSerializerOptionsSmallBufferMapper.ResolveOptionsInstanceWithSmallBuffer(options) : options; - public override bool IsAsyncSerializer => false; + private Stream ResolveReadStream(Stream stream) + => stream is not null && _forceBomInsertions ? new Utf8BomInsertingStream(stream) : stream; public override Task SerializeWrapper(Stream utf8Json, T value, JsonSerializerOptions options = null) { @@ -212,25 +223,25 @@ public override Task SerializeWrapper(Stream stream, object value, Type inputTyp public override Task DeserializeWrapper(Stream utf8Json, JsonSerializerOptions options = null) { - T result = JsonSerializer.Deserialize(utf8Json, ResolveOptionsInstance(options)); + T result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), ResolveOptionsInstance(options)); return Task.FromResult(result); } public override Task DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerOptions options = null) { - object result = JsonSerializer.Deserialize(utf8Json, returnType, ResolveOptionsInstance(options)); + object result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), returnType, ResolveOptionsInstance(options)); return Task.FromResult(result); } public override Task DeserializeWrapper(Stream utf8Json, JsonTypeInfo jsonTypeInfo) { - T result = JsonSerializer.Deserialize(utf8Json, jsonTypeInfo); + T result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), jsonTypeInfo); return Task.FromResult(result); } public override Task DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerContext context) { - object result = JsonSerializer.Deserialize(utf8Json, returnType, context); + object result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), returnType, context); return Task.FromResult(result); } } @@ -653,5 +664,88 @@ public static JsonSerializerOptions ResolveOptionsInstanceWithSmallBuffer(JsonSe return smallBufferCopy; } } + + private sealed class Utf8BomInsertingStream : Stream + { + private const int Utf8BomLength = 3; + private readonly static byte[] s_utf8Bom = Encoding.UTF8.GetPreamble(); + + private readonly Stream _source; + private byte[]? _prefixBytes; + private int _prefixBytesOffset = 0; + private int _prefixBytesCount = 0; + + public Utf8BomInsertingStream(Stream source) + { + Debug.Assert(source.CanRead); + _source = source; + } + + public override bool CanRead => _source.CanRead; + public override bool CanSeek => false; + public override bool CanWrite => false; + + public override int Read(byte[] buffer, int offset, int count) + { + if (_prefixBytes is null) + { + // This is the first read operation; read the first 3 bytes + // from the source to determine if it already includes a BOM. + // Only insert a BOM if it's missing from the source stream. + + _prefixBytes = new byte[2 * Utf8BomLength]; + int bytesRead = ReadExactlyFromSource(_prefixBytes, Utf8BomLength, Utf8BomLength); + + if (_prefixBytes.AsSpan(Utf8BomLength).SequenceEqual(s_utf8Bom)) + { + _prefixBytesOffset = Utf8BomLength; + _prefixBytesCount = Utf8BomLength; + } + else + { + s_utf8Bom.CopyTo(_prefixBytes, 0); + _prefixBytesOffset = 0; + _prefixBytesCount = Utf8BomLength + bytesRead; + } + } + + int prefixBytesToWrite = Math.Min(_prefixBytesCount, count); + if (prefixBytesToWrite > 0) + { + _prefixBytes.AsSpan(_prefixBytesOffset, prefixBytesToWrite).CopyTo(buffer.AsSpan(offset, count)); + _prefixBytesOffset += prefixBytesToWrite; + _prefixBytesCount -= prefixBytesToWrite; + offset += prefixBytesToWrite; + count -= prefixBytesToWrite; + } + + return prefixBytesToWrite + _source.Read(buffer, offset, count); + } + + private int ReadExactlyFromSource(byte[] buffer, int offset, int count) + { + int totalRead = 0; + + while (totalRead < count) + { + int read = _source.Read(buffer, offset + totalRead, count - totalRead); + if (read == 0) + { + break; + } + + totalRead += read; + }; + + return totalRead; + } + + public override long Length => throw new NotSupportedException(); + public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); } + public override void Flush() => throw new NotSupportedException(); + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + } } } From 74a5a895c79c843cf1cefaa4cdacc85d72c4aa10 Mon Sep 17 00:00:00 2001 From: Eirik Tsarpalis Date: Fri, 11 Nov 2022 14:33:36 +0000 Subject: [PATCH 3/3] Update src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs --- .../Serialization/JsonSerializerWrapper.Reflection.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs index 2ba22e924972a..6296522dd91d3 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs @@ -735,7 +735,7 @@ private int ReadExactlyFromSource(byte[] buffer, int offset, int count) } totalRead += read; - }; + } return totalRead; }