-
Notifications
You must be signed in to change notification settings - Fork 695
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create class for reading Json files in chunks (#5530)
* Moved files over and addressed some PR comments * added comment * switched to true and false strings * Added ctr to specify buffer for testing purposes. * remove commented code * switch to use Utf8 preamble for BOM * Create method for checking complete * combined code for ReadStringArray * Updated buffer size to match STJ's default buffer size * Switch Utf8JsonStreamReader to be disposable. * Switch to read the value for numbers into a string directly * revert back to using private var for utf8Bom * Remove ReadStringArrayAsList * Avoid referencing buffer after returning * Actually avoid referencing _buffer after returning * Update how buffers are fed into Utf8JsonReader to avoid feeding extra empty data. * remove extra line * Reverted back to using try get int for ReadTokenAsString * Update src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReader.cs Co-authored-by: Andy Zivkovic <zivkan@users.noreply.github.com> * Remove ValueTextEquals taking in string * Switched to Skip instead of TrySkip * Update src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReader.cs Co-authored-by: Andy Zivkovic <zivkan@users.noreply.github.com> * Added some unit tests * fix Bom * Switched to using Moq * Update src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReader.cs Co-authored-by: Andy Zivkovic <zivkan@users.noreply.github.com> * loop through stream when reading to ensure reading full bytes or to the end * update signature comment * Switched stream back to field and supress warning --------- Co-authored-by: Andy Zivkovic <zivkan@users.noreply.github.com>
- Loading branch information
Showing
6 changed files
with
1,194 additions
and
1 deletion.
There are no files selected for viewing
40 changes: 40 additions & 0 deletions
40
src/NuGet.Core/NuGet.ProjectModel/Utf8JsonReaderExtensions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using System.Text.Json; | ||
|
||
namespace NuGet.ProjectModel | ||
{ | ||
internal static class Utf8JsonReaderExtensions | ||
{ | ||
internal static string ReadTokenAsString(this ref Utf8JsonReader reader) | ||
{ | ||
switch (reader.TokenType) | ||
{ | ||
case JsonTokenType.True: | ||
return bool.TrueString; | ||
case JsonTokenType.False: | ||
return bool.FalseString; | ||
case JsonTokenType.Number: | ||
return reader.ReadNumberAsString(); | ||
case JsonTokenType.String: | ||
return reader.GetString(); | ||
case JsonTokenType.None: | ||
case JsonTokenType.Null: | ||
return null; | ||
default: | ||
throw new InvalidCastException(); | ||
} | ||
} | ||
|
||
private static string ReadNumberAsString(this ref Utf8JsonReader reader) | ||
{ | ||
if (reader.TryGetInt64(out long value)) | ||
{ | ||
return value.ToString(); | ||
} | ||
return reader.GetDouble().ToString(); | ||
} | ||
} | ||
} |
273 changes: 273 additions & 0 deletions
273
src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReader.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,273 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using System.Buffers; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Text.Json; | ||
|
||
namespace NuGet.ProjectModel | ||
{ | ||
/// <summary> | ||
/// This struct is used to read over a memeory stream in parts, in order to avoid reading the entire stream into memory. | ||
/// It functions as a wrapper around <see cref="Utf8JsonStreamReader"/>, while maintaining a stream and a buffer to read from. | ||
/// </summary> | ||
internal ref struct Utf8JsonStreamReader | ||
{ | ||
private static readonly char[] DelimitedStringDelimiters = [' ', ',']; | ||
private static readonly byte[] Utf8Bom = [0xEF, 0xBB, 0xBF]; | ||
|
||
private const int BufferSizeDefault = 16 * 1024; | ||
private const int MinBufferSize = 1024; | ||
private Utf8JsonReader _reader; | ||
#pragma warning disable CA2213 // Disposable fields should be disposed | ||
private Stream _stream; | ||
#pragma warning restore CA2213 // Disposable fields should be disposed | ||
// The buffer is used to read from the stream in chunks. | ||
private byte[] _buffer; | ||
private bool _disposed; | ||
private ArrayPool<byte> _bufferPool; | ||
private int _bufferUsed = 0; | ||
|
||
internal Utf8JsonStreamReader(Stream stream, int bufferSize = BufferSizeDefault, ArrayPool<byte> arrayPool = null) | ||
{ | ||
if (stream is null) | ||
{ | ||
throw new ArgumentNullException(nameof(stream)); | ||
} | ||
|
||
if (bufferSize < MinBufferSize) | ||
{ | ||
throw new ArgumentException($"Buffer size must be at least {MinBufferSize} bytes", nameof(bufferSize)); | ||
} | ||
|
||
_bufferPool = arrayPool ?? ArrayPool<byte>.Shared; | ||
_buffer = _bufferPool.Rent(bufferSize); | ||
_disposed = false; | ||
_stream = stream; | ||
_stream.Read(_buffer, 0, 3); | ||
if (!Utf8Bom.AsSpan().SequenceEqual(_buffer.AsSpan(0, 3))) | ||
{ | ||
_bufferUsed = 3; | ||
} | ||
|
||
var iniialJsonReaderState = new JsonReaderState(new JsonReaderOptions | ||
{ | ||
AllowTrailingCommas = true, | ||
CommentHandling = JsonCommentHandling.Skip, | ||
}); | ||
|
||
ReadStreamIntoBuffer(iniialJsonReaderState); | ||
_reader.Read(); | ||
} | ||
|
||
internal bool IsFinalBlock => _reader.IsFinalBlock; | ||
|
||
internal JsonTokenType TokenType => _reader.TokenType; | ||
|
||
internal bool ValueTextEquals(ReadOnlySpan<byte> utf8Text) => _reader.ValueTextEquals(utf8Text); | ||
|
||
internal bool TryGetInt32(out int value) => _reader.TryGetInt32(out value); | ||
|
||
internal string GetString() => _reader.GetString(); | ||
|
||
internal bool GetBoolean() => _reader.GetBoolean(); | ||
|
||
internal int GetInt32() => _reader.GetInt32(); | ||
|
||
internal bool Read() | ||
{ | ||
ThrowExceptionIfDisposed(); | ||
|
||
bool wasRead; | ||
while (!(wasRead = _reader.Read()) && !_reader.IsFinalBlock) | ||
{ | ||
GetMoreBytesFromStream(); | ||
} | ||
return wasRead; | ||
} | ||
|
||
internal void Skip() | ||
{ | ||
ThrowExceptionIfDisposed(); | ||
|
||
bool wasSkipped; | ||
while (!(wasSkipped = _reader.TrySkip()) && !_reader.IsFinalBlock) | ||
{ | ||
GetMoreBytesFromStream(); | ||
} | ||
if (!wasSkipped) | ||
{ | ||
_reader.Skip(); | ||
} | ||
} | ||
|
||
internal string ReadNextTokenAsString() | ||
{ | ||
ThrowExceptionIfDisposed(); | ||
|
||
if (Read()) | ||
{ | ||
return _reader.ReadTokenAsString(); | ||
} | ||
|
||
return null; | ||
} | ||
|
||
internal IList<string> ReadStringArrayAsIList(IList<string> strings = null) | ||
{ | ||
if (TokenType == JsonTokenType.StartArray) | ||
{ | ||
while (Read() && TokenType != JsonTokenType.EndArray) | ||
{ | ||
string value = _reader.ReadTokenAsString(); | ||
|
||
strings = strings ?? new List<string>(); | ||
|
||
strings.Add(value); | ||
} | ||
} | ||
return strings; | ||
} | ||
|
||
internal IReadOnlyList<string> ReadDelimitedString() | ||
{ | ||
ThrowExceptionIfDisposed(); | ||
|
||
if (Read()) | ||
{ | ||
switch (TokenType) | ||
{ | ||
case JsonTokenType.String: | ||
var value = GetString(); | ||
|
||
return value.Split(DelimitedStringDelimiters, StringSplitOptions.RemoveEmptyEntries); | ||
|
||
default: | ||
var invalidCastException = new InvalidCastException(); | ||
throw new JsonException(invalidCastException.Message, invalidCastException); | ||
} | ||
} | ||
|
||
return null; | ||
} | ||
|
||
internal bool ReadNextTokenAsBoolOrFalse() | ||
{ | ||
ThrowExceptionIfDisposed(); | ||
|
||
if (Read() && (TokenType == JsonTokenType.False || TokenType == JsonTokenType.True)) | ||
{ | ||
return GetBoolean(); | ||
} | ||
return false; | ||
} | ||
|
||
internal IReadOnlyList<string> ReadNextStringOrArrayOfStringsAsReadOnlyList() | ||
{ | ||
ThrowExceptionIfDisposed(); | ||
|
||
if (Read()) | ||
{ | ||
switch (_reader.TokenType) | ||
{ | ||
case JsonTokenType.String: | ||
return new[] { (string)_reader.GetString() }; | ||
|
||
case JsonTokenType.StartArray: | ||
return ReadStringArrayAsReadOnlyListFromArrayStart(); | ||
|
||
case JsonTokenType.StartObject: | ||
return null; | ||
} | ||
} | ||
|
||
return null; | ||
} | ||
|
||
internal IReadOnlyList<string> ReadStringArrayAsReadOnlyListFromArrayStart() | ||
{ | ||
ThrowExceptionIfDisposed(); | ||
|
||
List<string> strings = null; | ||
|
||
while (Read() && _reader.TokenType != JsonTokenType.EndArray) | ||
{ | ||
string value = _reader.ReadTokenAsString(); | ||
|
||
strings = strings ?? new List<string>(); | ||
|
||
strings.Add(value); | ||
} | ||
|
||
return (IReadOnlyList<string>)strings ?? Array.Empty<string>(); | ||
} | ||
|
||
// This function is called when Read() returns false and we're not already in the final block | ||
private void GetMoreBytesFromStream() | ||
{ | ||
if (_reader.BytesConsumed < _bufferUsed) | ||
{ | ||
// If the number of bytes consumed by the reader is less than the amount set in the buffer then we have leftover bytes | ||
var oldBuffer = _buffer; | ||
ReadOnlySpan<byte> leftover = oldBuffer.AsSpan((int)_reader.BytesConsumed); | ||
_bufferUsed = leftover.Length; | ||
|
||
// If the leftover bytes are the same as the buffer size then we are at capacity and need to double the buffer size | ||
if (leftover.Length == _buffer.Length) | ||
{ | ||
_buffer = _bufferPool.Rent(_buffer.Length * 2); | ||
leftover.CopyTo(_buffer); | ||
_bufferPool.Return(oldBuffer, true); | ||
} | ||
else | ||
{ | ||
leftover.CopyTo(_buffer); | ||
} | ||
} | ||
else | ||
{ | ||
_bufferUsed = 0; | ||
} | ||
|
||
ReadStreamIntoBuffer(_reader.CurrentState); | ||
} | ||
|
||
/// <summary> | ||
/// Loops through the stream and reads it into the buffer until the buffer is full or the stream is empty, creates the Utf8JsonReader. | ||
/// </summary> | ||
private void ReadStreamIntoBuffer(JsonReaderState jsonReaderState) | ||
{ | ||
int bytesRead; | ||
do | ||
{ | ||
var spaceLeftInBuffer = _buffer.Length - _bufferUsed; | ||
bytesRead = _stream.Read(_buffer, _bufferUsed, spaceLeftInBuffer); | ||
_bufferUsed += bytesRead; | ||
} | ||
while (bytesRead != 0 && _bufferUsed != _buffer.Length); | ||
_reader = new Utf8JsonReader(_buffer.AsSpan(0, _bufferUsed), isFinalBlock: bytesRead == 0, jsonReaderState); | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
if (!_disposed) | ||
{ | ||
_disposed = true; | ||
byte[] toReturn = _buffer; | ||
_buffer = null!; | ||
_bufferPool.Return(toReturn, true); | ||
} | ||
} | ||
|
||
private void ThrowExceptionIfDisposed() | ||
{ | ||
if (_disposed) | ||
{ | ||
throw new ObjectDisposedException(nameof(Utf8JsonStreamReader)); | ||
} | ||
} | ||
} | ||
} |
13 changes: 13 additions & 0 deletions
13
src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReaderConverter.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
namespace NuGet.ProjectModel | ||
{ | ||
/// <summary> | ||
/// An abstract class that defines a function for reading a <see cref="Utf8JsonStreamReader"/> into a <typeparamref name="T"/> | ||
/// </summary> | ||
/// <typeparam name="T"></typeparam> | ||
internal abstract class Utf8JsonStreamReaderConverter<T> | ||
{ | ||
public abstract T Read(ref Utf8JsonStreamReader reader); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
test/NuGet.Core.Tests/NuGet.ProjectModel.Test/Utf8JsonReaderExtensionsTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information. | ||
|
||
using System.Text; | ||
using System.Text.Json; | ||
using Xunit; | ||
|
||
namespace NuGet.ProjectModel.Test | ||
{ | ||
[UseCulture("")] // Fix tests failing on systems with non-English locales | ||
public class Utf8JsonReaderExtensionsTests | ||
{ | ||
[Theory] | ||
[InlineData("null", null)] | ||
[InlineData("true", "True")] | ||
[InlineData("false", "False")] | ||
[InlineData("-2", "-2")] | ||
[InlineData("9223372036854775807", "9223372036854775807")] | ||
[InlineData("3.14", "3.14")] | ||
[InlineData("\"b\"", "b")] | ||
public void ReadTokenAsString_WhenValueIsConvertibleToString_ReturnsValueAsString( | ||
string value, | ||
string expectedResult) | ||
{ | ||
var json = $"{{\"a\":{value}}}"; | ||
var encodedBytes = Encoding.UTF8.GetBytes(json); | ||
var reader = new Utf8JsonReader(encodedBytes); | ||
reader.Read(); | ||
reader.Read(); | ||
reader.Read(); | ||
string actualResult = reader.ReadTokenAsString(); | ||
Assert.Equal(expectedResult, actualResult); | ||
} | ||
} | ||
} |
Oops, something went wrong.