Skip to content

Commit

Permalink
Create class for reading Json files in chunks (#5530)
Browse files Browse the repository at this point in the history
* Moved files over and addressed some PR comments

* added comment

* switched to true and false strings

* Added ctr to specify buffer for testing purposes.

* remove commented code

* switch to use Utf8 preamble for BOM

* Create method for checking complete

* combined code for ReadStringArray

* Updated buffer size to match STJ's default buffer size

* Switch Utf8JsonStreamReader to be disposable.

* Switch to read the value for numbers into a string directly

* revert back to using private var for utf8Bom

* Remove ReadStringArrayAsList

* Avoid referencing buffer after  returning

* Actually avoid referencing _buffer after returning

* Update how buffers are fed into Utf8JsonReader to avoid feeding extra empty data.

* remove extra line

* Reverted back to using try get int for ReadTokenAsString

* Update src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReader.cs

Co-authored-by: Andy Zivkovic <zivkan@users.noreply.github.com>

* Remove ValueTextEquals taking in string

* Switched to Skip instead of TrySkip

* Update src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReader.cs

Co-authored-by: Andy Zivkovic <zivkan@users.noreply.github.com>

* Added some unit tests

* fix Bom

* Switched to using Moq

* Update src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReader.cs

Co-authored-by: Andy Zivkovic <zivkan@users.noreply.github.com>

* loop through stream when reading to ensure reading full bytes or to the end

* update signature comment

* Switched stream back to field and supress warning

---------

Co-authored-by: Andy Zivkovic <zivkan@users.noreply.github.com>
  • Loading branch information
jgonz120 and zivkan authored Dec 12, 2023
1 parent 54ece9a commit 22f3566
Show file tree
Hide file tree
Showing 6 changed files with 1,194 additions and 1 deletion.
40 changes: 40 additions & 0 deletions src/NuGet.Core/NuGet.ProjectModel/Utf8JsonReaderExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Text.Json;

namespace NuGet.ProjectModel
{
internal static class Utf8JsonReaderExtensions
{
internal static string ReadTokenAsString(this ref Utf8JsonReader reader)
{
switch (reader.TokenType)
{
case JsonTokenType.True:
return bool.TrueString;
case JsonTokenType.False:
return bool.FalseString;
case JsonTokenType.Number:
return reader.ReadNumberAsString();
case JsonTokenType.String:
return reader.GetString();
case JsonTokenType.None:
case JsonTokenType.Null:
return null;
default:
throw new InvalidCastException();
}
}

private static string ReadNumberAsString(this ref Utf8JsonReader reader)
{
if (reader.TryGetInt64(out long value))
{
return value.ToString();
}
return reader.GetDouble().ToString();
}
}
}
273 changes: 273 additions & 0 deletions src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Buffers;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.Json;

namespace NuGet.ProjectModel
{
/// <summary>
/// This struct is used to read over a memeory stream in parts, in order to avoid reading the entire stream into memory.
/// It functions as a wrapper around <see cref="Utf8JsonStreamReader"/>, while maintaining a stream and a buffer to read from.
/// </summary>
internal ref struct Utf8JsonStreamReader
{
private static readonly char[] DelimitedStringDelimiters = [' ', ','];
private static readonly byte[] Utf8Bom = [0xEF, 0xBB, 0xBF];

private const int BufferSizeDefault = 16 * 1024;
private const int MinBufferSize = 1024;
private Utf8JsonReader _reader;
#pragma warning disable CA2213 // Disposable fields should be disposed
private Stream _stream;
#pragma warning restore CA2213 // Disposable fields should be disposed
// The buffer is used to read from the stream in chunks.
private byte[] _buffer;
private bool _disposed;
private ArrayPool<byte> _bufferPool;
private int _bufferUsed = 0;

internal Utf8JsonStreamReader(Stream stream, int bufferSize = BufferSizeDefault, ArrayPool<byte> arrayPool = null)
{
if (stream is null)
{
throw new ArgumentNullException(nameof(stream));
}

if (bufferSize < MinBufferSize)
{
throw new ArgumentException($"Buffer size must be at least {MinBufferSize} bytes", nameof(bufferSize));
}

_bufferPool = arrayPool ?? ArrayPool<byte>.Shared;
_buffer = _bufferPool.Rent(bufferSize);
_disposed = false;
_stream = stream;
_stream.Read(_buffer, 0, 3);
if (!Utf8Bom.AsSpan().SequenceEqual(_buffer.AsSpan(0, 3)))
{
_bufferUsed = 3;
}

var iniialJsonReaderState = new JsonReaderState(new JsonReaderOptions
{
AllowTrailingCommas = true,
CommentHandling = JsonCommentHandling.Skip,
});

ReadStreamIntoBuffer(iniialJsonReaderState);
_reader.Read();
}

internal bool IsFinalBlock => _reader.IsFinalBlock;

internal JsonTokenType TokenType => _reader.TokenType;

internal bool ValueTextEquals(ReadOnlySpan<byte> utf8Text) => _reader.ValueTextEquals(utf8Text);

internal bool TryGetInt32(out int value) => _reader.TryGetInt32(out value);

internal string GetString() => _reader.GetString();

internal bool GetBoolean() => _reader.GetBoolean();

internal int GetInt32() => _reader.GetInt32();

internal bool Read()
{
ThrowExceptionIfDisposed();

bool wasRead;
while (!(wasRead = _reader.Read()) && !_reader.IsFinalBlock)
{
GetMoreBytesFromStream();
}
return wasRead;
}

internal void Skip()
{
ThrowExceptionIfDisposed();

bool wasSkipped;
while (!(wasSkipped = _reader.TrySkip()) && !_reader.IsFinalBlock)
{
GetMoreBytesFromStream();
}
if (!wasSkipped)
{
_reader.Skip();
}
}

internal string ReadNextTokenAsString()
{
ThrowExceptionIfDisposed();

if (Read())
{
return _reader.ReadTokenAsString();
}

return null;
}

internal IList<string> ReadStringArrayAsIList(IList<string> strings = null)
{
if (TokenType == JsonTokenType.StartArray)
{
while (Read() && TokenType != JsonTokenType.EndArray)
{
string value = _reader.ReadTokenAsString();

strings = strings ?? new List<string>();

strings.Add(value);
}
}
return strings;
}

internal IReadOnlyList<string> ReadDelimitedString()
{
ThrowExceptionIfDisposed();

if (Read())
{
switch (TokenType)
{
case JsonTokenType.String:
var value = GetString();

return value.Split(DelimitedStringDelimiters, StringSplitOptions.RemoveEmptyEntries);

default:
var invalidCastException = new InvalidCastException();
throw new JsonException(invalidCastException.Message, invalidCastException);
}
}

return null;
}

internal bool ReadNextTokenAsBoolOrFalse()
{
ThrowExceptionIfDisposed();

if (Read() && (TokenType == JsonTokenType.False || TokenType == JsonTokenType.True))
{
return GetBoolean();
}
return false;
}

internal IReadOnlyList<string> ReadNextStringOrArrayOfStringsAsReadOnlyList()
{
ThrowExceptionIfDisposed();

if (Read())
{
switch (_reader.TokenType)
{
case JsonTokenType.String:
return new[] { (string)_reader.GetString() };

case JsonTokenType.StartArray:
return ReadStringArrayAsReadOnlyListFromArrayStart();

case JsonTokenType.StartObject:
return null;
}
}

return null;
}

internal IReadOnlyList<string> ReadStringArrayAsReadOnlyListFromArrayStart()
{
ThrowExceptionIfDisposed();

List<string> strings = null;

while (Read() && _reader.TokenType != JsonTokenType.EndArray)
{
string value = _reader.ReadTokenAsString();

strings = strings ?? new List<string>();

strings.Add(value);
}

return (IReadOnlyList<string>)strings ?? Array.Empty<string>();
}

// This function is called when Read() returns false and we're not already in the final block
private void GetMoreBytesFromStream()
{
if (_reader.BytesConsumed < _bufferUsed)
{
// If the number of bytes consumed by the reader is less than the amount set in the buffer then we have leftover bytes
var oldBuffer = _buffer;
ReadOnlySpan<byte> leftover = oldBuffer.AsSpan((int)_reader.BytesConsumed);
_bufferUsed = leftover.Length;

// If the leftover bytes are the same as the buffer size then we are at capacity and need to double the buffer size
if (leftover.Length == _buffer.Length)
{
_buffer = _bufferPool.Rent(_buffer.Length * 2);
leftover.CopyTo(_buffer);
_bufferPool.Return(oldBuffer, true);
}
else
{
leftover.CopyTo(_buffer);
}
}
else
{
_bufferUsed = 0;
}

ReadStreamIntoBuffer(_reader.CurrentState);
}

/// <summary>
/// Loops through the stream and reads it into the buffer until the buffer is full or the stream is empty, creates the Utf8JsonReader.
/// </summary>
private void ReadStreamIntoBuffer(JsonReaderState jsonReaderState)
{
int bytesRead;
do
{
var spaceLeftInBuffer = _buffer.Length - _bufferUsed;
bytesRead = _stream.Read(_buffer, _bufferUsed, spaceLeftInBuffer);
_bufferUsed += bytesRead;
}
while (bytesRead != 0 && _bufferUsed != _buffer.Length);
_reader = new Utf8JsonReader(_buffer.AsSpan(0, _bufferUsed), isFinalBlock: bytesRead == 0, jsonReaderState);
}

public void Dispose()
{
if (!_disposed)
{
_disposed = true;
byte[] toReturn = _buffer;
_buffer = null!;
_bufferPool.Return(toReturn, true);
}
}

private void ThrowExceptionIfDisposed()
{
if (_disposed)
{
throw new ObjectDisposedException(nameof(Utf8JsonStreamReader));
}
}
}
}
13 changes: 13 additions & 0 deletions src/NuGet.Core/NuGet.ProjectModel/Utf8JsonStreamReaderConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
namespace NuGet.ProjectModel
{
/// <summary>
/// An abstract class that defines a function for reading a <see cref="Utf8JsonStreamReader"/> into a <typeparamref name="T"/>
/// </summary>
/// <typeparam name="T"></typeparam>
internal abstract class Utf8JsonStreamReaderConverter<T>
{
public abstract T Read(ref Utf8JsonStreamReader reader);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ public void LockFileFormat_ReadsLockFileWithNoTools()

var target = lockFile.Targets.Single();
Assert.Equal(NuGetFramework.Parse("dotnet"), target.TargetFramework);

var runtimeTargetLibrary = target.Libraries.Single();
Assert.Equal("System.Runtime", runtimeTargetLibrary.Name);
Assert.Equal(NuGetVersion.Parse("4.0.20-beta-22927"), runtimeTargetLibrary.Version);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System.Text;
using System.Text.Json;
using Xunit;

namespace NuGet.ProjectModel.Test
{
[UseCulture("")] // Fix tests failing on systems with non-English locales
public class Utf8JsonReaderExtensionsTests
{
[Theory]
[InlineData("null", null)]
[InlineData("true", "True")]
[InlineData("false", "False")]
[InlineData("-2", "-2")]
[InlineData("9223372036854775807", "9223372036854775807")]
[InlineData("3.14", "3.14")]
[InlineData("\"b\"", "b")]
public void ReadTokenAsString_WhenValueIsConvertibleToString_ReturnsValueAsString(
string value,
string expectedResult)
{
var json = $"{{\"a\":{value}}}";
var encodedBytes = Encoding.UTF8.GetBytes(json);
var reader = new Utf8JsonReader(encodedBytes);
reader.Read();
reader.Read();
reader.Read();
string actualResult = reader.ReadTokenAsString();
Assert.Equal(expectedResult, actualResult);
}
}
}
Loading

0 comments on commit 22f3566

Please sign in to comment.