Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce allocations in BinaryReader. #80331

Merged
merged 18 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1732,9 +1732,6 @@
<data name="ArgumentOutOfRange_BiggerThanCollection" xml:space="preserve">
<value>Larger than collection size.</value>
</data>
<data name="ArgumentOutOfRange_BinaryReaderFillBuffer" xml:space="preserve">
<value>The number of bytes requested does not fit into BinaryReader's internal buffer.</value>
</data>
<data name="ArgumentOutOfRange_Bounds_Lower_Upper" xml:space="preserve">
<value>Argument must be between {0} and {1}.</value>
</data>
Expand Down
169 changes: 75 additions & 94 deletions src/libraries/System.Private.CoreLib/src/System/IO/BinaryReader.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Buffers.Binary;
using System.Diagnostics;
using System.Runtime.CompilerServices;
Expand All @@ -16,9 +17,8 @@ public class BinaryReader : IDisposable
private const int MaxCharBytesSize = 128;

private readonly Stream _stream;
private readonly byte[] _buffer;
private readonly Decoder _decoder;
private byte[]? _charBytes;
private readonly Encoding _encoding;
private Decoder? _decoder;
private char[]? _charBuffer;
private readonly int _maxCharsSize; // From MaxCharBytesSize & Encoding

Expand Down Expand Up @@ -47,26 +47,16 @@ public BinaryReader(Stream input, Encoding encoding, bool leaveOpen)
}

_stream = input;
_decoder = encoding.GetDecoder();
_encoding = encoding;
_maxCharsSize = encoding.GetMaxCharCount(MaxCharBytesSize);
int minBufferSize = encoding.GetMaxByteCount(1); // max bytes per one char
if (minBufferSize < 16)
{
minBufferSize = 16;
}

_buffer = new byte[minBufferSize];
// _charBuffer and _charBytes will be left null.

// For Encodings that always use 2 bytes per char (or more),
// special case them here to make Read() & Peek() faster.
_2BytesPerChar = encoding is UnicodeEncoding;
// check if BinaryReader is based on MemoryStream, and keep this for it's life
// we cannot use "as" operator, since derived classes are not allowed
_isMemoryStream = (_stream.GetType() == typeof(MemoryStream));
_isMemoryStream = _stream.GetType() == typeof(MemoryStream);
_leaveOpen = leaveOpen;

Debug.Assert(_decoder != null, "[BinaryReader.ctor]_decoder!=null");
}

public virtual Stream BaseStream => _stream;
Expand Down Expand Up @@ -132,7 +122,8 @@ public virtual int Read()
posSav = _stream.Position;
}

_charBytes ??= new byte[MaxCharBytesSize];
_decoder ??= _encoding.GetDecoder();
Span<byte> charBytes = stackalloc byte[MaxCharBytesSize];

char singleChar = '\0';

Expand All @@ -145,15 +136,15 @@ public virtual int Read()
numBytes = _2BytesPerChar ? 2 : 1;

int r = _stream.ReadByte();
_charBytes[0] = (byte)r;
charBytes[0] = (byte)r;
if (r == -1)
{
numBytes = 0;
}
if (numBytes == 2)
{
r = _stream.ReadByte();
_charBytes[1] = (byte)r;
charBytes[1] = (byte)r;
if (r == -1)
{
numBytes = 1;
Expand All @@ -165,11 +156,11 @@ public virtual int Read()
return -1;
}

Debug.Assert(numBytes == 1 || numBytes == 2, "BinaryReader::ReadOneChar assumes it's reading one or 2 bytes only.");
Debug.Assert(numBytes is 1 or 2, "BinaryReader::ReadOneChar assumes it's reading one or two bytes only.");

try
{
charsRead = _decoder.GetChars(new ReadOnlySpan<byte>(_charBytes, 0, numBytes), new Span<char>(ref singleChar), flush: false);
charsRead = _decoder.GetChars(charBytes[..numBytes], new Span<char>(ref singleChar), flush: false);
}
catch
{
Expand Down Expand Up @@ -220,24 +211,24 @@ public virtual char ReadChar()
return (char)value;
}

public virtual short ReadInt16() => BinaryPrimitives.ReadInt16LittleEndian(InternalRead(2));
public virtual short ReadInt16() => BinaryPrimitives.ReadInt16LittleEndian(InternalRead(stackalloc byte[sizeof(short)]));

[CLSCompliant(false)]
public virtual ushort ReadUInt16() => BinaryPrimitives.ReadUInt16LittleEndian(InternalRead(2));
public virtual ushort ReadUInt16() => BinaryPrimitives.ReadUInt16LittleEndian(InternalRead(stackalloc byte[sizeof(ushort)]));

public virtual int ReadInt32() => BinaryPrimitives.ReadInt32LittleEndian(InternalRead(4));
public virtual int ReadInt32() => BinaryPrimitives.ReadInt32LittleEndian(InternalRead(stackalloc byte[sizeof(int)]));
[CLSCompliant(false)]
public virtual uint ReadUInt32() => BinaryPrimitives.ReadUInt32LittleEndian(InternalRead(4));
public virtual long ReadInt64() => BinaryPrimitives.ReadInt64LittleEndian(InternalRead(8));
public virtual uint ReadUInt32() => BinaryPrimitives.ReadUInt32LittleEndian(InternalRead(stackalloc byte[sizeof(uint)]));
public virtual long ReadInt64() => BinaryPrimitives.ReadInt64LittleEndian(InternalRead(stackalloc byte[sizeof(long)]));
[CLSCompliant(false)]
public virtual ulong ReadUInt64() => BinaryPrimitives.ReadUInt64LittleEndian(InternalRead(8));
public virtual Half ReadHalf() => BinaryPrimitives.ReadHalfLittleEndian(InternalRead(2));
public virtual float ReadSingle() => BinaryPrimitives.ReadSingleLittleEndian(InternalRead(4));
public virtual double ReadDouble() => BinaryPrimitives.ReadDoubleLittleEndian(InternalRead(8));
public virtual ulong ReadUInt64() => BinaryPrimitives.ReadUInt64LittleEndian(InternalRead(stackalloc byte[sizeof(ulong)]));
public virtual unsafe Half ReadHalf() => BinaryPrimitives.ReadHalfLittleEndian(InternalRead(stackalloc byte[sizeof(Half)]));
public virtual unsafe float ReadSingle() => BinaryPrimitives.ReadSingleLittleEndian(InternalRead(stackalloc byte[sizeof(float)]));
public virtual unsafe double ReadDouble() => BinaryPrimitives.ReadDoubleLittleEndian(InternalRead(stackalloc byte[sizeof(double)]));

public virtual decimal ReadDecimal()
{
ReadOnlySpan<byte> span = InternalRead(16);
ReadOnlySpan<byte> span = InternalRead(stackalloc byte[sizeof(decimal)]);
try
{
return decimal.ToDecimal(span);
Expand All @@ -253,14 +244,8 @@ public virtual string ReadString()
{
ThrowIfDisposed();

int currPos = 0;
int n;
int stringLength;
int readLength;
int charsRead;

// Length of the string in bytes, not chars
stringLength = Read7BitEncodedInt();
int stringLength = Read7BitEncodedInt();
if (stringLength < 0)
{
throw new IOException(SR.Format(SR.IO_InvalidStringLen_Len, stringLength));
Expand All @@ -271,27 +256,30 @@ public virtual string ReadString()
return string.Empty;
}

_charBytes ??= new byte[MaxCharBytesSize];
_charBuffer ??= new char[_maxCharsSize];
Span<byte> charBytes = stackalloc byte[MaxCharBytesSize];

int currPos = 0;
StringBuilder? sb = null;
do
{
readLength = ((stringLength - currPos) > MaxCharBytesSize) ? MaxCharBytesSize : (stringLength - currPos);
int readLength = Math.Min(MaxCharBytesSize, stringLength - currPos);

n = _stream.Read(_charBytes, 0, readLength);
int n = _stream.Read(charBytes[..readLength]);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
}

charsRead = _decoder.GetChars(_charBytes, 0, n, _charBuffer, 0);

if (currPos == 0 && n == stringLength)
{
return new string(_charBuffer, 0, charsRead);
return _encoding.GetString(charBytes[..n]);
teo-tsirpanis marked this conversation as resolved.
Show resolved Hide resolved
}

_decoder ??= _encoding.GetDecoder();
_charBuffer ??= new char[_maxCharsSize];

int charsRead = _decoder.GetChars(charBytes[..n], _charBuffer, flush: false);

// Since we could be reading from an untrusted data source, limit the initial size of the
// StringBuilder instance we're about to get or create. It'll expand automatically as needed.

Expand Down Expand Up @@ -328,7 +316,10 @@ private int InternalReadChars(Span<char> buffer)
{
Debug.Assert(!_disposed);

_decoder ??= _encoding.GetDecoder();

int totalCharsRead = 0;
Span<byte> charBytes = stackalloc byte[MaxCharBytesSize];

while (!buffer.IsEmpty)
{
Expand All @@ -349,10 +340,9 @@ private int InternalReadChars(Span<char> buffer)
// a custom replacement sequence may violate this assumption.
if (numBytes > 1)
{
DecoderNLS? decoder = _decoder as DecoderNLS;
// For internal decoders, we can check whether the decoder has any pending state.
// For custom decoders, assume that the decoder has pending state.
if (decoder == null || decoder.HasState)
if (_decoder is not DecoderNLS decoder || decoder.HasState)
{
numBytes--;

Expand All @@ -363,27 +353,25 @@ private int InternalReadChars(Span<char> buffer)
}
}

ReadOnlySpan<byte> byteBuffer;
scoped ReadOnlySpan<byte> byteBuffer;
if (_isMemoryStream)
{
Debug.Assert(_stream is MemoryStream);
MemoryStream mStream = (MemoryStream)_stream;
MemoryStream mStream = Unsafe.As<MemoryStream>(_stream);

int position = mStream.InternalGetPosition();
numBytes = mStream.InternalEmulateRead(numBytes);
byteBuffer = new ReadOnlySpan<byte>(mStream.InternalGetBuffer(), position, numBytes);
}
else
{
_charBytes ??= new byte[MaxCharBytesSize];

if (numBytes > MaxCharBytesSize)
{
numBytes = MaxCharBytesSize;
}

numBytes = _stream.Read(_charBytes, 0, numBytes);
byteBuffer = new ReadOnlySpan<byte>(_charBytes, 0, numBytes);
numBytes = _stream.Read(charBytes[..numBytes]);
byteBuffer = charBytes[..numBytes];
}

if (byteBuffer.IsEmpty)
Expand Down Expand Up @@ -416,9 +404,7 @@ public virtual char[] ReadChars(int count)
int n = InternalReadChars(new Span<char>(chars));
if (n != count)
{
char[] copy = new char[n];
Buffer.BlockCopy(chars, 0, copy, 0, 2 * n); // sizeof(char)
chars = copy;
chars = chars[..n];
}

return chars;
Expand Down Expand Up @@ -460,32 +446,30 @@ public virtual byte[] ReadBytes(int count)

if (numRead != result.Length)
{
// Trim array. This should happen on EOF & possibly net streams.
byte[] copy = new byte[numRead];
Buffer.BlockCopy(result, 0, copy, 0, numRead);
result = copy;
// Trim array. This should happen on EOF & possibly net streams.
result = result[..numRead];
}

return result;
}

private ReadOnlySpan<byte> InternalRead(int numBytes)
private ReadOnlySpan<byte> InternalRead(Span<byte> buffer)
{
Debug.Assert(numBytes >= 2 && numBytes <= 16, "value of 1 should use ReadByte. value > 16 requires to change the minimal _buffer size");
Debug.Assert(buffer.Length != 1, "length of 1 should use ReadByte.");

if (_isMemoryStream)
{
// read directly from MemoryStream buffer
Debug.Assert(_stream is MemoryStream);
return ((MemoryStream)_stream).InternalReadSpan(numBytes);
return Unsafe.As<MemoryStream>(_stream).InternalReadSpan(buffer.Length);
}
else
{
ThrowIfDisposed();

_stream.ReadExactly(_buffer.AsSpan(0, numBytes));
_stream.ReadExactly(buffer);

return _buffer;
return buffer;
}
}

Expand All @@ -495,40 +479,37 @@ private ReadOnlySpan<byte> InternalRead(int numBytes)
// reasons. More about the subject in: https://github.com/dotnet/coreclr/pull/22102
protected virtual void FillBuffer(int numBytes)
{
if (numBytes < 0 || numBytes > _buffer.Length)
{
throw new ArgumentOutOfRangeException(nameof(numBytes), SR.ArgumentOutOfRange_BinaryReaderFillBuffer);
teo-tsirpanis marked this conversation as resolved.
Show resolved Hide resolved
}
ArgumentOutOfRangeException.ThrowIfNegative(numBytes);

ThrowIfDisposed();

// Need to find a good threshold for calling ReadByte() repeatedly
// vs. calling Read(byte[], int, int) for both buffered & unbuffered
// streams.
if (numBytes == 1)
{
int n = _stream.ReadByte();
if (n == -1)
{
ThrowHelper.ThrowEndOfFileException();
}

_buffer[0] = (byte)n;
return;
}

if (numBytes > 0)
{
_stream.ReadExactly(_buffer.AsSpan(0, numBytes));
}
else
switch (numBytes)
{
// ReadExactly no-ops for empty buffers, so special case numBytes == 0 to preserve existing behavior.
int n = _stream.Read(_buffer, 0, 0);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
}
case 0:
// ReadExactly no-ops for empty buffers, so special case numBytes == 0 to preserve existing behavior.
int n = _stream.Read(Array.Empty<byte>(), 0, 0);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
}
break;
case 1:
n = _stream.ReadByte();
if (n == -1)
{
ThrowHelper.ThrowEndOfFileException();
}
break;
default:
if (_stream.CanSeek)
{
_stream.Seek(numBytes, SeekOrigin.Current);
return;
}
byte[] buffer = ArrayPool<byte>.Shared.Rent(numBytes);
_stream.ReadExactly(buffer.AsSpan(0, numBytes));
ArrayPool<byte>.Shared.Return(buffer);
break;
teo-tsirpanis marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down