-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
StreamReader sync/async optimizations: ArrayPool, IndexOfAny, ValueStringBuilder #62552
Changes from 16 commits
ccbec0f
d340760
e2e318f
7ac2d06
ff0ee6d
6f2af2d
03fae09
75cfdc6
04860a6
7d338a1
b8a91ff
d490b85
576e953
ba913a9
77a5827
4d46850
6d81c3b
dc93cbe
7526cad
ec33696
8060dc2
554ca18
c6d3eea
6325e8e
9922cf8
cc41cb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Buffers; | ||
using System.Collections.Generic; | ||
using System.Diagnostics; | ||
using System.Diagnostics.CodeAnalysis; | ||
using System.Text; | ||
|
@@ -413,10 +415,10 @@ public override string ReadToEnd() | |
CheckAsyncTaskInProgress(); | ||
|
||
// Call ReadBuffer, then pull data out of charBuffer. | ||
StringBuilder sb = new StringBuilder(_charLen - _charPos); | ||
using ValueStringBuilder sb = new(_charLen - _charPos); | ||
do | ||
{ | ||
sb.Append(_charBuffer, _charPos, _charLen - _charPos); | ||
sb.Append(_charBuffer.AsSpan(_charPos, _charLen - _charPos)); | ||
_charPos = _charLen; // Note we consumed these characters | ||
ReadBuffer(); | ||
} while (_charLen > 0); | ||
|
@@ -786,43 +788,39 @@ private int ReadBuffer(Span<char> userBuffer, out bool readToUserBuffer) | |
} | ||
} | ||
|
||
StringBuilder? sb = null; | ||
using ValueStringBuilder sb = new(stackalloc char[256]); | ||
danmoseley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
do | ||
{ | ||
int i = _charPos; | ||
do | ||
// Note the following common line feed chars: | ||
// \n - UNIX \r\n - DOS \r - Mac | ||
danmoseley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
int i = _charBuffer.AsSpan(_charPos, _charLen - _charPos).IndexOfAny('\r', '\n'); | ||
if (i >= 0) | ||
{ | ||
char ch = _charBuffer[i]; | ||
// Note the following common line feed chars: | ||
// \n - UNIX \r\n - DOS \r - Mac | ||
if (ch == '\r' || ch == '\n') | ||
string s; | ||
if (sb.Length > 0) | ||
{ | ||
string s; | ||
if (sb != null) | ||
{ | ||
sb.Append(_charBuffer, _charPos, i - _charPos); | ||
s = sb.ToString(); | ||
} | ||
else | ||
{ | ||
s = new string(_charBuffer, _charPos, i - _charPos); | ||
} | ||
_charPos = i + 1; | ||
if (ch == '\r' && (_charPos < _charLen || ReadBuffer() > 0)) | ||
sb.Append(_charBuffer.AsSpan(_charPos, i)); | ||
s = sb.ToString(); | ||
} | ||
else | ||
{ | ||
s = new string(_charBuffer, _charPos, i); | ||
} | ||
char ch = _charBuffer[_charPos + i]; | ||
_charPos += i + 1; | ||
if (ch == '\r' && (_charPos < _charLen || ReadBuffer() > 0)) | ||
{ | ||
if (_charBuffer[_charPos] == '\n') | ||
{ | ||
if (_charBuffer[_charPos] == '\n') | ||
{ | ||
_charPos++; | ||
} | ||
_charPos++; | ||
} | ||
return s; | ||
} | ||
i++; | ||
} while (i < _charLen); | ||
return s; | ||
} | ||
|
||
i = _charLen - _charPos; | ||
sb ??= new StringBuilder(i + 80); | ||
sb.Append(_charBuffer, _charPos, i); | ||
|
||
sb.Append(_charBuffer.AsSpan(_charPos, i)); | ||
} while (ReadBuffer() > 0); | ||
return sb.ToString(); | ||
} | ||
|
@@ -879,63 +877,89 @@ private int ReadBuffer(Span<char> userBuffer, out bool readToUserBuffer) | |
|
||
private async Task<string?> ReadLineAsyncInternal(CancellationToken cancellationToken) | ||
{ | ||
if (_charPos == _charLen && (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) == 0) | ||
static char[] ResizeOrPoolNewArray(char[]? array, int atLeastSpace) | ||
{ | ||
return null; | ||
if (array == null) return ArrayPool<char>.Shared.Rent(atLeastSpace); | ||
danmoseley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
char[] newArr = ArrayPool<char>.Shared.Rent(array.Length + atLeastSpace); | ||
try | ||
{ | ||
Array.Copy(array, newArr, array.Length); | ||
} | ||
catch | ||
{ | ||
ArrayPool<char>.Shared.Return(newArr); | ||
throw; | ||
} | ||
|
||
ArrayPool<char>.Shared.Return(array); | ||
danmoseley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return newArr; | ||
} | ||
static void Append(ref char[]? to, int destinationOffset, char[] @from, int offset, int length) | ||
{ | ||
if (to == null || (destinationOffset + length) > to.Length) | ||
{ | ||
to = ResizeOrPoolNewArray(to, destinationOffset + length + 80); | ||
} | ||
|
||
StringBuilder? sb = null; | ||
Array.Copy(@from, offset, to, destinationOffset, length); | ||
} | ||
|
||
do | ||
if (_charPos == _charLen && (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) == 0) | ||
{ | ||
char[] tmpCharBuffer = _charBuffer; | ||
int tmpCharLen = _charLen; | ||
int tmpCharPos = _charPos; | ||
int i = tmpCharPos; | ||
return null; | ||
} | ||
|
||
char[]? rentedArray = null; | ||
int lastWrittenIndex = 0; | ||
try | ||
{ | ||
do | ||
{ | ||
char ch = tmpCharBuffer[i]; | ||
|
||
// Note the following common line feed chars: | ||
// \n - UNIX \r\n - DOS \r - Mac | ||
if (ch == '\r' || ch == '\n') | ||
int i = _charBuffer.AsSpan(_charPos, _charLen - _charPos).IndexOfAny('\r', '\n'); | ||
danmoseley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (i >= 0) | ||
{ | ||
string s; | ||
|
||
if (sb != null) | ||
string? s; | ||
if (rentedArray != null) | ||
{ | ||
sb.Append(tmpCharBuffer, tmpCharPos, i - tmpCharPos); | ||
s = sb.ToString(); | ||
Append(ref rentedArray, lastWrittenIndex, _charBuffer, _charPos, i); | ||
lastWrittenIndex += i; | ||
s = new string(rentedArray!, 0, lastWrittenIndex); | ||
} | ||
else | ||
{ | ||
s = new string(tmpCharBuffer, tmpCharPos, i - tmpCharPos); | ||
s = new string(_charBuffer, _charPos, i); | ||
} | ||
|
||
_charPos = tmpCharPos = i + 1; | ||
|
||
if (ch == '\r' && (tmpCharPos < tmpCharLen || (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) > 0)) | ||
char ch = _charBuffer[_charPos + i]; | ||
_charPos += i + 1; | ||
if (ch == '\r' && (_charPos < _charLen || (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) > 0)) | ||
{ | ||
tmpCharPos = _charPos; | ||
if (_charBuffer[tmpCharPos] == '\n') | ||
if (_charBuffer[_charPos] == '\n') | ||
{ | ||
_charPos = ++tmpCharPos; | ||
_charPos++; | ||
} | ||
} | ||
|
||
return s; | ||
} | ||
|
||
i++; | ||
} while (i < tmpCharLen); | ||
i = _charLen - _charPos; | ||
if (i < 0) break; // a hack for System.Globalization.Tests (to check on CI if it fixes the problem) | ||
danmoseley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
i = tmpCharLen - tmpCharPos; | ||
sb ??= new StringBuilder(i + 80); | ||
sb.Append(tmpCharBuffer, tmpCharPos, i); | ||
} while (await ReadBufferAsync(cancellationToken).ConfigureAwait(false) > 0); | ||
|
||
return sb.ToString(); | ||
Append(ref rentedArray, lastWrittenIndex, _charBuffer, _charPos, i); | ||
lastWrittenIndex += i; | ||
} while (await ReadBufferAsync(cancellationToken).ConfigureAwait(false) > 0); | ||
return new string(rentedArray!, 0, lastWrittenIndex); | ||
} | ||
finally | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a general rule, I don't believe we generally bother with a try/finally simply to return a pooled array. Hopefully exceptions are exceptional, and in this case failing to return an array is a small issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If something goes wrong then we wouldn’t be able to return to the shared pool. I don’t know but it seems like an issue to me, that’s why have a try/finally block. What do you suggest? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're correct, but if something goes wrong then we have a bigger problem than wasting an allocation. And so we prefer the simpler code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really have a problem if a cancellation token triggered on an IO operation (ReadBufferAsync)? That's like a valid case where we wouldn't return to the shared pool (if we omit the finally block) but continue with the normal execution. I feel like I don't understand the suggestion or what you mean, sorry :( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The situation we're trying to avoid is something holding on to the buffer after the exception is thrown and returning into the pool prematurely. Lets say for example ReadBufferAsync failed in a way where the buffer was still being used by a background thread (a rogue Task.Run). This finally would run, return to the pool, then another piece of code would rent it and it would still be in used by the rogue Task.Run. To count this out, we don't bother returning to the pool in exceptional cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now I get it. But still There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My point is that it makes the code cleaner to not have the try/ finally. ArrayPool is all about stats, we’re assuming there will be far far more successes here than exceptions. |
||
{ | ||
if (rentedArray != null) | ||
{ | ||
ArrayPool<char>.Shared.Return(rentedArray); | ||
} | ||
} | ||
} | ||
|
||
public override Task<string> ReadToEndAsync() => ReadToEndAsync(default); | ||
|
@@ -985,17 +1009,50 @@ public override Task<string> ReadToEndAsync(CancellationToken cancellationToken) | |
|
||
private async Task<string> ReadToEndAsyncInternal(CancellationToken cancellationToken) | ||
{ | ||
// Call ReadBuffer, then pull data out of charBuffer. | ||
StringBuilder sb = new StringBuilder(_charLen - _charPos); | ||
danmoseley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
do | ||
static char[] Resize(char[] array, int atLeastSpace) | ||
{ | ||
int tmpCharPos = _charPos; | ||
sb.Append(_charBuffer, tmpCharPos, _charLen - tmpCharPos); | ||
_charPos = _charLen; // We consumed these characters | ||
await ReadBufferAsync(cancellationToken).ConfigureAwait(false); | ||
} while (_charLen > 0); | ||
char[] newArr = ArrayPool<char>.Shared.Rent(array.Length + atLeastSpace); | ||
try | ||
{ | ||
Array.Copy(array, newArr, array.Length); | ||
danmoseley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
catch | ||
{ | ||
ArrayPool<char>.Shared.Return(newArr); | ||
throw; | ||
} | ||
ArrayPool<char>.Shared.Return(array); | ||
return newArr; | ||
} | ||
static void Append(ref char[] to, int destinationOffset, char[] @from, int offset, int length) | ||
{ | ||
if ((destinationOffset + length) >= to.Length) | ||
{ | ||
to = Resize(to, destinationOffset + length + 80); | ||
} | ||
|
||
return sb.ToString(); | ||
Array.Copy(@from, offset, to, destinationOffset, length); | ||
} | ||
|
||
char[] rentedArray = ArrayPool<char>.Shared.Rent(_charLen - _charPos); | ||
int lastWrittenIndex = 0; | ||
try | ||
{ | ||
do | ||
{ | ||
// int tmpCharPos = _charPos; | ||
Append(ref rentedArray, lastWrittenIndex, _charBuffer, _charPos, _charLen - _charPos); | ||
lastWrittenIndex += _charLen - _charPos; | ||
_charPos = _charLen; // We consumed these characters | ||
await ReadBufferAsync(cancellationToken).ConfigureAwait(false); | ||
} while (_charLen > 0); | ||
|
||
return new string(rentedArray, 0, lastWrittenIndex); | ||
} | ||
finally | ||
{ | ||
ArrayPool<char>.Shared.Return(rentedArray); | ||
} | ||
} | ||
|
||
public override Task<int> ReadAsync(char[] buffer!!, int index, int count) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure we should be using ValueStringBuilder here. For ReadLine, I think it makes sense because it likely lines are relatively short. But the whole file could be very large, and with ValueStringBuilder and ArrayPool, that will require one large contiguous region, whereas StringBuilder will end up being chunked into essentially a linked list of buffers.
It might make sense to start with some stackalloc'd space and then graduate to a StringBuilder, or something like that. Might be best to revert the changes to ReadToEnd and keep this PR focused on ReadLine{Async}.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've run the benchmarks and it looks better with what we have in this PR (Might be the wrong benchmarks). With stackallock + going to stringbuilder it might be even better -> dotnet/performance#2177
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Trapov per your comment above -- did you compare stackalloc+StringBuilder vs what you have here? And now you have these great perf tests 😄 .
Instructions for running against local bits here but at this point you are already familiar.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@danmoseley you suggest running the tests again to see if something changed? I know we changed them (the perf tests) a little but it’s not that much. I can run them again but I feel like nothing would change
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ran the benchs. It's the last message in the PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Trapov sorry if I wasn't clear. I'm asking what the benchmark results are for stackalloc+StringBuilder (what @stephentoub suggested) vs the current code in main.
I also agree with him that I suggest you limit this PR to just ReadLine()/ReadLineAsync() and stackalloc+StringBuilder. For lines under ~128 characters the benchmarks should be a net win. if and when that is merged, we could look at the rest separately as it will need more thought and testing.
As you can probably guess, any changes to StreamReader need significant thought and testing of alternatives, as it's a fundamental type and performance sensitive.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I will remove ReadToEnd{Async} changes from this PR.