-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BinaryWriter perf and memory improvements #47316
Merged
GrabYourPitchforks
merged 8 commits into
dotnet:master
from
GrabYourPitchforks:binarywriter
Jan 29, 2021
+478
−163
Merged
Changes from 7 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
e5e9d20
First pass at BinaryWriter perf improvements
GrabYourPitchforks 02285ba
Quick cleanup + add unit tests
GrabYourPitchforks 05da1a2
Merge remote-tracking branch 'origin/master' into binarywriter
GrabYourPitchforks 2584f97
Further perf improvements
GrabYourPitchforks 97250f2
Merge remote-tracking branch 'origin/master' into binarywriter
GrabYourPitchforks e1d9cdb
PR feedback
GrabYourPitchforks 20f5326
Remove unused methods
GrabYourPitchforks 915e54a
Skip mem-heavy BinaryWriter test on Android
GrabYourPitchforks File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
44 changes: 44 additions & 0 deletions
44
src/libraries/Common/tests/TestUtilities/System/Runtime/InteropServices/SafeBufferUtil.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Runtime.CompilerServices; | ||
|
||
namespace System.Runtime.InteropServices | ||
{ | ||
public static class SafeBufferUtil | ||
{ | ||
/// <summary> | ||
/// Creates an unmanaged buffer of the specified length. | ||
/// </summary> | ||
public static SafeBuffer CreateSafeBuffer(nuint byteLength) | ||
{ | ||
return new AllocHGlobalSafeHandle(byteLength); | ||
} | ||
|
||
private sealed class AllocHGlobalSafeHandle : SafeBuffer | ||
{ | ||
public AllocHGlobalSafeHandle(nuint cb) : base(ownsHandle: true) | ||
{ | ||
#if !NETCOREAPP | ||
RuntimeHelpers.PrepareConstrainedRegions(); | ||
#endif | ||
try | ||
{ | ||
// intentionally empty to avoid ThreadAbortException in netfx runtimes | ||
} | ||
finally | ||
{ | ||
SetHandle(Marshal.AllocHGlobal((nint)cb)); | ||
} | ||
|
||
Initialize(cb); | ||
} | ||
|
||
protected override bool ReleaseHandle() | ||
{ | ||
Marshal.FreeHGlobal(handle); | ||
return true; | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
251 changes: 251 additions & 0 deletions
251
src/libraries/System.IO/tests/BinaryWriter/BinaryWriter.EncodingTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Numerics; | ||
using System.Reflection; | ||
using System.Runtime.InteropServices; | ||
using System.Text; | ||
using Xunit; | ||
|
||
namespace System.IO.Tests | ||
{ | ||
public class BinaryWriter_EncodingTests | ||
{ | ||
[Fact] | ||
public void Ctor_Default_UsesFastUtf8() | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream()); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_EncodingUtf8Singleton_UsesFastUtf8() | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), Encoding.UTF8); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Theory] | ||
[InlineData(true, true)] | ||
[InlineData(true, false)] | ||
[InlineData(false, true)] | ||
[InlineData(false, false)] | ||
public void Ctor_NewUtf8Encoding_UsesFastUtf8(bool emitIdentifier, bool throwOnInvalidBytes) | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UTF8Encoding(emitIdentifier, throwOnInvalidBytes)); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_Utf8EncodingWithSingleCharReplacementChar_UsesFastUtf8() | ||
{ | ||
Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("x"), DecoderFallback.ExceptionFallback); | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_Utf8EncodingWithMultiCharReplacementChar_DoesNotUseFastUtf8() | ||
{ | ||
Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("xx"), DecoderFallback.ExceptionFallback); | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding); | ||
Assert.False(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_NotUtf8EncodingType_DoesNotUseFastUtf8() | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UnicodeEncoding()); | ||
Assert.False(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_Utf8EncodingDerivedTypeWithWrongCodePage_DoesNotUseFastUtf8() | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), new NotActuallyUTF8Encoding()); | ||
Assert.False(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_Utf8EncodingDerivedTypeWithCorrectCodePage_DoesNotUseFastUtf8() | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), new MyCustomUTF8Encoding()); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Theory] | ||
[InlineData('x')] // 1 UTF-8 byte | ||
[InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes) | ||
[InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes) | ||
public void WriteSingleChar_FastUtf8(char ch) | ||
{ | ||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream); | ||
|
||
writer.Write(ch); | ||
|
||
Assert.Equal(Encoding.UTF8.GetBytes(new char[] { ch }), stream.ToArray()); | ||
} | ||
|
||
[Theory] | ||
[InlineData('x')] // 1 UTF-8 byte | ||
[InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes) | ||
[InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes) | ||
public void WriteSingleChar_NotUtf8NoArrayPoolRentalNeeded(char ch) | ||
{ | ||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */); | ||
|
||
writer.Write(ch); | ||
|
||
Assert.Equal(Encoding.Unicode.GetBytes(new char[] { ch }), stream.ToArray()); | ||
} | ||
|
||
[Fact] | ||
public void WriteSingleChar_ArrayPoolRentalNeeded() | ||
{ | ||
string replacementString = new string('v', 10_000); | ||
Encoding encoding = Encoding.GetEncoding("ascii", new EncoderReplacementFallback(replacementString), DecoderFallback.ExceptionFallback); | ||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream, encoding); | ||
|
||
writer.Write('\uFFFD'); // not ASCII | ||
|
||
Assert.Equal(Encoding.ASCII.GetBytes(replacementString), stream.ToArray()); | ||
} | ||
|
||
[Theory] | ||
[InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary | ||
[InlineData(32 * 1024)] // char count within 64k rental boundary, byte count not | ||
[InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary | ||
public void WriteChars_FastUtf8(int stringLengthInChars) | ||
{ | ||
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); | ||
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); | ||
|
||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream); | ||
|
||
writer.Write(stringToWrite.ToCharArray()); // writing a char buffer doesn't emit the length upfront | ||
Assert.Equal(expectedBytes, stream.GetBuffer()[..expectedBytes.Length]); | ||
} | ||
|
||
[Theory] | ||
[InlineData(24)] // within stackalloc path | ||
[InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary | ||
[InlineData(32 * 1024)] // char count within 64k rental boundary, byte count not | ||
[InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary | ||
public void WriteString_FastUtf8(int stringLengthInChars) | ||
{ | ||
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); | ||
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); | ||
|
||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream); | ||
|
||
writer.Write(stringToWrite); | ||
stream.Position = 0; | ||
|
||
Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); | ||
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); | ||
} | ||
|
||
[Theory] | ||
[InlineData(127 / 3)] // within stackalloc fast path | ||
[InlineData(127 / 3 + 1)] // not within stackalloc fast path | ||
public void WriteString_FastUtf8_UsingThreeByteChars(int stringLengthInChars) | ||
{ | ||
string stringToWrite = new string('\u2023', stringLengthInChars); // TRIANGULAR BULLET | ||
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); | ||
|
||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream); | ||
|
||
writer.Write(stringToWrite); | ||
stream.Position = 0; | ||
|
||
Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); | ||
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); | ||
} | ||
|
||
[Theory] | ||
[InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary | ||
[InlineData(48 * 1024)] // char count within 64k rental boundary, byte count not | ||
[InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary | ||
public void WriteString_NotUtf8(int stringLengthInChars) | ||
{ | ||
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); | ||
byte[] expectedBytes = Encoding.Unicode.GetBytes(stringToWrite); | ||
|
||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */); | ||
|
||
writer.Write(stringToWrite); | ||
stream.Position = 0; | ||
|
||
Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); | ||
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); | ||
} | ||
|
||
[Fact] | ||
public unsafe void WriteChars_VeryLargeArray_DoesNotOverflow() | ||
{ | ||
const nuint INPUT_LEN_IN_CHARS = 1_500_000_000; | ||
const nuint OUTPUT_LEN_IN_BYTES = 3_500_000_000; // overallocate | ||
|
||
SafeBuffer unmanagedInputBuffer; | ||
SafeBuffer unmanagedOutputBufer; | ||
try | ||
{ | ||
unmanagedInputBuffer = SafeBufferUtil.CreateSafeBuffer(INPUT_LEN_IN_CHARS * sizeof(char)); | ||
unmanagedOutputBufer = SafeBufferUtil.CreateSafeBuffer(OUTPUT_LEN_IN_BYTES * sizeof(byte)); | ||
} | ||
catch (OutOfMemoryException) | ||
{ | ||
return; // skip test in low-mem conditions | ||
} | ||
|
||
using (unmanagedInputBuffer) | ||
using (unmanagedOutputBufer) | ||
{ | ||
Span<char> inputSpan = new Span<char>((char*)unmanagedInputBuffer.DangerousGetHandle(), (int)INPUT_LEN_IN_CHARS); | ||
inputSpan.Fill('\u0224'); // LATIN CAPITAL LETTER Z WITH HOOK | ||
Stream outStream = new UnmanagedMemoryStream(unmanagedOutputBufer, 0, (long)unmanagedOutputBufer.ByteLength, FileAccess.ReadWrite); | ||
BinaryWriter writer = new BinaryWriter(outStream); | ||
|
||
writer.Write(inputSpan); // will write 3 billion bytes to the output | ||
|
||
Assert.Equal(3_000_000_000, outStream.Position); | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thank you for writing all the tests! and especially covering this particular edge case! 👍 |
||
|
||
private static bool IsUsingFastUtf8(BinaryWriter writer) | ||
{ | ||
return (bool)writer.GetType().GetField("_useFastUtf8", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(writer); | ||
} | ||
|
||
private static string GenerateLargeUnicodeString(int charCount) | ||
{ | ||
return string.Create(charCount, (object)null, static (buffer, _) => | ||
{ | ||
for (int i = 0; i < buffer.Length; i++) | ||
{ | ||
buffer[i] = (char)((i % 0xF00) + 0x100); // U+0100..U+0FFF (mix of 2-byte and 3-byte chars) | ||
} | ||
}); | ||
} | ||
|
||
private static int Get7BitEncodedIntByteLength(uint value) => (BitOperations.Log2(value) / 7) + 1; | ||
|
||
// subclasses UTF8Encoding, but returns a non-UTF8 code page | ||
private class NotActuallyUTF8Encoding : UTF8Encoding | ||
{ | ||
public override int CodePage => 65000; // UTF-7 code page | ||
} | ||
|
||
// subclasses UTF8Encoding, returns UTF-8 code page | ||
private class MyCustomUTF8Encoding : UTF8Encoding | ||
{ | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is just a test file... is this really necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. But everything is non-shipping code until it gets copied & pasted into a shipping product. :)