From 4e8339f0d025146a60b4fe8bba8d65dc787405c2 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Sat, 24 Aug 2019 10:10:00 -0700 Subject: [PATCH] Fix BinaryReader.ReadChars for fragmented Streams (#26324) BinaryReader.ReadChars incorrectly read more than necessary from the underlying Stream when multi-byte characters straddled the read chunks. Fixes https://github.com/dotnet/corefx/issues/40455 --- .../shared/System/IO/BinaryReader.cs | 22 +++++++++++++++++++ .../shared/System/Text/DecoderNLS.cs | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs b/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs index fc2ab5fd0ff7..98e853b6190c 100644 --- a/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs +++ b/src/System.Private.CoreLib/shared/System/IO/BinaryReader.cs @@ -384,6 +384,28 @@ private int InternalReadChars(Span buffer) { numBytes <<= 1; } + + // We do not want to read even a single byte more than necessary. + // + // Subtract pending bytes that the decoder may be holding onto. This assumes that each + // decoded char corresponds to one or more bytes. Note that custom encodings or encodings with + // a custom replacement sequence may violate this assumption. + if (numBytes > 1) + { + DecoderNLS? decoder = _decoder as DecoderNLS; + // For internal decoders, we can check whether the decoder has any pending state. + // For custom decoders, assume that the decoder has pending state. + if (decoder == null || decoder.HasState) + { + numBytes -= 1; + + // The worst case is charsRemaining = 2 and UTF32Decoder holding onto 3 pending bytes. We need to read just + // one byte in this case. + if (_2BytesPerChar && numBytes > 2) + numBytes -= 2; + } + } + if (numBytes > MaxCharBytesSize) { numBytes = MaxCharBytesSize; diff --git a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs index 7d453f73daa9..499c0bad6a78 100644 --- a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs +++ b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs @@ -217,7 +217,7 @@ public override unsafe void Convert(byte* bytes, int byteCount, public bool MustFlush => _mustFlush; // Anything left in our decoder? - internal virtual bool HasState => false; + internal virtual bool HasState => _leftoverByteCount != 0; // Allow encoding to clear our must flush instead of throwing (in ThrowCharsOverflow) internal void ClearMustFlush()