From 1846fd044c28cff0caaac15870c10032a2ee9f2c Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Tue, 8 Jul 2025 09:44:21 +0200 Subject: [PATCH 1/7] Tar: fix handing of null terminated fields. The current implementation is trimming spaces and nulls from the end. Instead we need to find the terminating null from the start, and we shouldn't trim spaces at the end. --- .../src/System/Formats/Tar/TarHeader.Read.cs | 14 +++--- .../src/System/Formats/Tar/TarHelpers.cs | 45 ++++++------------- 2 files changed, 20 insertions(+), 39 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index 37570d0d33a5ea..57e3b71566762f 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -384,7 +384,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca // Continue with the rest of the fields that require no special checks TarHeader header = new(initialFormat, - name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), + name: TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), mode: TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), mTime: ParseAsTimestamp(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)), typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag]) @@ -393,7 +393,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca _size = size, _uid = TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), _gid = TarHelpers.ParseNumeric(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), - _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) + _linkName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) }; if (header._format == TarEntryFormat.Unknown) @@ -517,8 +517,8 @@ private void ReadVersionAttribute(ReadOnlySpan buffer) private void ReadPosixAndGnuSharedAttributes(ReadOnlySpan buffer) { // Convert the byte arrays - _uName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName)); - _gName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName)); + _uName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName)); + _gName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName)); // DevMajor and DevMinor only have values with character devices and block devices. // For all other typeflags, the values in these fields are irrelevant. @@ -560,7 +560,7 @@ private static DateTimeOffset ParseAsTimestamp(ReadOnlySpan buffer) // Throws if a conversion to an expected data type fails. private void ReadUstarAttributes(ReadOnlySpan buffer) { - _prefix = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); + _prefix = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); // In ustar, Prefix is used to store the *leading* path segments of // Name, if the full path did not fit in the Name byte array. @@ -631,7 +631,7 @@ void ThrowSizeFieldTooLarge() => // Returns a dictionary containing the extended attributes collected from the provided byte buffer. private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) { - buffer = TarHelpers.TrimEndingNullsAndSpaces(buffer); + buffer = TarHelpers.TrimNullTerminated(buffer); while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value)) { @@ -691,7 +691,7 @@ private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, Canc // Collects the GNU long path info from the buffer and sets it in the right field depending on the type flag. private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer) { - string longPath = TarHelpers.GetTrimmedUtf8String(buffer); + string longPath = TarHelpers.ParseUtf8String(buffer); if (_typeFlag == TarEntryType.LongLink) { diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index a2e037e615e730..77cfc47de0bf70 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -240,8 +240,10 @@ internal static T ParseNumeric(ReadOnlySpan buffer) where T : struct, I /// Parses a byte span that represents an ASCII string containing a number in octal base. internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INumber { - buffer = TrimEndingNullsAndSpaces(buffer); - buffer = TrimLeadingNullsAndSpaces(buffer); + buffer = TrimNullTerminated(buffer); + + // We ignore spaces because some archives seem to have them (even though they shouldn't). + buffer = buffer.Trim((byte)' '); if (buffer.Length == 0) { @@ -268,44 +270,23 @@ internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INu private static void ThrowInvalidNumber() => throw new InvalidDataException(SR.Format(SR.TarInvalidNumber)); - // Returns the string contained in the specified buffer of bytes, - // in the specified encoding, removing the trailing null or space chars. - private static string GetTrimmedString(ReadOnlySpan buffer, Encoding encoding) + // Returns the null-terminated UTF8 string contained in the specified buffer. + internal static string ParseUtf8String(ReadOnlySpan buffer) { - buffer = TrimEndingNullsAndSpaces(buffer); - return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer); - } - - internal static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer) - { - int trimmedLength = buffer.Length; - while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32) - { - trimmedLength--; - } - - return buffer.Slice(0, trimmedLength); + buffer = TrimNullTerminated(buffer); + return Encoding.UTF8.GetString(buffer); } - private static ReadOnlySpan TrimLeadingNullsAndSpaces(ReadOnlySpan buffer) + internal static ReadOnlySpan TrimNullTerminated(ReadOnlySpan buffer) { - int newStart = 0; - while (newStart < buffer.Length && buffer[newStart] is 0 or 32) + int i = buffer.IndexOf((byte)0); + if (i != -1) { - newStart++; + buffer = buffer[0..i]; } - - return buffer.Slice(newStart); + return buffer; } - // Returns the ASCII string contained in the specified buffer of bytes, - // removing the trailing null or space chars. - internal static string GetTrimmedAsciiString(ReadOnlySpan buffer) => GetTrimmedString(buffer, Encoding.ASCII); - - // Returns the UTF8 string contained in the specified buffer of bytes, - // removing the trailing null or space chars. - internal static string GetTrimmedUtf8String(ReadOnlySpan buffer) => GetTrimmedString(buffer, Encoding.UTF8); - // After the file contents, there may be zero or more null characters, // which exist to ensure the data is aligned to the record size. Skip them and // set the stream position to the first byte of the next entry. From ec10f03ae87358a1a565e5056a456dba7f1cb551 Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Wed, 9 Jul 2025 13:59:40 +0200 Subject: [PATCH 2/7] Fix not advancing over datastream with length 1, which caused the DataOffset_RegularFile_SecondEntry test to fail. --- .../src/System/Formats/Tar/SubReadStream.cs | 46 ++++++------------- .../src/System/Formats/Tar/TarReader.cs | 28 ++++------- 2 files changed, 23 insertions(+), 51 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs index bda8886efd5a8e..430a9c73013868 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs @@ -64,23 +64,14 @@ public override long Position public override bool CanWrite => false; - internal bool HasReachedEnd + internal long Remaining => _endInSuperStream - _positionInSuperStream; + + private int LimitByRemaining(int bufferSize) => Math.Min(bufferSize, (int)Math.Min(Remaining, int.MaxValue)); + + internal void SetReachedEnd() { - get - { - if (!_hasReachedEnd && _positionInSuperStream > _endInSuperStream) - { - _hasReachedEnd = true; - } - return _hasReachedEnd; - } - set - { - if (value) // Don't allow revert to false - { - _hasReachedEnd = true; - } - } + _positionInSuperStream = _endInSuperStream; + _hasReachedEnd = true; } protected void ThrowIfDisposed() @@ -90,7 +81,7 @@ protected void ThrowIfDisposed() private void ThrowIfBeyondEndOfStream() { - if (HasReachedEnd) + if (_hasReachedEnd) { throw new EndOfStreamException(); } @@ -107,21 +98,12 @@ public override int Read(Span destination) ThrowIfDisposed(); ThrowIfBeyondEndOfStream(); - // parameter validation sent to _superStream.Read - int origCount = destination.Length; - int count = destination.Length; + destination = destination[..LimitByRemaining(destination.Length)]; - if (_positionInSuperStream + count > _endInSuperStream) - { - count = (int)(_endInSuperStream - _positionInSuperStream); - } - - Debug.Assert(count >= 0); - Debug.Assert(count <= origCount); - - int ret = _superStream.Read(destination.Slice(0, count)); + int ret = _superStream.Read(destination); _positionInSuperStream += ret; + return ret; } @@ -158,14 +140,12 @@ protected async ValueTask ReadAsyncCore(Memory buffer, CancellationTo cancellationToken.ThrowIfCancellationRequested(); - if (_positionInSuperStream > _endInSuperStream - buffer.Length) - { - buffer = buffer.Slice(0, (int)(_endInSuperStream - _positionInSuperStream)); - } + buffer = buffer[..LimitByRemaining(buffer.Length)]; int ret = await _superStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false); _positionInSuperStream += ret; + return ret; } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index fc2e4001a2a661..b15c666516ec18 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -221,17 +221,13 @@ internal void AdvanceDataStreamIfNeeded() return; } - if (!dataStream.HasReachedEnd) + long remaining = dataStream.Remaining; + dataStream.SetReachedEnd(); + if (remaining > 0) { - // If the user did not advance the position, we need to make sure the position - // pointer is located at the beginning of the next header. - if (dataStream.Position < (_previouslyReadEntry._header._size - 1)) - { - long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position; - TarHelpers.AdvanceStream(_archiveStream, bytesToSkip); - dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw - } + TarHelpers.AdvanceStream(_archiveStream, remaining); } + TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size); } } @@ -263,17 +259,13 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel return; } - if (!dataStream.HasReachedEnd) + long remaining = dataStream.Remaining; + dataStream.SetReachedEnd(); + if (remaining > 0) { - // If the user did not advance the position, we need to make sure the position - // pointer is located at the beginning of the next header. - if (dataStream.Position < (_previouslyReadEntry._header._size - 1)) - { - long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position; - await TarHelpers.AdvanceStreamAsync(_archiveStream, bytesToSkip, cancellationToken).ConfigureAwait(false); - dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw - } + await TarHelpers.AdvanceStreamAsync(_archiveStream, remaining, cancellationToken).ConfigureAwait(false); } + await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false); } } From 4f09d8504120bfc21fdc6bab25b625271f1cf37d Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Wed, 9 Jul 2025 14:01:06 +0200 Subject: [PATCH 3/7] Skip test cases that depend on checksum being checked. --- .../tests/TarReader/TarReader.File.Async.Tests.cs | 2 +- .../System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs index e603e6e9aa4691..b63c8e9429d164 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs @@ -261,7 +261,7 @@ public async Task AllowSpacesInOctalFieldsAsync(string folderName, string testCa [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries [InlineData("neg-size")] // Garbage chars [InlineData("invalid-go17")] // Many octal fields are all zero chars - [InlineData("issue11169")] // Checksum with null in the middle + // [InlineData("issue11169")] // Checksum with null in the middle, https://github.com/dotnet/runtime/issues/117455 [InlineData("issue10968")] // Garbage chars public async Task Throw_ArchivesWithRandomCharsAsync(string testCaseName) { diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs index 955f29c841849f..6a8744460e4b26 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs @@ -269,7 +269,7 @@ public void AllowSpacesInOctalFields(string folderName, string testCaseName) [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries [InlineData("neg-size")] // Garbage chars [InlineData("invalid-go17")] // Many octal fields are all zero chars - [InlineData("issue11169")] // Checksum with null in the middle + // [InlineData("issue11169")] // Checksum with null in the middle, https://github.com/dotnet/runtime/issues/117455 [InlineData("issue10968")] // Garbage chars public void Throw_ArchivesWithRandomChars(string testCaseName) { From 56660df7a669047289b775a3786cc238751bce5c Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Wed, 9 Jul 2025 15:18:11 +0200 Subject: [PATCH 4/7] Simplify LimitByRemaining. --- .../System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs index 430a9c73013868..afef82d857a26f 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs @@ -66,7 +66,7 @@ public override long Position internal long Remaining => _endInSuperStream - _positionInSuperStream; - private int LimitByRemaining(int bufferSize) => Math.Min(bufferSize, (int)Math.Min(Remaining, int.MaxValue)); + private int LimitByRemaining(int bufferSize) => (int)Math.Min(Remaining, bufferSize); internal void SetReachedEnd() { From f8b6679307cac182cf7b21cc6d07b236b75172f3 Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Thu, 10 Jul 2025 07:21:29 +0200 Subject: [PATCH 5/7] Add test. --- .../TarReader/TarReader.File.Async.Tests.cs | 27 +++++++++++++++++++ .../tests/TarReader/TarReader.File.Tests.cs | 27 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs index b63c8e9429d164..aaaab4df3c15f3 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs @@ -6,6 +6,7 @@ using System.IO; using System.IO.Compression; using System.Linq; +using System.Text; using System.Threading.Tasks; using Xunit; @@ -308,6 +309,32 @@ public async Task SparseEntryNotSupportedAsync(string testFolderName, string tes await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); } + [Fact] + public async Task ReaderIgnoresFieldValueAfterTrailingNullAsync() + { + // Fields in the tar archives are terminated by a trailing null. + // When reading these fields the reader must ignore all bytes past that null. + + // Construct an archive that has a filename with some data after the trailing null. + const string FileName = " filename "; + const string FileNameWithDataPastTrailingNull = $"{FileName}\0nonesense"; + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + var entry = new UstarTarEntry(TarEntryType.RegularFile, FileNameWithDataPastTrailingNull); + writer.WriteEntry(entry); + } + ms.Position = 0; + // Check the writer serialized the complete name passed to the constructor. + bool archiveIsExpected = ms.ToArray().IndexOf(Encoding.UTF8.GetBytes(FileNameWithDataPastTrailingNull)) != -1; + Assert.True(archiveIsExpected); + + // Verify the reader doesn't return the data past the trailing null. + using TarReader reader = new(ms); + TarEntry firstEntry = await reader.GetNextEntryAsync(); + Assert.Equal(FileName, firstEntry.Name); + } + [Fact] public async Task DirectoryListRegularFileAndSparseAsync() { diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs index 6a8744460e4b26..969bcf29b9556c 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs @@ -6,6 +6,7 @@ using System.IO; using System.IO.Compression; using System.Linq; +using System.Text; using Xunit; using static System.Formats.Tar.Tests.TarTestsBase; @@ -316,6 +317,32 @@ public void SparseEntryNotSupported(string testFolderName, string testCaseName) Assert.Throws(() => reader.GetNextEntry()); } + [Fact] + public void ReaderIgnoresFieldValueAfterTrailingNull() + { + // Fields in the tar archives are terminated by a trailing null. + // When reading these fields the reader must ignore all bytes past that null. + + // Construct an archive that has a filename with some data after the trailing null. + const string FileName = " filename "; + const string FileNameWithDataPastTrailingNull = $"{FileName}\0nonesense"; + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + var entry = new UstarTarEntry(TarEntryType.RegularFile, FileNameWithDataPastTrailingNull); + writer.WriteEntry(entry); + } + ms.Position = 0; + // Check the writer serialized the complete name passed to the constructor. + bool archiveIsExpected = ms.ToArray().IndexOf(Encoding.UTF8.GetBytes(FileNameWithDataPastTrailingNull)) != -1; + Assert.True(archiveIsExpected); + + // Verify the reader doesn't return the data past the trailing null. + using TarReader reader = new(ms); + TarEntry firstEntry = reader.GetNextEntry(); + Assert.Equal(FileName, firstEntry.Name); + } + [Fact] public void DirectoryListRegularFileAndSparse() { From c37721f1b75b4607796e975f6c51bfe75b59e837 Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Fri, 11 Jul 2025 10:23:31 +0200 Subject: [PATCH 6/7] Refactor SetReachedEnd. --- .../src/System/Formats/Tar/SubReadStream.cs | 16 ++++++++++++++-- .../src/System/Formats/Tar/TarReader.cs | 14 ++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs index afef82d857a26f..efa933d02fa70b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs @@ -64,14 +64,26 @@ public override long Position public override bool CanWrite => false; - internal long Remaining => _endInSuperStream - _positionInSuperStream; + private long Remaining => _endInSuperStream - _positionInSuperStream; private int LimitByRemaining(int bufferSize) => (int)Math.Min(Remaining, bufferSize); - internal void SetReachedEnd() + internal ValueTask AdvanceToEndAsync(CancellationToken cancellationToken) { + _hasReachedEnd = true; + + long remaining = Remaining; _positionInSuperStream = _endInSuperStream; + return TarHelpers.AdvanceStreamAsync(_superStream, remaining, cancellationToken); + } + + internal void AdvanceToEnd() + { _hasReachedEnd = true; + + long remaining = Remaining; + _positionInSuperStream = _endInSuperStream; + TarHelpers.AdvanceStream(_superStream, remaining); } protected void ThrowIfDisposed() diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index b15c666516ec18..747cba3102fecc 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -221,12 +221,7 @@ internal void AdvanceDataStreamIfNeeded() return; } - long remaining = dataStream.Remaining; - dataStream.SetReachedEnd(); - if (remaining > 0) - { - TarHelpers.AdvanceStream(_archiveStream, remaining); - } + dataStream.AdvanceToEnd(); TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size); } @@ -259,12 +254,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel return; } - long remaining = dataStream.Remaining; - dataStream.SetReachedEnd(); - if (remaining > 0) - { - await TarHelpers.AdvanceStreamAsync(_archiveStream, remaining, cancellationToken).ConfigureAwait(false); - } + await dataStream.AdvanceToEndAsync(cancellationToken).ConfigureAwait(false); await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false); } From 164c1665709fe53c0020360ed6f1ef94aa76a700 Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Fri, 11 Jul 2025 10:59:07 +0200 Subject: [PATCH 7/7] Throw InvalidDataException when the extended header contains invalid records. --- .../src/Resources/Strings.resx | 3 +++ .../src/System/Formats/Tar/TarHeader.Read.cs | 25 ++++++++++++++----- .../TarReader/TarReader.File.Async.Tests.cs | 3 ++- .../tests/TarReader/TarReader.File.Tests.cs | 3 ++- .../System.Formats.Tar/tests/TarTestsBase.cs | 1 - 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index ac632535dfd180..1c595d0bc0409d 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -205,4 +205,7 @@ Cannot write the unseekable data stream of entry '{0}' into an unseekable archive stream. + + The extended header contains invalid records. + diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index 57e3b71566762f..509bcd02a24abf 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Globalization; using System.IO; using System.Text; using System.Threading; @@ -631,8 +632,6 @@ void ThrowSizeFieldTooLarge() => // Returns a dictionary containing the extended attributes collected from the provided byte buffer. private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) { - buffer = TarHelpers.TrimNullTerminated(buffer); - while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value)) { if (!ExtendedAttributes.TryAdd(key, value)) @@ -640,6 +639,11 @@ private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string throw new InvalidDataException(SR.Format(SR.TarDuplicateExtendedAttribute, name)); } } + + if (buffer.Length > 0) + { + throw new InvalidDataException(SR.Format(SR.ExtHeaderInvalidRecords)); + } } // Reads the long path found in the data section of a GNU entry of type 'K' or 'L' @@ -725,15 +729,21 @@ private static bool TryGetNextExtendedAttribute( } ReadOnlySpan line = buffer.Slice(0, newlinePos); - // Update buffer to point to the next line for the next call - buffer = buffer.Slice(newlinePos + 1); - - // Find the end of the length and remove everything up through it. + // Find the end of the length int spacePos = line.IndexOf((byte)' '); if (spacePos < 0) { return false; } + + // Check the length matches the line length + ReadOnlySpan length = buffer.Slice(0, spacePos); + if (!int.TryParse(length, NumberStyles.None, CultureInfo.InvariantCulture, out int lengthValue) || lengthValue != (line.Length + 1)) + { + return false; + } + + // Remove the length line = line.Slice(spacePos + 1); // Find the equal separator. @@ -749,6 +759,9 @@ private static bool TryGetNextExtendedAttribute( // Return the parsed key and value. key = Encoding.UTF8.GetString(keySlice); value = Encoding.UTF8.GetString(valueSlice); + + // Update buffer to point to the next line for the next call + buffer = buffer.Slice(newlinePos + 1); return true; } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs index aaaab4df3c15f3..9692dcca89756a 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs @@ -262,7 +262,8 @@ public async Task AllowSpacesInOctalFieldsAsync(string folderName, string testCa [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries [InlineData("neg-size")] // Garbage chars [InlineData("invalid-go17")] // Many octal fields are all zero chars - // [InlineData("issue11169")] // Checksum with null in the middle, https://github.com/dotnet/runtime/issues/117455 + [InlineData("issue11169")] // Extended header uses spaces instead of newlines to separate records + [InlineData("pax-bad-hdr-file")] // Extended header record is not terminated by newline [InlineData("issue10968")] // Garbage chars public async Task Throw_ArchivesWithRandomCharsAsync(string testCaseName) { diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs index 969bcf29b9556c..0d8557d62a22b2 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs @@ -270,7 +270,8 @@ public void AllowSpacesInOctalFields(string folderName, string testCaseName) [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries [InlineData("neg-size")] // Garbage chars [InlineData("invalid-go17")] // Many octal fields are all zero chars - // [InlineData("issue11169")] // Checksum with null in the middle, https://github.com/dotnet/runtime/issues/117455 + [InlineData("issue11169")] // Extended header uses spaces instead of newlines to separate records + [InlineData("pax-bad-hdr-file")] // Extended header record is not terminated by newline [InlineData("issue10968")] // Garbage chars public void Throw_ArchivesWithRandomChars(string testCaseName) { diff --git a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs index b68493996a095d..16352979db2217 100644 --- a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs +++ b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs @@ -137,7 +137,6 @@ public abstract partial class TarTestsBase : FileCleanupTestBase "gnu", "hardlink", "nil-uid", - "pax-bad-hdr-file", "pax-bad-mtime-file", "pax-global-records", "pax-nul-path",