From 35e86f97a22b6b4e6b3e02848f7a9f5f5919e14e Mon Sep 17 00:00:00 2001 From: Carlos Sanchez <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 23 Aug 2022 17:11:12 -0700 Subject: [PATCH] [release/7.0] Backport tar bug fixes and improvements (#74449) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve performance of Tar library (#74281) * Avoid unnecessary byte[] allocations * Remove unnecessary use of FileStreamOptions * Clean up Dispose{Async} implementations * Clean up unnecessary consts Not a perf thing, just readability. * Remove MemoryStream/Encoding.UTF8.GetBytes allocations, unnecessary async variants, and overhaul GenerateExtendedAttributesDataStream * Avoid string allocations in ReadMagicAttribute * Avoid allocation in WriteAsOctal * Improve handling of octal * Avoid allocation for version string * Removing boxing and char string allocation in GenerateExtendedAttributeName * Fix a couple unnecessary dictionary lookups * Replace Enum.HasFlag usage * Remove allocations from Write{Posix}Name * Replace ArrayPool use with string.Create * Replace more superfluous ArrayPool usage * Remove ArrayPool use from System.IO.Compression.ZipFile * Fix inverted condition * Use generic math to parse octal * Remove allocations from StringReader and string.Split * Remove magic string allocation for Ustar when not V7 * Remove file name and directory name allocation in GenerateExtendedAttributeName * fix tar strings (#74321) * Fix some corner cases in TarReader (#74329) * Fix a few Tar issues post perf improvements (#74338) * Fix a few Tar issues post perf improvements * Update src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs * Skip directory symlink recursion on TarFile archive creation (#74376) * Skip directory symlink recursion on TarFile archive creation * Add symlink verification * Address suggestions by danmoseley Co-authored-by: carlossanlop * SkipBlockAlignmentPadding must be executed for all entries (#74396) * Set modified timestamps on files being extracted from tar archives (#74400) * Add tests for exotic external tar asset archives, fix some more corner case bugs (#74412) * Remove unused _readFirstEntry. Remnant from before we created PaxGlobalExtendedAttributesEntry. * Set the position of the freshly copied data stream to 0, so the first user access of the DataStream property gives them a stream ready to read from the beginning. * Process a PAX actual entry's data block only after the extended attributes are analyzed, in case the size is found as an extended attribute and needs to be overriden. * Add tests to verify the entries of the new external tar assets can be read. Verify their DataStream if available. * Add copyData argument to recent alignment padding tests. * Throw an exception sooner and with a clearer message when a data section is unexpected for the entry type. * Allow trailing nulls and spaces in octal fields. Co-authored-by: @am11 Adeel Mujahid <3840695+am11@users.noreply.github.com> * Throw a clearer exception if the unsupported sparse file entry type is encountered. These entries have additional data that indicates the locations of sparse bytes, which cannot be read with just the size field. So to avoid accidentally offseting the reader, we throw. * Tests. * Rename to TrimLeadingNullsAndSpaces Co-authored-by: carlossanlop * Remove Compression changes, keep changes confined to Tar. * Fix build failure due to missing using in TarHelpers.Windows Co-authored-by: Stephen Toub Co-authored-by: Dan Moseley Co-authored-by: Adeel Mujahid <3840695+am11@users.noreply.github.com> Co-authored-by: carlossanlop Co-authored-by: David CantĂș --- .../src/Resources/Strings.resx | 6 + .../src/System/Formats/Tar/SubReadStream.cs | 5 +- .../src/System/Formats/Tar/TarEntry.cs | 18 +- .../src/System/Formats/Tar/TarFile.cs | 91 +++-- .../src/System/Formats/Tar/TarHeader.Read.cs | 270 ++++++++----- .../src/System/Formats/Tar/TarHeader.Write.cs | 364 +++++++++--------- .../src/System/Formats/Tar/TarHelpers.Unix.cs | 20 + .../System/Formats/Tar/TarHelpers.Windows.cs | 44 ++- .../src/System/Formats/Tar/TarHelpers.cs | 105 +++-- .../src/System/Formats/Tar/TarReader.cs | 128 +++--- .../src/System/Formats/Tar/TarWriter.Unix.cs | 11 +- .../System/Formats/Tar/TarWriter.Windows.cs | 18 +- .../src/System/Formats/Tar/TarWriter.cs | 150 +++----- .../TarFile.CreateFromDirectory.File.Tests.cs | 60 +++ ...ile.CreateFromDirectoryAsync.File.Tests.cs | 60 +++ .../TarFile.ExtractToDirectory.File.Tests.cs | 27 ++ ...TarFile.ExtractToDirectory.Stream.Tests.cs | 32 ++ ...File.ExtractToDirectoryAsync.File.Tests.cs | 27 ++ ...le.ExtractToDirectoryAsync.Stream.Tests.cs | 32 ++ .../TarReader/TarReader.File.Async.Tests.cs | 241 ++++++++++++ .../tests/TarReader/TarReader.File.Tests.cs | 241 ++++++++++++ .../TarReader/TarReader.GetNextEntry.Tests.cs | 41 ++ .../TarReader.GetNextEntryAsync.Tests.cs | 41 ++ .../System.Formats.Tar/tests/TarTestsBase.cs | 163 +++++++- 24 files changed, 1628 insertions(+), 567 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index 79e3188410c3c9..5308e9153c9791 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -205,6 +205,9 @@ The entry is a symbolic link or a hard link but the LinkName field is null or empty. + Entry type '{0}' not supported. + + Entry type '{0}' not supported in format '{1}'. @@ -255,4 +258,7 @@ An attempt was made to move the position before the beginning of the stream. + + Unable to parse number. + diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs index c6b9b6afc8d6db..014165939ac1c7 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs @@ -188,10 +188,7 @@ public override Task FlushAsync(CancellationToken cancellationToken) => // the substream is just 'a chunk' of the super-stream protected override void Dispose(bool disposing) { - if (disposing && !_isDisposed) - { - _isDisposed = true; - } + _isDisposed = true; base.Dispose(disposing); } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs index 27fe4af8701a92..9c2f0553eb39bf 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs @@ -465,7 +465,7 @@ private void VerifyPathsForEntryType(string filePath, string? linkTargetPath, bo // If the destination contains a directory segment, need to check that it exists if (!string.IsNullOrEmpty(directoryPath) && !Path.Exists(directoryPath)) { - throw new IOException(string.Format(SR.IO_PathNotFound_NoPathName, filePath)); + throw new IOException(string.Format(SR.IO_PathNotFound_Path, filePath)); } if (!Path.Exists(filePath)) @@ -529,7 +529,7 @@ private void ExtractAsRegularFile(string destinationFileName) DataStream?.CopyTo(fs); } - ArchivingUtils.AttemptSetLastWriteTime(destinationFileName, ModificationTime); + AttemptSetLastWriteTime(destinationFileName, ModificationTime); } // Asynchronously extracts the current entry as a regular file into the specified destination. @@ -551,7 +551,19 @@ private async Task ExtractAsRegularFileAsync(string destinationFileName, Cancell } } - ArchivingUtils.AttemptSetLastWriteTime(destinationFileName, ModificationTime); + AttemptSetLastWriteTime(destinationFileName, ModificationTime); + } + + private static void AttemptSetLastWriteTime(string destinationFileName, DateTimeOffset lastWriteTime) + { + try + { + File.SetLastWriteTime(destinationFileName, lastWriteTime.LocalDateTime); // SetLastWriteTime expects local time + } + catch + { + // Some OSes like Android might not support setting the last write time, the extraction should not fail because of that + } } private FileStreamOptions CreateFileStreamOptions(bool isAsync) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index a77cb2c36a65ea..8d8902f5806d59 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.IO.Enumeration; using System.Threading; using System.Threading.Tasks; @@ -15,11 +16,6 @@ namespace System.Formats.Tar /// public static class TarFile { - // Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely - // to be greater than the length of typical entry names from the file system, even - // on non-Windows platforms. The capacity will be increased, if needed. - private const int DefaultCapacity = 260; - /// /// Creates a tar stream that contains all the filesystem entries from the specified directory. /// @@ -222,7 +218,7 @@ public static void ExtractToDirectory(string sourceFileName, string destinationD if (!File.Exists(sourceFileName)) { - throw new FileNotFoundException(string.Format(SR.IO_FileNotFound, sourceFileName)); + throw new FileNotFoundException(string.Format(SR.IO_FileNotFound_FileName, sourceFileName)); } if (!Directory.Exists(destinationDirectoryName)) @@ -261,7 +257,7 @@ public static Task ExtractToDirectoryAsync(string sourceFileName, string destina if (!File.Exists(sourceFileName)) { - return Task.FromException(new FileNotFoundException(string.Format(SR.IO_FileNotFound, sourceFileName))); + return Task.FromException(new FileNotFoundException(string.Format(SR.IO_FileNotFound_FileName, sourceFileName))); } if (!Directory.Exists(destinationDirectoryName)) @@ -283,23 +279,22 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + bool skipBaseDirRecursion = false; + if (includeBaseDirectory) { - if (includeBaseDirectory) - { - writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer)); - } + writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name)); + skipBaseDirRecursion = (di.Attributes & FileAttributes.ReparsePoint) != 0; + } - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer)); - } + if (skipBaseDirRecursion) + { + // The base directory is a symlink, do not recurse into it + return; } - finally + + foreach (FileSystemInfo file in GetFileSystemEnumerationForCreation(sourceDirectoryName)) { - ArrayPool.Shared.Return(entryNameBuffer); + writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length)); } } } @@ -339,44 +334,58 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + bool skipBaseDirRecursion = false; + if (includeBaseDirectory) { - if (includeBaseDirectory) - { - await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer), cancellationToken).ConfigureAwait(false); - } + await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name), cancellationToken).ConfigureAwait(false); + skipBaseDirRecursion = (di.Attributes & FileAttributes.ReparsePoint) != 0; + } - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer), cancellationToken).ConfigureAwait(false); - } + if (skipBaseDirRecursion) + { + // The base directory is a symlink, do not recurse into it + return; } - finally + + foreach (FileSystemInfo file in GetFileSystemEnumerationForCreation(sourceDirectoryName)) { - ArrayPool.Shared.Return(entryNameBuffer); + await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false); } } } + // Generates a recursive enumeration of the filesystem entries inside the specified source directory, while + // making sure that directory symlinks do not get recursed. + private static IEnumerable GetFileSystemEnumerationForCreation(string sourceDirectoryName) + { + return new FileSystemEnumerable( + directory: sourceDirectoryName, + transform: (ref FileSystemEntry entry) => entry.ToFileSystemInfo(), + options: new EnumerationOptions() + { + RecurseSubdirectories = true + }) + { + ShouldRecursePredicate = IsNotADirectorySymlink + }; + + static bool IsNotADirectorySymlink(ref FileSystemEntry entry) => entry.IsDirectory && (entry.Attributes & FileAttributes.ReparsePoint) == 0; + } + // Determines what should be the base path for all the entries when creating an archive. private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool includeBaseDirectory) => includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName; // Constructs the entry name used for a filesystem entry when creating an archive. - private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength, ref char[] entryNameBuffer) + private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength) { - int entryNameLength = file.FullName.Length - basePathLength; - Debug.Assert(entryNameLength > 0); - - bool isDirectory = file.Attributes.HasFlag(FileAttributes.Directory); - return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, ref entryNameBuffer, appendPathSeparator: isDirectory); + bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0; + return TarHelpers.EntryFromPath(file.FullName.AsSpan(basePathLength), appendPathSeparator: isDirectory); } - private static string GetEntryNameForBaseDirectory(string name, ref char[] entryNameBuffer) + private static string GetEntryNameForBaseDirectory(string name) { - return ArchivingUtils.EntryFromPath(name, 0, name.Length, ref entryNameBuffer, appendPathSeparator: true); + return TarHelpers.EntryFromPath(name, appendPathSeparator: true); } // Extracts an archive into the specified directory. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index e89c9d4579dd38..5578707d64eb18 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -20,7 +20,7 @@ internal sealed partial class TarHeader // Attempts to retrieve the next header from the specified tar archive stream. // Throws if end of stream is reached or if any data type conversion fails. // Returns a valid TarHeader object if the attributes were read successfully, null otherwise. - internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat) + internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock) { // The four supported formats have a header that fits in the default record size Span buffer = stackalloc byte[TarHelpers.RecordSize]; @@ -28,7 +28,7 @@ internal sealed partial class TarHeader archiveStream.ReadExactly(buffer); TarHeader? header = TryReadAttributes(initialFormat, buffer); - if (header != null) + if (header != null && processDataBlock) { header.ProcessDataBlock(archiveStream, copyData); } @@ -39,7 +39,7 @@ internal sealed partial class TarHeader // Asynchronously attempts read all the fields of the next header. // Throws if end of stream is reached or if any data type conversion fails. // Returns true if all the attributes were read successfully, false otherwise. - internal static async ValueTask TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, CancellationToken cancellationToken) + internal static async ValueTask TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); @@ -50,7 +50,7 @@ internal sealed partial class TarHeader await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); TarHeader? header = TryReadAttributes(initialFormat, buffer.Span); - if (header != null) + if (header != null && processDataBlock) { await header.ProcessDataBlockAsync(archiveStream, copyData, cancellationToken).ConfigureAwait(false); } @@ -180,7 +180,7 @@ internal void ReplaceNormalAttributesWithExtended(Dictionary? di // will get all the data section read and the stream pointer positioned at the beginning of the next header. // - Block, Character, Directory, Fifo, HardLink and SymbolicLink typeflag entries have no data section so the archive stream pointer will be positioned at the beginning of the next header. // - All other typeflag entries with a data section will generate a stream wrapping the data section: SeekableSubReadStream for seekable archive streams, and SubReadStream for unseekable archive streams. - private void ProcessDataBlock(Stream archiveStream, bool copyData) + internal void ProcessDataBlock(Stream archiveStream, bool copyData) { bool skipBlockAlignmentPadding = true; @@ -199,6 +199,10 @@ private void ProcessDataBlock(Stream archiveStream, bool copyData) case TarEntryType.HardLink: case TarEntryType.SymbolicLink: // No data section + if (_size > 0) + { + throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag)); + } break; case TarEntryType.RegularFile: case TarEntryType.V7RegularFile: // Treated as regular file @@ -257,6 +261,10 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca case TarEntryType.HardLink: case TarEntryType.SymbolicLink: // No data section + if (_size > 0) + { + throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag)); + } break; case TarEntryType.RegularFile: case TarEntryType.V7RegularFile: // Treated as regular file @@ -311,6 +319,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { MemoryStream copiedData = new MemoryStream(); TarHelpers.CopyBytes(archiveStream, copiedData, _size); + // Reset position pointer so the user can do the first DataStream read from the beginning + copiedData.Position = 0; return copiedData; } @@ -336,6 +346,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { MemoryStream copiedData = new MemoryStream(); await TarHelpers.CopyBytesAsync(archiveStream, copiedData, size, cancellationToken).ConfigureAwait(false); + // Reset position pointer so the user can do the first DataStream read from the beginning + copiedData.Position = 0; return copiedData; } @@ -357,14 +369,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { return null; } - int checksum = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(spanChecksum); + int checksum = (int)TarHelpers.ParseOctal(spanChecksum); // Zero checksum means the whole header is empty if (checksum == 0) { return null; } - long size = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); + long size = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); if (size < 0) { throw new FormatException(string.Format(SR.TarSizeFieldNegative)); @@ -373,14 +385,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca // Continue with the rest of the fields that require no special checks TarHeader header = new(initialFormat, name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), - mode: TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), - mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), + mode: (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), + mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch((long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag]) { _checksum = checksum, _size = size, - _uid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), - _gid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), + _uid = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), + _gid = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) }; @@ -396,12 +408,13 @@ TarEntryType.LongLink or TarEntryType.LongPath or TarEntryType.MultiVolume or TarEntryType.RenamedOrSymlinked or - TarEntryType.SparseFile or TarEntryType.TapeVolume => TarEntryFormat.Gnu, // V7 is the only one that uses 'V7RegularFile'. TarEntryType.V7RegularFile => TarEntryFormat.V7, + TarEntryType.SparseFile => throw new NotSupportedException(string.Format(SR.TarEntryTypeNotSupported, header._typeFlag)), + // We can quickly determine the *minimum* possible format if the entry type // is the POSIX 'RegularFile', although later we could upgrade it to PAX or GNU _ => (header._typeFlag == TarEntryType.RegularFile) ? TarEntryFormat.Ustar : TarEntryFormat.V7 @@ -425,16 +438,23 @@ private void ReadMagicAttribute(Span buffer) } // When the magic field is set, the archive is newer than v7. - _magic = Encoding.ASCII.GetString(magic); - - if (_magic == GnuMagic) + if (magic.SequenceEqual(GnuMagicBytes)) { + _magic = GnuMagic; _format = TarEntryFormat.Gnu; } - else if (_format == TarEntryFormat.V7 && _magic == UstarMagic) + else if (magic.SequenceEqual(UstarMagicBytes)) { - // Important: Only change to ustar if we had not changed the format to pax already - _format = TarEntryFormat.Ustar; + _magic = UstarMagic; + if (_format == TarEntryFormat.V7) + { + // Important: Only change to ustar if we had not changed the format to pax already + _format = TarEntryFormat.Ustar; + } + } + else + { + _magic = Encoding.ASCII.GetString(magic); } } @@ -448,19 +468,47 @@ private void ReadVersionAttribute(Span buffer) } Span version = buffer.Slice(FieldLocations.Version, FieldLengths.Version); + switch (_format) + { + case TarEntryFormat.Ustar or TarEntryFormat.Pax: + // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" + if (!version.SequenceEqual(UstarVersionBytes)) + { + // Check for gnu version header for mixed case + if (!version.SequenceEqual(GnuVersionBytes)) + { + throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); + } - _version = Encoding.ASCII.GetString(version); + _version = GnuVersion; + } + else + { + _version = UstarVersion; + } + break; - // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" - if ((_format is TarEntryFormat.Ustar or TarEntryFormat.Pax) && _version != UstarVersion) - { - throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); - } + case TarEntryFormat.Gnu: + // The GNU format has a Magic+Version 8 byte string "ustar \0" + if (!version.SequenceEqual(GnuVersionBytes)) + { + // Check for ustar or pax version header for mixed case + if (!version.SequenceEqual(UstarVersionBytes)) + { + throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + } - // The GNU format has a Magic+Version 8 byte string "ustar \0" - if (_format == TarEntryFormat.Gnu && _version != GnuVersion) - { - throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + _version = UstarVersion; + } + else + { + _version = GnuVersion; + } + break; + + default: + _version = Encoding.ASCII.GetString(version); + break; } } @@ -477,10 +525,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice) { // Major number for a character device or block device entry. - _devMajor = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); + _devMajor = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); // Minor number for a character device or block device entry. - _devMinor = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); + _devMinor = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } } @@ -489,10 +537,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) private void ReadGnuAttributes(Span buffer) { // Convert byte arrays - long aTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + long aTime = (long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); _aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime); - long cTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); + long cTime = (long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); _cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime); // TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230 @@ -518,11 +566,23 @@ private void ReadUstarAttributes(Span buffer) // Throws if end of stream is reached or if an attribute is malformed. private void ReadExtendedAttributesBlock(Stream archiveStream) { - byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); - if (buffer != null) + if (_size != 0) { - archiveStream.ReadExactly(buffer); - ReadExtendedAttributesFromBuffer(buffer, _name); + ValidateSize(); + + byte[]? buffer = null; + Span span = _size <= 256 ? + stackalloc byte[256] : + (buffer = ArrayPool.Shared.Rent((int)_size)); + span = span.Slice(0, (int)_size); + + archiveStream.ReadExactly(span); + ReadExtendedAttributesFromBuffer(span, _name); + + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } } } @@ -531,48 +591,43 @@ private void ReadExtendedAttributesBlock(Stream archiveStream) private async ValueTask ReadExtendedAttributesBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); - if (buffer != null) + + if (_size != 0) { - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - ReadExtendedAttributesFromBuffer(buffer, _name); - } - } + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Memory memory = buffer.AsMemory(0, (int)_size); - // Return a byte array if the size field has a valid value for extended attributes. Otherwise, return null, or throw. - private byte[]? CreateExtendedAttributesBufferIfSizeIsValid() - { - Debug.Assert(_typeFlag is TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes); + await archiveStream.ReadExactlyAsync(memory, cancellationToken).ConfigureAwait(false); + ReadExtendedAttributesFromBuffer(memory.Span, _name); - // It is not expected that the extended attributes data section will be longer than Array.MaxLength, considering - // the size field is 12 bytes long, which fits a number with a value under int.MaxValue. - if (_size > Array.MaxLength) - { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); + ArrayPool.Shared.Return(buffer); } + } - if (_size == 0) + private void ValidateSize() + { + if ((uint)_size > (uint)Array.MaxLength) { - return null; + ThrowSizeFieldTooLarge(); } - return new byte[(int)_size]; + [DoesNotReturn] + void ThrowSizeFieldTooLarge() => + throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); } // Returns a dictionary containing the extended attributes collected from the provided byte buffer. private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) { - string dataAsString = TarHelpers.GetTrimmedUtf8String(buffer); - - using StringReader reader = new(dataAsString); + buffer = TarHelpers.TrimEndingNullsAndSpaces(buffer); - while (TryGetNextExtendedAttribute(reader, out string? key, out string? value)) + while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value)) { - if (ExtendedAttributes.ContainsKey(key)) + if (!ExtendedAttributes.TryAdd(key, value)) { throw new FormatException(string.Format(SR.TarDuplicateExtendedAttribute, name)); } - ExtendedAttributes.Add(key, value); } } @@ -581,11 +636,23 @@ private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string // Throws if end of stream is reached. private void ReadGnuLongPathDataBlock(Stream archiveStream) { - byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); - if (buffer != null) + if (_size != 0) { - archiveStream.ReadExactly(buffer); - ReadGnuLongPathDataFromBuffer(buffer); + ValidateSize(); + + byte[]? buffer = null; + Span span = _size <= 256 ? + stackalloc byte[256] : + (buffer = ArrayPool.Shared.Rent((int)_size)); + span = span.Slice(0, (int)_size); + + archiveStream.ReadExactly(span); + ReadGnuLongPathDataFromBuffer(span); + + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } } } @@ -595,11 +662,17 @@ private void ReadGnuLongPathDataBlock(Stream archiveStream) private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); - if (buffer != null) + + if (_size != 0) { - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - ReadGnuLongPathDataFromBuffer(buffer); + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Memory memory = buffer.AsMemory(0, (int)_size); + + await archiveStream.ReadExactlyAsync(memory, cancellationToken).ConfigureAwait(false); + ReadGnuLongPathDataFromBuffer(memory.Span); + + ArrayPool.Shared.Return(buffer); } } @@ -618,61 +691,64 @@ private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer) } } - // Return a byte array if the size field has a valid value for GNU long metadata entry data. Otherwise, return null, or throw. - private byte[]? CreateGnuLongDataBufferIfSizeIsValid() - { - Debug.Assert(_typeFlag is TarEntryType.LongLink or TarEntryType.LongPath); - - if (_size > Array.MaxLength) - { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); - } - - if (_size == 0) - { - return null; - } - - return new byte[(int)_size]; - } - - // Tries to collect the next extended attribute from the string wrapped by the specified reader. + // Tries to collect the next extended attribute from the string. // Extended attributes are saved in the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html // "LENGTH KEY=VALUE\n" // Where LENGTH is the total number of bytes of that line, from LENGTH itself to the endline, inclusive. // Throws if end of stream is reached or if an attribute is malformed. private static bool TryGetNextExtendedAttribute( - StringReader reader, + ref ReadOnlySpan buffer, [NotNullWhen(returnValue: true)] out string? key, [NotNullWhen(returnValue: true)] out string? value) { key = null; value = null; - string? nextLine = reader.ReadLine(); - if (string.IsNullOrWhiteSpace(nextLine)) + // Slice off the next line. + int newlinePos = buffer.IndexOf((byte)'\n'); + if (newlinePos < 0) { return false; } + ReadOnlySpan line = buffer.Slice(0, newlinePos); + + // Update buffer to point to the next line for the next call + buffer = buffer.Slice(newlinePos + 1); - StringSplitOptions splitOptions = StringSplitOptions.RemoveEmptyEntries; + // Find the end of the length and remove everything up through it. + int spacePos = line.IndexOf((byte)' '); + if (spacePos < 0) + { + return false; + } + line = line.Slice(spacePos + 1).TrimStart((byte)' '); - string[] attributeArray = nextLine.Split(' ', 2, splitOptions); - if (attributeArray.Length != 2) + // If there are any more spaces, it's malformed. + if (line.IndexOf((byte)' ') >= 0) { return false; } - string[] keyAndValueArray = attributeArray[1].Split('=', 2, splitOptions); - if (keyAndValueArray.Length != 2) + // Find the equal separator. + int equalPos = line.IndexOf((byte)'='); + if (equalPos < 0) { return false; } - key = keyAndValueArray[0]; - value = keyAndValueArray[1]; + ReadOnlySpan keySlice = line.Slice(0, equalPos); + ReadOnlySpan valueSlice = line.Slice(equalPos + 1); + + // If the value contains an =, it's malformed. + if (valueSlice.IndexOf((byte)'=') >= 0) + { + return false; + } + // Return the parsed key and value. + key = Encoding.UTF8.GetString(keySlice); + value = Encoding.UTF8.GetString(valueSlice); return true; } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index ad5cdae22d0315..2afcfb8a9f23df 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -1,10 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Numerics; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -14,16 +17,12 @@ namespace System.Formats.Tar // Writes header attributes of a tar archive entry. internal sealed partial class TarHeader { - private static ReadOnlySpan PaxMagicBytes => "ustar\0"u8; - private static ReadOnlySpan PaxVersionBytes => "00"u8; + private static ReadOnlySpan UstarMagicBytes => "ustar\0"u8; + private static ReadOnlySpan UstarVersionBytes => "00"u8; private static ReadOnlySpan GnuMagicBytes => "ustar "u8; private static ReadOnlySpan GnuVersionBytes => " \0"u8; - // Extended Attribute entries have a special format in the Name field: - // "{dirName}/PaxHeaders.{processId}/{fileName}{trailingSeparator}" - private const string PaxHeadersFormat = "{0}/PaxHeaders.{1}/{2}{3}"; - // Predefined text for the Name field of a GNU long metadata entry. Applies for both LongPath ('L') and LongLink ('K'). private const string GnuLongMetadataName = "././@LongLink"; @@ -61,7 +60,7 @@ private long WriteV7FieldsToBuffer(Span buffer) long actualLength = GetTotalDataBytesToWrite(); TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); - int tmpChecksum = WriteName(buffer, out _); + int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, actualLength, actualEntryType); _checksum = WriteChecksum(tmpChecksum, buffer); @@ -215,7 +214,7 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C // Second, we determine if we need a preceding LongPath, and write it if needed if (_name.Length > FieldLengths.Name) { - TarHeader longPathHeader = await GetGnuLongMetadataHeaderAsync(TarEntryType.LongPath, _name, cancellationToken).ConfigureAwait(false); + TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name); await longPathHeader.WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); buffer.Span.Clear(); // Reset it to reuse it } @@ -227,34 +226,8 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C // Creates and returns a GNU long metadata header, with the specified long text written into its data stream. private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string longText) { - TarHeader longMetadataHeader = GetDefaultGnuLongMetadataHeader(longText.Length, entryType); - Debug.Assert(longMetadataHeader._dataStream != null); - - longMetadataHeader._dataStream.Write(Encoding.UTF8.GetBytes(longText)); - longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning - - return longMetadataHeader; - } - - // Asynchronously creates and returns a GNU long metadata header, with the specified long text written into its data stream. - private static async Task GetGnuLongMetadataHeaderAsync(TarEntryType entryType, string longText, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - - TarHeader longMetadataHeader = GetDefaultGnuLongMetadataHeader(longText.Length, entryType); - Debug.Assert(longMetadataHeader._dataStream != null); - - await longMetadataHeader._dataStream.WriteAsync(Encoding.UTF8.GetBytes(longText), cancellationToken).ConfigureAwait(false); - longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning - - return longMetadataHeader; - } - - // Constructs a GNU metadata header with default values for the specified entry type. - private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, TarEntryType entryType) - { - Debug.Assert((entryType is TarEntryType.LongPath && longTextLength > FieldLengths.Name) || - (entryType is TarEntryType.LongLink && longTextLength > FieldLengths.LinkName)); + Debug.Assert((entryType is TarEntryType.LongPath && longText.Length > FieldLengths.Name) || + (entryType is TarEntryType.LongLink && longText.Length > FieldLengths.LinkName)); TarHeader longMetadataHeader = new(TarEntryFormat.Gnu); @@ -264,7 +237,7 @@ private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, Tar longMetadataHeader._gid = 0; longMetadataHeader._mTime = DateTimeOffset.MinValue; // 0 longMetadataHeader._typeFlag = entryType; - longMetadataHeader._dataStream = new MemoryStream(); + longMetadataHeader._dataStream = new MemoryStream(Encoding.UTF8.GetBytes(longText)); return longMetadataHeader; } @@ -302,7 +275,7 @@ private void WriteAsGnuSharedInternal(Span buffer, out long actualLength) { actualLength = GetTotalDataBytesToWrite(); - int tmpChecksum = WriteName(buffer, out _); + int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, actualLength, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Gnu, _typeFlag)); tmpChecksum += WriteGnuMagicAndVersion(buffer); tmpChecksum += WritePosixAndGnuSharedFields(buffer); @@ -320,13 +293,13 @@ private void WriteAsPaxExtendedAttributes(Stream archiveStream, Span buffe } // Asynchronously writes the current header as a PAX Extended Attributes entry into the archive stream and returns the value of the final checksum. - private async Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) + private Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber); - _dataStream = await GenerateExtendedAttributesDataStreamAsync(extendedAttributes, cancellationToken).ConfigureAwait(false); - await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + _dataStream = GenerateExtendedAttributesDataStream(extendedAttributes); + return WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken); } // Initializes the name, mode and type flag of a PAX extended attributes entry. @@ -385,24 +358,33 @@ private void WriteAsPaxSharedInternal(Span buffer, out long actualLength) _checksum = WriteChecksum(tmpChecksum, buffer); } - // All formats save in the name byte array only the ASCII bytes that fit. The full string is returned in the out byte array. - private int WriteName(Span buffer, out byte[] fullNameBytes) + // All formats save in the name byte array only the ASCII bytes that fit. + private int WriteName(Span buffer) { - fullNameBytes = Encoding.ASCII.GetBytes(_name); - int nameBytesLength = Math.Min(fullNameBytes.Length, FieldLengths.Name); - int checksum = WriteLeftAlignedBytesAndGetChecksum(fullNameBytes.AsSpan(0, nameBytesLength), buffer.Slice(FieldLocations.Name, FieldLengths.Name)); - return checksum; + ReadOnlySpan src = _name.AsSpan(0, Math.Min(_name.Length, FieldLengths.Name)); + Span dest = buffer.Slice(FieldLocations.Name, FieldLengths.Name); + int encoded = Encoding.ASCII.GetBytes(src, dest); + return Checksum(dest.Slice(0, encoded)); } // Ustar and PAX save in the name byte array only the ASCII bytes that fit, and the rest of that string is saved in the prefix field. private int WritePosixName(Span buffer) { - int checksum = WriteName(buffer, out byte[] fullNameBytes); - if (fullNameBytes.Length > FieldLengths.Name) + int checksum = WriteName(buffer); + + if (_name.Length > FieldLengths.Name) { - int prefixBytesLength = Math.Min(fullNameBytes.Length - FieldLengths.Name, FieldLengths.Name); - checksum += WriteLeftAlignedBytesAndGetChecksum(fullNameBytes.AsSpan(FieldLengths.Name, prefixBytesLength), buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); + int prefixBytesLength = Math.Min(_name.Length - FieldLengths.Name, FieldLengths.Name); + Span remaining = prefixBytesLength <= 256 ? + stackalloc byte[prefixBytesLength] : + new byte[prefixBytesLength]; + + int encoded = Encoding.ASCII.GetBytes(_name.AsSpan(FieldLengths.Name), remaining); + Debug.Assert(encoded == remaining.Length); + + checksum += WriteLeftAlignedBytesAndGetChecksum(remaining, buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); } + return checksum; } @@ -413,27 +395,27 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (_mode > 0) { - checksum += WriteAsOctal(_mode, buffer, FieldLocations.Mode, FieldLengths.Mode); + checksum += FormatOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)); } if (_uid > 0) { - checksum += WriteAsOctal(_uid, buffer, FieldLocations.Uid, FieldLengths.Uid); + checksum += FormatOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)); } if (_gid > 0) { - checksum += WriteAsOctal(_gid, buffer, FieldLocations.Gid, FieldLengths.Gid); + checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)); } _size = actualLength; if (_size > 0) { - checksum += WriteAsOctal(_size, buffer, FieldLocations.Size, FieldLengths.Size); + checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size)); } - checksum += WriteAsTimestamp(_mTime, buffer, FieldLocations.MTime, FieldLengths.MTime); + checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)); char typeFlagChar = (char)actualEntryType; buffer[FieldLocations.TypeFlag] = (byte)typeFlagChar; @@ -441,7 +423,7 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (!string.IsNullOrEmpty(_linkName)) { - checksum += WriteAsAsciiString(_linkName, buffer, FieldLocations.LinkName, FieldLengths.LinkName); + checksum += WriteAsAsciiString(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)); } return checksum; @@ -465,8 +447,8 @@ private long GetTotalDataBytesToWrite() // Writes the magic and version fields of a ustar or pax entry into the specified spans. private static int WritePosixMagicAndVersion(Span buffer) { - int checksum = WriteLeftAlignedBytesAndGetChecksum(PaxMagicBytes, buffer.Slice(FieldLocations.Magic, FieldLengths.Magic)); - checksum += WriteLeftAlignedBytesAndGetChecksum(PaxVersionBytes, buffer.Slice(FieldLocations.Version, FieldLengths.Version)); + int checksum = WriteLeftAlignedBytesAndGetChecksum(UstarMagicBytes, buffer.Slice(FieldLocations.Magic, FieldLengths.Magic)); + checksum += WriteLeftAlignedBytesAndGetChecksum(UstarVersionBytes, buffer.Slice(FieldLocations.Version, FieldLengths.Version)); return checksum; } @@ -485,22 +467,22 @@ private int WritePosixAndGnuSharedFields(Span buffer) if (!string.IsNullOrEmpty(_uName)) { - checksum += WriteAsAsciiString(_uName, buffer, FieldLocations.UName, FieldLengths.UName); + checksum += WriteAsAsciiString(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName)); } if (!string.IsNullOrEmpty(_gName)) { - checksum += WriteAsAsciiString(_gName, buffer, FieldLocations.GName, FieldLengths.GName); + checksum += WriteAsAsciiString(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName)); } if (_devMajor > 0) { - checksum += WriteAsOctal(_devMajor, buffer, FieldLocations.DevMajor, FieldLengths.DevMajor); + checksum += FormatOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); } if (_devMinor > 0) { - checksum += WriteAsOctal(_devMinor, buffer, FieldLocations.DevMinor, FieldLengths.DevMinor); + checksum += FormatOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } return checksum; @@ -509,8 +491,8 @@ private int WritePosixAndGnuSharedFields(Span buffer) // Saves the gnu-specific fields into the specified spans. private int WriteGnuFields(Span buffer) { - int checksum = WriteAsTimestamp(_aTime, buffer, FieldLocations.ATime, FieldLengths.ATime); - checksum += WriteAsTimestamp(_cTime, buffer, FieldLocations.CTime, FieldLengths.CTime); + int checksum = WriteAsTimestamp(_aTime, buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + checksum += WriteAsTimestamp(_cTime, buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); if (_gnuUnusedBytes != null) { @@ -524,8 +506,18 @@ private int WriteGnuFields(Span buffer) private static void WriteData(Stream archiveStream, Stream dataStream, long actualLength) { dataStream.CopyTo(archiveStream); // The data gets copied from the current position + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); - archiveStream.Write(new byte[paddingAfterData]); + if (paddingAfterData != 0) + { + Debug.Assert(paddingAfterData <= TarHelpers.RecordSize); + + Span padding = stackalloc byte[TarHelpers.RecordSize]; + padding = padding.Slice(0, paddingAfterData); + padding.Clear(); + + archiveStream.Write(padding); + } } // Asynchronously writes the current header's data stream into the archive stream. @@ -534,44 +526,95 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream cancellationToken.ThrowIfCancellationRequested(); await dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); - await archiveStream.WriteAsync(new byte[paddingAfterData], cancellationToken).ConfigureAwait(false); + if (paddingAfterData != 0) + { + byte[] buffer = ArrayPool.Shared.Rent(paddingAfterData); + Array.Clear(buffer, 0, paddingAfterData); + + await archiveStream.WriteAsync(buffer.AsMemory(0, paddingAfterData), cancellationToken).ConfigureAwait(false); + + ArrayPool.Shared.Return(buffer); + } } // Dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. private static Stream? GenerateExtendedAttributesDataStream(Dictionary extendedAttributes) { MemoryStream? dataStream = null; + + byte[]? buffer = null; + Span span = stackalloc byte[512]; + if (extendedAttributes.Count > 0) { dataStream = new MemoryStream(); + foreach ((string attribute, string value) in extendedAttributes) { - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - dataStream.Write(entryBytes); + // Generates an extended attribute key value pair string saved into a byte array, following the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html + + // The format is: + // "XX attribute=value\n" + // where "XX" is the number of characters in the entry, including those required for the count itself. + // If prepending the length digits increases the number of digits, we need to expand. + int length = 3 + Encoding.UTF8.GetByteCount(attribute) + Encoding.UTF8.GetByteCount(value); + int originalDigitCount = CountDigits(length), newDigitCount; + length += originalDigitCount; + while ((newDigitCount = CountDigits(length)) != originalDigitCount) + { + length += newDigitCount - originalDigitCount; + originalDigitCount = newDigitCount; + } + Debug.Assert(length == CountDigits(length) + 3 + Encoding.UTF8.GetByteCount(attribute) + Encoding.UTF8.GetByteCount(value)); + + // Get a large enough buffer if we don't already have one. + if (span.Length < length) + { + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } + span = buffer = ArrayPool.Shared.Rent(length); + } + + // Format the contents. + bool formatted = Utf8Formatter.TryFormat(length, span, out int bytesWritten); + Debug.Assert(formatted); + span[bytesWritten++] = (byte)' '; + bytesWritten += Encoding.UTF8.GetBytes(attribute, span.Slice(bytesWritten)); + span[bytesWritten++] = (byte)'='; + bytesWritten += Encoding.UTF8.GetBytes(value, span.Slice(bytesWritten)); + span[bytesWritten++] = (byte)'\n'; + + // Write it to the stream. + dataStream.Write(span.Slice(0, bytesWritten)); } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning + + dataStream.Position = 0; // Ensure it gets written into the archive from the beginning } - return dataStream; - } - // Asynchronously dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. - private static async Task GenerateExtendedAttributesDataStreamAsync(Dictionary extendedAttributes, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } - MemoryStream? dataStream = null; - if (extendedAttributes.Count > 0) + return dataStream; + + static int CountDigits(int value) { - dataStream = new MemoryStream(); - foreach ((string attribute, string value) in extendedAttributes) + Debug.Assert(value >= 0); + int digits = 1; + while (true) { - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - await dataStream.WriteAsync(entryBytes, cancellationToken).ConfigureAwait(false); + value /= 10; + if (value == 0) break; + digits++; } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning + return digits; } - return dataStream; } // Some fields that have a reserved spot in the header, may not fit in such field anymore, but they can fit in the @@ -584,10 +627,12 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded() { ExtendedAttributes.Add(PaxEaMTime, TarHelpers.GetTimestampStringFromDateTimeOffset(_mTime)); } + if (!string.IsNullOrEmpty(_gName)) { TryAddStringField(ExtendedAttributes, PaxEaGName, _gName, FieldLengths.GName); } + if (!string.IsNullOrEmpty(_uName)) { TryAddStringField(ExtendedAttributes, PaxEaUName, _uName, FieldLengths.UName); @@ -603,7 +648,6 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded() ExtendedAttributes.Add(PaxEaSize, _size.ToString()); } - // Adds the specified string to the dictionary if it's longer than the specified max byte length. static void TryAddStringField(Dictionary extendedAttributes, string key, string value, int maxLength) { @@ -614,63 +658,24 @@ static void TryAddStringField(Dictionary extendedAttributes, str } } - // Generates an extended attribute key value pair string saved into a byte array, following the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. - // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html - private static byte[] GenerateExtendedAttributeKeyValuePairAsByteArray(byte[] keyBytes, byte[] valueBytes) - { - // Assuming key="ab" and value="cdef" - - // The " ab=cdef\n" attribute string has a length of 9 chars - int suffixByteCount = 3 + // leading space, equals sign and trailing newline - keyBytes.Length + valueBytes.Length; - - // The count string "9" has a length of 1 char - string suffixByteCountString = suffixByteCount.ToString(); - int firstTotalByteCount = Encoding.ASCII.GetByteCount(suffixByteCountString); - - // If we prepend the count string length to the attribute string, - // the total length increases to 10, which has one more digit - // "9 abc=def\n" - int firstPrefixAndSuffixByteCount = firstTotalByteCount + suffixByteCount; - - // The new count string "10" has an increased length of 2 chars - string prefixAndSuffixByteCountString = firstPrefixAndSuffixByteCount.ToString(); - int realTotalCharCount = Encoding.ASCII.GetByteCount(prefixAndSuffixByteCountString); - - byte[] finalTotalCharCountBytes = Encoding.ASCII.GetBytes(prefixAndSuffixByteCountString); - - // The final string should contain the correct total length now - List bytesList = new(); - - bytesList.AddRange(finalTotalCharCountBytes); - bytesList.Add(TarHelpers.SpaceChar); - bytesList.AddRange(keyBytes); - bytesList.Add(TarHelpers.EqualsChar); - bytesList.AddRange(valueBytes); - bytesList.Add(TarHelpers.NewLineChar); - - Debug.Assert(bytesList.Count == (realTotalCharCount + suffixByteCount)); - - return bytesList.ToArray(); - } - // The checksum accumulator first adds up the byte values of eight space chars, then the final number // is written on top of those spaces on the specified span as ascii. // At the end, it's saved in the header field and the final value returned. - internal int WriteChecksum(int checksum, Span buffer) + internal static int WriteChecksum(int checksum, Span buffer) { // The checksum field is also counted towards the total sum // but as an array filled with spaces - checksum += TarHelpers.SpaceChar * 8; + checksum += (byte)' ' * 8; Span converted = stackalloc byte[FieldLengths.Checksum]; - WriteAsOctal(checksum, converted, 0, converted.Length); + converted.Clear(); + FormatOctal(checksum, converted); Span destination = buffer.Slice(FieldLocations.Checksum, FieldLengths.Checksum); // Checksum field ends with a null and a space - destination[^1] = TarHelpers.SpaceChar; // ' ' - destination[^2] = 0; // '\0' + destination[^1] = (byte)' '; + destination[^2] = (byte)'\0'; int i = destination.Length - 3; int j = converted.Length - 1; @@ -684,7 +689,7 @@ internal int WriteChecksum(int checksum, Span buffer) } else { - destination[i] = TarHelpers.ZeroChar; // Leading zero chars '0' + destination[i] = (byte)'0'; // Leading zero chars } i--; } @@ -697,67 +702,75 @@ private static int WriteLeftAlignedBytesAndGetChecksum(ReadOnlySpan bytesT { Debug.Assert(destination.Length > 1); - int checksum = 0; - - for (int i = 0, j = 0; i < destination.Length && j < bytesToWrite.Length; i++, j++) - { - destination[i] = bytesToWrite[j]; - checksum += destination[i]; - } + // Copy as many bytes as will fit + int numToCopy = Math.Min(bytesToWrite.Length, destination.Length); + bytesToWrite = bytesToWrite.Slice(0, numToCopy); + bytesToWrite.CopyTo(destination); - return checksum; + return Checksum(bytesToWrite); } // Writes the specified bytes aligned to the right, filling all the leading bytes with the zero char 0x30, // ensuring a null terminator is included at the end of the specified span. private static int WriteRightAlignedBytesAndGetChecksum(ReadOnlySpan bytesToWrite, Span destination) { - int checksum = 0; - int i = destination.Length - 1; - int j = bytesToWrite.Length - 1; + Debug.Assert(destination.Length > 1); - while (i >= 0) + // Null terminated + destination[^1] = (byte)'\0'; + + // Copy as many input bytes as will fit + int numToCopy = Math.Min(bytesToWrite.Length, destination.Length - 1); + bytesToWrite = bytesToWrite.Slice(0, numToCopy); + int copyPos = destination.Length - 1 - bytesToWrite.Length; + bytesToWrite.CopyTo(destination.Slice(copyPos)); + + // Fill all leading bytes with zeros + destination.Slice(0, copyPos).Fill((byte)'0'); + + return Checksum(destination); + } + + private static int Checksum(ReadOnlySpan bytes) + { + int checksum = 0; + foreach (byte b in bytes) { - if (i == destination.Length - 1) - { - destination[i] = 0; // null terminated - } - else if (j >= 0) - { - destination[i] = bytesToWrite[j]; - j--; - } - else - { - destination[i] = TarHelpers.ZeroChar; // leading zeros - } - checksum += destination[i]; - i--; + checksum += b; } - return checksum; } // Writes the specified decimal number as a right-aligned octal number and returns its checksum. - internal static int WriteAsOctal(long tenBaseNumber, Span destination, int location, int length) + internal static int FormatOctal(long value, Span destination) { - long octal = TarHelpers.ConvertDecimalToOctal(tenBaseNumber); - byte[] bytes = Encoding.ASCII.GetBytes(octal.ToString()); - return WriteRightAlignedBytesAndGetChecksum(bytes.AsSpan(), destination.Slice(location, length)); + ulong remaining = (ulong)value; + Span digits = stackalloc byte[32]; // longer than any possible octal formatting of a ulong + + int i = digits.Length - 1; + while (true) + { + digits[i] = (byte)('0' + (remaining % 8)); + remaining /= 8; + if (remaining == 0) break; + i--; + } + + return WriteRightAlignedBytesAndGetChecksum(digits.Slice(i), destination); } // Writes the specified DateTimeOffset's Unix time seconds as a right-aligned octal number, and returns its checksum. - private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination, int location, int length) + private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination) { long unixTimeSeconds = timestamp.ToUnixTimeSeconds(); - return WriteAsOctal(unixTimeSeconds, destination, location, length); + return FormatOctal(unixTimeSeconds, destination); } // Writes the specified text as an ASCII string aligned to the left, and returns its checksum. - private static int WriteAsAsciiString(string str, Span buffer, int location, int length) + private static int WriteAsAsciiString(string str, Span buffer) { byte[] bytes = Encoding.ASCII.GetBytes(str); - return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer.Slice(location, length)); + return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer); } // Gets the special name for the 'name' field in an extended attribute entry. @@ -767,18 +780,15 @@ private static int WriteAsAsciiString(string str, Span buffer, int locatio // - %f: The filename of the file, equivalent to the result of the basename utility on the translated pathname. private string GenerateExtendedAttributeName() { - string? dirName = Path.GetDirectoryName(_name); - dirName = string.IsNullOrEmpty(dirName) ? "." : dirName; - - int processId = Environment.ProcessId; - - string? fileName = Path.GetFileName(_name); - fileName = string.IsNullOrEmpty(fileName) ? "." : fileName; + ReadOnlySpan dirName = Path.GetDirectoryName(_name.AsSpan()); + dirName = dirName.IsEmpty ? "." : dirName; - string trailingSeparator = (_typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList) ? - $"{Path.DirectorySeparatorChar}" : string.Empty; + ReadOnlySpan fileName = Path.GetFileName(_name.AsSpan()); + fileName = fileName.IsEmpty ? "." : fileName; - return string.Format(PaxHeadersFormat, dirName, processId, fileName, trailingSeparator); + return _typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList ? + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}{Path.DirectorySeparatorChar}" : + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}"; } // Gets the special name for the 'name' field in a global extended attribute entry. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs index 3e0a0b814484d4..613816904c9c4a 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs @@ -114,5 +114,25 @@ internal static void SetPendingModes(SortedDictionary? pen File.SetUnixFileMode(dir.Key, dir.Value & ~umask); } } + + internal static unsafe string EntryFromPath(ReadOnlySpan path, bool appendPathSeparator = false) + { + // Remove leading separators. + int nonSlash = path.IndexOfAnyExcept('/'); + if (nonSlash == -1) + { + nonSlash = path.Length; + } + path = path.Slice(nonSlash); + + // Append a separator if necessary. + return (path.IsEmpty, appendPathSeparator) switch + { + (false, false) => path.ToString(), + (false, true) => string.Concat(path, "/"), + (true, false) => string.Empty, + (true, true) => "/", + }; + } } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs index 6569ff237dbf97..e1025b7bdece68 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Windows.cs @@ -3,8 +3,8 @@ using System.Collections.Generic; using System.IO; -using System.Text; using System.Diagnostics; +using System.Runtime.InteropServices; namespace System.Formats.Tar { @@ -18,5 +18,47 @@ internal static void CreateDirectory(string fullPath, UnixFileMode? mode, Sorted internal static void SetPendingModes(SortedDictionary? pendingModes) => Debug.Assert(pendingModes is null); + + internal static unsafe string EntryFromPath(ReadOnlySpan path, bool appendPathSeparator = false) + { + // Remove leading separators. + int nonSlash = path.IndexOfAnyExcept('/', '\\'); + if (nonSlash == -1) + { + nonSlash = path.Length; + } + path = path.Slice(nonSlash); + + // Replace \ with /, and append a separator if necessary. + + if (path.IsEmpty) + { + return appendPathSeparator ? + "/" : + string.Empty; + } + + fixed (char* pathPtr = &MemoryMarshal.GetReference(path)) + { + return string.Create(appendPathSeparator ? path.Length + 1 : path.Length, (appendPathSeparator, (IntPtr)pathPtr, path.Length), static (dest, state) => + { + ReadOnlySpan path = new ReadOnlySpan((char*)state.Item2, state.Length); + path.CopyTo(dest); + if (state.appendPathSeparator) + { + dest[^1] = '/'; + } + + // To ensure tar files remain compatible with Unix, and per the ZIP File Format Specification 4.4.17.1, + // all slashes should be forward slashes. + int pos; + while ((pos = dest.IndexOf('\\')) >= 0) + { + dest[pos] = '/'; + dest = dest.Slice(pos + 1); + } + }); + } + } } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index d4592eb85d4b5f..c2a1b3b854c14b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -3,8 +3,10 @@ using System.Buffers; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; +using System.Numerics; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -17,11 +19,6 @@ internal static partial class TarHelpers internal const short RecordSize = 512; internal const int MaxBufferLength = 4096; - internal const int ZeroChar = 0x30; - internal const byte SpaceChar = 0x20; - internal const byte EqualsChar = 0x3d; - internal const byte NewLineChar = 0xa; - // Default mode for TarEntry created for a file-type. private const UnixFileMode DefaultFileMode = UnixFileMode.UserRead | UnixFileMode.UserWrite | @@ -117,36 +114,6 @@ internal static int CalculatePadding(long size) return padding; } - // Returns the specified 8-base number as a 10-base number. - internal static int ConvertDecimalToOctal(int value) - { - int multiplier = 1; - int accum = value; - int actual = 0; - while (accum != 0) - { - actual += (accum % 8) * multiplier; - accum /= 8; - multiplier *= 10; - } - return actual; - } - - // Returns the specified 10-base number as an 8-base number. - internal static long ConvertDecimalToOctal(long value) - { - long multiplier = 1; - long accum = value; - long actual = 0; - while (accum != 0) - { - actual += (accum % 8) * multiplier; - accum /= 8; - multiplier *= 10; - } - return actual; - } - // Returns true if all the bytes in the specified array are nulls, false otherwise. internal static bool IsAllNullBytes(Span buffer) => buffer.IndexOfAnyExcept((byte)0) < 0; @@ -191,9 +158,10 @@ internal static bool TryGetStringAsBaseTenInteger(IReadOnlyDictionary buffer) + /// Parses a byte span that represents an ASCII string containing a number in octal base. + internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INumber { - string str = GetTrimmedAsciiString(buffer); - return string.IsNullOrEmpty(str) ? 0 : Convert.ToInt32(str, fromBase: 8); - } + buffer = TrimEndingNullsAndSpaces(buffer); + buffer = TrimLeadingNullsAndSpaces(buffer); - // Receives a byte array that represents an ASCII string containing a number in octal base. - // Converts the array to an octal base number, then transforms it to ten base and returns it. - internal static long GetTenBaseLongFromOctalAsciiChars(Span buffer) - { - string str = GetTrimmedAsciiString(buffer); - return string.IsNullOrEmpty(str) ? 0 : Convert.ToInt64(str, fromBase: 8); + if (buffer.Length == 0) + { + return T.Zero; + } + + T octalFactor = T.CreateTruncating(8u); + T value = T.Zero; + foreach (byte b in buffer) + { + uint digit = (uint)(b - '0'); + if (digit >= 8) + { + ThrowInvalidNumber(); + } + + value = checked((value * octalFactor) + T.CreateTruncating(digit)); + } + + return value; } + [DoesNotReturn] + private static void ThrowInvalidNumber() => + throw new FormatException(SR.Format(SR.TarInvalidNumber)); + // Returns the string contained in the specified buffer of bytes, // in the specified encoding, removing the trailing null or space chars. private static string GetTrimmedString(ReadOnlySpan buffer, Encoding encoding) + { + buffer = TrimEndingNullsAndSpaces(buffer); + return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer); + } + + internal static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer) { int trimmedLength = buffer.Length; - while (trimmedLength > 0 && IsByteNullOrSpace(buffer[trimmedLength - 1])) + while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32) { trimmedLength--; } - return trimmedLength == 0 ? string.Empty : encoding.GetString(buffer.Slice(0, trimmedLength)); + return buffer.Slice(0, trimmedLength); + } + + private static ReadOnlySpan TrimLeadingNullsAndSpaces(ReadOnlySpan buffer) + { + int newStart = 0; + while (newStart < buffer.Length && buffer[newStart] is 0 or 32) + { + newStart++; + } - static bool IsByteNullOrSpace(byte c) => c is 0 or 32; + return buffer.Slice(newStart); } // Returns the ASCII string contained in the specified buffer of bytes, @@ -369,7 +368,7 @@ TarEntryType.RegularFile or throw new FormatException(string.Format(SR.TarInvalidFormat, archiveFormat)); } - throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupported, entryType, archiveFormat)); + throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupportedInFormat, entryType, archiveFormat)); } } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index 7e951c5243e35c..5328d61190280b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -19,7 +19,6 @@ public sealed class TarReader : IDisposable, IAsyncDisposable private readonly bool _leaveOpen; private TarEntry? _previouslyReadEntry; private List? _dataStreamsToDispose; - private bool _readFirstEntry; private bool _reachedEndMarkers; internal Stream _archiveStream; @@ -44,7 +43,6 @@ public TarReader(Stream archiveStream, bool leaveOpen = false) _previouslyReadEntry = null; _isDisposed = false; - _readFirstEntry = false; _reachedEndMarkers = false; } @@ -54,8 +52,18 @@ public TarReader(Stream archiveStream, bool leaveOpen = false) /// The property of any entry can be replaced with a new stream. If the user decides to replace it on a instance that was obtained using a , the underlying stream gets disposed immediately, freeing the of origin from the responsibility of having to dispose it. public void Dispose() { - Dispose(disposing: true); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) + { + foreach (Stream s in _dataStreamsToDispose) + { + s.Dispose(); + } + } + } } /// @@ -64,8 +72,18 @@ public void Dispose() /// The property of any entry can be replaced with a new stream. If the user decides to replace it on a instance that was obtained using a , the underlying stream gets disposed immediately, freeing the of origin from the responsibility of having to dispose it. public async ValueTask DisposeAsync() { - await DisposeAsync(disposing: true).ConfigureAwait(false); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) + { + foreach (Stream s in _dataStreamsToDispose) + { + await s.DisposeAsync().ConfigureAwait(false); + } + } + } } /// @@ -104,11 +122,6 @@ public async ValueTask DisposeAsync() TarHeader? header = TryGetNextEntryHeader(copyData); if (header != null) { - if (!_readFirstEntry) - { - _readFirstEntry = true; - } - TarEntry entry = header._format switch { TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ? @@ -118,6 +131,11 @@ public async ValueTask DisposeAsync() TarEntryFormat.V7 or TarEntryFormat.Unknown or _ => new V7TarEntry(header, this), }; + if (_archiveStream.CanSeek && _archiveStream.Length == _archiveStream.Position) + { + _reachedEndMarkers = true; + } + _previouslyReadEntry = entry; PreserveDataStreamForDisposalIfNeeded(entry); return entry; @@ -200,10 +218,10 @@ internal void AdvanceDataStreamIfNeeded() { long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position; TarHelpers.AdvanceStream(_archiveStream, bytesToSkip); - TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size); dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw } } + TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size); } } @@ -242,56 +260,10 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel { long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position; await TarHelpers.AdvanceStreamAsync(_archiveStream, bytesToSkip, cancellationToken).ConfigureAwait(false); - await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false); dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw } } - } - } - - // Disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private void Dispose(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) - { - foreach (Stream s in _dataStreamsToDispose) - { - s.Dispose(); - } - } - } - finally - { - _isDisposed = true; - } - } - } - - // Asynchronously disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private async ValueTask DisposeAsync(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) - { - foreach (Stream s in _dataStreamsToDispose) - { - await s.DisposeAsync().ConfigureAwait(false); - } - } - } - finally - { - _isDisposed = true; - } + await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false); } } @@ -303,11 +275,6 @@ private async ValueTask DisposeAsync(bool disposing) TarHeader? header = await TryGetNextEntryHeaderAsync(copyData, cancellationToken).ConfigureAwait(false); if (header != null) { - if (!_readFirstEntry) - { - _readFirstEntry = true; - } - TarEntry entry = header._format switch { TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ? @@ -317,6 +284,11 @@ private async ValueTask DisposeAsync(bool disposing) TarEntryFormat.V7 or TarEntryFormat.Unknown or _ => new V7TarEntry(header, this), }; + if (_archiveStream.CanSeek && _archiveStream.Length == _archiveStream.Position) + { + _reachedEndMarkers = true; + } + _previouslyReadEntry = entry; PreserveDataStreamForDisposalIfNeeded(entry); return entry; @@ -335,7 +307,7 @@ private async ValueTask DisposeAsync(bool disposing) { Debug.Assert(!_reachedEndMarkers); - TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown); + TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true); if (header == null) { @@ -377,7 +349,7 @@ private async ValueTask DisposeAsync(bool disposing) Debug.Assert(!_reachedEndMarkers); - TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, cancellationToken).ConfigureAwait(false); + TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true, cancellationToken).ConfigureAwait(false); if (header == null) { return null; @@ -413,9 +385,10 @@ private async ValueTask DisposeAsync(bool disposing) // and returns the actual entry with the processed extended attributes saved in the _extendedAttributes dictionary. private bool TryProcessExtendedAttributesHeader(TarHeader extendedAttributesHeader, bool copyData, [NotNullWhen(returnValue: true)] out TarHeader? actualHeader) { - actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax); + // Don't process the data block of the actual entry just yet, because there's a slim chance + // that the extended attributes contain a size that we need to override in the header + actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false); - // Now get the actual entry if (actualHeader == null) { return false; @@ -433,6 +406,9 @@ TarEntryType.LongLink or // Replace all the attributes representing standard fields with the extended ones, if any actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes); + // We retrieved the extended attributes, now we can read the data, and always with the right size + actualHeader.ProcessDataBlock(_archiveStream, copyData); + return true; } @@ -442,8 +418,9 @@ TarEntryType.LongLink or { cancellationToken.ThrowIfCancellationRequested(); - // Now get the actual entry - TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, cancellationToken).ConfigureAwait(false); + // Don't process the data block of the actual entry just yet, because there's a slim chance + // that the extended attributes contain a size that we need to override in the header + TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false, cancellationToken).ConfigureAwait(false); if (actualHeader == null) { return null; @@ -467,6 +444,9 @@ TarEntryType.LongLink or // Replace all the attributes representing standard fields with the extended ones, if any actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes); + // We retrieved the extended attributes, now we can read the data, and always with the right size + actualHeader.ProcessDataBlock(_archiveStream, copyData); + return actualHeader; } @@ -476,7 +456,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta { finalHeader = new(TarEntryFormat.Gnu); - TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu); + TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true); // Get the second entry, which is the actual entry if (secondHeader == null) @@ -494,7 +474,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta if ((header._typeFlag is TarEntryType.LongLink && secondHeader._typeFlag is TarEntryType.LongPath) || (header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink)) { - TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu); + TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true); // Get the third entry, which is the actual entry if (thirdHeader == null) @@ -553,7 +533,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta cancellationToken.ThrowIfCancellationRequested(); // Get the second entry, which is the actual entry - TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false); + TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false); if (secondHeader == null) { return null; @@ -572,7 +552,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta (header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink)) { // Get the third entry, which is the actual entry - TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false); + TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false); if (thirdHeader == null) { return null; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs index 7e678bcd918711..28eaf1e3323237 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs @@ -94,16 +94,7 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) { Debug.Assert(entry._header._dataStream == null); - - FileStreamOptions options = new() - { - Mode = FileMode.Open, - Access = FileAccess.Read, - Share = FileShare.Read, - Options = fileOptions - }; - - entry._header._dataStream = new FileStream(fullPath, options); + entry._header._dataStream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, fileOptions); } return entry; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs index bfb6cf17f11076..d3fa19a5b12eac 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs @@ -22,15 +22,15 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil FileAttributes attributes = File.GetAttributes(fullPath); TarEntryType entryType; - if (attributes.HasFlag(FileAttributes.ReparsePoint)) + if ((attributes & FileAttributes.ReparsePoint) != 0) { entryType = TarEntryType.SymbolicLink; } - else if (attributes.HasFlag(FileAttributes.Directory)) + else if ((attributes & FileAttributes.Directory) != 0) { entryType = TarEntryType.Directory; } - else if (attributes.HasFlag(FileAttributes.Normal) || attributes.HasFlag(FileAttributes.Archive)) + else if ((attributes & (FileAttributes.Normal | FileAttributes.Archive)) != 0) { entryType = Format is TarEntryFormat.V7 ? TarEntryType.V7RegularFile : TarEntryType.RegularFile; } @@ -48,7 +48,7 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil _ => throw new FormatException(string.Format(SR.TarInvalidFormat, Format)), }; - FileSystemInfo info = attributes.HasFlag(FileAttributes.Directory) ? new DirectoryInfo(fullPath) : new FileInfo(fullPath); + FileSystemInfo info = (attributes & FileAttributes.Directory) != 0 ? new DirectoryInfo(fullPath) : new FileInfo(fullPath); entry._header._mTime = info.LastWriteTimeUtc; entry._header._aTime = info.LastAccessTimeUtc; @@ -63,16 +63,8 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) { - FileStreamOptions options = new() - { - Mode = FileMode.Open, - Access = FileAccess.Read, - Share = FileShare.Read, - Options = fileOptions - }; - Debug.Assert(entry._header._dataStream == null); - entry._header._dataStream = new FileStream(fullPath, options); + entry._header._dataStream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, fileOptions); } return entry; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index 94692316840235..7d3977eda68df4 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -84,8 +84,20 @@ public TarWriter(Stream archiveStream, TarEntryFormat format = TarEntryFormat.Pa /// public void Dispose() { - Dispose(disposing: true); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (_wroteEntries) + { + WriteFinalRecords(); + } + + if (!_leaveOpen) + { + _archiveStream.Dispose(); + } + } } /// @@ -93,8 +105,20 @@ public void Dispose() /// public async ValueTask DisposeAsync() { - await DisposeAsync(disposing: true).ConfigureAwait(false); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (_wroteEntries) + { + await WriteFinalRecordsAsync().ConfigureAwait(false); + } + + if (!_leaveOpen) + { + await _archiveStream.DisposeAsync().ConfigureAwait(false); + } + } } /// @@ -241,95 +265,40 @@ public Task WriteEntryAsync(TarEntry entry, CancellationToken cancellationToken return WriteEntryAsyncInternal(entry, cancellationToken); } - // Disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private void Dispose(bool disposing) + // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. + private void WriteEntryInternal(TarEntry entry) { - if (disposing && !_isDisposed) + Span buffer = stackalloc byte[TarHelpers.RecordSize]; + buffer.Clear(); + + switch (entry.Format) { - try - { - if (_wroteEntries) - { - WriteFinalRecords(); - } + case TarEntryFormat.V7: + entry._header.WriteAsV7(_archiveStream, buffer); + break; + case TarEntryFormat.Ustar: + entry._header.WriteAsUstar(_archiveStream, buffer); + break; - if (!_leaveOpen) + case TarEntryFormat.Pax: + if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) { - _archiveStream.Dispose(); + entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); } - } - finally - { - _isDisposed = true; - } - } - } - - // Asynchronously disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private async ValueTask DisposeAsync(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (_wroteEntries) + else { - await WriteFinalRecordsAsync().ConfigureAwait(false); + entry._header.WriteAsPax(_archiveStream, buffer); } + break; + case TarEntryFormat.Gnu: + entry._header.WriteAsGnu(_archiveStream, buffer); + break; - if (!_leaveOpen) - { - await _archiveStream.DisposeAsync().ConfigureAwait(false); - } - } - finally - { - _isDisposed = true; - } - } - } - - // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. - private void WriteEntryInternal(TarEntry entry) - { - byte[] rented = ArrayPool.Shared.Rent(minimumLength: TarHelpers.RecordSize); - Span buffer = rented.AsSpan(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger - buffer.Clear(); // Rented arrays aren't clean - try - { - switch (entry.Format) - { - case TarEntryFormat.V7: - entry._header.WriteAsV7(_archiveStream, buffer); - break; - case TarEntryFormat.Ustar: - entry._header.WriteAsUstar(_archiveStream, buffer); - break; - case TarEntryFormat.Pax: - if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) - { - entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); - } - else - { - entry._header.WriteAsPax(_archiveStream, buffer); - } - break; - case TarEntryFormat.Gnu: - entry._header.WriteAsGnu(_archiveStream, buffer); - break; - default: - Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); - throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); - } - } - finally - { - ArrayPool.Shared.Return(rented); + default: + Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); + throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); } _wroteEntries = true; @@ -364,7 +333,9 @@ private async Task WriteEntryAsyncInternal(TarEntry entry, CancellationToken can // by two records consisting entirely of zero bytes. private void WriteFinalRecords() { - byte[] emptyRecord = new byte[TarHelpers.RecordSize]; + Span emptyRecord = stackalloc byte[TarHelpers.RecordSize]; + emptyRecord.Clear(); + _archiveStream.Write(emptyRecord); _archiveStream.Write(emptyRecord); } @@ -374,9 +345,14 @@ private void WriteFinalRecords() // This method is called from DisposeAsync, so we don't want to propagate a cancelled CancellationToken. private async ValueTask WriteFinalRecordsAsync() { - byte[] emptyRecord = new byte[TarHelpers.RecordSize]; - await _archiveStream.WriteAsync(emptyRecord, cancellationToken: default).ConfigureAwait(false); - await _archiveStream.WriteAsync(emptyRecord, cancellationToken: default).ConfigureAwait(false); + const int TwoRecordSize = TarHelpers.RecordSize * 2; + + byte[] twoEmptyRecords = ArrayPool.Shared.Rent(TwoRecordSize); + Array.Clear(twoEmptyRecords, 0, TwoRecordSize); + + await _archiveStream.WriteAsync(twoEmptyRecords.AsMemory(0, TwoRecordSize), cancellationToken: default).ConfigureAwait(false); + + ArrayPool.Shared.Return(twoEmptyRecords); } private (string, string) ValidateWriteEntryArguments(string fileName, string? entryName) diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs index 75a3495d94e5e5..308a9b68ba4def 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectory.File.Tests.cs @@ -193,5 +193,65 @@ public void IncludeAllSegmentsOfPath(bool includeBaseDirectory) Assert.Null(reader.GetNextEntry()); } + + [ConditionalFact(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks))] + public void SkipRecursionIntoDirectorySymlinks() + { + using TempDirectory root = new TempDirectory(); + + string destinationArchive = Path.Join(root.Path, "destination.tar"); + + string externalDirectory = Path.Join(root.Path, "externalDirectory"); + Directory.CreateDirectory(externalDirectory); + + File.Create(Path.Join(externalDirectory, "file.txt")).Dispose(); + + string sourceDirectoryName = Path.Join(root.Path, "baseDirectory"); + Directory.CreateDirectory(sourceDirectoryName); + + string subDirectory = Path.Join(sourceDirectoryName, "subDirectory"); + Directory.CreateSymbolicLink(subDirectory, externalDirectory); // Should not recurse here + + TarFile.CreateFromDirectory(sourceDirectoryName, destinationArchive, includeBaseDirectory: false); + + using FileStream archiveStream = File.OpenRead(destinationArchive); + using TarReader reader = new(archiveStream, leaveOpen: false); + + TarEntry entry = reader.GetNextEntry(); + Assert.NotNull(entry); + Assert.Equal("subDirectory/", entry.Name); + Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType); + + Assert.Null(reader.GetNextEntry()); // file.txt should not be found + } + + [ConditionalFact(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks))] + public void SkipRecursionIntoBaseDirectorySymlink() + { + using TempDirectory root = new TempDirectory(); + + string destinationArchive = Path.Join(root.Path, "destination.tar"); + + string externalDirectory = Path.Join(root.Path, "externalDirectory"); + Directory.CreateDirectory(externalDirectory); + + string subDirectory = Path.Join(externalDirectory, "subDirectory"); + Directory.CreateDirectory(subDirectory); + + string sourceDirectoryName = Path.Join(root.Path, "baseDirectory"); + Directory.CreateSymbolicLink(sourceDirectoryName, externalDirectory); + + TarFile.CreateFromDirectory(sourceDirectoryName, destinationArchive, includeBaseDirectory: true); // Base directory is a symlink, do not recurse + + using FileStream archiveStream = File.OpenRead(destinationArchive); + using TarReader reader = new(archiveStream, leaveOpen: false); + + TarEntry entry = reader.GetNextEntry(); + Assert.NotNull(entry); + Assert.Equal("baseDirectory/", entry.Name); + Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType); + + Assert.Null(reader.GetNextEntry()); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs index c9b4377cd03285..78b051ae33b08e 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.CreateFromDirectoryAsync.File.Tests.cs @@ -237,5 +237,65 @@ public async Task IncludeAllSegmentsOfPath_Async(bool includeBaseDirectory) } } } + + [ConditionalFact(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks))] + public async Task SkipRecursionIntoDirectorySymlinksAsync() + { + using TempDirectory root = new TempDirectory(); + + string destinationArchive = Path.Join(root.Path, "destination.tar"); + + string externalDirectory = Path.Join(root.Path, "externalDirectory"); + Directory.CreateDirectory(externalDirectory); + + File.Create(Path.Join(externalDirectory, "file.txt")).Dispose(); + + string sourceDirectoryName = Path.Join(root.Path, "baseDirectory"); + Directory.CreateDirectory(sourceDirectoryName); + + string subDirectory = Path.Join(sourceDirectoryName, "subDirectory"); + Directory.CreateSymbolicLink(subDirectory, externalDirectory); // Should not recurse here + + await TarFile.CreateFromDirectoryAsync(sourceDirectoryName, destinationArchive, includeBaseDirectory: false); + + await using FileStream archiveStream = File.OpenRead(destinationArchive); + await using TarReader reader = new(archiveStream, leaveOpen: false); + + TarEntry entry = await reader.GetNextEntryAsync(); + Assert.NotNull(entry); + Assert.Equal("subDirectory/", entry.Name); + Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType); + + Assert.Null(await reader.GetNextEntryAsync()); // file.txt should not be found + } + + [ConditionalFact(typeof(MountHelper), nameof(MountHelper.CanCreateSymbolicLinks))] + public async Task SkipRecursionIntoBaseDirectorySymlinkAsync() + { + using TempDirectory root = new TempDirectory(); + + string destinationArchive = Path.Join(root.Path, "destination.tar"); + + string externalDirectory = Path.Join(root.Path, "externalDirectory"); + Directory.CreateDirectory(externalDirectory); + + string subDirectory = Path.Join(externalDirectory, "subDirectory"); + Directory.CreateDirectory(subDirectory); + + string sourceDirectoryName = Path.Join(root.Path, "baseDirectory"); + Directory.CreateSymbolicLink(sourceDirectoryName, externalDirectory); + + await TarFile.CreateFromDirectoryAsync(sourceDirectoryName, destinationArchive, includeBaseDirectory: true); // Base directory is a symlink, do not recurse + + await using FileStream archiveStream = File.OpenRead(destinationArchive); + await using TarReader reader = new(archiveStream, leaveOpen: false); + + TarEntry entry = await reader.GetNextEntryAsync(); + Assert.NotNull(entry); + Assert.Equal("baseDirectory/", entry.Name); + Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType); + + Assert.Null(await reader.GetNextEntryAsync()); // subDirectory should not be found + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.File.Tests.cs index ee00ccfaf6ccb8..90c5eba6b9ff97 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.File.Tests.cs @@ -44,6 +44,33 @@ public void NonExistentDirectory_Throws() Assert.Throws(() => TarFile.ExtractToDirectory(sourceFileName: filePath, destinationDirectoryName: dirPath, overwriteFiles: false)); } + [Fact] + public void SetsLastModifiedTimeOnExtractedFiles() + { + using TempDirectory root = new TempDirectory(); + + string inDir = Path.Join(root.Path, "indir"); + string inFile = Path.Join(inDir, "file"); + + string tarFile = Path.Join(root.Path, "file.tar"); + + string outDir = Path.Join(root.Path, "outdir"); + string outFile = Path.Join(outDir, "file"); + + Directory.CreateDirectory(inDir); + File.Create(inFile).Dispose(); + var dt = new DateTime(2001, 1, 2, 3, 4, 5, DateTimeKind.Local); + File.SetLastWriteTime(inFile, dt); + + TarFile.CreateFromDirectory(sourceDirectoryName: inDir, destinationFileName: tarFile, includeBaseDirectory: false); + + Directory.CreateDirectory(outDir); + TarFile.ExtractToDirectory(sourceFileName: tarFile, destinationDirectoryName: outDir, overwriteFiles: false); + + Assert.True(File.Exists(outFile)); + Assert.InRange(File.GetLastWriteTime(outFile).Ticks, dt.AddSeconds(-3).Ticks, dt.AddSeconds(3).Ticks); // include some slop for filesystem granularity + } + [Theory] [InlineData(TestTarFormat.v7)] [InlineData(TestTarFormat.ustar)] diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs index 5a68d5a95d5d26..d53f354ee408d1 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectory.Stream.Tests.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; +using System.IO.Compression; using System.Linq; using Xunit; @@ -145,5 +146,36 @@ private void Extract_LinkEntry_TargetInsideDirectory_Internal(TarEntryType entry Assert.Equal(2, Directory.GetFileSystemEntries(baseDir).Count()); } + + [Theory] + [InlineData(512)] + [InlineData(512 + 1)] + [InlineData(512 + 512 - 1)] + public void Extract_UnseekableStream_BlockAlignmentPadding_DoesNotAffectNextEntries(int contentSize) + { + byte[] fileContents = new byte[contentSize]; + Array.Fill(fileContents, 0x1); + + using var archive = new MemoryStream(); + using (var compressor = new GZipStream(archive, CompressionMode.Compress, leaveOpen: true)) + { + using var writer = new TarWriter(compressor); + var entry1 = new PaxTarEntry(TarEntryType.RegularFile, "file"); + entry1.DataStream = new MemoryStream(fileContents); + writer.WriteEntry(entry1); + + var entry2 = new PaxTarEntry(TarEntryType.RegularFile, "next-file"); + writer.WriteEntry(entry2); + } + + archive.Position = 0; + using var decompressor = new GZipStream(archive, CompressionMode.Decompress); + using var reader = new TarReader(decompressor); + + using TempDirectory destination = new TempDirectory(); + TarFile.ExtractToDirectory(decompressor, destination.Path, overwriteFiles: true); + + Assert.Equal(2, Directory.GetFileSystemEntries(destination.Path, "*", SearchOption.AllDirectories).Count()); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.File.Tests.cs index 905e9b61bfb8e2..19a28d7f59ce39 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.File.Tests.cs @@ -56,6 +56,33 @@ public async Task NonExistentDirectory_Throws_Async() } } + [Fact] + public async Task SetsLastModifiedTimeOnExtractedFiles() + { + using TempDirectory root = new TempDirectory(); + + string inDir = Path.Join(root.Path, "indir"); + string inFile = Path.Join(inDir, "file"); + + string tarFile = Path.Join(root.Path, "file.tar"); + + string outDir = Path.Join(root.Path, "outdir"); + string outFile = Path.Join(outDir, "file"); + + Directory.CreateDirectory(inDir); + File.Create(inFile).Dispose(); + var dt = new DateTime(2001, 1, 2, 3, 4, 5, DateTimeKind.Local); + File.SetLastWriteTime(inFile, dt); + + await TarFile.CreateFromDirectoryAsync(sourceDirectoryName: inDir, destinationFileName: tarFile, includeBaseDirectory: false); + + Directory.CreateDirectory(outDir); + await TarFile.ExtractToDirectoryAsync(sourceFileName: tarFile, destinationDirectoryName: outDir, overwriteFiles: false); + + Assert.True(File.Exists(outFile)); + Assert.InRange(File.GetLastWriteTime(outFile).Ticks, dt.AddSeconds(-3).Ticks, dt.AddSeconds(3).Ticks); // include some slop for filesystem granularity + } + [Theory] [InlineData(TestTarFormat.v7)] [InlineData(TestTarFormat.ustar)] diff --git a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs index a35a22c1050f56..741ce8e102deb4 100644 --- a/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarFile/TarFile.ExtractToDirectoryAsync.Stream.Tests.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; +using System.IO.Compression; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -174,5 +175,36 @@ private async Task Extract_LinkEntry_TargetInsideDirectory_Internal_Async(TarEnt } } } + + [Theory] + [InlineData(512)] + [InlineData(512 + 1)] + [InlineData(512 + 512 - 1)] + public async Task Extract_UnseekableStream_BlockAlignmentPadding_DoesNotAffectNextEntries_Async(int contentSize) + { + byte[] fileContents = new byte[contentSize]; + Array.Fill(fileContents, 0x1); + + using var archive = new MemoryStream(); + using (var compressor = new GZipStream(archive, CompressionMode.Compress, leaveOpen: true)) + { + using var writer = new TarWriter(compressor); + var entry1 = new PaxTarEntry(TarEntryType.RegularFile, "file"); + entry1.DataStream = new MemoryStream(fileContents); + await writer.WriteEntryAsync(entry1); + + var entry2 = new PaxTarEntry(TarEntryType.RegularFile, "next-file"); + await writer.WriteEntryAsync(entry2); + } + + archive.Position = 0; + using var decompressor = new GZipStream(archive, CompressionMode.Decompress); + using var reader = new TarReader(decompressor); + + using TempDirectory destination = new TempDirectory(); + await TarFile.ExtractToDirectoryAsync(decompressor, destination.Path, overwriteFiles: true); + + Assert.Equal(2, Directory.GetFileSystemEntries(destination.Path, "*", SearchOption.AllDirectories).Count()); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs index 14087fb030d020..69a543a206d4b9 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Globalization; using System.IO; +using System.IO.Compression; using System.Linq; using System.Threading.Tasks; using Xunit; @@ -125,5 +126,245 @@ public Task Read_Archive_LongFileName_Over100_Under255_Async(TarEntryFormat form [InlineData(TarEntryFormat.Gnu, TestTarFormat.oldgnu)] public Task Read_Archive_LongPath_Over255_Async(TarEntryFormat format, TestTarFormat testFormat) => Read_Archive_LongPath_Over255_Async_Internal(format, testFormat); + + [Theory] + [MemberData(nameof(GetV7TestCaseNames))] + public Task ReadDataStreamOfTarGzV7Async(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.v7, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetUstarTestCaseNames))] + public Task ReadDataStreamOfTarGzUstarAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.ustar, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadDataStreamOfTarGzPaxAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.pax, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadDataStreamOfTarGzPaxGeaAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.pax_gea, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadDataStreamOfTarGzOldGnuAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.oldgnu, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadDataStreamOfTarGzGnuAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.gnu, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetV7TestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzV7Async(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.v7, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetUstarTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzUstarAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.ustar, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzPaxAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.pax, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzPaxGeaAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.pax_gea, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzOldGnuAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.oldgnu, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public Task ReadCopiedDataStreamOfTarGzGnuAsync(string testCaseName) => + VerifyDataStreamOfTarGzInternalAsync(TestTarFormat.gnu, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetGoLangTarTestCaseNames))] + public Task ReadDataStreamOfExternalAssetsGoLangAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("golang_tar", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetNodeTarTestCaseNames))] + public Task ReadDataStreamOfExternalAssetsNodeAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("node-tar", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetRsTarTestCaseNames))] + public Task ReadDataStreamOfExternalAssetsRsAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("tar-rs", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetGoLangTarTestCaseNames))] + public Task ReadCopiedDataStreamOfExternalAssetsGoLangAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("golang_tar", testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetNodeTarTestCaseNames))] + public Task ReadCopiedDataStreamOfExternalAssetsNodeAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("node-tar", testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetRsTarTestCaseNames))] + public Task ReadCopiedDataStreamOfExternalAssetsRsAsync(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternalAsync("tar-rs", testCaseName, copyData: true); + + [Fact] + public async Task Throw_FifoContainsNonZeroDataSectionAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "hdr-only"); + await using TarReader reader = new TarReader(archiveStream); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + Assert.NotNull(await reader.GetNextEntryAsync()); + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + [Fact] + public async Task Throw_SingleExtendedAttributesEntryWithNoActualEntryAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "pax-path-hdr"); + await using TarReader reader = new TarReader(archiveStream); + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + [Theory] + [InlineData("tar-rs", "spaces")] + [InlineData("golang_tar", "v7")] + public async Task AllowSpacesInOctalFieldsAsync(string folderName, string testCaseName) + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, folderName, testCaseName); + await using TarReader reader = new TarReader(archiveStream); + TarEntry entry; + while ((entry = await reader.GetNextEntryAsync()) != null) + { + AssertExtensions.GreaterThan(entry.Checksum, 0); + AssertExtensions.GreaterThan((int)entry.Mode, 0); + } + } + + [Theory] + [InlineData("pax-multi-hdrs")] // Multiple consecutive PAX metadata entries + [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries + [InlineData("neg-size")] // Garbage chars + [InlineData("invalid-go17")] // Many octal fields are all zero chars + [InlineData("issue11169")] // Checksum with null in the middle + [InlineData("issue10968")] // Garbage chars + [InlineData("writer-big")] // The size field contains an euro char + public async Task Throw_ArchivesWithRandomCharsAsync(string testCaseName) + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", testCaseName); + await using TarReader reader = new TarReader(archiveStream); + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + [Fact] + public async Task GarbageEntryChecksumZeroReturnNullAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "issue12435"); + await using TarReader reader = new TarReader(archiveStream); + Assert.Null(await reader.GetNextEntryAsync()); + } + + [Theory] + [InlineData("golang_tar", "gnu-nil-sparse-data")] + [InlineData("golang_tar", "gnu-nil-sparse-hole")] + [InlineData("golang_tar", "gnu-sparse-big")] + [InlineData("golang_tar", "sparse-formats")] + [InlineData("tar-rs", "sparse-1")] + [InlineData("tar-rs", "sparse")] + public async Task SparseEntryNotSupportedAsync(string testFolderName, string testCaseName) + { + // Currently sparse entries are not supported. + + // There are PAX archives archives in the golang folder that have extended attributes for treating a regular file as a sparse file. + // Sparse entries were created for the GNU format, so they are very rare entry types which are excluded from this test method: + // pax-nil-sparse-data, pax-nil-sparse-hole, pax-sparse-big + + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFolderName, testCaseName); + await using TarReader reader = new TarReader(archiveStream); + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + [Fact] + public async Task DirectoryListRegularFileAndSparseAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "gnu-incremental"); + await using TarReader reader = new TarReader(archiveStream); + TarEntry directoryList = await reader.GetNextEntryAsync(); + + Assert.Equal(TarEntryType.DirectoryList, directoryList.EntryType); + Assert.NotNull(directoryList.DataStream); + Assert.Equal(14, directoryList.Length); + + Assert.NotNull(await reader.GetNextEntryAsync()); // Just a regular file + + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); // Sparse + } + + [Fact] + public async Task PaxSizeLargerThanMaxAllowedByStreamAsync() + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "writer-big-long"); + await using TarReader reader = new TarReader(archiveStream); + // The extended attribute 'size' has the value 17179869184 + // Exception message: Stream length must be non-negative and less than 2^31 - 1 - origin + await Assert.ThrowsAsync(async () => await reader.GetNextEntryAsync()); + } + + private static async Task VerifyDataStreamOfTarUncompressedInternalAsync(string testFolderName, string testCaseName, bool copyData) + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFolderName, testCaseName); + await VerifyDataStreamOfTarInternalAsync(archiveStream, copyData); + } + + private static async Task VerifyDataStreamOfTarGzInternalAsync(TestTarFormat testTarFormat, string testCaseName, bool copyData) + { + await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.GZip, testTarFormat, testCaseName); + await using GZipStream decompressor = new GZipStream(archiveStream, CompressionMode.Decompress); + await VerifyDataStreamOfTarInternalAsync(decompressor, copyData); + } + + private static async Task VerifyDataStreamOfTarInternalAsync(Stream archiveStream, bool copyData) + { + await using TarReader reader = new TarReader(archiveStream); + + TarEntry entry; + + await using MemoryStream ms = new MemoryStream(); + while ((entry = await reader.GetNextEntryAsync(copyData)) != null) + { + if (entry.EntryType is TarEntryType.V7RegularFile or TarEntryType.RegularFile) + { + if (entry.Length == 0) + { + Assert.Null(entry.DataStream); + } + else + { + Assert.NotNull(entry.DataStream); + Assert.Equal(entry.DataStream.Length, entry.Length); + if (copyData) + { + Assert.True(entry.DataStream.CanSeek); + Assert.Equal(0, entry.DataStream.Position); + } + } + } + } + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs index ffc537917fc7af..298e3e8be47947 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Tests.cs @@ -4,8 +4,10 @@ using System.Collections.Generic; using System.Globalization; using System.IO; +using System.IO.Compression; using System.Linq; using Xunit; +using static System.Formats.Tar.Tests.TarTestsBase; namespace System.Formats.Tar.Tests { @@ -124,5 +126,244 @@ public void Read_Archive_LongFileName_Over100_Under255(TarEntryFormat format, Te [InlineData(TarEntryFormat.Gnu, TestTarFormat.oldgnu)] public void Read_Archive_LongPath_Over255(TarEntryFormat format, TestTarFormat testFormat) => Read_Archive_LongPath_Over255_Internal(format, testFormat); + + [Theory] + [MemberData(nameof(GetV7TestCaseNames))] + public void ReadDataStreamOfTarGzV7(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.v7, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetUstarTestCaseNames))] + public void ReadDataStreamOfTarGzUstar(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.ustar, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadDataStreamOfTarGzPax(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.pax, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadDataStreamOfTarGzPaxGea(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.pax_gea, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadDataStreamOfTarGzOldGnu(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.oldgnu, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadDataStreamOfTarGzGnu(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.gnu, testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetV7TestCaseNames))] + public void ReadCopiedDataStreamOfTarGzV7(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.v7, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetUstarTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzUstar(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.ustar, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzPax(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.pax, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzPaxGea(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.pax_gea, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzOldGnu(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.oldgnu, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetPaxAndGnuTestCaseNames))] + public void ReadCopiedDataStreamOfTarGzGnu(string testCaseName) => + VerifyDataStreamOfTarGzInternal(TestTarFormat.gnu, testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetGoLangTarTestCaseNames))] + public void ReadDataStreamOfExternalAssetsGoLang(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("golang_tar", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetNodeTarTestCaseNames))] + public void ReadDataStreamOfExternalAssetsNode(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("node-tar", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetRsTarTestCaseNames))] + public void ReadDataStreamOfExternalAssetsRs(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("tar-rs", testCaseName, copyData: false); + + [Theory] + [MemberData(nameof(GetGoLangTarTestCaseNames))] + public void ReadCopiedDataStreamOfExternalAssetsGoLang(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("golang_tar", testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetNodeTarTestCaseNames))] + public void ReadCopiedDataStreamOfExternalAssetsNode(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("node-tar", testCaseName, copyData: true); + + [Theory] + [MemberData(nameof(GetRsTarTestCaseNames))] + public void ReadCopiedDataStreamOfExternalAssetsRs(string testCaseName) => + VerifyDataStreamOfTarUncompressedInternal("tar-rs", testCaseName, copyData: true); + + [Fact] + public void Throw_FifoContainsNonZeroDataSection() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "hdr-only"); + using TarReader reader = new TarReader(archiveStream); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.NotNull(reader.GetNextEntry()); + Assert.Throws(() => reader.GetNextEntry()); + } + + [Fact] + public void Throw_SingleExtendedAttributesEntryWithNoActualEntry() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "pax-path-hdr"); + using TarReader reader = new TarReader(archiveStream); + Assert.Throws(() => reader.GetNextEntry()); + } + + [Theory] + [InlineData("tar-rs", "spaces")] + [InlineData("golang_tar", "v7")] + public void AllowSpacesInOctalFields(string folderName, string testCaseName) + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, folderName, testCaseName); + using TarReader reader = new TarReader(archiveStream); + TarEntry entry; + while ((entry = reader.GetNextEntry()) != null) + { + AssertExtensions.GreaterThan(entry.Checksum, 0); + AssertExtensions.GreaterThan((int)entry.Mode, 0); + } + } + + [Theory] + [InlineData("pax-multi-hdrs")] // Multiple consecutive PAX metadata entries + [InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries + [InlineData("neg-size")] // Garbage chars + [InlineData("invalid-go17")] // Many octal fields are all zero chars + [InlineData("issue11169")] // Checksum with null in the middle + [InlineData("issue10968")] // Garbage chars + [InlineData("writer-big")] // The size field contains an euro char + public void Throw_ArchivesWithRandomChars(string testCaseName) + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", testCaseName); + using TarReader reader = new TarReader(archiveStream); + Assert.Throws(() => reader.GetNextEntry()); + } + + [Fact] + public void GarbageEntryChecksumZeroReturnNull() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "issue12435"); + using TarReader reader = new TarReader(archiveStream); + Assert.Null(reader.GetNextEntry()); + } + + [Theory] + [InlineData("golang_tar", "gnu-nil-sparse-data")] + [InlineData("golang_tar", "gnu-nil-sparse-hole")] + [InlineData("golang_tar", "gnu-sparse-big")] + [InlineData("golang_tar", "sparse-formats")] + [InlineData("tar-rs", "sparse-1")] + [InlineData("tar-rs", "sparse")] + public void SparseEntryNotSupported(string testFolderName, string testCaseName) + { + // Currently sparse entries are not supported. + + // There are PAX archives archives in the golang folder that have extended attributes for treating a regular file as a sparse file. + // Sparse entries were created for the GNU format, so they are very rare entry types which are excluded from this test method: + // pax-nil-sparse-data, pax-nil-sparse-hole, pax-sparse-big + + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFolderName, testCaseName); + using TarReader reader = new TarReader(archiveStream); + Assert.Throws(() => reader.GetNextEntry()); + } + + [Fact] + public void DirectoryListRegularFileAndSparse() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "gnu-incremental"); + using TarReader reader = new TarReader(archiveStream); + TarEntry directoryList = reader.GetNextEntry(); + + Assert.Equal(TarEntryType.DirectoryList, directoryList.EntryType); + Assert.NotNull(directoryList.DataStream); + Assert.Equal(14, directoryList.Length); + + Assert.NotNull(reader.GetNextEntry()); // Just a regular file + + Assert.Throws(() => reader.GetNextEntry()); // Sparse + } + + [Fact] + public void PaxSizeLargerThanMaxAllowedByStream() + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", "writer-big-long"); + using TarReader reader = new TarReader(archiveStream); + // The extended attribute 'size' has the value 17179869184 + // Exception message: Stream length must be non-negative and less than 2^31 - 1 - origin + Assert.Throws(() => reader.GetNextEntry()); + } + + private static void VerifyDataStreamOfTarUncompressedInternal(string testFolderName, string testCaseName, bool copyData) + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, testFolderName, testCaseName); + VerifyDataStreamOfTarInternal(archiveStream, copyData); + } + + private static void VerifyDataStreamOfTarGzInternal(TestTarFormat testTarFormat, string testCaseName, bool copyData) + { + using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.GZip, testTarFormat, testCaseName); + using GZipStream decompressor = new GZipStream(archiveStream, CompressionMode.Decompress); + VerifyDataStreamOfTarInternal(decompressor, copyData); + } + + private static void VerifyDataStreamOfTarInternal(Stream archiveStream, bool copyData) + { + using TarReader reader = new TarReader(archiveStream); + + TarEntry entry; + + while ((entry = reader.GetNextEntry(copyData)) != null) + { + if (entry.EntryType is TarEntryType.V7RegularFile or TarEntryType.RegularFile) + { + if (entry.Length == 0) + { + Assert.Null(entry.DataStream); + } + else + { + Assert.NotNull(entry.DataStream); + Assert.Equal(entry.DataStream.Length, entry.Length); + if (copyData) + { + Assert.True(entry.DataStream.CanSeek); + Assert.Equal(0, entry.DataStream.Position); + } + } + } + } + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs index bad9bf8fa179bf..dda0cae56b69f8 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntry.Tests.cs @@ -250,5 +250,46 @@ public void GetNextEntry_UnseekableArchive_ReplaceDataStream_ExcludeFromDisposin Assert.Equal("Substituted", streamReader.ReadLine()); } } + + [Theory] + [InlineData(512, false)] + [InlineData(512, true)] + [InlineData(512 + 1, false)] + [InlineData(512 + 1, true)] + [InlineData(512 + 512 - 1, false)] + [InlineData(512 + 512 - 1, true)] + public void BlockAlignmentPadding_DoesNotAffectNextEntries(int contentSize, bool copyData) + { + byte[] fileContents = new byte[contentSize]; + Array.Fill(fileContents, 0x1); + + using var archive = new MemoryStream(); + using (var writer = new TarWriter(archive, leaveOpen: true)) + { + var entry1 = new PaxTarEntry(TarEntryType.RegularFile, "file"); + entry1.DataStream = new MemoryStream(fileContents); + writer.WriteEntry(entry1); + + var entry2 = new PaxTarEntry(TarEntryType.RegularFile, "next-file"); + writer.WriteEntry(entry2); + } + + archive.Position = 0; + using var unseekable = new WrappedStream(archive, archive.CanRead, archive.CanWrite, canSeek: false); + using var reader = new TarReader(unseekable); + + TarEntry e = reader.GetNextEntry(copyData); + Assert.Equal(contentSize, e.Length); + + byte[] buffer = new byte[contentSize]; + while (e.DataStream.Read(buffer) > 0) ; + AssertExtensions.SequenceEqual(fileContents, buffer); + + e = reader.GetNextEntry(copyData); + Assert.Equal(0, e.Length); + + e = reader.GetNextEntry(copyData); + Assert.Null(e); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs index 77ddf1f1320621..1dcd9326ee81ee 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.GetNextEntryAsync.Tests.cs @@ -288,5 +288,46 @@ public async Task GetNextEntry_UnseekableArchive_ReplaceDataStream_ExcludeFromDi } } } + + [Theory] + [InlineData(512, false)] + [InlineData(512, true)] + [InlineData(512 + 1, false)] + [InlineData(512 + 1, true)] + [InlineData(512 + 512 - 1, false)] + [InlineData(512 + 512 - 1, true)] + public async Task BlockAlignmentPadding_DoesNotAffectNextEntries_Async(int contentSize, bool copyData) + { + byte[] fileContents = new byte[contentSize]; + Array.Fill(fileContents, 0x1); + + using var archive = new MemoryStream(); + using (var writer = new TarWriter(archive, leaveOpen: true)) + { + var entry1 = new PaxTarEntry(TarEntryType.RegularFile, "file"); + entry1.DataStream = new MemoryStream(fileContents); + await writer.WriteEntryAsync(entry1); + + var entry2 = new PaxTarEntry(TarEntryType.RegularFile, "next-file"); + await writer.WriteEntryAsync(entry2); + } + + archive.Position = 0; + using var unseekable = new WrappedStream(archive, archive.CanRead, archive.CanWrite, canSeek: false); + using var reader = new TarReader(unseekable); + + TarEntry e = await reader.GetNextEntryAsync(copyData); + Assert.Equal(contentSize, e.Length); + + byte[] buffer = new byte[contentSize]; + while (e.DataStream.Read(buffer) > 0) ; + AssertExtensions.SequenceEqual(fileContents, buffer); + + e = await reader.GetNextEntryAsync(copyData); + Assert.Equal(0, e.Length); + + e = await reader.GetNextEntryAsync(copyData); + Assert.Null(e); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs index 04d034ce8764f8..23dc653c83a165 100644 --- a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs +++ b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs @@ -3,6 +3,8 @@ using System.Collections.Generic; using System.IO; +using System.Linq; +using System.Runtime.CompilerServices; using Microsoft.DotNet.RemoteExecutor; using Xunit; @@ -88,6 +90,94 @@ public abstract partial class TarTestsBase : FileCleanupTestBase protected const string PaxEaDevMajor = "devmajor"; protected const string PaxEaDevMinor = "devminor"; + private static readonly string[] V7TestCaseNames = new[] + { + "file", + "file_hardlink", + "file_symlink", + "folder_file", + "folder_file_utf8", + "folder_subfolder_file", + "foldersymlink_folder_subfolder_file", + "many_small_files" + }; + + private static readonly string[] UstarTestCaseNames = new[] + { + "longpath_splitable_under255", + "specialfiles" }; + + private static readonly string[] PaxAndGnuTestCaseNames = new[] + { + "file_longsymlink", + "longfilename_over100_under255", + "longpath_over255" + }; + + private static readonly string[] GoLangTestCaseNames = new[] + { + "empty", + "file-and-dir", + "gnu-long-nul", + "gnu-not-utf8", + "gnu-utf8", + "gnu", + "hardlink", + "nil-uid", + "pax-bad-hdr-file", + "pax-bad-mtime-file", + "pax-global-records", + "pax-nul-path", + "pax-nul-xattrs", + "pax-pos-size-file", + "pax-records", + "pax", + "star", + "trailing-slash", + "ustar-file-devs", + "ustar-file-reg", + "ustar", + "writer", + "xattrs" + }; + + private static readonly string[] NodeTarTestCaseNames = new[] + { + "bad-cksum", + "body-byte-counts", + "dir", + "emptypax", + "file", + "global-header", + "links-invalid", + "links-strip", + "links", + "long-paths", + "long-pax", + "next-file-has-long", + "null-byte", + "path-missing", + "trailing-slash-corner-case", + "utf8" + }; + + private static readonly string[] RsTarTestCaseNames = new[] + { + "7z_long_path", + "directory", + "duplicate_dirs", + "empty_filename", + "file_times", + "link", + "pax_size", + "pax", + "pax2", + "reading_files", + "simple_missing_last_header", + "simple", + "xattrs" + }; + protected enum CompressionMethod { // Archiving only, no compression @@ -121,30 +211,41 @@ public enum TestTarFormat protected TarTestsBase() { CreateDirectoryDefaultMode = Directory.CreateDirectory(GetRandomDirPath()).UnixFileMode; // '0777 & ~umask' - UMask = ~CreateDirectoryDefaultMode & (UnixFileMode)Convert.ToInt32("777", 8); + UMask = ~CreateDirectoryDefaultMode & (UnixFileMode)Convert.ToInt32("777", + 8); } protected static string GetTestCaseUnarchivedFolderPath(string testCaseName) => - Path.Join(Directory.GetCurrentDirectory(), "unarchived", testCaseName); + Path.Join(Directory.GetCurrentDirectory(), "unarchived", + testCaseName); protected static string GetTarFilePath(CompressionMethod compressionMethod, TestTarFormat format, string testCaseName) + => GetTarFilePath(compressionMethod, format.ToString(), testCaseName); + + protected static string GetTarFilePath(CompressionMethod compressionMethod, string testFolderName, string testCaseName) { (string compressionMethodFolder, string fileExtension) = compressionMethod switch { - CompressionMethod.Uncompressed => ("tar", ".tar"), - CompressionMethod.GZip => ("targz", ".tar.gz"), + CompressionMethod.Uncompressed => ("tar", + ".tar"), + CompressionMethod.GZip => ("targz", + ".tar.gz"), _ => throw new InvalidOperationException($"Unexpected compression method: {compressionMethod}"), }; - return Path.Join(Directory.GetCurrentDirectory(), compressionMethodFolder, format.ToString(), testCaseName + fileExtension); + return Path.Join(Directory.GetCurrentDirectory(), compressionMethodFolder, testFolderName, testCaseName + fileExtension); } // MemoryStream containing the copied contents of the specified file. Meant for reading and writing. protected static MemoryStream GetTarMemoryStream(CompressionMethod compressionMethod, TestTarFormat format, string testCaseName) => - GetMemoryStream(GetTarFilePath(compressionMethod, format, testCaseName)); + GetTarMemoryStream(compressionMethod, format.ToString(), testCaseName); + + protected static MemoryStream GetTarMemoryStream(CompressionMethod compressionMethod, string testFolderName, string testCaseName) => + GetMemoryStream(GetTarFilePath(compressionMethod, testFolderName, testCaseName)); protected static string GetStrangeTarFilePath(string testCaseName) => - Path.Join(Directory.GetCurrentDirectory(), "strange", testCaseName + ".tar"); + Path.Join(Directory.GetCurrentDirectory(), "strange", + testCaseName + ".tar"); protected static MemoryStream GetStrangeTarMemoryStream(string testCaseName) => GetMemoryStream(GetStrangeTarFilePath(testCaseName)); @@ -462,5 +563,53 @@ protected void Verify_Extract(string destination, TarEntry entry, TarEntryType e AssertFileModeEquals(destination, TestPermission1); } + + public static IEnumerable GetNodeTarTestCaseNames() + { + foreach (string name in NodeTarTestCaseNames) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetGoLangTarTestCaseNames() + { + foreach (string name in GoLangTestCaseNames) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetRsTarTestCaseNames() + { + foreach (string name in RsTarTestCaseNames) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetV7TestCaseNames() + { + foreach (string name in V7TestCaseNames) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetUstarTestCaseNames() + { + foreach (string name in UstarTestCaseNames.Concat(V7TestCaseNames)) + { + yield return new object[] { name }; + } + } + + public static IEnumerable GetPaxAndGnuTestCaseNames() + { + foreach (string name in UstarTestCaseNames.Concat(V7TestCaseNames).Concat(PaxAndGnuTestCaseNames)) + { + yield return new object[] { name }; + } + } } }