Skip to content

Commit

Permalink
Tar: set directory modification times while extracting. (#88231)
Browse files Browse the repository at this point in the history
* Tar: set directory modification times while extracting.

* Extend test and change entry write order.

* Get rid of some enumeration allocations.

* test: check DateTime using InRange.

* Extend comment explaining the order of entries.

* Apply suggestions from code review

Co-authored-by: Dan Moseley <danmose@microsoft.com>

* Try including AltDirectorySeparatorChar.

* Revert "Try including AltDirectorySeparatorChar."

This reverts commit 086aac4.

* Try using Directory.SetLastWriteTime instead of File's.

* Tie up some ends.

---------

Co-authored-by: Dan Moseley <danmose@microsoft.com>
  • Loading branch information
tmds and danmoseley authored Jul 5, 2023
1 parent 7b91fd4 commit 5e1608d
Show file tree
Hide file tree
Showing 7 changed files with 215 additions and 43 deletions.
22 changes: 12 additions & 10 deletions src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -285,42 +285,44 @@ public Stream? DataStream
internal abstract bool IsDataStreamSetterSupported();

// Extracts the current entry to a location relative to the specified directory.
internal void ExtractRelativeToDirectory(string destinationDirectoryPath, bool overwrite, SortedDictionary<string, UnixFileMode>? pendingModes)
internal void ExtractRelativeToDirectory(string destinationDirectoryPath, bool overwrite, SortedDictionary<string, UnixFileMode>? pendingModes, Stack<(string, DateTimeOffset)> directoryModificationTimes)
{
(string fileDestinationPath, string? linkTargetPath) = GetDestinationAndLinkPaths(destinationDirectoryPath);
(string destinationFullPath, string? linkTargetPath) = GetDestinationAndLinkPaths(destinationDirectoryPath);

if (EntryType == TarEntryType.Directory)
{
TarHelpers.CreateDirectory(fileDestinationPath, Mode, pendingModes);
TarHelpers.CreateDirectory(destinationFullPath, Mode, pendingModes);
TarHelpers.UpdatePendingModificationTimes(directoryModificationTimes, destinationFullPath, ModificationTime);
}
else
{
// If it is a file, create containing directory.
TarHelpers.CreateDirectory(Path.GetDirectoryName(fileDestinationPath)!, mode: null, pendingModes);
ExtractToFileInternal(fileDestinationPath, linkTargetPath, overwrite);
TarHelpers.CreateDirectory(Path.GetDirectoryName(destinationFullPath)!, mode: null, pendingModes);
ExtractToFileInternal(destinationFullPath, linkTargetPath, overwrite);
}
}

// Asynchronously extracts the current entry to a location relative to the specified directory.
internal Task ExtractRelativeToDirectoryAsync(string destinationDirectoryPath, bool overwrite, SortedDictionary<string, UnixFileMode>? pendingModes, CancellationToken cancellationToken)
internal Task ExtractRelativeToDirectoryAsync(string destinationDirectoryPath, bool overwrite, SortedDictionary<string, UnixFileMode>? pendingModes, Stack<(string, DateTimeOffset)> directoryModificationTimes, CancellationToken cancellationToken)
{
if (cancellationToken.IsCancellationRequested)
{
return Task.FromCanceled(cancellationToken);
}

(string fileDestinationPath, string? linkTargetPath) = GetDestinationAndLinkPaths(destinationDirectoryPath);
(string destinationFullPath, string? linkTargetPath) = GetDestinationAndLinkPaths(destinationDirectoryPath);

if (EntryType == TarEntryType.Directory)
{
TarHelpers.CreateDirectory(fileDestinationPath, Mode, pendingModes);
TarHelpers.CreateDirectory(destinationFullPath, Mode, pendingModes);
TarHelpers.UpdatePendingModificationTimes(directoryModificationTimes, destinationFullPath, ModificationTime);
return Task.CompletedTask;
}
else
{
// If it is a file, create containing directory.
TarHelpers.CreateDirectory(Path.GetDirectoryName(fileDestinationPath)!, mode: null, pendingModes);
return ExtractToFileInternalAsync(fileDestinationPath, linkTargetPath, overwrite, cancellationToken);
TarHelpers.CreateDirectory(Path.GetDirectoryName(destinationFullPath)!, mode: null, pendingModes);
return ExtractToFileInternalAsync(destinationFullPath, linkTargetPath, overwrite, cancellationToken);
}
}

Expand Down
74 changes: 43 additions & 31 deletions src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre
using (TarWriter writer = new TarWriter(destination, TarEntryFormat.Pax, leaveOpen))
{
DirectoryInfo di = new(sourceDirectoryName);
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);

bool skipBaseDirRecursion = false;
if (includeBaseDirectory)
Expand All @@ -345,9 +344,10 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre
return;
}

foreach (FileSystemInfo file in GetFileSystemEnumerationForCreation(sourceDirectoryName))
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);
foreach ((string fullpath, string entryname) in GetFilesForCreation(sourceDirectoryName, basePath.Length))
{
writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length));
writer.WriteEntry(fullpath, entryname);
}
}
}
Expand Down Expand Up @@ -385,7 +385,6 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector
await using (writer.ConfigureAwait(false))
{
DirectoryInfo di = new(sourceDirectoryName);
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);

bool skipBaseDirRecursion = false;
if (includeBaseDirectory)
Expand All @@ -400,72 +399,83 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector
return;
}

foreach (FileSystemInfo file in GetFileSystemEnumerationForCreation(sourceDirectoryName))
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);
foreach ((string fullpath, string entryname) in GetFilesForCreation(sourceDirectoryName, basePath.Length))
{
await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false);
await writer.WriteEntryAsync(fullpath, entryname, cancellationToken).ConfigureAwait(false);
}
}
}

// Generates a recursive enumeration of the filesystem entries inside the specified source directory, while
// making sure that directory symlinks do not get recursed.
private static FileSystemEnumerable<FileSystemInfo> GetFileSystemEnumerationForCreation(string sourceDirectoryName)
private static IEnumerable<(string fullpath, string entryname)> GetFilesForCreation(string sourceDirectoryName, int basePathLength)
{
return new FileSystemEnumerable<FileSystemInfo>(
// The default order to write a tar archive is to recurse into subdirectories first.
// This order is expected by 'tar' to restore directory timestamps properly without the user explicitly specifying `--delay-directory-restore`.
// FileSystemEnumerable RecurseSubdirectories will first write further entries before recursing, so we don't use it here.

var fse = new FileSystemEnumerable<(string fullpath, string entryname, bool recurse)>(
directory: sourceDirectoryName,
transform: (ref FileSystemEntry entry) => entry.ToFileSystemInfo(),
options: new EnumerationOptions()
transform: (ref FileSystemEntry entry) =>
{
RecurseSubdirectories = true
})
string fullPath = entry.ToFullPath();
bool isRealDirectory = entry.IsDirectory && (entry.Attributes & FileAttributes.ReparsePoint) == 0; // not a symlink.
string entryName = ArchivingUtils.EntryFromPath(fullPath.AsSpan(basePathLength), appendPathSeparator: isRealDirectory);
return (fullPath, entryName, isRealDirectory);
});

foreach ((string fullpath, string entryname, bool recurse) in fse)
{
ShouldRecursePredicate = IsNotADirectorySymlink
};
yield return (fullpath, entryname);

static bool IsNotADirectorySymlink(ref FileSystemEntry entry) => entry.IsDirectory && (entry.Attributes & FileAttributes.ReparsePoint) == 0;
// Return entries for the subdirectory.
if (recurse)
{
foreach (var inner in GetFilesForCreation(fullpath, basePathLength))
{
yield return inner;
}
}
}
}

// Determines what should be the base path for all the entries when creating an archive.
private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool includeBaseDirectory) =>
includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName;

// Constructs the entry name used for a filesystem entry when creating an archive.
private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength)
{
bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0;
return ArchivingUtils.EntryFromPath(file.FullName.AsSpan(basePathLength), appendPathSeparator: isDirectory);
}

private static string GetEntryNameForBaseDirectory(string name)
{
return ArchivingUtils.EntryFromPath(name, appendPathSeparator: true);
}

// Extracts an archive into the specified directory.
// It assumes the destinationDirectoryName is a fully qualified path, and allows choosing if the archive stream should be left open or not.
private static void ExtractToDirectoryInternal(Stream source, string destinationDirectoryPath, bool overwriteFiles, bool leaveOpen)
private static void ExtractToDirectoryInternal(Stream source, string destinationDirectoryFullPath, bool overwriteFiles, bool leaveOpen)
{
VerifyExtractToDirectoryArguments(source, destinationDirectoryPath);
VerifyExtractToDirectoryArguments(source, destinationDirectoryFullPath);

using TarReader reader = new TarReader(source, leaveOpen);

SortedDictionary<string, UnixFileMode>? pendingModes = TarHelpers.CreatePendingModesDictionary();
var directoryModificationTimes = new Stack<(string, DateTimeOffset)>();
TarEntry? entry;
while ((entry = reader.GetNextEntry()) != null)
{
if (entry.EntryType is not TarEntryType.GlobalExtendedAttributes)
{
entry.ExtractRelativeToDirectory(destinationDirectoryPath, overwriteFiles, pendingModes);
entry.ExtractRelativeToDirectory(destinationDirectoryFullPath, overwriteFiles, pendingModes, directoryModificationTimes);
}
}
TarHelpers.SetPendingModes(pendingModes);
TarHelpers.SetPendingModificationTimes(directoryModificationTimes);
}

// Asynchronously extracts the contents of a tar file into the specified directory.
private static async Task ExtractToDirectoryInternalAsync(string sourceFileName, string destinationDirectoryName, bool overwriteFiles, CancellationToken cancellationToken)
private static async Task ExtractToDirectoryInternalAsync(string sourceFileName, string destinationDirectoryFullPath, bool overwriteFiles, CancellationToken cancellationToken)
{
Debug.Assert(!string.IsNullOrEmpty(sourceFileName));
Debug.Assert(!string.IsNullOrEmpty(destinationDirectoryName));
Debug.Assert(!string.IsNullOrEmpty(destinationDirectoryFullPath));

cancellationToken.ThrowIfCancellationRequested();

Expand All @@ -478,18 +488,19 @@ private static async Task ExtractToDirectoryInternalAsync(string sourceFileName,
FileStream archive = new(sourceFileName, options);
await using (archive.ConfigureAwait(false))
{
await ExtractToDirectoryInternalAsync(archive, destinationDirectoryName, overwriteFiles, leaveOpen: false, cancellationToken).ConfigureAwait(false);
await ExtractToDirectoryInternalAsync(archive, destinationDirectoryFullPath, overwriteFiles, leaveOpen: false, cancellationToken).ConfigureAwait(false);
}
}

// Asynchronously extracts an archive into the specified directory.
// It assumes the destinationDirectoryName is a fully qualified path, and allows choosing if the archive stream should be left open or not.
private static async Task ExtractToDirectoryInternalAsync(Stream source, string destinationDirectoryPath, bool overwriteFiles, bool leaveOpen, CancellationToken cancellationToken)
private static async Task ExtractToDirectoryInternalAsync(Stream source, string destinationDirectoryFullPath, bool overwriteFiles, bool leaveOpen, CancellationToken cancellationToken)
{
VerifyExtractToDirectoryArguments(source, destinationDirectoryPath);
VerifyExtractToDirectoryArguments(source, destinationDirectoryFullPath);
cancellationToken.ThrowIfCancellationRequested();

SortedDictionary<string, UnixFileMode>? pendingModes = TarHelpers.CreatePendingModesDictionary();
var directoryModificationTimes = new Stack<(string, DateTimeOffset)>();
TarReader reader = new TarReader(source, leaveOpen);
await using (reader.ConfigureAwait(false))
{
Expand All @@ -498,11 +509,12 @@ private static async Task ExtractToDirectoryInternalAsync(Stream source, string
{
if (entry.EntryType is not TarEntryType.GlobalExtendedAttributes)
{
await entry.ExtractRelativeToDirectoryAsync(destinationDirectoryPath, overwriteFiles, pendingModes, cancellationToken).ConfigureAwait(false);
await entry.ExtractRelativeToDirectoryAsync(destinationDirectoryFullPath, overwriteFiles, pendingModes, directoryModificationTimes, cancellationToken).ConfigureAwait(false);
}
}
}
TarHelpers.SetPendingModes(pendingModes);
TarHelpers.SetPendingModificationTimes(directoryModificationTimes);
}

[Conditional("DEBUG")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,5 +386,77 @@ TarEntryType.RegularFile or

throw new ArgumentException(SR.Format(SR.TarEntryTypeNotSupportedInFormat, entryType, archiveFormat), paramName);
}

public static void SetPendingModificationTimes(Stack<(string, DateTimeOffset)> directoryModificationTimes)
{
// note: these are ordered child to parent.
while (directoryModificationTimes.TryPop(out (string Path, DateTimeOffset Modified) item))
{
AttemptDirectorySetLastWriteTime(item.Path, item.Modified);
}
}

public static void UpdatePendingModificationTimes(Stack<(string, DateTimeOffset)> directoryModificationTimes, string fullPath, DateTimeOffset modified)
{
// We can't set the modification time when we create the directory because extracting entries into it
// will cause that time to change. Instead, we track the times to set them later.

// We take into account that regular tar files are ordered:
// when we see a new directory which is not a child of the previous directory
// we can set the parent directory timestamps, and stop tracking them.
// This avoids having to track all directory entries until we've finished extracting the entire archive.
while (directoryModificationTimes.TryPeek(out (string Path, DateTimeOffset Modified) previous) &&
!IsChildPath(previous.Path, fullPath))
{
directoryModificationTimes.TryPop(out previous);
AttemptDirectorySetLastWriteTime(previous.Path, previous.Modified);
}

directoryModificationTimes.Push((fullPath, modified));
}

private static bool IsChildPath(string parentFullPath, string childFullPath)
{
// Both paths may end with an additional separator.

// Verify that either the parent path ends with a separator
// or the child path has a separator where the parent path ends.
if (IsDirectorySeparatorChar(parentFullPath[^1]))
{
// The child needs to be at least a char longer than the parent for the name.
if (childFullPath.Length <= parentFullPath.Length)
{
return false;
}
}
else
{
// The child needs to be at least 2 chars longer than the parent:
// one for the separator, and one for the name.
if ((childFullPath.Length < parentFullPath.Length + 2) ||
!IsDirectorySeparatorChar(childFullPath[parentFullPath.Length]))
{
return false;
}
}

return childFullPath.StartsWith(parentFullPath, PathInternal.StringComparison);

// We don't need to check for AltDirectorySeparatorChar, full paths are normalized to DirectorySeparatorChar.
static bool IsDirectorySeparatorChar(char c)
=> c == Path.DirectorySeparatorChar;
}

private static void AttemptDirectorySetLastWriteTime(string fullPath, DateTimeOffset lastWriteTime)
{
try
{
Directory.SetLastWriteTime(fullPath, lastWriteTime.LocalDateTime); // SetLastWriteTime expects local time
}
catch
{
// Some OSes like Android might not support setting the last write time, the extraction should not fail because of that
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ public void SkipRecursionIntoDirectorySymlinks()

TarEntry entry = reader.GetNextEntry();
Assert.NotNull(entry);
Assert.Equal("subDirectory/", entry.Name);
Assert.Equal("subDirectory", entry.Name);
Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType);

Assert.Null(reader.GetNextEntry()); // file.txt should not be found
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ public async Task SkipRecursionIntoDirectorySymlinksAsync()

TarEntry entry = await reader.GetNextEntryAsync();
Assert.NotNull(entry);
Assert.Equal("subDirectory/", entry.Name);
Assert.Equal("subDirectory", entry.Name);
Assert.Equal(TarEntryType.SymbolicLink, entry.EntryType);

Assert.Null(await reader.GetNextEntryAsync()); // file.txt should not be found
Expand Down
Loading

0 comments on commit 5e1608d

Please sign in to comment.