Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of Tar library #74281

Merged
merged 21 commits into from
Aug 21, 2022
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d52d836
Avoid unnecessary byte[] allocations
stephentoub Aug 19, 2022
1e5020e
Remove unnecessary use of FileStreamOptions
stephentoub Aug 19, 2022
6938c77
Clean up Dispose{Async} implementations
stephentoub Aug 19, 2022
5fa03e9
Clean up unnecessary consts
stephentoub Aug 19, 2022
8dd0ac1
Remove MemoryStream/Encoding.UTF8.GetBytes allocations, unnecessary a…
stephentoub Aug 19, 2022
5be57ad
Avoid string allocations in ReadMagicAttribute
stephentoub Aug 19, 2022
ab71e6c
Avoid allocation in WriteAsOctal
stephentoub Aug 19, 2022
df2d742
Improve handling of octal
stephentoub Aug 19, 2022
c6058bd
Avoid allocation for version string
stephentoub Aug 19, 2022
5756a8c
Removing boxing and char string allocation in GenerateExtendedAttribu…
stephentoub Aug 19, 2022
9539a4a
Fix a couple unnecessary dictionary lookups
stephentoub Aug 19, 2022
74bbc9c
Replace Enum.HasFlag usage
stephentoub Aug 19, 2022
46e0855
Remove allocations from Write{Posix}Name
stephentoub Aug 19, 2022
02ca7da
Replace ArrayPool use with string.Create
stephentoub Aug 19, 2022
f9eb99f
Replace more superfluous ArrayPool usage
stephentoub Aug 19, 2022
add6179
Remove ArrayPool use from System.IO.Compression.ZipFile
stephentoub Aug 20, 2022
6f8cb75
Fix inverted condition
stephentoub Aug 20, 2022
827a588
Use generic math to parse octal
stephentoub Aug 20, 2022
ae21478
Remove allocations from StringReader and string.Split
stephentoub Aug 20, 2022
d6b6727
Remove magic string allocation for Ustar when not V7
stephentoub Aug 20, 2022
480af5c
Remove file name and directory name allocation in GenerateExtendedAtt…
stephentoub Aug 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 25 additions & 16 deletions src/libraries/Common/src/System/IO/Archiving.Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ internal static partial class ArchivingUtils
private const char PathSeparatorChar = '/';
private const string PathSeparatorString = "/";

public static string EntryFromPath(string entry, int offset, int length, ref char[] buffer, bool appendPathSeparator = false)
public static string EntryFromPath(string entry, int offset, int length, bool appendPathSeparator = false)
{
Debug.Assert(length <= entry.Length - offset);
Debug.Assert(buffer != null);

// Remove any leading slashes from the entry name:
while (length > 0)
Expand All @@ -32,26 +31,36 @@ public static string EntryFromPath(string entry, int offset, int length, ref cha
}

if (length == 0)
{
return appendPathSeparator ? PathSeparatorString : string.Empty;
}

int resultLength = appendPathSeparator ? length + 1 : length;
EnsureCapacity(ref buffer, resultLength);
entry.CopyTo(offset, buffer, 0, length);

// '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux)
// We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is
// explicitly trying to standardize to '/'
for (int i = 0; i < length; i++)
if (appendPathSeparator)
{
char ch = buffer[i];
if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar)
buffer[i] = PathSeparatorChar;
length++;
}

if (appendPathSeparator)
buffer[length] = PathSeparatorChar;
return string.Create(length, (appendPathSeparator, offset, entry), static (dest, state) =>
{
state.entry.AsSpan(state.offset).CopyTo(dest);

// '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux)
// We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is
// explicitly trying to standardize to '/'
for (int i = 0; i < dest.Length; i++)
{
char ch = dest[i];
if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If on Unix both these constants are /

internal const char DirectorySeparatorChar = '/';
internal const char AltDirectorySeparatorChar = '/';

we could skip the cycle on Unix. On Windows we could do only one check \ in the cycle.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is the same on both platforms.

@EgorBo just out of curiosity, would the JIT in theory be able to legally collapse such a thing? ie, if (ch == '/' || ch == '/') ..

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is the same on both platforms.

It makes no sense to replace / with / on Unix.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get that, but what are you proposing -- duplicate this method for Unix and Windows so they can be different? Is this code path that hot?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not super hot, but it can be improved. I need to push up a fix anyway, so I'll do so.

{
dest[i] = PathSeparatorChar;
}
}

return new string(buffer, 0, resultLength);
if (state.appendPathSeparator)
{
dest[^1] = PathSeparatorChar;
}
});
}

public static void EnsureCapacity(ref char[] buffer, int min)
Expand Down
3 changes: 3 additions & 0 deletions src/libraries/System.Formats.Tar/src/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -255,4 +255,7 @@
<data name="IO_SeekBeforeBegin" xml:space="preserve">
<value>An attempt was made to move the position before the beginning of the stream.</value>
</data>
<data name="TarInvalidNumber" xml:space="preserve">
<value>Unable to parse number.</value>
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,7 @@ public override Task FlushAsync(CancellationToken cancellationToken) =>
// the substream is just 'a chunk' of the super-stream
protected override void Dispose(bool disposing)
{
if (disposing && !_isDisposed)
{
_isDisposed = true;
}
_isDisposed = true;
base.Dispose(disposing);
}
}
Expand Down
53 changes: 15 additions & 38 deletions src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@ namespace System.Formats.Tar
/// </summary>
public static class TarFile
{
// Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely
// to be greater than the length of typical entry names from the file system, even
// on non-Windows platforms. The capacity will be increased, if needed.
private const int DefaultCapacity = 260;

/// <summary>
/// Creates a tar stream that contains all the filesystem entries from the specified directory.
/// </summary>
Expand Down Expand Up @@ -283,23 +278,14 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre
DirectoryInfo di = new(sourceDirectoryName);
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);

char[] entryNameBuffer = ArrayPool<char>.Shared.Rent(DefaultCapacity);

try
if (includeBaseDirectory)
{
if (includeBaseDirectory)
{
writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer));
}

foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
{
writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer));
}
writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name));
}
finally

foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
{
ArrayPool<char>.Shared.Return(entryNameBuffer);
writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length));
}
}
}
Expand Down Expand Up @@ -339,23 +325,14 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector
DirectoryInfo di = new(sourceDirectoryName);
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);

char[] entryNameBuffer = ArrayPool<char>.Shared.Rent(DefaultCapacity);

try
if (includeBaseDirectory)
{
if (includeBaseDirectory)
{
await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer), cancellationToken).ConfigureAwait(false);
}

foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
{
await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer), cancellationToken).ConfigureAwait(false);
}
await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name), cancellationToken).ConfigureAwait(false);
}
finally

foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
{
ArrayPool<char>.Shared.Return(entryNameBuffer);
await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false);
}
}
}
Expand All @@ -365,18 +342,18 @@ private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool i
includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName;

// Constructs the entry name used for a filesystem entry when creating an archive.
private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength, ref char[] entryNameBuffer)
private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength)
{
int entryNameLength = file.FullName.Length - basePathLength;
Debug.Assert(entryNameLength > 0);

bool isDirectory = file.Attributes.HasFlag(FileAttributes.Directory);
return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, ref entryNameBuffer, appendPathSeparator: isDirectory);
bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0;
return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, appendPathSeparator: isDirectory);
}

private static string GetEntryNameForBaseDirectory(string name, ref char[] entryNameBuffer)
private static string GetEntryNameForBaseDirectory(string name)
{
return ArchivingUtils.EntryFromPath(name, 0, name.Length, ref entryNameBuffer, appendPathSeparator: true);
return ArchivingUtils.EntryFromPath(name, 0, name.Length, appendPathSeparator: true);
}

// Extracts an archive into the specified directory.
Expand Down
Loading