Skip to content

Commit c5e8f83

Browse files
authored
Tar: support GNU numeric format. (#101172)
The tar specification stores numeric fields using an octal representation. This limits the range of values that can be stored. To increase the supported range, a GNU extension defines that when the leading byte is 0xff/0x80 the remaining bytes are a negative/positive big endian formatted value. When writing under the PAX format, we continue to only use the only octal representation in the header fields. The values are overridden using extended attributes.
1 parent afe6fbd commit c5e8f83

18 files changed

+539
-343
lines changed

src/libraries/System.Formats.Tar/src/Resources/Strings.resx

+2-2
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,8 @@
193193
<data name="TarEntryFieldExceedsMaxLength" xml:space="preserve">
194194
<value>The field '{0}' exceeds the maximum allowed length for this format.</value>
195195
</data>
196-
<data name="TarSizeFieldTooLargeForEntryFormat" xml:space="preserve">
197-
<value>The value of the size field for the current entry of format '{0}' is greater than the format allows.</value>
196+
<data name="TarFieldTooLargeForEntryFormat" xml:space="preserve">
197+
<value>The value of the field for the current entry of format '{0}' is greater than the format allows.</value>
198198
</data>
199199
<data name="TarExtAttrDisallowedKeyChar" xml:space="preserve">
200200
<value>The extended attribute key '{0}' contains a disallowed '{1}' character.</value>

src/libraries/System.Formats.Tar/src/System/Formats/Tar/GnuTarEntry.cs

-2
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ public DateTimeOffset AccessTime
9898
get => _header._aTime;
9999
set
100100
{
101-
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
102101
_header._aTime = value;
103102
}
104103
}
@@ -112,7 +111,6 @@ public DateTimeOffset ChangeTime
112111
get => _header._cTime;
113112
set
114113
{
115-
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
116114
_header._cTime = value;
117115
}
118116
}

src/libraries/System.Formats.Tar/src/System/Formats/Tar/PosixTarEntry.cs

+10-4
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ internal PosixTarEntry(TarEntry other, TarEntryFormat format)
5050
/// </summary>
5151
/// <remarks>Character and block devices are Unix-specific entry types.</remarks>
5252
/// <exception cref="InvalidOperationException">The entry does not represent a block device or a character device.</exception>
53-
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151.</exception>
53+
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151 when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
5454
public int DeviceMajor
5555
{
5656
get => _header._devMajor;
@@ -62,7 +62,10 @@ public int DeviceMajor
6262
}
6363

6464
ArgumentOutOfRangeException.ThrowIfNegative(value);
65-
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
65+
if (FormatIsOctalOnly)
66+
{
67+
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
68+
}
6669

6770
_header._devMajor = value;
6871
}
@@ -73,7 +76,7 @@ public int DeviceMajor
7376
/// </summary>
7477
/// <remarks>Character and block devices are Unix-specific entry types.</remarks>
7578
/// <exception cref="InvalidOperationException">The entry does not represent a block device or a character device.</exception>
76-
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151.</exception>
79+
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151 when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
7780
public int DeviceMinor
7881
{
7982
get => _header._devMinor;
@@ -85,7 +88,10 @@ public int DeviceMinor
8588
}
8689

8790
ArgumentOutOfRangeException.ThrowIfNegative(value);
88-
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
91+
if (FormatIsOctalOnly)
92+
{
93+
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
94+
}
8995

9096
_header._devMinor = value;
9197
}

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs

+8-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ public abstract partial class TarEntry
2020
// Used to access the data section of this entry in an unseekable file
2121
private TarReader? _readerOfOrigin;
2222

23+
// These formats have a limited numeric range due to the octal number representation.
24+
protected bool FormatIsOctalOnly => _header._format is TarEntryFormat.V7 or TarEntryFormat.Ustar;
25+
2326
// Constructor called when reading a TarEntry from a TarReader.
2427
internal TarEntry(TarHeader header, TarReader readerOfOrigin, TarEntryFormat format)
2528
{
@@ -92,13 +95,16 @@ public int Gid
9295
/// A timestamps that represents the last time the contents of the file represented by this entry were modified.
9396
/// </summary>
9497
/// <remarks>In Unix platforms, this timestamp is commonly known as <c>mtime</c>.</remarks>
95-
/// <exception cref="ArgumentOutOfRangeException">The specified value is larger than <see cref="DateTimeOffset.UnixEpoch"/>.</exception>
98+
/// <exception cref="ArgumentOutOfRangeException">The specified value is larger than <see cref="DateTimeOffset.UnixEpoch"/> when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
9699
public DateTimeOffset ModificationTime
97100
{
98101
get => _header._mTime;
99102
set
100103
{
101-
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
104+
if (FormatIsOctalOnly)
105+
{
106+
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
107+
}
102108
_header._mTime = value;
103109
}
104110
}

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs

+9-10
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
374374
return null;
375375
}
376376

377-
long size = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
378-
Debug.Assert(size <= TarHelpers.MaxSizeLength, "size exceeded the max value possible with 11 octal digits. Actual size " + size);
377+
long size = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
379378
if (size < 0)
380379
{
381380
throw new InvalidDataException(SR.Format(SR.TarSizeFieldNegative));
@@ -384,14 +383,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
384383
// Continue with the rest of the fields that require no special checks
385384
TarHeader header = new(initialFormat,
386385
name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)),
387-
mode: (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
388-
mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch((long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))),
386+
mode: TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
387+
mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))),
389388
typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag])
390389
{
391390
_checksum = checksum,
392391
_size = size,
393-
_uid = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
394-
_gid = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
392+
_uid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
393+
_gid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
395394
_linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName))
396395
};
397396

@@ -524,10 +523,10 @@ private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
524523
if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice)
525524
{
526525
// Major number for a character device or block device entry.
527-
_devMajor = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
526+
_devMajor = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
528527

529528
// Minor number for a character device or block device entry.
530-
_devMinor = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
529+
_devMinor = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
531530
}
532531
}
533532

@@ -536,10 +535,10 @@ private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
536535
private void ReadGnuAttributes(Span<byte> buffer)
537536
{
538537
// Convert byte arrays
539-
long aTime = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
538+
long aTime = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
540539
_aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime);
541540

542-
long cTime = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
541+
long cTime = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
543542
_cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime);
544543

545544
// TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs

+100-23
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Buffers;
5+
using System.Buffers.Binary;
56
using System.Buffers.Text;
67
using System.Collections.Generic;
78
using System.Diagnostics;
@@ -15,6 +16,9 @@ namespace System.Formats.Tar
1516
// Writes header attributes of a tar archive entry.
1617
internal sealed partial class TarHeader
1718
{
19+
private const long Octal12ByteFieldMaxValue = (1L << (3 * 11)) - 1; // Max value of 11 octal digits.
20+
private const int Octal8ByteFieldMaxValue = (1 << (3 * 7)) - 1; // Max value of 7 octal digits.
21+
1822
private static ReadOnlySpan<byte> UstarMagicBytes => "ustar\0"u8;
1923
private static ReadOnlySpan<byte> UstarVersionBytes => "00"u8;
2024

@@ -606,35 +610,22 @@ private int WriteCommonFields(Span<byte> buffer, TarEntryType actualEntryType)
606610

607611
if (_mode > 0)
608612
{
609-
checksum += FormatOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
613+
checksum += FormatNumeric(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
610614
}
611615

612616
if (_uid > 0)
613617
{
614-
checksum += FormatOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
618+
checksum += FormatNumeric(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
615619
}
616620

617621
if (_gid > 0)
618622
{
619-
checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
623+
checksum += FormatNumeric(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
620624
}
621625

622626
if (_size > 0)
623627
{
624-
if (_size <= TarHelpers.MaxSizeLength)
625-
{
626-
checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
627-
}
628-
else if (_format is not TarEntryFormat.Pax)
629-
{
630-
throw new ArgumentException(SR.Format(SR.TarSizeFieldTooLargeForEntryFormat, _format));
631-
}
632-
else
633-
{
634-
// No writing, just verifications
635-
Debug.Assert(_typeFlag is not TarEntryType.ExtendedAttributes and not TarEntryType.GlobalExtendedAttributes);
636-
Debug.Assert(Convert.ToInt64(ExtendedAttributes[PaxEaSize]) > TarHelpers.MaxSizeLength);
637-
}
628+
checksum += FormatNumeric(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
638629
}
639630

640631
checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime));
@@ -739,12 +730,12 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)
739730

740731
if (_devMajor > 0)
741732
{
742-
checksum += FormatOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
733+
checksum += FormatNumeric(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
743734
}
744735

745736
if (_devMinor > 0)
746737
{
747-
checksum += FormatOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
738+
checksum += FormatNumeric(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
748739
}
749740

750741
return checksum;
@@ -916,7 +907,7 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
916907
ExtendedAttributes[PaxEaLinkName] = _linkName;
917908
}
918909

919-
if (_size > TarHelpers.MaxSizeLength)
910+
if (_size > Octal12ByteFieldMaxValue)
920911
{
921912
ExtendedAttributes[PaxEaSize] = _size.ToString();
922913
}
@@ -925,6 +916,42 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
925916
ExtendedAttributes.Remove(PaxEaSize);
926917
}
927918

919+
if (_uid > Octal8ByteFieldMaxValue)
920+
{
921+
ExtendedAttributes[PaxEaUid] = _uid.ToString();
922+
}
923+
else
924+
{
925+
ExtendedAttributes.Remove(PaxEaUid);
926+
}
927+
928+
if (_gid > Octal8ByteFieldMaxValue)
929+
{
930+
ExtendedAttributes[PaxEaGid] = _gid.ToString();
931+
}
932+
else
933+
{
934+
ExtendedAttributes.Remove(PaxEaGid);
935+
}
936+
937+
if (_devMajor > Octal8ByteFieldMaxValue)
938+
{
939+
ExtendedAttributes[PaxEaDevMajor] = _devMajor.ToString();
940+
}
941+
else
942+
{
943+
ExtendedAttributes.Remove(PaxEaDevMajor);
944+
}
945+
946+
if (_devMinor > Octal8ByteFieldMaxValue)
947+
{
948+
ExtendedAttributes[PaxEaDevMinor] = _devMinor.ToString();
949+
}
950+
else
951+
{
952+
ExtendedAttributes.Remove(PaxEaDevMinor);
953+
}
954+
928955
// Sets the specified string to the dictionary if it's longer than the specified max byte length; otherwise, remove it.
929956
static void TryAddStringField(Dictionary<string, string> extendedAttributes, string key, string? value, int maxLength)
930957
{
@@ -1022,6 +1049,56 @@ private static int Checksum(ReadOnlySpan<byte> bytes)
10221049
return checksum;
10231050
}
10241051

1052+
private int FormatNumeric(int value, Span<byte> destination)
1053+
{
1054+
Debug.Assert(destination.Length == 8, "8 byte field expected.");
1055+
1056+
bool isOctalRange = value >= 0 && value <= Octal8ByteFieldMaxValue;
1057+
1058+
if (isOctalRange || _format == TarEntryFormat.Pax)
1059+
{
1060+
return FormatOctal(value, destination);
1061+
}
1062+
else if (_format == TarEntryFormat.Gnu)
1063+
{
1064+
// GNU format: store negative numbers in big endian format with leading '0xff' byte.
1065+
// store positive numbers in big endian format with leading '0x80' byte.
1066+
long destinationValue = value;
1067+
destinationValue |= 1L << 63;
1068+
BinaryPrimitives.WriteInt64BigEndian(destination, destinationValue);
1069+
return Checksum(destination);
1070+
}
1071+
else
1072+
{
1073+
throw new ArgumentException(SR.Format(SR.TarFieldTooLargeForEntryFormat, _format));
1074+
}
1075+
}
1076+
1077+
private int FormatNumeric(long value, Span<byte> destination)
1078+
{
1079+
Debug.Assert(destination.Length == 12, "12 byte field expected.");
1080+
const int Offset = 4; // 4 bytes before the long.
1081+
1082+
bool isOctalRange = value >= 0 && value <= Octal12ByteFieldMaxValue;
1083+
1084+
if (isOctalRange || _format == TarEntryFormat.Pax)
1085+
{
1086+
return FormatOctal(value, destination);
1087+
}
1088+
else if (_format == TarEntryFormat.Gnu)
1089+
{
1090+
// GNU format: store negative numbers in big endian format with leading '0xff' byte.
1091+
// store positive numbers in big endian format with leading '0x80' byte.
1092+
BinaryPrimitives.WriteUInt32BigEndian(destination, value < 0 ? 0xffffffff : 0x80000000);
1093+
BinaryPrimitives.WriteInt64BigEndian(destination.Slice(Offset), value);
1094+
return Checksum(destination);
1095+
}
1096+
else
1097+
{
1098+
throw new ArgumentException(SR.Format(SR.TarFieldTooLargeForEntryFormat, _format));
1099+
}
1100+
}
1101+
10251102
// Writes the specified decimal number as a right-aligned octal number and returns its checksum.
10261103
private static int FormatOctal(long value, Span<byte> destination)
10271104
{
@@ -1040,11 +1117,11 @@ private static int FormatOctal(long value, Span<byte> destination)
10401117
return WriteRightAlignedBytesAndGetChecksum(digits.Slice(i), destination);
10411118
}
10421119

1043-
// Writes the specified DateTimeOffset's Unix time seconds as a right-aligned octal number, and returns its checksum.
1044-
private static int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
1120+
// Writes the specified DateTimeOffset's Unix time seconds, and returns its checksum.
1121+
private int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
10451122
{
10461123
long unixTimeSeconds = timestamp.ToUnixTimeSeconds();
1047-
return FormatOctal(unixTimeSeconds, destination);
1124+
return FormatNumeric(unixTimeSeconds, destination);
10481125
}
10491126

10501127
// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs

+23-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ internal static partial class TarHelpers
1919
{
2020
internal const short RecordSize = 512;
2121
internal const int MaxBufferLength = 4096;
22-
internal const long MaxSizeLength = (1L << 33) - 1; // Max value of 11 octal digits = 2^33 - 1 or 8 Gb.
2322

2423
internal const UnixFileMode ValidUnixFileModes =
2524
UnixFileMode.UserRead |
@@ -215,6 +214,29 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format,
215214
return entryType;
216215
}
217216

217+
/// <summary>Parses a numeric field.</summary>
218+
internal static T ParseNumeric<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>, IBinaryInteger<T>
219+
{
220+
// The tar standard specifies that numeric fields are stored using an octal representation.
221+
// This limits the range of values that can be stored in the fields.
222+
// To increase the supported range, a GNU extension defines that when the leading byte is
223+
// '0xff'/'0x80' the remaining bytes are a negative/positive big formatted endian value.
224+
// Like the 'tar' tool we are permissive when encountering this representation in non GNU formats.
225+
byte leadingByte = buffer[0];
226+
if (leadingByte == 0xff)
227+
{
228+
return T.ReadBigEndian(buffer, isUnsigned: false);
229+
}
230+
else if (leadingByte == 0x80)
231+
{
232+
return T.ReadBigEndian(buffer.Slice(1), isUnsigned: true);
233+
}
234+
else
235+
{
236+
return ParseOctal<T>(buffer);
237+
}
238+
}
239+
218240
/// <summary>Parses a byte span that represents an ASCII string containing a number in octal base.</summary>
219241
internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
220242
{

0 commit comments

Comments
 (0)