diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index 6bda76ec3f84ab..ffddaa2a3c9047 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -188,7 +188,7 @@ An attempt was made to move the position before the beginning of the stream. - Unable to parse number. + The TAR archive is corrupted or invalid. Checksum validation failed. The archive might be corrupted. @@ -211,4 +211,7 @@ The extended header contains invalid records. + + The file appears to be a {0} archive. TAR format expected. + diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index e3fd69857e500a..6d8f188043fec6 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -370,7 +370,20 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { return null; } - int checksum = (int)TarHelpers.ParseOctal(spanChecksum); + + int checksum; + try + { + checksum = (int)TarHelpers.ParseOctal(spanChecksum); + } + catch (InvalidDataException) + { + // Check if this might be a compressed file by looking at the buffer for compression magic numbers + ThrowIfCompressedArchive(buffer); + // If not a compressed file, re-throw the original parsing exception + throw; + } + // Zero checksum means the whole header is empty if (checksum == 0) { @@ -789,5 +802,85 @@ private static bool TryGetNextExtendedAttribute( buffer = buffer.Slice(newlinePos + 1); return true; } + + /// + /// Analyzes the buffer for known compression format magic numbers and throws an InvalidDataException + /// with a specific error message if a compression format is detected. + /// If no compression format is detected, the method returns without throwing. + /// + /// + /// Thrown if a compression format is detected. + /// + private static void ThrowIfCompressedArchive(ReadOnlySpan buffer) + { + if (buffer.Length < 2) + { + return; + } + + byte firstByte = buffer[0]; + switch (firstByte) + { + case 0x28: // Zstandard + if (buffer.Length >= 4 && + buffer[1] == 0xB5 && buffer[2] == 0x2F && buffer[3] == 0xFD) + { + throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "Zstandard")); + } + break; + + case 0x37: // 7-Zip + if (buffer.Length >= 6 && + buffer[1] == 0x7A && buffer[2] == 0xBC && + buffer[3] == 0xAF && buffer[4] == 0x27 && buffer[5] == 0x1C) + { + throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "7-Zip")); + } + break; + + case 0x50: // ZIP files start with "PK" + if (buffer.Length >= 2 && buffer[1] == 0x4B) + { + throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "ZIP")); + } + break; + + case 0x1F: // GZIP + if (buffer.Length >= 2 && buffer[1] == 0x8B) + { + throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "GZIP")); + } + break; + + case 0x42: // BZIP2 - "BZh" + if (buffer.Length >= 3 && buffer[1] == 0x5A && buffer[2] == 0x68) + { + throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "BZIP2")); + } + break; + + case 0xFD: // XZ + if (buffer.Length >= 6 && + buffer[1] == 0x37 && buffer[2] == 0x7A && + buffer[3] == 0x58 && buffer[4] == 0x5A && buffer[5] == 0x00) + { + throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "XZ")); + } + break; + + case 0x78: // ZLIB (deflate compression) + if (buffer.Length >= 2) + { + byte secondByte = buffer[1]; + if (secondByte == 0x01 || secondByte == 0x5E || secondByte == 0x9C || + secondByte == 0xDA || secondByte == 0x20 || secondByte == 0x7D || + secondByte == 0xBB || secondByte == 0xF9) + { + throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "ZLIB")); + } + } + break; + } + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.Tests.cs index 71fe33389598fd..8492be776ae965 100644 --- a/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarReader/TarReader.Tests.cs @@ -159,7 +159,7 @@ public void TarReader_InvalidChecksum_ThrowsException(bool corrupted) if (corrupted) { - Assert.Contains("parse", exception.Message); + Assert.Contains("corrupted", exception.Message); } else {