From 83e58a2239878d7f3c802ba7ab5f2f78727bf90a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman <r.h.p.vorderman@lumc.nl> Date: Mon, 18 Oct 2021 10:46:00 +0200 Subject: [PATCH 1/2] Throw EOF error when trailer is not present in gzip member This is to keep error compatibility with 3.10 and lower. --- Lib/gzip.py | 3 +++ Lib/test/test_gzip.py | 13 +++++++++++++ .../2021-10-18-11-20-24.bpo-45507.vWx2yS.rst | 2 ++ 3 files changed, 18 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst diff --git a/Lib/gzip.py b/Lib/gzip.py index 0dddb51553fabd..27211eef0d63a2 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -607,6 +607,9 @@ def decompress(data): do = zlib.decompressobj(wbits=-zlib.MAX_WBITS) # Read all the data except the header decompressed = do.decompress(data[fp.tell():]) + if not do.eof or len(do.unused_data) < 8: + raise EOFError("Compressed file ended before the end-of-stream " + "marker was reached") crc, length = struct.unpack("<II", do.unused_data[:8]) if crc != zlib.crc32(decompressed): raise BadGzipFile("CRC check failed") diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index f86e767ac0e59c..9c03614362f7e6 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -562,6 +562,19 @@ def test_decompress(self): datac = gzip.compress(data) self.assertEqual(gzip.decompress(datac), data) + def test_decompress_uncompressed_header(self): + truncated_headers = [ + b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x00", # Missing OS byte + b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x00\xff", # FHRC, but no checksum + b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff", # FEXTRA, but no xlen + b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\xaa\x00", # FEXTRA, xlen, but no data + b"\x1f\x8b\x08\x08\x00\x00\x00\x00\x00\xff", # FNAME but no fname + b"\x1f\x8b\x08\x10\x00\x00\x00\x00\x00\xff", # FCOMMENT, but no fcomment + ] + for header in truncated_headers: + with self.subTest(header=header): + self.assertRaises(EOFError, gzip.decompress, header) + def test_read_truncated(self): data = data1*50 # Drop the CRC (4 bytes) and file size (4 bytes). diff --git a/Misc/NEWS.d/next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst b/Misc/NEWS.d/next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst new file mode 100644 index 00000000000000..604fbbb697dbd8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst @@ -0,0 +1,2 @@ +Add regression tests for errors that are thrown when decompressing with the +``gzip`` module to ensure backwards-compatibility between Python versions. From 773c4f173f2a237d20727c0b78aa4f8233796865 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman <r.h.p.vorderman@lumc.nl> Date: Mon, 18 Oct 2021 11:44:03 +0200 Subject: [PATCH 2/2] Fix bug where missing NAME or COMMENt fields do not trigger an error --- Lib/gzip.py | 8 ++++---- Lib/test/test_gzip.py | 5 +++-- .../next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst | 5 +++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 27211eef0d63a2..57a08b8ea84dc7 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -442,14 +442,14 @@ def _read_gzip_header(fp): if flag & FNAME: # Read and discard a null-terminated string containing the filename while True: - s = fp.read(1) - if not s or s==b'\000': + s = _read_exact(fp, 1) + if s == b'\000': break if flag & FCOMMENT: # Read and discard a null-terminated string containing a comment while True: - s = fp.read(1) - if not s or s==b'\000': + s = _read_exact(fp, 1) + if s == b'\000': break if flag & FHCRC: _read_exact(fp, 2) # Read & discard the 16-bit header CRC diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 9c03614362f7e6..ae568f61439af2 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -562,7 +562,7 @@ def test_decompress(self): datac = gzip.compress(data) self.assertEqual(gzip.decompress(datac), data) - def test_decompress_uncompressed_header(self): + def test_truncated_header(self): truncated_headers = [ b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x00", # Missing OS byte b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x00\xff", # FHRC, but no checksum @@ -573,7 +573,8 @@ def test_decompress_uncompressed_header(self): ] for header in truncated_headers: with self.subTest(header=header): - self.assertRaises(EOFError, gzip.decompress, header) + self.assertRaises(EOFError, gzip._read_gzip_header, + io.BytesIO(header)) def test_read_truncated(self): data = data1*50 diff --git a/Misc/NEWS.d/next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst b/Misc/NEWS.d/next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst index 604fbbb697dbd8..4ecafb0836a2d1 100644 --- a/Misc/NEWS.d/next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst +++ b/Misc/NEWS.d/next/Library/2021-10-18-11-20-24.bpo-45507.vWx2yS.rst @@ -1,2 +1,3 @@ -Add regression tests for errors that are thrown when decompressing with the -``gzip`` module to ensure backwards-compatibility between Python versions. +Make sure EOFerror is thrown when gzip headers have missing or truncated +NAME or COMMENT fields when FNAME or FCOMMENT flags are set. +