@@ -403,6 +403,59 @@ def __iter__(self):
403403 return self ._buffer .__iter__ ()
404404
405405
406+ def _read_exact (fp , n ):
407+ '''Read exactly *n* bytes from `fp`
408+
409+ This method is required because fp may be unbuffered,
410+ i.e. return short reads.
411+ '''
412+ data = fp .read (n )
413+ while len (data ) < n :
414+ b = fp .read (n - len (data ))
415+ if not b :
416+ raise EOFError ("Compressed file ended before the "
417+ "end-of-stream marker was reached" )
418+ data += b
419+ return data
420+
421+
422+ def _read_gzip_header (fp ):
423+ '''Read a gzip header from `fp` and progress to the end of the header.
424+
425+ Returns last mtime if header was present or None otherwise.
426+ '''
427+ magic = fp .read (2 )
428+ if magic == b'' :
429+ return None
430+
431+ if magic != b'\037 \213 ' :
432+ raise BadGzipFile ('Not a gzipped file (%r)' % magic )
433+
434+ (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
435+ if method != 8 :
436+ raise BadGzipFile ('Unknown compression method' )
437+
438+ if flag & FEXTRA :
439+ # Read & discard the extra field, if present
440+ extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
441+ _read_exact (fp , extra_len )
442+ if flag & FNAME :
443+ # Read and discard a null-terminated string containing the filename
444+ while True :
445+ s = fp .read (1 )
446+ if not s or s == b'\000 ' :
447+ break
448+ if flag & FCOMMENT :
449+ # Read and discard a null-terminated string containing a comment
450+ while True :
451+ s = fp .read (1 )
452+ if not s or s == b'\000 ' :
453+ break
454+ if flag & FHCRC :
455+ _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
456+ return last_mtime
457+
458+
406459class _GzipReader (_compression .DecompressReader ):
407460 def __init__ (self , fp ):
408461 super ().__init__ (_PaddedFile (fp ), zlib .decompressobj ,
@@ -415,53 +468,11 @@ def _init_read(self):
415468 self ._crc = zlib .crc32 (b"" )
416469 self ._stream_size = 0 # Decompressed size of unconcatenated stream
417470
418- def _read_exact (self , n ):
419- '''Read exactly *n* bytes from `self._fp`
420-
421- This method is required because self._fp may be unbuffered,
422- i.e. return short reads.
423- '''
424-
425- data = self ._fp .read (n )
426- while len (data ) < n :
427- b = self ._fp .read (n - len (data ))
428- if not b :
429- raise EOFError ("Compressed file ended before the "
430- "end-of-stream marker was reached" )
431- data += b
432- return data
433-
434471 def _read_gzip_header (self ):
435- magic = self ._fp . read ( 2 )
436- if magic == b'' :
472+ last_mtime = _read_gzip_header ( self ._fp )
473+ if last_mtime is None :
437474 return False
438-
439- if magic != b'\037 \213 ' :
440- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
441-
442- (method , flag ,
443- self ._last_mtime ) = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
444- if method != 8 :
445- raise BadGzipFile ('Unknown compression method' )
446-
447- if flag & FEXTRA :
448- # Read & discard the extra field, if present
449- extra_len , = struct .unpack ("<H" , self ._read_exact (2 ))
450- self ._read_exact (extra_len )
451- if flag & FNAME :
452- # Read and discard a null-terminated string containing the filename
453- while True :
454- s = self ._fp .read (1 )
455- if not s or s == b'\000 ' :
456- break
457- if flag & FCOMMENT :
458- # Read and discard a null-terminated string containing a comment
459- while True :
460- s = self ._fp .read (1 )
461- if not s or s == b'\000 ' :
462- break
463- if flag & FHCRC :
464- self ._read_exact (2 ) # Read & discard the 16-bit header CRC
475+ self ._last_mtime = last_mtime
465476 return True
466477
467478 def read (self , size = - 1 ):
@@ -524,7 +535,7 @@ def _read_eof(self):
524535 # We check that the computed CRC and size of the
525536 # uncompressed data matches the stored values. Note that the size
526537 # stored is the true file size mod 2**32.
527- crc32 , isize = struct .unpack ("<II" , self . _read_exact (8 ))
538+ crc32 , isize = struct .unpack ("<II" , _read_exact (self . _fp , 8 ))
528539 if crc32 != self ._crc :
529540 raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
530541 hex (self ._crc )))
@@ -544,21 +555,65 @@ def _rewind(self):
544555 super ()._rewind ()
545556 self ._new_member = True
546557
558+
559+ def _create_simple_gzip_header (compresslevel : int ,
560+ mtime = None ) -> bytes :
561+ """
562+ Write a simple gzip header with no extra fields.
563+ :param compresslevel: Compresslevel used to determine the xfl bytes.
564+ :param mtime: The mtime (must support conversion to a 32-bit integer).
565+ :return: A bytes object representing the gzip header.
566+ """
567+ if mtime is None :
568+ mtime = time .time ()
569+ if compresslevel == _COMPRESS_LEVEL_BEST :
570+ xfl = 2
571+ elif compresslevel == _COMPRESS_LEVEL_FAST :
572+ xfl = 4
573+ else :
574+ xfl = 0
575+ # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
576+ # fields added to header), mtime, xfl and os (255 for unknown OS).
577+ return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
578+
579+
547580def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
548581 """Compress data in one shot and return the compressed string.
549- Optional argument is the compression level, in range of 0-9.
582+
583+ compresslevel sets the compression level in range of 0-9.
584+ mtime can be used to set the modification time. The modification time is
585+ set to the current time by default.
550586 """
551- buf = io .BytesIO ()
552- with GzipFile (fileobj = buf , mode = 'wb' , compresslevel = compresslevel , mtime = mtime ) as f :
553- f .write (data )
554- return buf .getvalue ()
587+ if mtime == 0 :
588+ # Use zlib as it creates the header with 0 mtime by default.
589+ # This is faster and with less overhead.
590+ return zlib .compress (data , level = compresslevel , wbits = 31 )
591+ header = _create_simple_gzip_header (compresslevel , mtime )
592+ trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
593+ # Wbits=-15 creates a raw deflate block.
594+ return header + zlib .compress (data , wbits = - 15 ) + trailer
595+
555596
556597def decompress (data ):
557598 """Decompress a gzip compressed string in one shot.
558599 Return the decompressed string.
559600 """
560- with GzipFile (fileobj = io .BytesIO (data )) as f :
561- return f .read ()
601+ decompressed_members = []
602+ while True :
603+ fp = io .BytesIO (data )
604+ if _read_gzip_header (fp ) is None :
605+ return b"" .join (decompressed_members )
606+ # Use a zlib raw deflate compressor
607+ do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
608+ # Read all the data except the header
609+ decompressed = do .decompress (data [fp .tell ():])
610+ crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
611+ if crc != zlib .crc32 (decompressed ):
612+ raise BadGzipFile ("CRC check failed" )
613+ if length != (len (decompressed ) & 0xffffffff ):
614+ raise BadGzipFile ("Incorrect length of data produced" )
615+ decompressed_members .append (decompressed )
616+ data = do .unused_data [8 :].lstrip (b"\x00 " )
562617
563618
564619def main ():
0 commit comments