@@ -217,12 +217,15 @@ def __init__(self, filename=None, mode=None,
217
217
FutureWarning , 2 )
218
218
self .mode = WRITE
219
219
self ._init_write (filename )
220
+ if mtime is None :
221
+ mtime = int (time .time ())
220
222
self .compress = zlib .compressobj (compresslevel ,
221
223
zlib .DEFLATED ,
222
- - zlib .MAX_WBITS ,
224
+ 16 + zlib .MAX_WBITS ,
223
225
zlib .DEF_MEM_LEVEL ,
224
- 0 )
225
- self ._write_mtime = mtime
226
+ 0 ,
227
+ mtime = mtime ,
228
+ fname = self ._encode_fname ())
226
229
self ._buffer_size = _WRITE_BUFFER_SIZE
227
230
self ._buffer = io .BufferedWriter (_WriteBufferStream (self ),
228
231
buffer_size = self ._buffer_size )
@@ -231,9 +234,6 @@ def __init__(self, filename=None, mode=None,
231
234
232
235
self .fileobj = fileobj
233
236
234
- if self .mode == WRITE :
235
- self ._write_gzip_header (compresslevel )
236
-
237
237
@property
238
238
def mtime (self ):
239
239
"""Last modification time read from stream, or None"""
@@ -245,7 +245,6 @@ def __repr__(self):
245
245
246
246
def _init_write (self , filename ):
247
247
self .name = filename
248
- self .crc = zlib .crc32 (b"" )
249
248
self .size = 0
250
249
self .writebuf = []
251
250
self .bufsize = 0
@@ -256,9 +255,7 @@ def tell(self):
256
255
self ._buffer .flush ()
257
256
return super ().tell ()
258
257
259
- def _write_gzip_header (self , compresslevel ):
260
- self .fileobj .write (b'\037 \213 ' ) # magic header
261
- self .fileobj .write (b'\010 ' ) # compression method
258
+ def _encode_fname (self ):
262
259
try :
263
260
# RFC 1952 requires the FNAME field to be Latin-1. Do not
264
261
# include filenames that cannot be represented that way.
@@ -269,24 +266,7 @@ def _write_gzip_header(self, compresslevel):
269
266
fname = fname [:- 3 ]
270
267
except UnicodeEncodeError :
271
268
fname = b''
272
- flags = 0
273
- if fname :
274
- flags = FNAME
275
- self .fileobj .write (chr (flags ).encode ('latin-1' ))
276
- mtime = self ._write_mtime
277
- if mtime is None :
278
- mtime = time .time ()
279
- write32u (self .fileobj , int (mtime ))
280
- if compresslevel == _COMPRESS_LEVEL_BEST :
281
- xfl = b'\002 '
282
- elif compresslevel == _COMPRESS_LEVEL_FAST :
283
- xfl = b'\004 '
284
- else :
285
- xfl = b'\000 '
286
- self .fileobj .write (xfl )
287
- self .fileobj .write (b'\377 ' )
288
- if fname :
289
- self .fileobj .write (fname + b'\000 ' )
269
+ return fname
290
270
291
271
def write (self ,data ):
292
272
self ._check_not_closed ()
@@ -311,7 +291,6 @@ def _write_raw(self, data):
311
291
if length > 0 :
312
292
self .fileobj .write (self .compress .compress (data ))
313
293
self .size += length
314
- self .crc = zlib .crc32 (data , self .crc )
315
294
self .offset += length
316
295
317
296
return length
@@ -355,9 +334,6 @@ def close(self):
355
334
if self .mode == WRITE :
356
335
self ._buffer .flush ()
357
336
fileobj .write (self .compress .flush ())
358
- write32u (fileobj , self .crc )
359
- # self.size may exceed 2 GiB, or even 4 GiB
360
- write32u (fileobj , self .size & 0xffffffff )
361
337
elif self .mode == READ :
362
338
self ._buffer .close ()
363
339
finally :
@@ -424,78 +400,17 @@ def readline(self, size=-1):
424
400
return self ._buffer .readline (size )
425
401
426
402
427
- def _read_exact (fp , n ):
428
- '''Read exactly *n* bytes from `fp`
429
-
430
- This method is required because fp may be unbuffered,
431
- i.e. return short reads.
432
- '''
433
- data = fp .read (n )
434
- while len (data ) < n :
435
- b = fp .read (n - len (data ))
436
- if not b :
437
- raise EOFError ("Compressed file ended before the "
438
- "end-of-stream marker was reached" )
439
- data += b
440
- return data
441
-
442
-
443
- def _read_gzip_header (fp ):
444
- '''Read a gzip header from `fp` and progress to the end of the header.
445
-
446
- Returns last mtime if header was present or None otherwise.
447
- '''
448
- magic = fp .read (2 )
449
- if magic == b'' :
450
- return None
451
-
452
- if magic != b'\037 \213 ' :
453
- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
454
-
455
- (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
456
- if method != 8 :
457
- raise BadGzipFile ('Unknown compression method' )
458
-
459
- if flag & FEXTRA :
460
- # Read & discard the extra field, if present
461
- extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
462
- _read_exact (fp , extra_len )
463
- if flag & FNAME :
464
- # Read and discard a null-terminated string containing the filename
465
- while True :
466
- s = fp .read (1 )
467
- if not s or s == b'\000 ' :
468
- break
469
- if flag & FCOMMENT :
470
- # Read and discard a null-terminated string containing a comment
471
- while True :
472
- s = fp .read (1 )
473
- if not s or s == b'\000 ' :
474
- break
475
- if flag & FHCRC :
476
- _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
477
- return last_mtime
478
-
479
-
480
403
class _GzipReader (_compression .DecompressReader ):
481
404
def __init__ (self , fp ):
482
405
super ().__init__ (_PaddedFile (fp ), zlib ._ZlibDecompressor ,
483
- wbits = - zlib .MAX_WBITS )
406
+ wbits = 16 + zlib .MAX_WBITS )
484
407
# Set flag indicating start of a new member
485
408
self ._new_member = True
486
409
self ._last_mtime = None
487
410
488
411
def _init_read (self ):
489
- self ._crc = zlib .crc32 (b"" )
490
412
self ._stream_size = 0 # Decompressed size of unconcatenated stream
491
413
492
- def _read_gzip_header (self ):
493
- last_mtime = _read_gzip_header (self ._fp )
494
- if last_mtime is None :
495
- return False
496
- self ._last_mtime = last_mtime
497
- return True
498
-
499
414
def read (self , size = - 1 ):
500
415
if size < 0 :
501
416
return self .readall ()
@@ -509,33 +424,35 @@ def read(self, size=-1):
509
424
while True :
510
425
if self ._decompressor .eof :
511
426
# Ending case: we've come to the end of a member in the file,
512
- # so finish up this member, and read a new gzip header.
513
- # Check the CRC and file size, and set the flag so we read
514
- # a new member
427
+ # so finish up this member and set the flag, so that we read a
428
+ # new member
515
429
self ._read_eof ()
516
430
self ._new_member = True
517
431
self ._decompressor = self ._decomp_factory (
518
432
** self ._decomp_args )
519
433
520
- if self ._new_member :
521
- # If the _new_member flag is set, we have to
522
- # jump to the next member, if there is one.
523
- self ._init_read ()
524
- if not self ._read_gzip_header ():
525
- self ._size = self ._pos
526
- return b""
527
- self ._new_member = False
528
-
529
434
# Read a chunk of data from the file
530
435
if self ._decompressor .needs_input :
531
436
buf = self ._fp .read (READ_BUFFER_SIZE )
437
+ if self ._new_member :
438
+ # If the _new_member flag is set, we have to
439
+ # jump to the next member, if there is one.
440
+ self ._init_read ()
441
+ if not buf :
442
+ self ._size = self ._pos
443
+ return b""
444
+ self ._new_member = False
532
445
uncompress = self ._decompressor .decompress (buf , size )
533
446
else :
447
+ assert not self ._new_member
534
448
uncompress = self ._decompressor .decompress (b"" , size )
535
449
450
+ if self ._decompressor .gz_header_done :
451
+ self ._last_mtime = self ._decompressor .gz_header_mtime
452
+
536
453
if self ._decompressor .unused_data != b"" :
537
454
# Prepend the already read bytes to the fileobj so they can
538
- # be seen by _read_eof() and _read_gzip_header()
455
+ # be seen by _read_eof()
539
456
self ._fp .prepend (self ._decompressor .unused_data )
540
457
541
458
if uncompress != b"" :
@@ -544,23 +461,12 @@ def read(self, size=-1):
544
461
raise EOFError ("Compressed file ended before the "
545
462
"end-of-stream marker was reached" )
546
463
547
- self ._crc = zlib .crc32 (uncompress , self ._crc )
548
464
self ._stream_size += len (uncompress )
549
465
self ._pos += len (uncompress )
550
466
return uncompress
551
467
552
468
def _read_eof (self ):
553
469
# We've read to the end of the file
554
- # We check that the computed CRC and size of the
555
- # uncompressed data matches the stored values. Note that the size
556
- # stored is the true file size mod 2**32.
557
- crc32 , isize = struct .unpack ("<II" , _read_exact (self ._fp , 8 ))
558
- if crc32 != self ._crc :
559
- raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
560
- hex (self ._crc )))
561
- elif isize != (self ._stream_size & 0xffffffff ):
562
- raise BadGzipFile ("Incorrect length of data produced" )
563
-
564
470
# Gzip files can be padded with zeroes and still have archives.
565
471
# Consume all zero bytes and set the file position to the first
566
472
# non-zero byte. See http://www.gzip.org/#faq8
@@ -575,68 +481,32 @@ def _rewind(self):
575
481
self ._new_member = True
576
482
577
483
578
- def _create_simple_gzip_header (compresslevel : int ,
579
- mtime = None ) -> bytes :
580
- """
581
- Write a simple gzip header with no extra fields.
582
- :param compresslevel: Compresslevel used to determine the xfl bytes.
583
- :param mtime: The mtime (must support conversion to a 32-bit integer).
584
- :return: A bytes object representing the gzip header.
585
- """
586
- if mtime is None :
587
- mtime = time .time ()
588
- if compresslevel == _COMPRESS_LEVEL_BEST :
589
- xfl = 2
590
- elif compresslevel == _COMPRESS_LEVEL_FAST :
591
- xfl = 4
592
- else :
593
- xfl = 0
594
- # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
595
- # fields added to header), mtime, xfl and os (255 for unknown OS).
596
- return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
597
-
598
-
599
484
def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
600
485
"""Compress data in one shot and return the compressed string.
601
486
602
487
compresslevel sets the compression level in range of 0-9.
603
488
mtime can be used to set the modification time. The modification time is
604
489
set to the current time by default.
605
490
"""
606
- if mtime == 0 :
607
- # Use zlib as it creates the header with 0 mtime by default.
608
- # This is faster and with less overhead.
609
- return zlib .compress (data , level = compresslevel , wbits = 31 )
610
- header = _create_simple_gzip_header (compresslevel , mtime )
611
- trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
612
- # Wbits=-15 creates a raw deflate block.
613
- return (header + zlib .compress (data , level = compresslevel , wbits = - 15 ) +
614
- trailer )
491
+ if mtime is None :
492
+ mtime = int (time .time ())
493
+ return zlib .compress (data , level = compresslevel , wbits = 31 , mtime = mtime )
615
494
616
495
617
496
def decompress (data ):
618
497
"""Decompress a gzip compressed string in one shot.
619
498
Return the decompressed string.
620
499
"""
621
500
decompressed_members = []
622
- while True :
623
- fp = io .BytesIO (data )
624
- if _read_gzip_header (fp ) is None :
625
- return b"" .join (decompressed_members )
626
- # Use a zlib raw deflate compressor
627
- do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
628
- # Read all the data except the header
629
- decompressed = do .decompress (data [fp .tell ():])
630
- if not do .eof or len (do .unused_data ) < 8 :
501
+ while data :
502
+ do = zlib .decompressobj (wbits = 16 + zlib .MAX_WBITS )
503
+ decompressed = do .decompress (data )
504
+ if not do .eof :
631
505
raise EOFError ("Compressed file ended before the end-of-stream "
632
506
"marker was reached" )
633
- crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
634
- if crc != zlib .crc32 (decompressed ):
635
- raise BadGzipFile ("CRC check failed" )
636
- if length != (len (decompressed ) & 0xffffffff ):
637
- raise BadGzipFile ("Incorrect length of data produced" )
638
507
decompressed_members .append (decompressed )
639
- data = do .unused_data [8 :].lstrip (b"\x00 " )
508
+ data = do .unused_data .lstrip (b"\x00 " )
509
+ return b"" .join (decompressed_members )
640
510
641
511
642
512
def main ():
0 commit comments