@@ -353,38 +353,44 @@ def _validate_nrows(nrows):
353353 return nrows
354354
355355
356+ _compression_to_extension = {
357+ 'gzip' : '.gz' ,
358+ 'bz2' : '.bz2' ,
359+ 'zip' : '.zip' ,
360+ 'xz' : '.xz' ,
361+ }
362+
363+ def _infer_compression (filepath_or_buffer ):
364+ """
365+ Infer compression of a filepath or buffer. In case of buffer, compression
366+ is None. Otherwise, inference is perfomed using the extension of the
367+ filename or URL.
368+ """
369+ if not isinstance (filepath_or_buffer , compat .string_types ):
370+ return None
371+ filepath = str (filepath_or_buffer )
372+ for compression , extension in _compression_to_extension .items ():
373+ if filepath .endswith (extension ):
374+ return compression
375+ return None
376+
356377def _read (filepath_or_buffer , kwds ):
357- "Generic reader of line files."
378+ """ Generic reader of line files."" "
358379 encoding = kwds .get ('encoding' , None )
359380 if encoding is not None :
360381 encoding = re .sub ('_' , '-' , encoding ).lower ()
361382 kwds ['encoding' ] = encoding
362383
363- # If the input could be a filename, check for a recognizable compression
364- # extension. If we're reading from a URL, the `get_filepath_or_buffer`
365- # will use header info to determine compression, so use what it finds in
366- # that case.
367- inferred_compression = kwds .get ('compression' )
368- if inferred_compression == 'infer' :
369- if isinstance (filepath_or_buffer , compat .string_types ):
370- if filepath_or_buffer .endswith ('.gz' ):
371- inferred_compression = 'gzip'
372- elif filepath_or_buffer .endswith ('.bz2' ):
373- inferred_compression = 'bz2'
374- elif filepath_or_buffer .endswith ('.zip' ):
375- inferred_compression = 'zip'
376- elif filepath_or_buffer .endswith ('.xz' ):
377- inferred_compression = 'xz'
378- else :
379- inferred_compression = None
380- else :
381- inferred_compression = None
384+ compression = kwds .get ('compression' )
385+ if compression not in set (_compression_to_extension ) | {None , 'infer' }:
386+ raise ValueError ('"{}" is not a valid compression' .format (compression ))
387+
388+ if compression == 'infer' :
389+ compression = _infer_compression (filepath_or_buffer )
382390
383391 filepath_or_buffer , _ , compression = get_filepath_or_buffer (
384- filepath_or_buffer , encoding ,
385- compression = kwds .get ('compression' , None ))
386- kwds ['compression' ] = (inferred_compression if compression == 'infer'
387- else compression )
392+ filepath_or_buffer , encoding , compression )
393+ kwds ['compression' ] = compression
388394
389395 if kwds .get ('date_parser' , None ) is not None :
390396 if isinstance (kwds ['parse_dates' ], bool ):
0 commit comments