Skip to content

Commit 4b80862

Browse files
Mahmoud Lababididhimmel
Mahmoud Lababidi
authored andcommitted
Move compression code to io.common._get_handle
Closes #13340.
1 parent 14e4815 commit 4b80862

File tree

2 files changed

+51
-30
lines changed

2 files changed

+51
-30
lines changed

pandas/io/common.py

+47-16
Original file line numberDiff line numberDiff line change
@@ -287,53 +287,84 @@ def ZipFile(*args, **kwargs):
287287
ZipFile = zipfile.ZipFile
288288

289289

290-
def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
290+
def _get_handle(source, mode, encoding=None, compression=None, memory_map=False):
291291
"""Gets file handle for given path and mode.
292292
"""
293-
if compression is not None:
294-
if encoding is not None and not compat.PY3:
293+
294+
f = source
295+
is_path = isinstance(source, compat.string_types)
296+
297+
# in Python 3, convert BytesIO or fileobjects passed with an encoding
298+
if compat.PY3 and isinstance(source, compat.BytesIO):
299+
from io import TextIOWrapper
300+
301+
return TextIOWrapper(source, encoding=encoding)
302+
303+
elif compression is not None:
304+
compression = compression.lower()
305+
if encoding is not None and not compat.PY3 and not is_path:
295306
msg = 'encoding + compression not yet supported in Python 2'
296307
raise ValueError(msg)
297308

309+
# GZ Compression
298310
if compression == 'gzip':
299311
import gzip
300-
f = gzip.GzipFile(path, mode)
312+
313+
f = gzip.GzipFile(source, mode) \
314+
if is_path else gzip.GzipFile(fileobj=source)
315+
316+
# BZ Compression
301317
elif compression == 'bz2':
302318
import bz2
303-
f = bz2.BZ2File(path, mode)
319+
320+
if is_path:
321+
f = bz2.BZ2File(source, mode)
322+
323+
else:
324+
f = bz2.BZ2File(source) if compat.PY3 else StringIO(
325+
bz2.decompress(source.read()))
326+
# Python 2's bz2 module can't take file objects, so have to
327+
# run through decompress manually
328+
329+
# ZIP Compression
304330
elif compression == 'zip':
305331
import zipfile
306-
zip_file = zipfile.ZipFile(path)
332+
zip_file = zipfile.ZipFile(source)
307333
zip_names = zip_file.namelist()
308334

309335
if len(zip_names) == 1:
310-
file_name = zip_names.pop()
311-
f = zip_file.open(file_name)
336+
f = zip_file.open(zip_names.pop())
312337
elif len(zip_names) == 0:
313338
raise ValueError('Zero files found in ZIP file {}'
314-
.format(path))
339+
.format(source))
315340
else:
316341
raise ValueError('Multiple files found in ZIP file.'
317342
' Only one file per ZIP :{}'
318343
.format(zip_names))
344+
345+
# XZ Compression
319346
elif compression == 'xz':
320347
lzma = compat.import_lzma()
321-
f = lzma.LZMAFile(path, mode)
348+
f = lzma.LZMAFile(source, mode)
349+
322350
else:
323-
raise ValueError('Unrecognized compression type: %s' %
324-
compression)
351+
raise ValueError('Unrecognized compression: %s' % compression)
352+
325353
if compat.PY3:
326354
from io import TextIOWrapper
355+
327356
f = TextIOWrapper(f, encoding=encoding)
357+
328358
return f
329-
else:
359+
360+
elif is_path:
330361
if compat.PY3:
331362
if encoding:
332-
f = open(path, mode, encoding=encoding)
363+
f = open(source, mode, encoding=encoding)
333364
else:
334-
f = open(path, mode, errors='replace')
365+
f = open(source, mode, errors='replace')
335366
else:
336-
f = open(path, mode)
367+
f = open(source, mode)
337368

338369
if memory_map and hasattr(f, 'fileno'):
339370
try:

pandas/io/parsers.py

+4-14
Original file line numberDiff line numberDiff line change
@@ -1890,20 +1890,10 @@ def __init__(self, f, **kwds):
18901890
self.comment = kwds['comment']
18911891
self._comment_lines = []
18921892

1893-
if isinstance(f, compat.string_types):
1894-
f = _get_handle(f, 'r', encoding=self.encoding,
1895-
compression=self.compression,
1896-
memory_map=self.memory_map)
1897-
self.handles.append(f)
1898-
elif self.compression:
1899-
f = _wrap_compressed(f, self.compression, self.encoding)
1900-
self.handles.append(f)
1901-
# in Python 3, convert BytesIO or fileobjects passed with an encoding
1902-
elif compat.PY3 and isinstance(f, compat.BytesIO):
1903-
from io import TextIOWrapper
1904-
1905-
f = TextIOWrapper(f, encoding=self.encoding)
1906-
self.handles.append(f)
1893+
f = _get_handle(f, 'r', encoding=self.encoding,
1894+
compression=self.compression,
1895+
memory_map=self.memory_map)
1896+
self.handles.append(f)
19071897

19081898
# Set self.data to something that can read lines.
19091899
if hasattr(f, 'readline'):

0 commit comments

Comments
 (0)