From 06a70b1ffc46f339a7b2a4c247c9b96815cabdab Mon Sep 17 00:00:00 2001 From: Christopher Burr Date: Sat, 27 May 2017 02:34:47 +0200 Subject: [PATCH 1/4] TST: Add tests for trying to read non-existent files #15296 --- pandas/tests/io/test_common.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 30904593fedc4..b8864d468274b 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -125,6 +125,26 @@ def test_iterator(self): tm.assert_frame_equal(first, expected.iloc[[0]]) tm.assert_frame_equal(concat(it), expected.iloc[1:]) + @pytest.mark.parametrize('reader, module, error_class, fn_ext', [ + (pd.read_csv, 'os', pd.compat.FileNotFoundError, 'csv'), + (pd.read_table, 'os', pd.compat.FileNotFoundError, 'csv'), + (pd.read_fwf, 'os', pd.compat.FileNotFoundError, 'txt'), + (pd.read_excel, 'xlrd', ValueError, 'xlsx'), + (pd.read_feather, 'feather', ValueError, 'feather'), + (pd.read_hdf, 'tables', ValueError, 'h5'), + (pd.read_stata, 'os', pd.compat.FileNotFoundError, 'dta'), + (pd.read_sas, 'os', pd.compat.FileNotFoundError, 'sas7bdat'), + (pd.read_json, 'os', ValueError, 'json'), + (pd.read_msgpack, 'os', ValueError, 'mp'), + (pd.read_pickle, 'os', pd.compat.FileNotFoundError, 'pickle'), + ]) + def test_read_non_existant(self, reader, module, error_class, fn_ext): + pytest.importorskip(module) + + path = os.path.join(HERE, 'data', 'does_not_exist.' + fn_ext) + with pytest.raises(error_class): + reader(path) + @pytest.mark.parametrize('reader, module, path', [ (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')), (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')), From 02b041cc08dc959a96f992efc5bfc79e64fbb25d Mon Sep 17 00:00:00 2001 From: Christopher Burr Date: Sun, 28 May 2017 11:52:14 +0200 Subject: [PATCH 2/4] BUG: Fix passing non-existant file to read_msgpack #15296 --- doc/source/whatsnew/v0.22.0.txt | 2 ++ pandas/io/packers.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index e85ba505887b4..4b2999416ffbe 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -86,6 +86,8 @@ Documentation Changes Bug Fixes ~~~~~~~~~ +- Bug in ``pd.read_msgpack()`` with a non existent file is passed in Python 2 (:issue:`15296`) + Conversion ^^^^^^^^^^ diff --git a/pandas/io/packers.py b/pandas/io/packers.py index abd258034af99..ef65a3275060b 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -192,7 +192,6 @@ def read(fh): # see if we have an actual file if isinstance(path_or_buf, compat.string_types): - try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): @@ -202,18 +201,21 @@ def read(fh): with open(path_or_buf, 'rb') as fh: return read(fh) - # treat as a binary-like if isinstance(path_or_buf, compat.binary_type): + # treat as a binary-like fh = None try: - fh = compat.BytesIO(path_or_buf) - return read(fh) + # We can't distinguish between a path and a buffer of bytes in + # Python 2 so instead assume the first byte of a valid path is + # less than 0x80. + if compat.PY3 or ord(path_or_buf[0]) >= 0x80: + fh = compat.BytesIO(path_or_buf) + return read(fh) finally: if fh is not None: fh.close() - - # a buffer like - if hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): + elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): + # treat as a buffer like return read(path_or_buf) raise ValueError('path_or_buf needs to be a string file path or file-like') From 6ea733cd5e7a0f83d5fed086d1cc20c5e2e8ef93 Mon Sep 17 00:00:00 2001 From: Christopher Burr Date: Sun, 28 May 2017 13:00:52 +0200 Subject: [PATCH 3/4] TST: Fix io.test_common.test_read_non_existant for external modules --- pandas/tests/io/test_common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b8864d468274b..51274e7228b70 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -129,9 +129,9 @@ def test_iterator(self): (pd.read_csv, 'os', pd.compat.FileNotFoundError, 'csv'), (pd.read_table, 'os', pd.compat.FileNotFoundError, 'csv'), (pd.read_fwf, 'os', pd.compat.FileNotFoundError, 'txt'), - (pd.read_excel, 'xlrd', ValueError, 'xlsx'), - (pd.read_feather, 'feather', ValueError, 'feather'), - (pd.read_hdf, 'tables', ValueError, 'h5'), + (pd.read_excel, 'xlrd', pd.compat.FileNotFoundError, 'xlsx'), + (pd.read_feather, 'feather', Exception, 'feather'), + (pd.read_hdf, 'tables', pd.compat.FileNotFoundError, 'h5'), (pd.read_stata, 'os', pd.compat.FileNotFoundError, 'dta'), (pd.read_sas, 'os', pd.compat.FileNotFoundError, 'sas7bdat'), (pd.read_json, 'os', ValueError, 'json'), From 9d0f3b67228fc1c76f7a6c3a92a0d2a1692ce7be Mon Sep 17 00:00:00 2001 From: Christopher Burr Date: Sat, 28 Oct 2017 10:14:16 +0100 Subject: [PATCH 4/4] CLN: Import FileNotFoundError in tests/io/test_common.py --- pandas/tests/io/test_common.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 51274e7228b70..707580bfe9601 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -10,7 +10,7 @@ import pandas.util.testing as tm from pandas.io import common -from pandas.compat import is_platform_windows, StringIO +from pandas.compat import is_platform_windows, StringIO, FileNotFoundError from pandas import read_csv, concat @@ -126,17 +126,17 @@ def test_iterator(self): tm.assert_frame_equal(concat(it), expected.iloc[1:]) @pytest.mark.parametrize('reader, module, error_class, fn_ext', [ - (pd.read_csv, 'os', pd.compat.FileNotFoundError, 'csv'), - (pd.read_table, 'os', pd.compat.FileNotFoundError, 'csv'), - (pd.read_fwf, 'os', pd.compat.FileNotFoundError, 'txt'), - (pd.read_excel, 'xlrd', pd.compat.FileNotFoundError, 'xlsx'), + (pd.read_csv, 'os', FileNotFoundError, 'csv'), + (pd.read_table, 'os', FileNotFoundError, 'csv'), + (pd.read_fwf, 'os', FileNotFoundError, 'txt'), + (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'), (pd.read_feather, 'feather', Exception, 'feather'), - (pd.read_hdf, 'tables', pd.compat.FileNotFoundError, 'h5'), - (pd.read_stata, 'os', pd.compat.FileNotFoundError, 'dta'), - (pd.read_sas, 'os', pd.compat.FileNotFoundError, 'sas7bdat'), + (pd.read_hdf, 'tables', FileNotFoundError, 'h5'), + (pd.read_stata, 'os', FileNotFoundError, 'dta'), + (pd.read_sas, 'os', FileNotFoundError, 'sas7bdat'), (pd.read_json, 'os', ValueError, 'json'), (pd.read_msgpack, 'os', ValueError, 'mp'), - (pd.read_pickle, 'os', pd.compat.FileNotFoundError, 'pickle'), + (pd.read_pickle, 'os', FileNotFoundError, 'pickle'), ]) def test_read_non_existant(self, reader, module, error_class, fn_ext): pytest.importorskip(module)