diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 58b60fb08920a..62fa6c80e690f 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -565,7 +565,7 @@ Bug Fixes - Bug in ``.plot`` potentially modifying the ``colors`` input when the number of columns didn't match the number of series provided (:issue:`12039`). - +- Bug in ``read_excel`` failing to read data with one column when ``squeeze=True`` (:issue:`12157`) - Bug in ``.groupby`` where a ``KeyError`` was not raised for a wrong column if there was only one row in the dataframe (:issue:`11741`) - Bug in ``.read_csv`` with dtype specified on empty data producing an error (:issue:`12048`) - Bug in building *pandas* with debugging symbols (:issue:`12123`) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 0642079cc5b34..2972e21f5f120 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -76,7 +76,7 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, converters=None, - engine=None, **kwds): + engine=None, squeeze=False, **kwds): """ Read an Excel table into a pandas DataFrame @@ -133,6 +133,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, * If list of ints then indicates list of column numbers to be parsed * If string then indicates comma separated list of column names and column ranges (e.g. "A:E" or "A,C,E:F") + squeeze : boolean, default False + If the parsed data only contains one column then return a Series na_values : list-like, default None List of additional strings to recognize as NA/NaN thousands : str, default None @@ -171,7 +173,7 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, thousands=thousands, convert_float=convert_float, has_index_names=has_index_names, - skip_footer=skip_footer, converters=converters, **kwds) + skip_footer=skip_footer, converters=converters, squeeze=squeeze, **kwds) class ExcelFile(object): @@ -227,7 +229,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, - converters=None, **kwds): + converters=None, squeeze=False, **kwds): """ Parse specified sheet(s) into a DataFrame @@ -246,6 +248,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, skip_footer=skip_footer, convert_float=convert_float, converters=converters, + squeeze=squeeze, **kwds) def _should_parse(self, i, parse_cols): @@ -285,7 +288,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, has_index_names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, - verbose=False, **kwds): + verbose=False, squeeze=False, **kwds): skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: @@ -452,11 +455,13 @@ def _parse_cell(cell_contents, cell_typ): date_parser=date_parser, skiprows=skiprows, skip_footer=skip_footer, + squeeze=squeeze, **kwds) output[asheetname] = parser.read() - output[asheetname].columns = output[ - asheetname].columns.set_names(header_names) + if not squeeze or isinstance(output[asheetname], DataFrame): + output[asheetname].columns = output[ + asheetname].columns.set_names(header_names) if ret_dict: return output diff --git a/pandas/io/tests/data/test_squeeze.xls b/pandas/io/tests/data/test_squeeze.xls new file mode 100644 index 0000000000000..7261f4df13f08 Binary files /dev/null and b/pandas/io/tests/data/test_squeeze.xls differ diff --git a/pandas/io/tests/data/test_squeeze.xlsm b/pandas/io/tests/data/test_squeeze.xlsm new file mode 100644 index 0000000000000..d7fabe802ff52 Binary files /dev/null and b/pandas/io/tests/data/test_squeeze.xlsm differ diff --git a/pandas/io/tests/data/test_squeeze.xlsx b/pandas/io/tests/data/test_squeeze.xlsx new file mode 100644 index 0000000000000..89fc590cebcc7 Binary files /dev/null and b/pandas/io/tests/data/test_squeeze.xlsx differ diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 082a26df681a4..a6a189e4f4785 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -741,6 +741,24 @@ def test_read_excel_skiprows_list(self): 'skiprows_list', skiprows=np.array([0, 2])) tm.assert_frame_equal(actual, expected) + def test_read_excel_squeeze(self): + # GH 12157 + f = os.path.join(self.dirpath, 'test_squeeze' + self.ext) + + actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True) + expected = pd.Series([2, 3, 4], [4, 5, 6], name='b') + expected.index.name = 'a' + tm.assert_series_equal(actual, expected) + + actual = pd.read_excel(f, 'two_columns', squeeze=True) + expected = pd.DataFrame({'a': [4, 5, 6], + 'b': [2, 3, 4]}) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel(f, 'one_column', squeeze=True) + expected = pd.Series([1,2,3], name='a') + tm.assert_series_equal(actual, expected) + class XlsReaderTests(XlrdTests, tm.TestCase): ext = '.xls'