Skip to content

Commit ebab86c

Browse files
gfyoungKrzysztof Chomski
authored and
Krzysztof Chomski
committed
DEPR: Deprecate tupleize_cols in read_csv (pandas-dev#17820)
xref pandas-devgh-17060.
1 parent 2075a8c commit ebab86c

File tree

7 files changed

+56
-48
lines changed

7 files changed

+56
-48
lines changed

doc/source/io.rst

+4
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,10 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None``
343343
override values, a ParserWarning will be issued. See :class:`python:csv.Dialect`
344344
documentation for more details.
345345
tupleize_cols : boolean, default ``False``
346+
.. deprecated:: 0.21.0
347+
348+
This argument will be removed and will always convert to MultiIndex
349+
346350
Leave a list of tuples on columns as is (default is to convert to a MultiIndex
347351
on the columns).
348352

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,7 @@ Deprecations
717717

718718
- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
719719
- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`)
720+
- :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`)
720721
- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`)
721722
- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
722723
- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).

pandas/io/parsers.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,11 @@
260260
override values, a ParserWarning will be issued. See csv.Dialect
261261
documentation for more details.
262262
tupleize_cols : boolean, default False
263+
.. deprecated:: 0.21.0
264+
This argument will be removed and will always convert to MultiIndex
265+
263266
Leave a list of tuples on columns as is (default is to convert to
264-
a Multi Index on the columns)
267+
a MultiIndex on the columns)
265268
error_bad_lines : boolean, default True
266269
Lines with too many fields (e.g. a csv line with too many commas) will by
267270
default cause an exception to be raised, and no DataFrame will be returned.
@@ -510,6 +513,7 @@ def _read(filepath_or_buffer, kwds):
510513
'buffer_lines': None,
511514
'error_bad_lines': True,
512515
'warn_bad_lines': True,
516+
'tupleize_cols': False,
513517
'float_precision': None
514518
}
515519

@@ -529,6 +533,7 @@ def _read(filepath_or_buffer, kwds):
529533
'buffer_lines',
530534
'compact_ints',
531535
'use_unsigned',
536+
'tupleize_cols',
532537
}
533538

534539

@@ -962,6 +967,9 @@ def _clean_options(self, options, engine):
962967

963968
if arg == 'as_recarray':
964969
msg += ' Please call pd.to_csv(...).to_records() instead.'
970+
elif arg == 'tupleize_cols':
971+
msg += (' Column tuples will then '
972+
'always be converted to MultiIndex')
965973

966974
if result.get(arg, parser_default) != parser_default:
967975
depr_warning += msg + '\n\n'

pandas/tests/frame/test_to_csv.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -555,8 +555,12 @@ def _make_frame(names=None):
555555
# tupleize_cols=True and index=False
556556
df = _make_frame(True)
557557
df.to_csv(path, tupleize_cols=True, index=False)
558-
result = read_csv(
559-
path, header=0, tupleize_cols=True, index_col=None)
558+
559+
with tm.assert_produces_warning(FutureWarning,
560+
check_stacklevel=False):
561+
result = read_csv(path, header=0,
562+
tupleize_cols=True,
563+
index_col=None)
560564
result.columns = df.columns
561565
assert_frame_equal(df, result)
562566

@@ -576,8 +580,11 @@ def _make_frame(names=None):
576580
# column & index are multi-index (compatibility)
577581
df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
578582
df.to_csv(path, tupleize_cols=True)
579-
result = read_csv(path, header=0, index_col=[
580-
0, 1], tupleize_cols=True)
583+
584+
with tm.assert_produces_warning(FutureWarning,
585+
check_stacklevel=False):
586+
result = read_csv(path, header=0, index_col=[0, 1],
587+
tupleize_cols=True)
581588
result.columns = df.columns
582589
assert_frame_equal(df, result)
583590

pandas/tests/io/parser/header.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -105,13 +105,13 @@ def test_header_multi_index(self):
105105
R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
106106
"""
107107

108-
df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[
109-
0, 1], tupleize_cols=False)
108+
df = self.read_csv(StringIO(data), header=[0, 1, 2, 3],
109+
index_col=[0, 1])
110110
tm.assert_frame_equal(df, expected)
111111

112112
# skipping lines in the header
113-
df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[
114-
0, 1], tupleize_cols=False)
113+
df = self.read_csv(StringIO(data), header=[0, 1, 2, 3],
114+
index_col=[0, 1])
115115
tm.assert_frame_equal(df, expected)
116116

117117
# INVALID OPTIONS
@@ -121,25 +121,22 @@ def test_header_multi_index(self):
121121
FutureWarning, check_stacklevel=False):
122122
pytest.raises(ValueError, self.read_csv,
123123
StringIO(data), header=[0, 1, 2, 3],
124-
index_col=[0, 1], as_recarray=True,
125-
tupleize_cols=False)
124+
index_col=[0, 1], as_recarray=True)
126125

127126
# names
128127
pytest.raises(ValueError, self.read_csv,
129128
StringIO(data), header=[0, 1, 2, 3],
130-
index_col=[0, 1], names=['foo', 'bar'],
131-
tupleize_cols=False)
129+
index_col=[0, 1], names=['foo', 'bar'])
132130

133131
# usecols
134132
pytest.raises(ValueError, self.read_csv,
135133
StringIO(data), header=[0, 1, 2, 3],
136-
index_col=[0, 1], usecols=['foo', 'bar'],
137-
tupleize_cols=False)
134+
index_col=[0, 1], usecols=['foo', 'bar'])
138135

139136
# non-numeric index_col
140137
pytest.raises(ValueError, self.read_csv,
141138
StringIO(data), header=[0, 1, 2, 3],
142-
index_col=['foo', 'bar'], tupleize_cols=False)
139+
index_col=['foo', 'bar'])
143140

144141
def test_header_multiindex_common_format(self):
145142

pandas/tests/io/parser/python_parser_only.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,7 @@ def test_none_delimiter(self):
232232
result = self.read_csv(StringIO(data), header=0,
233233
sep=None,
234234
error_bad_lines=False,
235-
warn_bad_lines=True,
236-
engine='python',
237-
tupleize_cols=True)
235+
warn_bad_lines=True)
238236
tm.assert_frame_equal(result, expected)
239237

240238
def test_skipfooter_bad_row(self):

pandas/tests/io/parser/test_unsupported.py

+22-29
Original file line numberDiff line numberDiff line change
@@ -127,32 +127,25 @@ def read(self):
127127

128128
class TestDeprecatedFeatures(object):
129129

130-
def test_deprecated_args(self):
131-
data = '1,2,3'
132-
133-
# deprecated arguments with non-default values
134-
deprecated = {
135-
'as_recarray': True,
136-
'buffer_lines': True,
137-
'compact_ints': True,
138-
'use_unsigned': True,
139-
'skip_footer': 1,
140-
}
141-
142-
engines = 'c', 'python'
143-
144-
for engine in engines:
145-
for arg, non_default_val in deprecated.items():
146-
if engine == 'c' and arg == 'skip_footer':
147-
# unsupported --> exception is raised
148-
continue
149-
150-
if engine == 'python' and arg == 'buffer_lines':
151-
# unsupported --> exception is raised
152-
continue
153-
154-
with tm.assert_produces_warning(
155-
FutureWarning, check_stacklevel=False):
156-
kwargs = {arg: non_default_val}
157-
read_csv(StringIO(data), engine=engine,
158-
**kwargs)
130+
@pytest.mark.parametrize("engine", ["c", "python"])
131+
@pytest.mark.parametrize("kwargs", [{"as_recarray": True},
132+
{"buffer_lines": True},
133+
{"compact_ints": True},
134+
{"use_unsigned": True},
135+
{"tupleize_cols": True},
136+
{"skip_footer": 1}])
137+
def test_deprecated_args(self, engine, kwargs):
138+
data = "1,2,3"
139+
arg, _ = list(kwargs.items())[0]
140+
141+
if engine == "c" and arg == "skip_footer":
142+
# unsupported --> exception is raised
143+
return
144+
145+
if engine == "python" and arg == "buffer_lines":
146+
# unsupported --> exception is raised
147+
return
148+
149+
with tm.assert_produces_warning(
150+
FutureWarning, check_stacklevel=False):
151+
read_csv(StringIO(data), engine=engine, **kwargs)

0 commit comments

Comments
 (0)