From 36fbc414fc03abfcc2103d934f395260134b490c Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 13 Oct 2017 09:50:44 -0700 Subject: [PATCH] DEPR: Deprecate read_csv arguments fully Issue warnings on `read_csv` deprecated args in full, even if the normal defaults were passed in. Closes gh-17828. --- pandas/core/frame.py | 2 +- pandas/io/parsers.py | 29 +++++++++---- pandas/tests/frame/test_to_csv.py | 47 +++++++++++----------- pandas/tests/io/parser/test_unsupported.py | 5 +++ 4 files changed, 51 insertions(+), 32 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7e8c0da75e2c..97943f153319b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1294,7 +1294,7 @@ def _from_arrays(cls, arrays, columns, index, dtype=None): @classmethod def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, - encoding=None, tupleize_cols=False, + encoding=None, tupleize_cols=None, infer_datetime_format=False): """ Read CSV file (DEPRECATED, please use :func:`pandas.read_csv` diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3c94871003dd0..1b6414ea974fa 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -529,6 +529,14 @@ def _read(filepath_or_buffer, kwds): 'buffer_lines', 'float_precision', } + +_deprecated_defaults = { + 'as_recarray': None, + 'buffer_lines': None, + 'compact_ints': None, + 'use_unsigned': None, + 'tupleize_cols': None +} _deprecated_args = { 'as_recarray', 'buffer_lines', @@ -594,7 +602,7 @@ def parser_f(filepath_or_buffer, comment=None, encoding=None, dialect=None, - tupleize_cols=False, + tupleize_cols=None, # Error Handling error_bad_lines=True, @@ -606,9 +614,9 @@ def parser_f(filepath_or_buffer, # Internal doublequote=True, delim_whitespace=False, - as_recarray=False, - compact_ints=False, - use_unsigned=False, + as_recarray=None, + compact_ints=None, + use_unsigned=None, low_memory=_c_parser_defaults['low_memory'], buffer_lines=None, memory_map=False, @@ -831,12 +839,14 @@ def _get_options_with_defaults(self, engine): if ('python' in engine and argname not in _python_unsupported): pass + elif value == _deprecated_defaults.get(argname, default): + pass else: raise ValueError( 'The %r option is not supported with the' ' %r engine' % (argname, engine)) else: - value = default + value = _deprecated_defaults.get(argname, default) options[argname] = value if engine == 'python-fwf': @@ -962,6 +972,8 @@ def _clean_options(self, options, engine): for arg in _deprecated_args: parser_default = _c_parser_defaults[arg] + depr_default = _deprecated_defaults[arg] + msg = ("The '{arg}' argument has been deprecated " "and will be removed in a future version." .format(arg=arg)) @@ -970,10 +982,13 @@ def _clean_options(self, options, engine): msg += ' Please call pd.to_csv(...).to_records() instead.' elif arg == 'tupleize_cols': msg += (' Column tuples will then ' - 'always be converted to MultiIndex') + 'always be converted to MultiIndex.') - if result.get(arg, parser_default) != parser_default: + if result.get(arg, depr_default) != depr_default: + # raise Exception(result.get(arg, depr_default), depr_default) depr_warning += msg + '\n\n' + else: + result[arg] = parser_default if depr_warning != '': warnings.warn(depr_warning, FutureWarning, stacklevel=2) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index a8449d6f874df..b0cc414861818 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -267,8 +267,8 @@ def _do_test(df, r_dtype=None, c_dtype=None, with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', - chunksize=chunksize, tupleize_cols=False) - recons = self.read_csv(path, tupleize_cols=False, **kwargs) + chunksize=chunksize) + recons = self.read_csv(path, **kwargs) else: kwargs['header'] = 0 @@ -542,35 +542,35 @@ def _make_frame(names=None): # column & index are multi-index df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], + index_col=[0, 1]) assert_frame_equal(df, result) # column is mi df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) + df.to_csv(path) result = read_csv( - path, header=[0, 1, 2, 3], index_col=0, tupleize_cols=False) + path, header=[0, 1, 2, 3], index_col=0) assert_frame_equal(df, result) # dup column names? df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1, 2, 3], index_col=[ - 0, 1, 2], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], + index_col=[0, 1, 2]) assert_frame_equal(df, result) # writing with no index df = _make_frame() - df.to_csv(path, tupleize_cols=False, index=False) - result = read_csv(path, header=[0, 1], tupleize_cols=False) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) assert_frame_equal(df, result) # we lose the names here df = _make_frame(True) - df.to_csv(path, tupleize_cols=False, index=False) - result = read_csv(path, header=[0, 1], tupleize_cols=False) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) assert _all_none(*result.columns.names) result.columns.names = df.columns.names assert_frame_equal(df, result) @@ -589,15 +589,15 @@ def _make_frame(names=None): # whatsnew example df = _make_frame() - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1], index_col=[ - 0], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1], + index_col=[0]) assert_frame_equal(df, result) df = _make_frame(True) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1], index_col=[ - 0], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1], + index_col=[0]) assert_frame_equal(df, result) # column & index are multi-index (compatibility) @@ -613,18 +613,17 @@ def _make_frame(names=None): # invalid options df = _make_frame(True) - df.to_csv(path, tupleize_cols=False) + df.to_csv(path) for i in [6, 7]: msg = 'len of {i}, but only 5 lines in file'.format(i=i) with tm.assert_raises_regex(ParserError, msg): - read_csv(path, tupleize_cols=False, - header=lrange(i), index_col=0) + read_csv(path, header=lrange(i), index_col=0) # write with cols with tm.assert_raises_regex(TypeError, 'cannot specify cols ' 'with a MultiIndex'): - df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar']) + df.to_csv(path, columns=['foo', 'bar']) with ensure_clean('__tmp_to_csv_multiindex__') as path: # empty diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 2e73ce6aa19b0..189a113bb6abb 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -129,10 +129,15 @@ class TestDeprecatedFeatures(object): @pytest.mark.parametrize("engine", ["c", "python"]) @pytest.mark.parametrize("kwargs", [{"as_recarray": True}, + {"as_recarray": False}, {"buffer_lines": True}, + {"buffer_lines": False}, {"compact_ints": True}, + {"compact_ints": False}, {"use_unsigned": True}, + {"use_unsigned": False}, {"tupleize_cols": True}, + {"tupleize_cols": False}, {"skip_footer": 1}]) def test_deprecated_args(self, engine, kwargs): data = "1,2,3"