diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d571c0f2d9620..95f5ed6916936 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1184,6 +1184,7 @@ I/O - Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) - Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`) - Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) +- Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 10f8c53987471..a968a2b9623d9 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1036,6 +1036,37 @@ def _evaluate_usecols(usecols, names): return usecols +def _validate_skipfooter_arg(skipfooter): + """ + Validate the 'skipfooter' parameter. + + Checks whether 'skipfooter' is a non-negative integer. + Raises a ValueError if that is not the case. + + Parameters + ---------- + skipfooter : non-negative integer + The number of rows to skip at the end of the file. + + Returns + ------- + validated_skipfooter : non-negative integer + The original input if the validation succeeds. + + Raises + ------ + ValueError : 'skipfooter' was not a non-negative integer. + """ + + if not is_integer(skipfooter): + raise ValueError("skipfooter must be an integer") + + if skipfooter < 0: + raise ValueError("skipfooter cannot be negative") + + return skipfooter + + def _validate_usecols_arg(usecols): """ Validate the 'usecols' parameter. @@ -1880,7 +1911,7 @@ def __init__(self, f, **kwds): else: self.skipfunc = lambda x: x in self.skiprows - self.skipfooter = kwds['skipfooter'] + self.skipfooter = _validate_skipfooter_arg(kwds['skipfooter']) self.delimiter = kwds['delimiter'] self.quotechar = kwds['quotechar'] @@ -2684,9 +2715,6 @@ def _get_index_name(self, columns): return index_name, orig_names, columns def _rows_to_cols(self, content): - if self.skipfooter < 0: - raise ValueError('skip footer cannot be negative') - col_len = self.num_original_columns if self._implicit_index: diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index ee0f00506cef3..ab30301e710a6 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -546,7 +546,7 @@ def test_iterator(self): if self.engine == 'python': # test bad parameter (skipfooter) reader = self.read_csv(StringIO(self.data1), index_col=0, - iterator=True, skipfooter=True) + iterator=True, skipfooter=1) self.assertRaises(ValueError, reader.read, 3) def test_pass_names_with_index(self): diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index 9a1eb94270e28..510e3c689649c 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -20,20 +20,22 @@ class PythonParserTests(object): - def test_negative_skipfooter_raises(self): - text = """#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -#foo,a,b,c -1/1/2000,1.,2.,3. -1/2/2000,4,5,6 -1/3/2000,7,8,9 -""" + def test_invalid_skipfooter(self): + text = "a\n1\n2" + + # see gh-15925 (comment) + msg = "skipfooter must be an integer" + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(text), skipfooter="foo") + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(text), skipfooter=1.5) + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(text), skipfooter=True) - with tm.assertRaisesRegexp( - ValueError, 'skip footer cannot be negative'): + msg = "skipfooter cannot be negative" + with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(StringIO(text), skipfooter=-1) def test_sniff_delimiter(self): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 14146a3ad1e9a..9637b449de6da 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -112,8 +112,8 @@ def test_deprecated_args(self): 'as_recarray': True, 'buffer_lines': True, 'compact_ints': True, - 'skip_footer': True, 'use_unsigned': True, + 'skip_footer': 1, } engines = 'c', 'python'