Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,7 @@ I/O
- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
- Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
- Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`)
- Bug in :meth:`python_parser` where :class:`MyDialect` did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
- Bug in :meth:`read_csv` for the ``c`` and ``python`` engines where parsing numbers with large exponents caused overflows. Now, numbers with large positive exponents are parsed as ``inf`` or ``-inf`` depending on the sign of the mantissa, while those with large negative exponents are parsed as ``0.0`` (:issue:`62617`, :issue:`38794`, :issue:`62740`)
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
Expand Down
9 changes: 9 additions & 0 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,15 @@ class MyDialect(csv.Dialect):

if sep is not None:
dia.delimiter = sep
# Skip rows at file level before csv.reader sees them
# prevents CSV parsing errors on lines that will be discarded
if self.skiprows is not None:
while self.skipfunc(self.pos):
self.pos += 1
try:
f.readline()
except (StopIteration, AttributeError):
break
else:
# attempt to sniff the delimiter from the first valid line,
# i.e. no comment line and not in skiprows
Expand Down
43 changes: 43 additions & 0 deletions pandas/tests/io/parser/test_python_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,3 +599,46 @@ def fixer(bad_line):
)

tm.assert_frame_equal(result, expected)


def test_read_csv_leading_quote_skip(python_parser_only):
# GH 62739
tbl = """\
"
a b
1 3
"""
parser = python_parser_only
result = parser.read_csv(
StringIO(tbl),
delimiter=" ",
skiprows=1,
)
expected = DataFrame({"a": [1], "b": [3]})
tm.assert_frame_equal(result, expected)


def test_read_csv_unclosed_double_quote_in_data_still_errors(python_parser_only):
# GH 62739
tbl = """\
comment line
a b
"
1 3
"""
parser = python_parser_only
with pytest.raises(ParserError, match="unexpected end of data"):
parser.read_csv(StringIO(tbl), delimiter=" ", skiprows=1)


def test_read_csv_skiprows_zero(python_parser_only):
# GH 62739
tbl = """\
"
a b
1 3
"""
parser = python_parser_only
# don't skip anything
with pytest.raises(ParserError, match="unexpected end of data"):
parser.read_csv(StringIO(tbl), delimiter=" ", skiprows=0, engine="python")
Loading