pandas-dev · georginas05 · Oct 26, 2025 · Oct 26, 2025 · Oct 26, 2025 · Oct 26, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1106,6 +1106,7 @@ I/O
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
 - Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`)
+- Bug in :meth:`python_parser` where :class:`MyDialect` did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`)
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` for the ``c`` and ``python`` engines where parsing numbers with large exponents caused overflows. Now, numbers with large positive exponents are parsed as ``inf`` or ``-inf`` depending on the sign of the mantissa, while those with large negative exponents are parsed as ``0.0`` (:issue:`62617`, :issue:`38794`, :issue:`62740`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -218,6 +218,14 @@ class MyDialect(csv.Dialect):
 
             if sep is not None:
                 dia.delimiter = sep
+                # Skip rows at file level before csv.reader sees them
+                # prevents CSV parsing errors on lines that will be discarded
+                if self.skiprows is not None:
+                    while self.skipfunc(self.pos):
+                        line = f.readline()
+                        if not line:
+                            break
+                        self.pos += 1
             else:
                 # attempt to sniff the delimiter from the first valid line,
                 # i.e. no comment line and not in skiprows

diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
@@ -599,3 +599,45 @@ def fixer(bad_line):
         )
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_leading_quote_skip(python_parser_only):
+    # GH 62739
+    tbl = """\
+    "
+a b
+1 3
+"""
+    parser = python_parser_only
+    result = parser.read_csv(
+        StringIO(tbl),
+        delimiter=" ",
+        skiprows=1,
+    )
+    expected = DataFrame({"a": [1], "b": [3]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_unclosed_double_quote_in_data_still_errors(python_parser_only):
+    # GH 62739
+    tbl = """\
+a b
+"
+1 3
+"""
+    parser = python_parser_only
+    with pytest.raises(ParserError, match="unexpected end of data"):
+        parser.read_csv(StringIO(tbl), delimiter=" ", skiprows=1)
+
+
+def test_read_csv_skiprows_zero(python_parser_only):
+    # GH 62739
+    tbl = """\
+"
+a b
+1 3
+"""
+    parser = python_parser_only
+    # don't skip anything
+    with pytest.raises(ParserError, match="unexpected end of data"):
+        parser.read_csv(StringIO(tbl), delimiter=" ", skiprows=0, engine="python")