@@ -64,9 +64,11 @@ class ParserWarning(Warning):
6464 pass ``header=0`` to be able to replace existing names. The header can be
6565 a list of integers that specify row locations for a multi-index on the
6666 columns E.g. [0,1,3]. Intervening rows that are not specified will be
67- skipped. (E.g. 2 in this example are skipped)
67+ skipped (e.g. 2 in this example are skipped). Note that this parameter
68+ ignores commented lines, so header=0 denotes the first line of
69+ data rather than the first line of the file.
6870skiprows : list-like or integer
69- Row numbers to skip (0-indexed) or number of rows to skip (int)
71+ Line numbers to skip (0-indexed) or number of lines to skip (int)
7072 at the start of the file
7173index_col : int or sequence or False, default None
7274 Column to use as the row labels of the DataFrame. If a sequence is given, a
@@ -106,8 +108,12 @@ class ParserWarning(Warning):
106108thousands : str, default None
107109 Thousands separator
108110comment : str, default None
109- Indicates remainder of line should not be parsed
110- Does not support line commenting (will return empty line)
111+ Indicates remainder of line should not be parsed. If found at the
112+ beginning of a line, the line will be ignored altogether. This parameter
113+ must be a single character. Also, fully commented lines
114+ are ignored by the parameter `header` but not by `skiprows`. For example,
115+ if comment='#', parsing '#empty\n 1,2,3\n a,b,c' with `header=0` will
116+ result in '1,2,3' being treated as the header.
111117decimal : str, default '.'
112118 Character to recognize as decimal point. E.g. use ',' for European data
113119nrows : int, default None
@@ -1313,6 +1319,7 @@ def __init__(self, f, **kwds):
13131319 self .data = None
13141320 self .buf = []
13151321 self .pos = 0
1322+ self .line_pos = 0
13161323
13171324 self .encoding = kwds ['encoding' ]
13181325 self .compression = kwds ['compression' ]
@@ -1459,6 +1466,7 @@ class MyDialect(csv.Dialect):
14591466 line = self ._check_comments ([line ])[0 ]
14601467
14611468 self .pos += 1
1469+ self .line_pos += 1
14621470 sniffed = csv .Sniffer ().sniff (line )
14631471 dia .delimiter = sniffed .delimiter
14641472 if self .encoding is not None :
@@ -1566,7 +1574,7 @@ def _infer_columns(self):
15661574 if self .header is not None :
15671575 header = self .header
15681576
1569- # we have a mi columns, so read and extra line
1577+ # we have a mi columns, so read an extra line
15701578 if isinstance (header , (list , tuple , np .ndarray )):
15711579 have_mi_columns = True
15721580 header = list (header ) + [header [- 1 ] + 1 ]
@@ -1578,9 +1586,8 @@ def _infer_columns(self):
15781586 for level , hr in enumerate (header ):
15791587 line = self ._buffered_line ()
15801588
1581- while self .pos <= hr :
1589+ while self .line_pos <= hr :
15821590 line = self ._next_line ()
1583-
15841591 unnamed_count = 0
15851592 this_columns = []
15861593 for i , c in enumerate (line ):
@@ -1705,25 +1712,36 @@ def _buffered_line(self):
17051712 else :
17061713 return self ._next_line ()
17071714
1715+ def _empty (self , line ):
1716+ return not line or all (not x for x in line )
1717+
17081718 def _next_line (self ):
17091719 if isinstance (self .data , list ):
17101720 while self .pos in self .skiprows :
17111721 self .pos += 1
17121722
1713- try :
1714- line = self .data [self .pos ]
1715- except IndexError :
1716- raise StopIteration
1723+ while True :
1724+ try :
1725+ line = self ._check_comments ([self .data [self .pos ]])[0 ]
1726+ self .pos += 1
1727+ # either uncommented or blank to begin with
1728+ if self ._empty (self .data [self .pos - 1 ]) or line :
1729+ break
1730+ except IndexError :
1731+ raise StopIteration
17171732 else :
17181733 while self .pos in self .skiprows :
17191734 next (self .data )
17201735 self .pos += 1
17211736
1722- line = next (self .data )
1723-
1724- line = self ._check_comments ([line ])[0 ]
1737+ while True :
1738+ orig_line = next (self .data )
1739+ line = self ._check_comments ([orig_line ])[0 ]
1740+ self .pos += 1
1741+ if self ._empty (orig_line ) or line :
1742+ break
17251743
1726- self .pos += 1
1744+ self .line_pos += 1
17271745 self .buf .append (line )
17281746
17291747 return line
0 commit comments