Skip to content

Commit 539f345

Browse files
Carter GreenCarter Green
Carter Green
authored and
Carter Green
committed
BUG: #7757 Fix CSV parsing of singleton list header
Fix header list manipulation resulting in NaN DataFrame Write new test to for bug Update what's new
1 parent f9a552d commit 539f345

File tree

4 files changed

+24
-12
lines changed

4 files changed

+24
-12
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ I/O
283283
- Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`)
284284
- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696, :issue:`16798`).
285285
- Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`).
286+
- Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`)
286287
- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)
287288
- Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)
288289

pandas/_libs/parsers.pyx

+12-9
Original file line numberDiff line numberDiff line change
@@ -535,23 +535,26 @@ cdef class TextReader:
535535
self.parser_start = 0
536536
self.header = []
537537
else:
538-
if isinstance(header, list) and len(header):
539-
# need to artifically skip the final line
540-
# which is still a header line
541-
header = list(header)
542-
header.append(header[-1] + 1)
538+
if isinstance(header, list):
539+
if len(header) > 1:
540+
# need to artifically skip the final line
541+
# which is still a header line
542+
header = list(header)
543+
header.append(header[-1] + 1)
544+
self.parser.header_end = header[-1]
545+
self.has_mi_columns = 1
546+
else:
547+
self.parser.header_end = header[0]
543548

549+
self.parser_start = header[-1] + 1
544550
self.parser.header_start = header[0]
545-
self.parser.header_end = header[-1]
546551
self.parser.header = header[0]
547-
self.parser_start = header[-1] + 1
548-
self.has_mi_columns = 1
549552
self.header = header
550553
else:
551554
self.parser.header_start = header
552555
self.parser.header_end = header
553-
self.parser.header = header
554556
self.parser_start = header + 1
557+
self.parser.header = header
555558
self.header = [ header ]
556559

557560
self.names = names

pandas/io/parsers.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2279,10 +2279,11 @@ def _infer_columns(self):
22792279
if self.header is not None:
22802280
header = self.header
22812281

2282-
# we have a mi columns, so read an extra line
22832282
if isinstance(header, (list, tuple, np.ndarray)):
2284-
have_mi_columns = True
2285-
header = list(header) + [header[-1] + 1]
2283+
have_mi_columns = len(header) > 1
2284+
# we have a mi columns, so read an extra line
2285+
if have_mi_columns:
2286+
header = list(header) + [header[-1] + 1]
22862287
else:
22872288
have_mi_columns = False
22882289
header = [header]

pandas/tests/io/parser/header.py

+7
Original file line numberDiff line numberDiff line change
@@ -286,3 +286,10 @@ def test_non_int_header(self):
286286
self.read_csv(StringIO(data), sep=',', header=['a', 'b'])
287287
with tm.assert_raises_regex(ValueError, msg):
288288
self.read_csv(StringIO(data), sep=',', header='string_header')
289+
290+
def test_singleton_header(self):
291+
# See GH #7757
292+
data = """a,b,c\n0,1,2\n1,2,3"""
293+
df = self.read_csv(StringIO(data), header=[0])
294+
expected = self.read_csv(StringIO(data), header=0)
295+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)