Skip to content

Commit 85809e8

Browse files
committed
Merge pull request #7029 from mcwitt/csv_mi_bug
BUG: fix reading multi-index data in python parser
2 parents b117b7b + 8651a43 commit 85809e8

File tree

3 files changed

+15
-6
lines changed

3 files changed

+15
-6
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,7 @@ Bug Fixes
457457
- accept ``TextFileReader`` in ``concat``, which was affecting a common user idiom (:issue:`6583`)
458458
- Bug in C parser with leading whitespace (:issue:`3374`)
459459
- Bug in C parser with ``delim_whitespace=True`` and ``\r``-delimited lines
460+
- Bug in python parser with explicit multi-index in row following column header (:issue:`6893`)
460461
- Bug in ``Series.rank`` and ``DataFrame.rank`` that caused small floats (<1e-13) to all receive the same rank (:issue:`6886`)
461462
- Bug in ``DataFrame.apply`` with functions that used \*args`` or \*\*kwargs and returned
462463
an empty result (:issue:`6952`)

pandas/io/parsers.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,7 +1383,7 @@ def __init__(self, f, **kwds):
13831383
# multiple date column thing turning into a real spaghetti factory
13841384
if not self._has_complex_date_col:
13851385
(index_names,
1386-
self.orig_names, columns_) = self._get_index_name(self.columns)
1386+
self.orig_names, self.columns) = self._get_index_name(self.columns)
13871387
self._name_processed = True
13881388
if self.index_names is None:
13891389
self.index_names = index_names
@@ -1811,8 +1811,9 @@ def _get_index_name(self, columns):
18111811
columns.insert(0, c)
18121812

18131813
# Update list of original names to include all indices.
1814-
self.num_original_columns = len(next_line)
1815-
return line, columns, orig_names
1814+
orig_names = list(columns)
1815+
self.num_original_columns = len(columns)
1816+
return line, orig_names, columns
18161817

18171818
if implicit_first_cols > 0:
18181819
# Case 1
@@ -1824,7 +1825,7 @@ def _get_index_name(self, columns):
18241825

18251826
else:
18261827
# Case 2
1827-
(index_name, columns,
1828+
(index_name, columns_,
18281829
self.index_col) = _clean_index_names(columns, self.index_col)
18291830

18301831
return index_name, orig_names, columns

pandas/io/tests/test_parsers.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,7 +1569,7 @@ def test_converter_return_string_bug(self):
15691569

15701570
def test_read_table_buglet_4x_multiindex(self):
15711571
# GH 6607
1572-
# Parsing multiindex columns currently causes an error in the C parser.
1572+
# Parsing multi-level index currently causes an error in the C parser.
15731573
# Temporarily copied to TestPythonParser.
15741574
# Here test that CParserError is raised:
15751575

@@ -2692,7 +2692,7 @@ def test_decompression_regex_sep(self):
26922692
def test_read_table_buglet_4x_multiindex(self):
26932693
# GH 6607
26942694
# This is a copy which should eventually be merged into ParserTests
2695-
# when the issue with multiindex columns is fixed in the C parser.
2695+
# when the issue with multi-level index is fixed in the C parser.
26962696

26972697
text = """ A B C D E
26982698
one two three four
@@ -2704,6 +2704,13 @@ def test_read_table_buglet_4x_multiindex(self):
27042704
df = self.read_table(StringIO(text), sep='\s+')
27052705
self.assertEquals(df.index.names, ('one', 'two', 'three', 'four'))
27062706

2707+
# GH 6893
2708+
data = ' A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9'
2709+
expected = DataFrame.from_records([(1,3,7,0,3,6), (3,1,4,1,5,9)],
2710+
columns=list('abcABC'), index=list('abc'))
2711+
actual = self.read_table(StringIO(data), sep='\s+')
2712+
tm.assert_frame_equal(actual, expected)
2713+
27072714
class TestFwfColspaceSniffing(tm.TestCase):
27082715
def test_full_file(self):
27092716
# File with all values

0 commit comments

Comments
 (0)