From cd8a55d9d1cb2c97aad4b96e38fd7d84d0c4b7d9 Mon Sep 17 00:00:00 2001 From: xiejxie Date: Tue, 25 Sep 2018 22:12:01 -0400 Subject: [PATCH 1/6] apply fix --- pandas/io/parsers.py | 13 ++++++++----- pandas/tests/io/parser/test_read_fwf.py | 9 +++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8d37bf4c84d5d..812f7405af453 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2301,7 +2301,6 @@ def read(self, rows=None): def _exclude_implicit_index(self, alldata): names = self._maybe_dedup_names(self.orig_names) - if self._implicit_index: excl_indices = self.index_col @@ -2382,7 +2381,6 @@ def _infer_columns(self): for level, hr in enumerate(header): try: line = self._buffered_line() - while self.line_pos <= hr: line = self._next_line() @@ -2552,7 +2550,8 @@ def _buffered_line(self): if len(self.buf) > 0: return self.buf[0] else: - return self._next_line() + s = self._next_line() + return s def _check_for_bom(self, first_row): """ @@ -2680,6 +2679,7 @@ def _next_line(self): self.line_pos += 1 self.buf.append(line) + return line def _alert_malformed(self, msg, row_num): @@ -2778,8 +2778,11 @@ def _remove_empty_lines(self, lines): ret = [] for l in lines: - # Remove empty lines and lines with only one whitespace value - if (len(l) > 1 or len(l) == 1 and + # Remove blank lines if they're not headers of the form ['', '', ... ] + if not self.line_pos == 0 and ''.join([str(x) for x in l]).strip() != '': + ret.append(l) + # Remove header lines that are empty or with only one whitespace value + elif self.line_pos == 0 and (len(l) > 1 or len(l) == 1 and (not isinstance(l[0], compat.string_types) or l[0].strip())): ret.append(l) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index a60f2b5a4c946..25aeb0ac04e8c 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -223,6 +223,7 @@ def test_comment_fwf(self): [5, np.nan, 10.]]) df = read_fwf(StringIO(data), colspecs=[(0, 3), (4, 9), (9, 25)], comment='#') + print(df.values, expected) tm.assert_almost_equal(df.values, expected) def test_1000_fwf(self): @@ -434,3 +435,11 @@ def test_default_delimiter(self): header=None, skiprows=[0]) tm.assert_frame_equal(result, expected) + + def test_skip_blanklines(self): + data_expected = '''A,B + +C,D''' + expected = read_csv(StringIO(data_expected), header=None, skip_blank_lines=True) + result = read_fwf(StringIO(data_expected), colspecs=[(0, 1), (2, 3)], header=None, skip_blank_lines=True) + tm.assert_frame_equal(result, expected) From c9dc905da47866344e27cb78a89d44f59e4b97df Mon Sep 17 00:00:00 2001 From: xiejxie Date: Wed, 26 Sep 2018 19:30:44 -0400 Subject: [PATCH 2/6] Fix pep8 --- pandas/io/parsers.py | 18 ++++++++++++------ pandas/tests/io/parser/test_read_fwf.py | 6 ++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 6cd5a250c1979..9ae6a1c88103d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2775,13 +2775,19 @@ def _remove_empty_lines(self, lines): ret = [] for l in lines: - # Remove blank lines if they're not headers of the form ['', '', ... ] - if not self.line_pos == 0 and ''.join([str(x) for x in l]).strip() != '': + # Remove blank lines if they're not headers of the + # form ['', '', ... ] + if not self.line_pos == 0\ + and ''.join([str(x) for x in l]).strip() != '': ret.append(l) - # Remove header lines that are empty or with only one whitespace value - elif self.line_pos == 0 and (len(l) > 1 or len(l) == 1 and - (not isinstance(l[0], compat.string_types) or - l[0].strip())): + # Remove header lines that are empty or with only one + # whitespace value + elif self.line_pos == 0\ + and ( + len(l) > 1 or len(l) == 1 + and (not isinstance(l[0], + compat.string_types) or l[0].strip()) + ): ret.append(l) return ret diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 25aeb0ac04e8c..5067e4121d0c9 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -440,6 +440,8 @@ def test_skip_blanklines(self): data_expected = '''A,B C,D''' - expected = read_csv(StringIO(data_expected), header=None, skip_blank_lines=True) - result = read_fwf(StringIO(data_expected), colspecs=[(0, 1), (2, 3)], header=None, skip_blank_lines=True) + expected = read_csv(StringIO(data_expected), + header=None, skip_blank_lines=True) + result = read_fwf(StringIO(data_expected), colspecs=[(0, 1), (2, 3)], + header=None, skip_blank_lines=True) tm.assert_frame_equal(result, expected) From 39b8e5737b80ec6fa93218523781ed61ee72e4bd Mon Sep 17 00:00:00 2001 From: xiejxie Date: Wed, 26 Sep 2018 19:36:20 -0400 Subject: [PATCH 3/6] Fix unnecessary changes --- pandas/io/parsers.py | 6 +++--- pandas/tests/io/parser/test_read_fwf.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9ae6a1c88103d..bf6f2d24e546b 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2301,6 +2301,7 @@ def read(self, rows=None): def _exclude_implicit_index(self, alldata): names = self._maybe_dedup_names(self.orig_names) + if self._implicit_index: excl_indices = self.index_col @@ -2381,6 +2382,7 @@ def _infer_columns(self): for level, hr in enumerate(header): try: line = self._buffered_line() + while self.line_pos <= hr: line = self._next_line() @@ -2550,8 +2552,7 @@ def _buffered_line(self): if len(self.buf) > 0: return self.buf[0] else: - s = self._next_line() - return s + return self._next_line() def _check_for_bom(self, first_row): """ @@ -2679,7 +2680,6 @@ def _next_line(self): self.line_pos += 1 self.buf.append(line) - return line def _alert_malformed(self, msg, row_num): diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 5067e4121d0c9..908f60a3f697f 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -223,7 +223,6 @@ def test_comment_fwf(self): [5, np.nan, 10.]]) df = read_fwf(StringIO(data), colspecs=[(0, 3), (4, 9), (9, 25)], comment='#') - print(df.values, expected) tm.assert_almost_equal(df.values, expected) def test_1000_fwf(self): From 57c0904bd4da3d4f7212a4fef1e7a626dae661b3 Mon Sep 17 00:00:00 2001 From: xiejxie Date: Wed, 26 Sep 2018 19:42:16 -0400 Subject: [PATCH 4/6] Add new entry in whats new --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3b61fde77cb9f..2bc949976e24a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -763,6 +763,7 @@ I/O - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) +- Fix :func:`read_fwf()` so that empty lines are skipped when the relevant argument is set (:issue:`22693`) Plotting ^^^^^^^^ From b59ea54e92ca035fd13f8282e9fa16d0580434b6 Mon Sep 17 00:00:00 2001 From: xiejxie Date: Wed, 26 Sep 2018 20:39:00 -0400 Subject: [PATCH 5/6] Move function --- pandas/io/parsers.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index bf6f2d24e546b..b3b6d4cad5a84 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2775,19 +2775,10 @@ def _remove_empty_lines(self, lines): ret = [] for l in lines: - # Remove blank lines if they're not headers of the - # form ['', '', ... ] - if not self.line_pos == 0\ - and ''.join([str(x) for x in l]).strip() != '': - ret.append(l) - # Remove header lines that are empty or with only one - # whitespace value - elif self.line_pos == 0\ - and ( - len(l) > 1 or len(l) == 1 - and (not isinstance(l[0], - compat.string_types) or l[0].strip()) - ): + # Remove empty lines and lines with only one whitespace value + if (len(l) > 1 or len(l) == 1 and + (not isinstance(l[0], compat.string_types) or + l[0].strip())): ret.append(l) return ret @@ -3477,3 +3468,22 @@ def __init__(self, f, **kwds): def _make_reader(self, f): self.data = FixedWidthReader(f, self.colspecs, self.delimiter, self.comment, self.skiprows) + + def _remove_empty_lines(self, lines): + ret = [] + for l in lines: + # Remove blank lines if they're not headers of the + # form ['', '', ... ] + if not self.line_pos == 0\ + and ''.join([str(x) for x in l]).strip() != '': + ret.append(l) + # Remove header lines that are empty or with only one + # whitespace value + elif self.line_pos == 0 \ + and ( + len(l) > 1 or len(l) == 1 + and (not isinstance(l[0], + compat.string_types) or l[0].strip()) + ): + ret.append(l) + return ret From fafeb534c64084844757def6f712093897fc6637 Mon Sep 17 00:00:00 2001 From: xiejxie Date: Wed, 26 Sep 2018 21:11:03 -0400 Subject: [PATCH 6/6] pep8 --- pandas/io/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b3b6d4cad5a84..9a8d91b62d2b7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2777,8 +2777,8 @@ def _remove_empty_lines(self, lines): for l in lines: # Remove empty lines and lines with only one whitespace value if (len(l) > 1 or len(l) == 1 and - (not isinstance(l[0], compat.string_types) or - l[0].strip())): + (not isinstance(l[0], compat.string_types) or + l[0].strip())): ret.append(l) return ret