diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 481c31d2410a9..3da7c30a231df 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -767,6 +767,7 @@ I/O - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) +- Fix :func:`read_fwf()` so that empty lines are skipped when the relevant argument is set (:issue:`22693`) Plotting ^^^^^^^^ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a4f1155117b12..9a8d91b62d2b7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3468,3 +3468,22 @@ def __init__(self, f, **kwds): def _make_reader(self, f): self.data = FixedWidthReader(f, self.colspecs, self.delimiter, self.comment, self.skiprows) + + def _remove_empty_lines(self, lines): + ret = [] + for l in lines: + # Remove blank lines if they're not headers of the + # form ['', '', ... ] + if not self.line_pos == 0\ + and ''.join([str(x) for x in l]).strip() != '': + ret.append(l) + # Remove header lines that are empty or with only one + # whitespace value + elif self.line_pos == 0 \ + and ( + len(l) > 1 or len(l) == 1 + and (not isinstance(l[0], + compat.string_types) or l[0].strip()) + ): + ret.append(l) + return ret diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index a60f2b5a4c946..908f60a3f697f 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -434,3 +434,13 @@ def test_default_delimiter(self): header=None, skiprows=[0]) tm.assert_frame_equal(result, expected) + + def test_skip_blanklines(self): + data_expected = '''A,B + +C,D''' + expected = read_csv(StringIO(data_expected), + header=None, skip_blank_lines=True) + result = read_fwf(StringIO(data_expected), colspecs=[(0, 1), (2, 3)], + header=None, skip_blank_lines=True) + tm.assert_frame_equal(result, expected)