From eef347bba3063abe0e9b688db967ced23e48b46e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 16 Nov 2020 23:47:10 -0500 Subject: [PATCH] use compression=None (again) to avoid inferring compression --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/common.py | 5 ++++- pandas/tests/io/parser/test_compression.py | 17 +++++++++++++++-- pandas/tests/io/parser/test_read_fwf.py | 2 +- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 62da3c0c5cddc..0a57435ea4300 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -630,6 +630,7 @@ I/O - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) - Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`) +- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`) Period ^^^^^^ diff --git a/pandas/io/common.py b/pandas/io/common.py index 695c1671abd61..8ec0a869c7042 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -468,8 +468,11 @@ def infer_compression( ------ ValueError on invalid compression specified. """ + if compression is None: + return None + # Infer compression - if compression in ("infer", None): + if compression == "infer": # Convert all path types (e.g. pathlib.Path) to strings filepath_or_buffer = stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py index 6e957313d8de8..690d3133dae5e 100644 --- a/pandas/tests/io/parser/test_compression.py +++ b/pandas/tests/io/parser/test_compression.py @@ -4,11 +4,12 @@ """ import os +from pathlib import Path import zipfile import pytest -import pandas as pd +from pandas import DataFrame import pandas._testing as tm @@ -130,7 +131,7 @@ def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding path = os.path.join(csv_dir_path, f"utf{utf_value}_ex_small.zip") result = parser.read_csv(path, encoding=encoding, compression="zip", sep="\t") - expected = pd.DataFrame( + expected = DataFrame( { "Country": ["Venezuela", "Venezuela"], "Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."], @@ -149,3 +150,15 @@ def test_invalid_compression(all_parsers, invalid_compression): with pytest.raises(ValueError, match=msg): parser.read_csv("test_file.zip", **compress_kwargs) + + +def test_ignore_compression_extension(all_parsers): + parser = all_parsers + df = DataFrame({"a": [0, 1]}) + with tm.ensure_clean("test.csv") as path_csv: + with tm.ensure_clean("test.csv.zip") as path_zip: + # make sure to create un-compressed file with zip extension + df.to_csv(path_csv, index=False) + Path(path_zip).write_text(Path(path_csv).read_text()) + + tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 5e9609956183b..d684bb36c3911 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -638,7 +638,7 @@ def test_default_delimiter(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("infer", [True, False, None]) +@pytest.mark.parametrize("infer", [True, False]) def test_fwf_compression(compression_only, infer): data = """1111111111 2222222222