pandas-dev · mroeschke · Jul 7, 2023 · Jun 26, 2023 · Jun 26, 2023 · Jun 26, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -453,6 +453,7 @@ I/O
 - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
 - Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`)
 - Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`)
+- Bug where ``bz2`` was treated as a hard requirement (:issue:`53857`)
 
 Period
 ^^^^^^

diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import bz2
 import gzip
 import io
 import tarfile
@@ -11,7 +10,10 @@
 )
 import zipfile
 
-from pandas.compat import get_lzma_file
+from pandas.compat import (
+    get_bz2_file,
+    get_lzma_file,
+)
 from pandas.compat._optional import import_optional_dependency
 
 import pandas as pd
@@ -157,7 +159,7 @@ def write_to_compressed(compression, path, data, dest: str = "test"):
     elif compression == "gzip":
         compress_method = gzip.GzipFile
     elif compression == "bz2":
-        compress_method = bz2.BZ2File
+        compress_method = get_bz2_file()
     elif compression == "zstd":
         compress_method = import_optional_dependency("zstandard").open
     elif compression == "xz":

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -154,6 +154,29 @@ def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
     return pandas.compat.compressors.LZMAFile
 
 
+def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
+    """
+    Importing the `BZ2File` class from the `bz2` module.
+
+    Returns
+    -------
+    class
+        The `BZ2File` class from the `bz2` module.
+
+    Raises
+    ------
+    RuntimeError
+        If the `bz2` module was not imported correctly, or didn't exist.
+    """
+    if not pandas.compat.compressors.has_bz2:
+        raise RuntimeError(
+            "bz2 module not available. "
+            "A Python re-install with the proper dependencies, "
+            "might be required to solve this issue."
+        )
+    return pandas.compat.compressors.BZ2File
+
+
 __all__ = [
     "is_numpy_dev",
     "pa_version_under7p0",

diff --git a/pandas/compat/compressors.py b/pandas/compat/compressors.py
@@ -4,11 +4,17 @@
 
 from __future__ import annotations
 
-import bz2
 from pickle import PickleBuffer
 
 from pandas.compat._constants import PY310
 
+try:
+    import bz2
+
+    has_bz2 = True
+except ImportError:
+    has_bz2 = False
+
 try:
     import lzma
 
@@ -41,17 +47,19 @@ def flatten_buffer(
         return memoryview(b).tobytes("A")
 
 
-class BZ2File(bz2.BZ2File):
-    if not PY310:
+if has_bz2:
 
-        def write(self, b) -> int:
-            # Workaround issue where `bz2.BZ2File` expects `len`
-            # to return the number of bytes in `b` by converting
-            # `b` into something that meets that constraint with
-            # minimal copying.
-            #
-            # Note: This is fixed in Python 3.10.
-            return super().write(flatten_buffer(b))
+    class BZ2File(bz2.BZ2File):
+        if not PY310:
+
+            def write(self, b) -> int:
+                # Workaround issue where `bz2.BZ2File` expects `len`
+                # to return the number of bytes in `b` by converting
+                # `b` into something that meets that constraint with
+                # minimal copying.
+                #
+                # Note: This is fixed in Python 3.10.
+                return super().write(flatten_buffer(b))
 
 
 if has_lzma:

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -57,9 +57,11 @@
     StorageOptions,
     WriteBuffer,
 )
-from pandas.compat import get_lzma_file
+from pandas.compat import (
+    get_bz2_file,
+    get_lzma_file,
+)
 from pandas.compat._optional import import_optional_dependency
-from pandas.compat.compressors import BZ2File as _BZ2File
 from pandas.util._decorators import doc
 from pandas.util._exceptions import find_stack_level
 
@@ -766,7 +768,7 @@ def get_handle(
         elif compression == "bz2":
             # Overload of "BZ2File" to handle pickle protocol 5
             # "Union[str, BaseBuffer]", "str", "Dict[str, Any]"
-            handle = _BZ2File(  # type: ignore[call-overload]
+            handle = get_bz2_file()(  # type: ignore[call-overload]
                 handle,
                 mode=ioargs.mode,
                 **compression_args,

diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
@@ -3,6 +3,7 @@
 import string
 import subprocess
 import sys
+import textwrap
 
 import numpy as np
 import pytest
@@ -245,3 +246,21 @@ def test_str_size():
     ]
     result = subprocess.check_output(call).decode()[-4:-1].strip("\n")
     assert int(result) == int(expected)
+
+
+@pytest.mark.single_cpu
+def test_bz2_missing_import():
+    # Check whether bz2 missing import is handled correctly (issue #53857)
+    code = """
+        import sys
+        sys.modules['bz2'] = None
+        import pytest
+        import pandas as pd
+        from pandas.compat import get_bz2_file
+        msg = 'bz2 module not available.'
+        with pytest.raises(RuntimeError, match=msg):
+            get_bz2_file()
+    """
+    code = textwrap.dedent(code)
+    call = [sys.executable, "-c", code]
+    subprocess.check_output(call)