diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 2bf0be3822db5..5a79573447801 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -32,7 +32,7 @@ RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 -RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.4.4' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib +RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.4.4' scipy unittest-xml-reporting plotly>=4.8 sklearn 'mlflow>=1.0' coverage matplotlib openpyxl RUN add-apt-repository ppa:pypy/ppa RUN apt update diff --git a/python/pyspark/pandas/tests/test_dataframe_conversion.py b/python/pyspark/pandas/tests/test_dataframe_conversion.py index 0582f5db3721c..4e4c9ac2e7d9b 100644 --- a/python/pyspark/pandas/tests/test_dataframe_conversion.py +++ b/python/pyspark/pandas/tests/test_dataframe_conversion.py @@ -90,7 +90,6 @@ def get_excel_dfs(pandas_on_spark_location, pandas_location): "expected": pd.read_excel(pandas_location, index_col=0), } - @unittest.skip("openpyxl") def test_to_excel(self): with self.temp_dir() as dirpath: pandas_location = dirpath + "/" + "output1.xlsx" diff --git a/python/pyspark/pandas/tests/test_dataframe_spark_io.py b/python/pyspark/pandas/tests/test_dataframe_spark_io.py index d63ee659d3dec..6c9cc13e8250f 100644 --- a/python/pyspark/pandas/tests/test_dataframe_spark_io.py +++ b/python/pyspark/pandas/tests/test_dataframe_spark_io.py @@ -247,6 +247,7 @@ def test_spark_io(self): expected_idx.sort_values(by="f").to_spark().toPandas(), ) + # TODO(SPARK-40353): re-enabling the `test_read_excel`. @unittest.skip("openpyxl") def test_read_excel(self): with self.temp_dir() as tmp: