diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py index b5466b467d84e..4e80c680b6ee9 100644 --- a/python/pyspark/pandas/tests/test_dataframe.py +++ b/python/pyspark/pandas/tests/test_dataframe.py @@ -6091,10 +6091,12 @@ def test_corrwith(self): def _test_corrwith(self, psdf, psobj): pdf = psdf._to_pandas() pobj = psobj._to_pandas() - # Regression in pandas 1.5.0 when other is Series and method is "pearson" or "spearman" + # There was a regression in pandas 1.5.0 + # when other is Series and method is "pearson" or "spearman", and fixed in pandas 1.5.1 + # Therefore, we only test the pandas 1.5.0 in different way. # See https://github.com/pandas-dev/pandas/issues/48826 for the reported issue, # and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes. - if LooseVersion(pd.__version__) >= LooseVersion("1.5.0") and isinstance(pobj, pd.Series): + if LooseVersion(pd.__version__) == LooseVersion("1.5.0") and isinstance(pobj, pd.Series): methods = ["kendall"] else: methods = ["pearson", "spearman", "kendall"] diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py index ce1ffb347655a..71c393dcf340e 100644 --- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py @@ -1866,12 +1866,13 @@ def test_corrwith(self): self._test_corrwith((df1 + 1), df2.B) self._test_corrwith((df1 + 1), (df2.B + 2)) - # Regression in pandas 1.5.0 + # There was a regression in pandas 1.5.0, and fixed in pandas 1.5.1. + # Therefore, we only test the pandas 1.5.0 in different way. # See https://github.com/pandas-dev/pandas/issues/49141 for the reported issue, # and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes. df_bool = ps.DataFrame({"A": [True, True, False, False], "B": [True, False, False, True]}) ser_bool = ps.Series([True, True, False, True]) - if LooseVersion(pd.__version__) >= LooseVersion("1.5.0"): + if LooseVersion(pd.__version__) == LooseVersion("1.5.0"): expected = ps.Series([0.5773502691896257, 0.5773502691896257], index=["B", "A"]) self.assert_eq(df_bool.corrwith(ser_bool), expected, almost=True) else: @@ -1883,10 +1884,11 @@ def test_corrwith(self): self._test_corrwith(self.psdf3, self.psdf4) self._test_corrwith(self.psdf1, self.psdf1.a) - # Regression in pandas 1.5.0 + # There was a regression in pandas 1.5.0, and fixed in pandas 1.5.1. + # Therefore, we only test the pandas 1.5.0 in different way. # See https://github.com/pandas-dev/pandas/issues/49141 for the reported issue, # and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes. - if LooseVersion(pd.__version__) >= LooseVersion("1.5.0"): + if LooseVersion(pd.__version__) == LooseVersion("1.5.0"): expected = ps.Series([-0.08827348295047496, 0.4413674147523748], index=["b", "a"]) self.assert_eq(self.psdf1.corrwith(self.psdf2.b), expected, almost=True) else: