diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py index 7feecf3db76e..a2a9e28a5ab5 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_as_type import AsTypeTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class AsTypeParityTests( - AsTypeTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + AsTypeTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py index 1623db58af38..c277f5ce0664 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py @@ -20,10 +20,7 @@ from pyspark.testing.connectutils import ReusedConnectTestCase -class BaseParityTests( - BaseTestsMixin, - ReusedConnectTestCase, -): +class BaseParityTests(BaseTestsMixin, ReusedConnectTestCase): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py index 9af064218965..29b13868e03f 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py @@ -17,14 +17,13 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_binary_ops import BinaryOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class BinaryOpsParityTests( - BinaryOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + BinaryOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py index 6804768585ae..9ad2aa0ad17a 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_boolean_ops import BooleanOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class BooleanOpsParityTests( - BooleanOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + BooleanOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py index 851aaaa171b8..1b4dabdb0453 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_categorical_ops import CategoricalOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class CategoricalOpsParityTests( - CategoricalOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + CategoricalOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py index 1877be058305..ef587578f4ae 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py @@ -17,14 +17,13 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_complex_ops import ComplexOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ComplexOpsParityTests( - ComplexOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + ComplexOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py index ddc1545590ef..baa3180baaa7 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_date_ops import DateOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class DateOpsParityTests( - DateOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + DateOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py index 2a7395248567..2641e3a32dcd 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_datetime_ops import DatetimeOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class DatetimeOpsParityTests( - DatetimeOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + DatetimeOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py index 5c27fa6d3e4e..5df4c791c98b 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py @@ -17,14 +17,13 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_null_ops import NullOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class NullOpsParityTests( - NullOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + NullOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py index aa98a3009dc4..6f5c294e4ad5 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_arithmetic import ArithmeticTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ArithmeticParityTests( - ArithmeticTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + ArithmeticTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py index d66a5f155f81..56eba708c945 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_ops import NumOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class NumOpsParityTests( - NumOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + NumOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py index 5983c9a2c4f5..4d322d8b9b06 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_reverse import ReverseTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ReverseParityTests( - ReverseTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + ReverseTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py index e80971e12121..f507756a7a48 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py @@ -16,17 +16,19 @@ # import unittest +from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_string_ops import StringOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class StringOpsParityTests( - StringOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + StringOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py index 3c5a3b217352..edd29fa1ed28 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py @@ -16,17 +16,19 @@ # import unittest +import pyspark.pandas as ps from pyspark.pandas.tests.data_type_ops.test_timedelta_ops import TimedeltaOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class TimedeltaOpsParityTests( - TimedeltaOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + TimedeltaOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): - pass + @property + def psdf(self): + return ps.from_pandas(self.pdf) if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py index 237079922222..70a79e4cd3f9 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py @@ -17,14 +17,13 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_udt_ops import UDTOpsTestsMixin -from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class UDTOpsParityTests( - UDTOpsTestsMixin, - OpsTestBase, - ReusedConnectTestCase, + UDTOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py new file mode 100644 index 000000000000..f1e36aecd194 --- /dev/null +++ b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py @@ -0,0 +1,211 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime +import decimal + +import numpy as np +import pandas as pd + +import pyspark.pandas as ps +from pyspark.pandas.typedef.typehints import ( + extension_dtypes_available, + extension_float_dtypes_available, + extension_object_dtypes_available, +) + +if extension_dtypes_available: + from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype + +if extension_float_dtypes_available: + from pandas import Float32Dtype, Float64Dtype + +if extension_object_dtypes_available: + from pandas import BooleanDtype, StringDtype + + +class OpsTestBase: + """The test base for arithmetic operations of different data types.""" + + @property + def numeric_pdf(self): + dtypes = [np.int32, int, np.float32, float] + sers = [pd.Series([1, 2, 3], dtype=dtype) for dtype in dtypes] + sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)])) + sers.append(pd.Series([1, 2, np.nan], dtype=float)) + sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(np.nan)])) + pdf = pd.concat(sers, axis=1) + pdf.columns = [dtype.__name__ for dtype in dtypes] + [ + "decimal", + "float_nan", + "decimal_nan", + ] + return pdf + + @property + def numeric_psdf(self): + return ps.from_pandas(self.numeric_pdf) + + @property + def numeric_df_cols(self): + return self.numeric_pdf.columns + + @property + def integral_pdf(self): + return pd.DataFrame({"this": [1, 2, 3], "that": [2, 2, 1]}) + + @property + def integral_psdf(self): + return ps.from_pandas(self.integral_pdf) + + @property + def non_numeric_pdf(self): + psers = { + "string": pd.Series(["x", "y", "z"]), + "bool": pd.Series([True, True, False]), + "date": pd.Series( + [datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)] + ), + "datetime": pd.to_datetime(pd.Series([1, 2, 3])), + "timedelta": pd.Series( + [datetime.timedelta(1), datetime.timedelta(hours=2), datetime.timedelta(weeks=3)] + ), + "categorical": pd.Series(["a", "b", "a"], dtype="category"), + } + return pd.concat(psers, axis=1) + + @property + def non_numeric_psdf(self): + return ps.from_pandas(self.non_numeric_pdf) + + @property + def non_numeric_df_cols(self): + return self.non_numeric_pdf.columns + + @property + def pdf(self): + return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1) + + @property + def df_cols(self): + return self.pdf.columns + + @property + def numeric_psers(self): + dtypes = [np.float32, float, int, np.int32] + sers = [pd.Series([1, 2, 3], dtype=dtype) for dtype in dtypes] + sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)])) + return sers + + @property + def numeric_pssers(self): + return [ps.from_pandas(pser) for pser in self.numeric_psers] + + @property + def numeric_pser_psser_pairs(self): + return zip(self.numeric_psers, self.numeric_pssers) + + @property + def non_numeric_psers(self): + psers = { + "string": pd.Series(["x", "y", "z"]), + "datetime": pd.to_datetime(pd.Series([1, 2, 3])), + "bool": pd.Series([True, True, False]), + "date": pd.Series( + [datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)] + ), + "categorical": pd.Series(["a", "b", "a"], dtype="category"), + } + return psers + + @property + def non_numeric_pssers(self): + pssers = {} + + for k, v in self.non_numeric_psers.items(): + pssers[k] = ps.from_pandas(v) + return pssers + + @property + def non_numeric_pser_psser_pairs(self): + return zip(self.non_numeric_psers.values(), self.non_numeric_pssers.values()) + + @property + def pssers(self): + return self.numeric_pssers + list(self.non_numeric_pssers.values()) + + @property + def psers(self): + return self.numeric_psers + list(self.non_numeric_psers.values()) + + @property + def pser_psser_pairs(self): + return zip(self.psers, self.pssers) + + @property + def string_extension_dtype(self): + return ["string", StringDtype()] if extension_object_dtypes_available else [] + + @property + def object_extension_dtypes(self): + return ( + ["boolean", "string", BooleanDtype(), StringDtype()] + if extension_object_dtypes_available + else [] + ) + + @property + def fractional_extension_dtypes(self): + return ( + ["Float32", "Float64", Float32Dtype(), Float64Dtype()] + if extension_float_dtypes_available + else [] + ) + + @property + def integral_extension_dtypes(self): + return ( + [ + "Int8", + "Int16", + "Int32", + "Int64", + Int8Dtype(), + Int16Dtype(), + Int32Dtype(), + Int64Dtype(), + ] + if extension_dtypes_available + else [] + ) + + @property + def extension_dtypes(self): + return ( + self.object_extension_dtypes + + self.fractional_extension_dtypes + + self.integral_extension_dtypes + ) + + def check_extension(self, left, right): + """ + Compare `psser` and `pser` of numeric ExtensionDtypes. + + This utility is to adjust an issue for comparing numeric ExtensionDtypes in specific + pandas versions. Please refer to https://github.com/pandas-dev/pandas/issues/39410. + """ + self.assert_eq(left, right) diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py index 25eb97328d47..37a708948a80 100644 --- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py @@ -27,7 +27,7 @@ extension_float_dtypes_available, extension_object_dtypes_available, ) -from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.testing.pandasutils import ComparisonTestBase if extension_dtypes_available: from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype @@ -39,7 +39,7 @@ from pandas import BooleanDtype, StringDtype -class OpsTestBase(PandasOnSparkTestCase): +class OpsTestBase(ComparisonTestBase): """The test base for arithmetic operations of different data types.""" @property @@ -101,10 +101,6 @@ def non_numeric_df_cols(self): def pdf(self): return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1) - @property - def psdf(self): - return ps.from_pandas(self.pdf) - @property def df_cols(self): return self.pdf.columns