Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions python/pyspark/errors/error_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,11 @@
"Expected <expected> values for `<item>`, got <actual>."
]
},
"UDF_RETURN_TYPE" : {
"message" : [
"Return type of the user-defined function should be <expected>, but is <actual>."
]
},
"UDTF_EXEC_ERROR" : {
"message" : [
"User defined table function encountered an error in the '<method_name>' method: <error>"
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ class DataFrame(Frame, Generic[T]):
`compute.ops_on_diff_frames` should be turned on;
2, when `data` is a local dataset (Pandas DataFrame/numpy ndarray/list/etc),
it will first collect the `index` to driver if necessary, and then apply
the `Pandas.DataFrame(...)` creation internally;
the `pandas.DataFrame(...)` creation internally;

Examples
--------
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/pandas/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def wrap_and_init_stream():

class ArrowStreamPandasSerializer(ArrowStreamSerializer):
"""
Serializes Pandas.Series as Arrow data with Arrow streaming format.
Serializes pandas.Series as Arrow data with Arrow streaming format.

Parameters
----------
Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/sql/tests/connect/test_parity_arrow_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@


class ArrowMapParityTests(MapInArrowTestsMixin, ReusedConnectTestCase):
pass
def test_other_than_recordbatch_iter(self):
self.check_other_than_recordbatch_iter()


if __name__ == "__main__":
Expand Down
23 changes: 21 additions & 2 deletions python/pyspark/sql/tests/connect/test_parity_pandas_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,35 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import unittest


from pyspark.sql.tests.pandas.test_pandas_map import MapInPandasTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase


class MapInPandasParityTests(MapInPandasTestsMixin, ReusedConnectTestCase):
def test_other_than_dataframe_iter(self):
self.check_other_than_dataframe_iter()

def test_dataframes_with_other_column_names(self):
self.check_dataframes_with_other_column_names()

def test_dataframes_with_duplicate_column_names(self):
self.check_dataframes_with_duplicate_column_names()

def test_dataframes_with_less_columns(self):
self.check_dataframes_with_less_columns()

@unittest.skip("Fails in Spark Connect, should enable.")
def test_dataframes_with_incompatible_types(self):
self.check_dataframes_with_incompatible_types()

def test_empty_dataframes_with_less_columns(self):
self.check_empty_dataframes_with_less_columns()

def test_other_than_dataframe(self):
self.check_other_than_dataframe()
def test_empty_dataframes_with_other_columns(self):
self.check_empty_dataframes_with_other_columns()


if __name__ == "__main__":
Expand Down
4 changes: 1 addition & 3 deletions python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ class CogroupedApplyInPandasTestsMixin:
def data1(self):
return (
self.spark.range(10)
.toDF("id")
.withColumn("ks", array([lit(i) for i in range(20, 30)]))
.withColumn("k", explode(col("ks")))
.withColumn("v", col("k") * 10)
Expand All @@ -67,7 +66,6 @@ def data1(self):
def data2(self):
return (
self.spark.range(10)
.toDF("id")
.withColumn("ks", array([lit(i) for i in range(20, 30)]))
.withColumn("k", explode(col("ks")))
.withColumn("v2", col("k") * 100)
Expand Down Expand Up @@ -168,7 +166,7 @@ def check_apply_in_pandas_not_returning_pandas_dataframe(self):
fn=lambda lft, rgt: lft.size + rgt.size,
error_class=PythonException,
error_message_regex="Return type of the user-defined function "
"should be pandas.DataFrame, but is <class 'numpy.int64'>",
"should be pandas.DataFrame, but is int64.",
)

def test_apply_in_pandas_returning_column_names(self):
Expand Down
4 changes: 1 addition & 3 deletions python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ class GroupedApplyInPandasTestsMixin:
def data(self):
return (
self.spark.range(10)
.toDF("id")
.withColumn("vs", array([lit(i) for i in range(20, 30)]))
.withColumn("v", explode(col("vs")))
.drop("vs")
Expand Down Expand Up @@ -287,8 +286,7 @@ def test_apply_in_pandas_not_returning_pandas_dataframe(self):
def check_apply_in_pandas_not_returning_pandas_dataframe(self):
with self.assertRaisesRegex(
PythonException,
"Return type of the user-defined function should be pandas.DataFrame, "
"but is <class 'tuple'>",
"Return type of the user-defined function should be pandas.DataFrame, but is tuple.",
):
self._test_apply_in_pandas(lambda key, pdf: key)

Expand Down
Loading