Skip to content

Commit 9389c6b

Browse files
itholicHyukjinKwon
authored andcommitted
[SPARK-46820][PYTHON] Fix error message regression by restoring new_msg
### What changes were proposed in this pull request? This PR proposes to fix error message regression by restoring `new_msg`. ### Why are the changes needed? In the past few PRs, we mistakenly remove `new_msg` which introduces error message regression. ### Does this PR introduce _any_ user-facing change? No API change, but the user-facing error message is improved **Before** ```python >>> from pyspark.sql.types import StructType, StructField, StringType, IntegerType >>> schema = StructType([ ... StructField("name", StringType(), nullable=True), ... StructField("age", IntegerType(), nullable=False) ... ]) >>> df = spark.createDataFrame([(["asd", None])], schema) pyspark.errors.exceptions.base.PySparkValueError: [CANNOT_BE_NONE] Argument `obj` cannot be None. ``` **After** ```python >>> from pyspark.sql.types import StructType, StructField, StringType, IntegerType >>> schema = StructType([ ... StructField("name", StringType(), nullable=True), ... StructField("age", IntegerType(), nullable=False) ... ]) >>> df = spark.createDataFrame([(["asd", None])], schema) pyspark.errors.exceptions.base.PySparkValueError: field age: This field is not nullable, but got None ``` ### How was this patch tested? The existing CI should pass ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44859 from itholic/SPARK-46820. Authored-by: Haejoon Lee <haejoon.lee@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent aa53c8e commit 9389c6b

File tree

4 files changed

+108
-29
lines changed

4 files changed

+108
-29
lines changed

python/pyspark/errors/error_classes.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,46 @@
286286
"An error occurred while calling <func_name>: <error_msg>."
287287
]
288288
},
289+
"FIELD_DATA_TYPE_UNACCEPTABLE": {
290+
"message": [
291+
"<data_type> can not accept object <obj> in type <obj_type>."
292+
]
293+
},
294+
"FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME": {
295+
"message": [
296+
"<field_name>: <data_type> can not accept object <obj> in type <obj_type>."
297+
]
298+
},
299+
"FIELD_NOT_NULLABLE": {
300+
"message": [
301+
"Field is not nullable, but got None."
302+
]
303+
},
304+
"FIELD_NOT_NULLABLE_WITH_NAME": {
305+
"message": [
306+
"<field_name>: This field is not nullable, but got None."
307+
]
308+
},
309+
"FIELD_STRUCT_LENGTH_MISMATCH": {
310+
"message": [
311+
"Length of object (<object_length>) does not match with length of fields (<field_length>)."
312+
]
313+
},
314+
"FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME": {
315+
"message": [
316+
"<field_name>: Length of object (<object_length>) does not match with length of fields (<field_length>)."
317+
]
318+
},
319+
"FIELD_TYPE_MISMATCH": {
320+
"message": [
321+
"<obj> is not an instance of type <data_type>."
322+
]
323+
},
324+
"FIELD_TYPE_MISMATCH_WITH_NAME": {
325+
"message": [
326+
"<field_name>: <obj> is not an instance of type <data_type>."
327+
]
328+
},
289329
"HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": {
290330
"message": [
291331
"Function `<func_name>` should return Column, got <return_type>."
@@ -612,11 +652,6 @@
612652
"<feature> is not implemented."
613653
]
614654
},
615-
"NOT_INSTANCE_OF": {
616-
"message": [
617-
"<value> is not an instance of type <type>."
618-
]
619-
},
620655
"NOT_INT": {
621656
"message": [
622657
"Argument `<arg_name>` should be an int, got <arg_type>."

python/pyspark/sql/tests/test_dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,13 +1271,13 @@ def test_toDF_with_schema_string(self):
12711271

12721272
# number of fields must match.
12731273
self.assertRaisesRegex(
1274-
Exception, "LENGTH_SHOULD_BE_THE_SAME", lambda: rdd.toDF("key: int").collect()
1274+
Exception, "FIELD_STRUCT_LENGTH_MISMATCH", lambda: rdd.toDF("key: int").collect()
12751275
)
12761276

12771277
# field types mismatch will cause exception at runtime.
12781278
self.assertRaisesRegex(
12791279
Exception,
1280-
"CANNOT_ACCEPT_OBJECT_IN_TYPE",
1280+
"FIELD_DATA_TYPE_UNACCEPTABLE",
12811281
lambda: rdd.toDF("key: float, value: string").collect(),
12821282
)
12831283

python/pyspark/sql/tests/test_types.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,9 +1458,9 @@ def test_verify_type_exception_msg(self):
14581458

14591459
self.check_error(
14601460
exception=pe.exception,
1461-
error_class="CANNOT_BE_NONE",
1461+
error_class="FIELD_NOT_NULLABLE_WITH_NAME",
14621462
message_parameters={
1463-
"arg_name": "obj",
1463+
"field_name": "test_name",
14641464
},
14651465
)
14661466

@@ -1470,11 +1470,12 @@ def test_verify_type_exception_msg(self):
14701470

14711471
self.check_error(
14721472
exception=pe.exception,
1473-
error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
1473+
error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
14741474
message_parameters={
14751475
"data_type": "IntegerType()",
1476-
"obj_name": "data",
1477-
"obj_type": "str",
1476+
"field_name": "field b in field a",
1477+
"obj": "'data'",
1478+
"obj_type": "<class 'str'>",
14781479
},
14791480
)
14801481

python/pyspark/sql/types.py

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2196,9 +2196,16 @@ def verify_nullability(obj: Any) -> bool:
21962196
if nullable:
21972197
return True
21982198
else:
2199+
if name is not None:
2200+
raise PySparkValueError(
2201+
error_class="FIELD_NOT_NULLABLE_WITH_NAME",
2202+
message_parameters={
2203+
"field_name": str(name),
2204+
},
2205+
)
21992206
raise PySparkValueError(
2200-
error_class="CANNOT_BE_NONE",
2201-
message_parameters={"arg_name": "obj"},
2207+
error_class="FIELD_NOT_NULLABLE",
2208+
message_parameters={},
22022209
)
22032210
else:
22042211
return False
@@ -2213,12 +2220,22 @@ def assert_acceptable_types(obj: Any) -> None:
22132220
def verify_acceptable_types(obj: Any) -> None:
22142221
# subclass of them can not be fromInternal in JVM
22152222
if type(obj) not in _acceptable_types[_type]:
2223+
if name is not None:
2224+
raise PySparkTypeError(
2225+
error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
2226+
message_parameters={
2227+
"field_name": str(name),
2228+
"data_type": str(dataType),
2229+
"obj": repr(obj),
2230+
"obj_type": str(type(obj)),
2231+
},
2232+
)
22162233
raise PySparkTypeError(
2217-
error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
2234+
error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
22182235
message_parameters={
22192236
"data_type": str(dataType),
2220-
"obj_name": str(obj),
2221-
"obj_type": type(obj).__name__,
2237+
"obj": repr(obj),
2238+
"obj_type": str(type(obj)),
22222239
},
22232240
)
22242241

@@ -2232,11 +2249,20 @@ def verify_value(obj: Any) -> None:
22322249

22332250
def verify_udf(obj: Any) -> None:
22342251
if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType):
2252+
if name is not None:
2253+
raise PySparkValueError(
2254+
error_class="FIELD_TYPE_MISMATCH_WITH_NAME",
2255+
message_parameters={
2256+
"field_name": str(name),
2257+
"obj": str(obj),
2258+
"data_type": str(dataType),
2259+
},
2260+
)
22352261
raise PySparkValueError(
2236-
error_class="NOT_INSTANCE_OF",
2262+
error_class="FIELD_TYPE_MISMATCH",
22372263
message_parameters={
2238-
"value": str(obj),
2239-
"type": str(dataType),
2264+
"obj": str(obj),
2265+
"data_type": str(dataType),
22402266
},
22412267
)
22422268
verifier(dataType.toInternal(obj))
@@ -2365,13 +2391,20 @@ def verify_struct(obj: Any) -> None:
23652391
verifier(obj.get(f))
23662392
elif isinstance(obj, (tuple, list)):
23672393
if len(obj) != len(verifiers):
2394+
if name is not None:
2395+
raise PySparkValueError(
2396+
error_class="FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME",
2397+
message_parameters={
2398+
"field_name": str(name),
2399+
"object_length": str(len(obj)),
2400+
"field_length": str(len(verifiers)),
2401+
},
2402+
)
23682403
raise PySparkValueError(
2369-
error_class="LENGTH_SHOULD_BE_THE_SAME",
2404+
error_class="FIELD_STRUCT_LENGTH_MISMATCH",
23702405
message_parameters={
2371-
"arg1": "obj",
2372-
"arg2": "fields",
2373-
"arg1_length": str(len(obj)),
2374-
"arg2_length": str(len(verifiers)),
2406+
"object_length": str(len(obj)),
2407+
"field_length": str(len(verifiers)),
23752408
},
23762409
)
23772410
for v, (_, verifier) in zip(obj, verifiers):
@@ -2381,12 +2414,22 @@ def verify_struct(obj: Any) -> None:
23812414
for f, verifier in verifiers:
23822415
verifier(d.get(f))
23832416
else:
2417+
if name is not None:
2418+
raise PySparkTypeError(
2419+
error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
2420+
message_parameters={
2421+
"field_name": str(name),
2422+
"data_type": str(dataType),
2423+
"obj": repr(obj),
2424+
"obj_type": str(type(obj)),
2425+
},
2426+
)
23842427
raise PySparkTypeError(
2385-
error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
2428+
error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
23862429
message_parameters={
2387-
"data_type": "StructType",
2388-
"obj_name": str(obj),
2389-
"obj_type": type(obj).__name__,
2430+
"data_type": str(dataType),
2431+
"obj": repr(obj),
2432+
"obj_type": str(type(obj)),
23902433
},
23912434
)
23922435

0 commit comments

Comments
 (0)