Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
747 changes: 376 additions & 371 deletions python/pyspark/errors/error_classes.py

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions python/pyspark/errors/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,43 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#


def _write_self() -> None:
import json
from pyspark.errors import error_classes

with open("python/pyspark/errors/error_classes.py", "w") as f:
error_class_py_file = """#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# NOTE: Automatically sort this file via
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If error_classes.py is not meant to be edited manually, I would add a clear warning here so people don't mistakenly edit the file, similar to #44847.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh actually this case is slightly different. Has to be manually edited first, and then reformatted via that code :-).

# - cd $SPARK_HOME
# - bin/pyspark
# - from pyspark.errors.exceptions import _write_self; _write_self()
import json
ERROR_CLASSES_JSON = '''
%s
'''
ERROR_CLASSES_MAP = json.loads(ERROR_CLASSES_JSON)
""" % json.dumps(
error_classes.ERROR_CLASSES_MAP, sort_keys=True, indent=2
)
f.write(error_class_py_file)
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1663,7 +1663,7 @@ def sampleBy(
"arg_name": "fractions",
"arg_type": type(fractions).__name__,
"allowed_types": "float, int, str",
"return_type": type(k).__name__,
"item_type": type(k).__name__,
},
)
fractions[k] = float(v)
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def createDataFrame(
if isinstance(data, DataFrame):
raise PySparkTypeError(
error_class="INVALID_TYPE",
message_parameters={"arg_name": "data", "data_type": "DataFrame"},
message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
)

_schema: Optional[Union[AtomicType, StructType]] = None
Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1319,6 +1319,7 @@ def hint(
error_class="DISALLOWED_TYPE_FOR_CONTAINER",
message_parameters={
"arg_name": "parameters",
"arg_type": type(parameters).__name__,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... this also seems to be problematic. It seems that an error should have occurred if a parameter defined in the template was actually missing. Let me investigate this too.

"allowed_types": allowed_types_repr,
"item_type": type(p).__name__,
},
Expand All @@ -1329,6 +1330,7 @@ def hint(
error_class="DISALLOWED_TYPE_FOR_CONTAINER",
message_parameters={
"arg_name": "parameters",
"arg_type": type(parameters).__name__,
"allowed_types": allowed_types_repr,
"item_type": type(p).__name__ + "[" + type(p[0]).__name__ + "]",
},
Expand Down Expand Up @@ -2385,7 +2387,7 @@ def sampleBy(
"arg_name": "fractions",
"arg_type": type(fractions).__name__,
"allowed_types": "float, int, str",
"return_type": type(k).__name__,
"item_type": type(k).__name__,
},
)
fractions[k] = float(v)
Expand Down Expand Up @@ -5839,7 +5841,7 @@ def approxQuantile(
"arg_name": "col",
"arg_type": type(col).__name__,
"allowed_types": "str",
"return_type": type(c).__name__,
"item_type": type(c).__name__,
},
)
col = _to_list(self._sc, cast(List["ColumnOrName"], col))
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ def createDataFrame( # type: ignore[misc]
if isinstance(data, DataFrame):
raise PySparkTypeError(
error_class="INVALID_TYPE",
message_parameters={"arg_name": "data", "data_type": "DataFrame"},
message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
)

if isinstance(schema, str):
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/connect/test_connect_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2282,7 +2282,7 @@ def test_stat_sample_by(self):
"arg_name": "fractions",
"arg_type": "dict",
"allowed_types": "float, int, str",
"return_type": "NoneType",
"item_type": "NoneType",
},
)

Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/sql/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,7 @@ def test_observe(self):
# observation requires name (if given) to be non empty string
with self.assertRaisesRegex(TypeError, "`name` should be a str, got int"):
Observation(123)
with self.assertRaisesRegex(ValueError, "`name` must be a non empty string, got ''."):
with self.assertRaisesRegex(ValueError, "`name` must be a non-empty string, got ''."):
Observation("")

# dataframe.observe requires at least one expr
Expand Down Expand Up @@ -2034,7 +2034,7 @@ def test_invalid_argument_create_dataframe(self):
self.check_error(
exception=pe.exception,
error_class="INVALID_TYPE",
message_parameters={"arg_name": "data", "data_type": "DataFrame"},
message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
)


Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def test_sampleby(self):
"arg_name": "fractions",
"arg_type": "dict",
"allowed_types": "float, int, str",
"return_type": "NoneType",
"item_type": "NoneType",
},
)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2236,7 +2236,7 @@ def verify_udf(obj: Any) -> None:
error_class="NOT_INSTANCE_OF",
message_parameters={
"value": str(obj),
"data_type": str(dataType),
"type": str(dataType),
},
)
verifier(dataType.toInternal(obj))
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1175,7 +1175,7 @@ def verify_result(result):
raise PySparkTypeError(
error_class="INVALID_ARROW_UDTF_RETURN_TYPE",
message_parameters={
"type_name": type(result).__name__,
"return_type": type(result).__name__,
"value": str(result),
"func": f.__name__,
},
Expand Down