diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index baf8dc82fd84a..4616943626122 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -1928,6 +1928,11 @@ def inline_outer(col: "ColumnOrName") -> Column: def json_tuple(col: "ColumnOrName", *fields: str) -> Column: + if len(fields) == 0: + raise PySparkValueError( + error_class="CANNOT_BE_EMPTY", + message_parameters={"item": "field"}, + ) return _invoke_function("json_tuple", _to_col(col), *[lit(field) for field in fields]) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 0ff1ee2a7394a..286af684efb2b 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -14226,6 +14226,11 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column: >>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect() [Row(key='1', c0='value1', c1='value2'), Row(key='2', c0='value12', c1=None)] """ + if len(fields) == 0: + raise PySparkValueError( + error_class="CANNOT_BE_EMPTY", + message_parameters={"item": "field"}, + ) sc = _get_active_spark_context() return _invoke_function("json_tuple", _to_java_column(col), _to_seq(sc, fields)) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index df1ddd0301ad1..aaf58136508a7 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -1452,6 +1452,20 @@ def test_current_timestamp(self): self.assertIsInstance(df.first()[0], datetime.datetime) self.assertEqual(df.schema.names[0], "now()") + def test_json_tuple_empty_fields(self): + df = self.spark.createDataFrame( + [ + ("1", """{"f1": "value1", "f2": "value2"}"""), + ("2", """{"f1": "value12"}"""), + ], + ("key", "jstring"), + ) + self.assertRaisesRegex( + PySparkValueError, + "At least one field must be specified", + lambda: df.select(F.json_tuple(df.jstring)), + ) + class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin): pass