diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index c6e70da1f8dc..b038cf6ce5ba 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -1579,6 +1579,13 @@ def test_row_without_field_sorting(self): self.assertEqual(r, expected) self.assertEqual(repr(r), "Row(b=1, a=2)") + def test_struct_field_from_json(self): + # SPARK-40820: fromJson with only name and type + json = {"name": "c1", "type": "string"} + struct_field = StructField.fromJson(json) + + self.assertEqual(repr(struct_field), "StructField('c1', StringType(), True)") + class TypesTests(TypesTestsMixin, ReusedSQLTestCase): pass diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 01db75b25003..84a4e7935415 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -769,8 +769,8 @@ def fromJson(cls, json: Dict[str, Any]) -> "StructField": return StructField( json["name"], _parse_datatype_json_value(json["type"]), - json["nullable"], - json["metadata"], + json.get("nullable", True), + json.get("metadata"), ) def needConversion(self) -> bool: diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala index 2bd88d597563..5f563e3b7a8f 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -258,6 +258,11 @@ object DataType { ("nullable", JBool(nullable)), ("type", dataType: JValue)) => StructField(name, parseDataType(dataType), nullable) + // Support reading schema when 'nullable' is missing. + case JSortedObject( + ("name", JString(name)), + ("type", dataType: JValue)) => + StructField(name, parseDataType(dataType)) case other => throw new IllegalArgumentException( s"Failed to convert the JSON string '${compact(render(other))}' to a field.") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 0e78f875ad7c..6daeda693002 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -317,6 +317,29 @@ class DataTypeSuite extends SparkFunSuite { assert(message.contains("Unrecognized token 'abcd'")) } + // SPARK-40820: fromJson with only name and type + test("Deserialized and serialized schema without nullable or metadata in") { + val schema = + """ + |{ + | "type": "struct", + | "fields": [ + | { + | "name": "c1", + | "type": "string" + | } + | ] + |} + |""".stripMargin + val dt = DataType.fromJson(schema) + + dt.simpleString equals "struct" + dt.json equals + """ + |{"type":"struct","fields":[{"name":"c1","type":"string","nullable":false,"metadata":{}}]} + |""".stripMargin + } + def checkDefaultSize(dataType: DataType, expectedDefaultSize: Int): Unit = { test(s"Check the default size of $dataType") { assert(dataType.defaultSize === expectedDefaultSize)