Skip to content

Commit 6f0c3af

Browse files
committed
[SPARK-54029][SQL] Add detailed error message for table metadata corruption to ease debugging
1 parent 06674ba commit 6f0c3af

File tree

2 files changed

+87
-1
lines changed

2 files changed

+87
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,17 @@ case class CatalogTable(
451451
*/
452452
def partitionSchema: StructType = {
453453
val partitionFields = schema.takeRight(partitionColumnNames.length)
454-
assert(partitionFields.map(_.name) == partitionColumnNames)
454+
val actualPartitionColumnNames = partitionFields.map(_.name)
455+
456+
assert(actualPartitionColumnNames == partitionColumnNames,
457+
"Corrupted table metadata detected for table " + identifier.unquotedString + ". " +
458+
"The partition column names in the table schema " +
459+
"do not match the declared partition columns. " +
460+
"Table schema columns: [" + schema.fieldNames.mkString(", ") + "] " +
461+
"Declared partition columns: [" + partitionColumnNames.mkString(", ") + "] " +
462+
"Actual partition columns from schema: " +
463+
"[" + actualPartitionColumnNames.mkString(", ") + "]. " +
464+
"This indicates corrupted table metadata that needs to be repaired.")
455465

456466
StructType(partitionFields)
457467
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,4 +1951,80 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
19511951
assert(catalog.getCachedTable(qualifiedName2) != null)
19521952
}
19531953
}
1954+
1955+
test("CatalogTable partitionSchema provides detailed error for corrupted metadata") {
1956+
// Test case 1: Partition columns don't match schema (wrong names)
1957+
val corruptedTable1 = CatalogTable(
1958+
identifier = TableIdentifier("corrupted_table1", Some("test_db")),
1959+
tableType = CatalogTableType.MANAGED,
1960+
storage = CatalogStorageFormat.empty,
1961+
schema = StructType(Seq(
1962+
StructField("id", IntegerType),
1963+
StructField("name", StringType),
1964+
StructField("year", IntegerType),
1965+
StructField("month", IntegerType)
1966+
)),
1967+
partitionColumnNames = Seq("year", "day") // "day" doesn't exist in schema
1968+
)
1969+
1970+
val exception1 = intercept[AssertionError] {
1971+
corruptedTable1.partitionSchema
1972+
}
1973+
1974+
val expectedMessage1 = "assertion failed: Corrupted table metadata detected " +
1975+
"for table test_db.corrupted_table1. " +
1976+
"The partition column names in the table schema " +
1977+
"do not match the declared partition columns. " +
1978+
"Table schema columns: [id, name, year, month] " +
1979+
"Declared partition columns: [year, day] " +
1980+
"Actual partition columns from schema: [year, month]. " +
1981+
"This indicates corrupted table metadata that needs to be repaired."
1982+
assert(exception1.getMessage === expectedMessage1)
1983+
1984+
// Test case 2: Wrong number of partition columns
1985+
val corruptedTable2 = CatalogTable(
1986+
identifier = TableIdentifier("corrupted_table2", Some("test_db")),
1987+
tableType = CatalogTableType.MANAGED,
1988+
storage = CatalogStorageFormat.empty,
1989+
schema = StructType(Seq(
1990+
StructField("id", IntegerType),
1991+
StructField("data", StringType),
1992+
StructField("partition_col", StringType)
1993+
)),
1994+
partitionColumnNames = Seq("partition_col", "extra_col") // Too many partition columns
1995+
)
1996+
1997+
val exception2 = intercept[AssertionError] {
1998+
corruptedTable2.partitionSchema
1999+
}
2000+
2001+
val expectedMessage2 = "assertion failed: Corrupted table metadata detected " +
2002+
"for table test_db.corrupted_table2. " +
2003+
"The partition column names in the table schema " +
2004+
"do not match the declared partition columns. " +
2005+
"Table schema columns: [id, data, partition_col] " +
2006+
"Declared partition columns: [partition_col, extra_col] " +
2007+
"Actual partition columns from schema: [data, partition_col]. " +
2008+
"This indicates corrupted table metadata that needs to be repaired."
2009+
assert(exception2.getMessage === expectedMessage2)
2010+
2011+
// Test case 3: Valid table should work without error
2012+
val validTable = CatalogTable(
2013+
identifier = TableIdentifier("valid_table", Some("test_db")),
2014+
tableType = CatalogTableType.MANAGED,
2015+
storage = CatalogStorageFormat.empty,
2016+
schema = StructType(Seq(
2017+
StructField("id", IntegerType),
2018+
StructField("name", StringType),
2019+
StructField("year", IntegerType),
2020+
StructField("month", IntegerType)
2021+
)),
2022+
partitionColumnNames = Seq("year", "month") // Matches schema
2023+
)
2024+
2025+
// This should not throw an exception
2026+
val partitionSchema = validTable.partitionSchema
2027+
assert(partitionSchema.fieldNames.toSeq == Seq("year", "month"))
2028+
assert(partitionSchema.fields.length == 2)
2029+
}
19542030
}

0 commit comments

Comments
 (0)