Skip to content

Commit 942366f

Browse files
committed
address comments.
1 parent 6c5c2d9 commit 942366f

File tree

3 files changed

+19
-19
lines changed

3 files changed

+19
-19
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,6 @@ object ParserUtils {
5050
}
5151
}
5252

53-
/** Check if duplicate exist in a set of column names. */
54-
def checkDuplicateNames[T](colNames: Seq[String], ctx: ParserRuleContext): Unit = {
55-
if (colNames.length != colNames.distinct.length) {
56-
val duplicateColumns = colNames.groupBy(identity).collect {
57-
case (x, ys) if ys.length > 1 => "\"" + x + "\""
58-
}
59-
throw new ParseException(s"Column repeated in partitioning column(s) or duplicate column " +
60-
s"name key in the table definition: ${duplicateColumns.mkString("[", ",", "]")}", ctx)
61-
}
62-
}
63-
6453
/** Get the code that creates the given node. */
6554
def source(ctx: ParserRuleContext): String = {
6655
val stream = ctx.getStart.getInputStream

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -880,14 +880,27 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
880880
val properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)
881881
val selectQuery = Option(ctx.query).map(plan)
882882

883+
// Ensuring whether no duplicate name is used in table definition
884+
val colNames = cols.map(_.name)
885+
if (colNames.length != colNames.distinct.length) {
886+
val duplicateColumns = colNames.groupBy(identity).collect {
887+
case (x, ys) if ys.length > 1 => "\"" + x + "\""
888+
}
889+
throw new ParseException(s"Duplicate column name key(s) in the table definition: " +
890+
duplicateColumns.mkString("[", ",", "]"), ctx)
891+
}
892+
893+
// Ensuring the existing columns are not used as partition columns
894+
val partitionColsInTable = partitionCols.map(_.name).toSet.intersect(colNames.toSet)
895+
if (partitionColsInTable.nonEmpty) {
896+
throw new ParseException(s"Column repeated in partitioning column(s): " +
897+
partitionColsInTable.map("\"" + _ + "\"").mkString("[", ",", "]"), ctx)
898+
}
899+
883900
// Note: Hive requires partition columns to be distinct from the schema, so we need
884901
// to include the partition columns here explicitly
885902
val schema = cols ++ partitionCols
886903

887-
// Ensuring whether no duplicate name is used in table definition;
888-
// Also ensuring the existing columns are not used as partition columns
889-
checkDuplicateNames(colNames = schema.map(_.name), ctx)
890-
891904
// Storage format
892905
val defaultStorage: CatalogStorageFormat = {
893906
val defaultStorageType = conf.getConfString("hive.default.fileformat", "textfile")

sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -322,15 +322,13 @@ class DDLCommandSuite extends PlanTest {
322322
test("create table - column repeated in partitioning columns") {
323323
val query = "CREATE TABLE tab1 (key INT, value STRING) PARTITIONED BY (key INT, hr STRING)"
324324
val e = intercept[ParseException] { parser.parsePlan(query) }
325-
assert(e.getMessage.contains("Column repeated in partitioning column(s) or " +
326-
"duplicate column name key in the table definition: [\"key\"]"))
325+
assert(e.getMessage.contains("Column repeated in partitioning column(s): [\"key\"]"))
327326
}
328327

329328
test("create table - duplicate column name key in the table definition") {
330329
val query = "CREATE TABLE tab1 (key INT, key STRING)"
331330
val e = intercept[ParseException] { parser.parsePlan(query) }
332-
assert(e.getMessage.contains("Column repeated in partitioning column(s) or " +
333-
"duplicate column name key in the table definition: [\"key\"]"))
331+
assert(e.getMessage.contains("Duplicate column name key(s) in the table definition: [\"key\"]"))
334332
}
335333

336334
test("create table using - with partitioned by") {

0 commit comments

Comments
 (0)