From 18dd5b22cf8dfd58de4a5dd3a222228c3a3e19ed Mon Sep 17 00:00:00 2001
From: milastdbx <milan.stefanovic@databricks.com>
Date: Thu, 16 Nov 2023 14:32:49 +0100
Subject: [PATCH 1/6] introducing `select * except` syntax

---
 .../sql/catalyst/parser/SqlBaseParser.g4      |   6 +
 .../sql/catalyst/analysis/unresolved.scala    | 163 ++++++++++-
 .../sql/catalyst/parser/AstBuilder.scala      |  12 +
 .../analyzer-results/selectExcept.sql.out     | 252 ++++++++++++++++++
 .../sql-tests/inputs/selectExcept.sql         |  45 ++++
 .../sql-tests/results/selectExcept.sql.out    | 247 +++++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  99 +++++++
 7 files changed, 819 insertions(+), 5 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out

diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 609bd72e21935..198a5266ca3b2 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -587,6 +587,10 @@ notMatchedBySourceAction
     | UPDATE SET assignmentList
     ;
 
+exceptClause
+    : EXCEPT '(' exceptCols=multipartIdentifierList ')'
+    ;
+
 assignmentList
     : assignment (COMMA assignment)*
     ;
@@ -969,6 +973,8 @@ primaryExpression
     | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN                                   #last
     | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN            #position
     | constant                                                                                 #constantDefault
+    | ASTERISK EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN                #starExcept
+    | qualifiedName DOT ASTERISK EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN   #starExcept
     | ASTERISK                                                                                 #star
     | qualifiedName DOT ASTERISK                                                               #star
     | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN                          #rowConstructor
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index e232dfc05faa8..82667ac23e7e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -358,10 +358,10 @@ abstract class Star extends LeafExpression with NamedExpression {
   def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression]
 }
 
-
 /**
  * Represents all of the input attributes to a given relational operator, for example in
  * "SELECT * FROM ...".
+ * "SELECT * FROM ..." or "SELECT * EXCEPT(...) FROM ..."
  *
  * This is also used to expand structs. For example:
  * "SELECT record.* from (SELECT struct(a,b,c) as record ...)
@@ -369,9 +369,12 @@ abstract class Star extends LeafExpression with NamedExpression {
  * @param target an optional name that should be the target of the expansion.  If omitted all
  *              targets' columns are produced. This can either be a table name or struct name. This
  *              is a list of identifiers that is the path of the expansion.
- */
-case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevaluable {
-
+ *
+ * This class provides the shared behavior between the classes for SELECT * ([[UnresolvedStar]])
+ * and SELECT * EXCEPT ([[UnresolvedStarExcept]]). [[UnresolvedStar]] is just a case class of this,
+ * while [[UnresolvedStarExcept]] adds some additional logic to the expand method.
+  */
+abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with Unevaluable {
   /**
    * Returns true if the nameParts is a subset of the last elements of qualifier of the attribute.
    *
@@ -383,7 +386,7 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu
    *   - `SELECT t.* FROM ns1.ns2.t` where nameParts is Seq("t") and
    *     qualifier is Seq("ns1", "ns2", "t").
    */
-  private def matchedQualifier(
+  protected def matchedQualifier(
       attribute: Attribute,
       nameParts: Seq[String],
       resolver: Resolver): Boolean = {
@@ -444,6 +447,156 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu
   override def toString: String = target.map(_.mkString("", ".", ".")).getOrElse("") + "*"
 }
 
+/**
+ * Represents some of the input attributes to a given relational operator, for example in
+ * "SELECT * EXCEPT(a) FROM ...".
+ *
+ * @param target an optional name that should be the target of the expansion. If omitted all
+ *              targets' columns are produced. This can only be a table name. This
+ *              is a list of identifiers that is the path of the expansion.
+ *
+ * @param excepts a list of names that should be excluded from the expansion.
+ *
+ */
+case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[String]])
+  extends UnresolvedStarBase(target) {
+
+  /**
+   * We expand the * EXCEPT by the following three steps:
+   * 1. use the original .expand() to get top-level column list or struct expansion
+   * 2. resolve excepts (with respect to the Seq[NamedExpression] returned from (1))
+   * 3. filter the expanded columns with the resolved except list. recursively apply filtering in
+   *    case of nested columns in the except list (in order to rewrite structs)
+   */
+  override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = {
+    // Use the UnresolvedStarBase expand method to get a seq of NamedExpressions corresponding to
+    // the star expansion. This will yield a list of top-level columns from the logical plan's
+    // output, or in the case of struct expansion (e.g. target=`x` for SELECT x.*) it will give
+    // a seq of NamedExpressions corresponding to struct fields.
+    val expandedCols = super.expand(input, resolver)
+
+    // resolve except list with respect to the expandedCols
+    val resolvedExcepts = excepts.map { exceptParts =>
+      AttributeSeq(expandedCols.map(_.toAttribute)).resolve(exceptParts, resolver).getOrElse {
+        val orderedCandidates = StringUtils.orderSuggestedIdentifiersBySimilarity(
+          UnresolvedAttribute(exceptParts).name, expandedCols.map(a => a.qualifier :+ a.name))
+        // if target is defined and expandedCols does not include any Attributes, it must be struct
+        // expansion; give message suggesting to use unqualified names of nested fields.
+        if (target.isDefined && !expandedCols.exists(_.isInstanceOf[Attribute])) {
+            throw new AnalysisException(
+              errorClass = "EXCEPT_UNRESOLVED_COLUMN_IN_STRUCT_EXPANSION",
+              messageParameters = Map(
+                "objectName" -> UnresolvedAttribute(exceptParts).sql,
+                "objectList" -> orderedCandidates.mkString(", ")))
+        } else {
+            throw QueryCompilationErrors
+              .unresolvedColumnError(UnresolvedAttribute(exceptParts).name, orderedCandidates)
+        }
+      }
+    }
+
+    // Convert each resolved except into a pair of (col: Attribute, nestedColumn) representing the
+    // top level column in expandedCols that we must 'filter' based on nestedColumn.
+    @scala.annotation.tailrec
+    def getRootStruct(expr: Expression, nestedColumn: Seq[String] = Nil)
+      : (NamedExpression, Seq[String]) = expr match {
+      case GetStructField(fieldExpr, _, Some(fieldName)) =>
+        getRootStruct(fieldExpr, fieldName +: nestedColumn)
+      case e: NamedExpression => e -> nestedColumn
+      case other: ExtractValue => throw new AnalysisException(
+        errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE",
+        messageParameters = Map("columnName" -> other.sql, "dataType" -> other.dataType.toString))
+    }
+    // An exceptPair represents the column in expandedCols along with the path of a nestedColumn
+    // that should be except-ed. Consider two examples:
+    // 1. excepting the entire col1 = (col1, Seq())
+    // 2. excepting a nested field in col2, col2.a.b = (col2, Seq(a, b))
+    // INVARIANT: we rely on the structure of the resolved except being an Alias of GetStructField
+    // in the case of nested columns.
+    val exceptPairs = resolvedExcepts.map {
+      case Alias(exceptExpr, name) => getRootStruct(exceptExpr)
+      case except: NamedExpression => except -> Seq.empty
+    }
+
+    // Filter columns which correspond to ones listed in the except list and return a new list of
+    // columns which exclude the columns in the except list. The 'filtering' manifests as either
+    // dropping the column from the list of columns we return, or rewriting the projected column in
+    // order to remove excepts that refer to nested columns. For example, with the example above:
+    // excepts = Seq(
+    //   (col1, Seq()),    => filter col1 from the output
+    //   (col2, Seq(a, b)) => rewrite col2 in the output so that it doesn't include the nested field
+    // )                      corresponding to col2.a.b
+    //
+    // This occurs in two steps:
+    // 1. group the excepts by the column they refer to (groupedExcepts)
+    // 2. filter/rewrite input columns based on four cases:
+    //    a. column doesn't match any groupedExcepts => column unchanged
+    //    b. column exists in groupedExcepts and:
+    //       i.   none of remainingExcepts are empty => recursively apply filterColumns over the
+    //            struct fields in order to rewrite the struct
+    //       ii.  a remainingExcept is empty, but there are multiple remainingExcepts => we must
+    //            have duplicate/overlapping excepts - throw an error
+    //       iii. [otherwise] remainingExcept is exactly Seq(Seq()) => this is the base 'filtering'
+    //            case. we omit the column from the output (this is a column we would like to
+    //            except). NOTE: this case isn't explicitly listed in the `collect` below since we
+    //            'collect' columns which match the cases above and omit ones that fall into this
+    //            remaining case.
+    def filterColumns(columns: Seq[NamedExpression], excepts: Seq[(NamedExpression, Seq[String])])
+      : Seq[NamedExpression] = {
+      // group the except pairs by the column they refer to. NOTE: no groupMap until scala 2.13
+      val groupedExcepts: AttributeMap[Seq[Seq[String]]] =
+        AttributeMap(excepts.groupBy(_._1.toAttribute).mapValues(v => v.map(_._2)))
+
+      // map input columns while searching for the except entry corresponding to the current column
+      columns.map(col => col -> groupedExcepts.get(col.toAttribute)).collect {
+        // pass through columns that don't match anything in groupedExcepts
+        case (col, None) => col
+        // found a match but nestedExcepts has remaining excepts - recurse to rewrite the struct
+        case (col, Some(nestedExcepts)) if !nestedExcepts.exists(_.isEmpty) =>
+          val fields = col.dataType match {
+            case s: StructType => s.fields
+            // we shouldn't be here since we throw the same error above (in getRootStruct), but
+            // nonetheless just throw the same error
+            case _ => throw new AnalysisException(
+              errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE",
+              messageParameters =
+                Map("columnName" -> col.qualifiedName, "dataType" -> col.dataType.toString))
+          }
+          val extractedFields = fields.zipWithIndex.map { case (f, i) =>
+            Alias(GetStructField(col, i), f.name)()
+          }
+          val newExcepts = nestedExcepts.map { nestedExcept =>
+            // INVARIANT: we cannot have duplicate column names in nested columns, thus, this `head`
+            // will find the one and only column corresponding to the correct extractedField.
+            extractedFields.collectFirst { case col if resolver(col.name, nestedExcept.head) =>
+              col.toAttribute -> nestedExcept.tail
+            }.get
+          }
+          Alias(CreateStruct(filterColumns(extractedFields.toSeq, newExcepts)), col.name)()
+        // if there are multiple nestedExcepts but one is empty we must have overlapping except
+        // columns. throw an error.
+        case (col, Some(nestedExcepts)) if nestedExcepts.size > 1 =>
+          throw new AnalysisException(errorClass = "EXCEPT_OVERLAPPING_COLUMNS", Map.empty)
+      }
+    }
+
+    filterColumns(expandedCols, exceptPairs)
+  }
+}
+
+/**
+ * Represents all of the input attributes to a given relational operator, for example in
+ * "SELECT * FROM ...".
+ *
+ * This is also used to expand structs. For example:
+ * "SELECT record.* from (SELECT struct(a,b,c) as record ...)
+ *
+ * @param target an optional name that should be the target of the expansion.  If omitted all
+ *              targets' columns are produced. This can either be a table name or struct name. This
+ *              is a list of identifiers that is the path of the expansion.
+ */
+case class UnresolvedStar(target: Option[Seq[String]]) extends UnresolvedStarBase(target)
+
 /**
  * Represents all of the input attributes to a given relational operator, for example in
  * "SELECT `(id)?+.+` FROM ...".
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 09161d8f8da53..81f40739dea30 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1789,6 +1789,18 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq))
   }
 
+  /**
+   * Create a star-except (i.e. all - except list) expression; this selects all elements in the
+   * specified object except those in the except list.
+   * Both un-targeted (global) and targeted aliases are supported.
+   */
+  override def visitStarExcept(ctx: StarExceptContext): Expression = withOrigin(ctx) {
+    val exceptCols = ctx.exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]])
+    UnresolvedStarExcept(
+      Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq),
+      exceptCols.toSeq)
+  }
+
   /**
    * Create an aliased expression if an alias is specified. Both single and multi-aliases are
    * supported.
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
new file mode 100644
index 0000000000000..5c7c888b6b36c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
@@ -0,0 +1,252 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
+  (10, "name1"),
+  (20, "name2"),
+  (30, "name3"),
+  (40, "name4"),
+  (50, "name5"),
+  (70, "name7")
+AS tbl_view(id, name)
+-- !query analysis
+CreateViewCommand `tbl_view`, SELECT * FROM VALUES
+  (10, "name1"),
+  (20, "name2"),
+  (30, "name3"),
+  (40, "name4"),
+  (50, "name5"),
+  (70, "name7")
+AS tbl_view(id, name), false, false, LocalTempView, true
+   +- Project [id#x, name#x]
+      +- SubqueryAlias tbl_view
+         +- LocalRelation [id#x, name#x]
+
+
+-- !query
+CREATE TABLE ids (id INT) USING CSV
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+{
+  "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS",
+  "sqlState" : "42P07",
+  "messageParameters" : {
+    "relationName" : "`spark_catalog`.`default`.`ids`"
+  }
+}
+
+
+-- !query
+SELECT * FROM tbl_view
+-- !query analysis
+Project [id#x, name#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
+         +- Project [id#x, name#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x]
+
+
+-- !query
+SELECT * EXCEPT (id) FROM tbl_view
+-- !query analysis
+Project [name#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
+         +- Project [id#x, name#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x]
+
+
+-- !query
+SELECT * EXCEPT (name) FROM tbl_view
+-- !query analysis
+Project [id#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
+         +- Project [id#x, name#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x]
+
+
+-- !query
+SELECT * EXCEPT (id, name) FROM tbl_view
+-- !query analysis
+Project
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
+         +- Project [id#x, name#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x]
+
+
+-- !query
+CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+{
+  "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS",
+  "sqlState" : "42P07",
+  "messageParameters" : {
+    "relationName" : "`spark_catalog`.`default`.`namesTbl`"
+  }
+}
+
+
+-- !query
+SELECT * FROM namesTbl
+-- !query analysis
+Project [name#x]
++- SubqueryAlias spark_catalog.default.namestbl
+   +- Relation spark_catalog.default.namestbl[name#x] csv
+
+
+-- !query
+SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`)
+-- !query analysis
+Project [a_b_c#x]
++- SubqueryAlias __auto_generated_subquery_name
+   +- Project [1 AS a_b_c#x, 2 AS a-b-c#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT tbl_view.* EXCEPT (name) FROM tbl_view
+-- !query analysis
+Project [id#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
+         +- Project [id#x, name#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x]
+
+
+-- !query
+INSERT INTO ids
+SELECT * EXCEPT (name) FROM tbl_view
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/ids, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/ids], Append, `spark_catalog`.`default`.`ids`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/ids), [id]
++- Project [id#x]
+   +- SubqueryAlias tbl_view
+      +- View (`tbl_view`, [id#x,name#x])
+         +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
+            +- Project [id#x, name#x]
+               +- SubqueryAlias tbl_view
+                  +- LocalRelation [id#x, name#x]
+
+
+-- !query
+SELECT * FROM ids
+-- !query analysis
+Project [id#x]
++- SubqueryAlias spark_catalog.default.ids
+   +- Relation spark_catalog.default.ids[id#x] csv
+
+
+-- !query
+SELECT * EXCEPT (ids.id) FROM ids
+-- !query analysis
+Project
++- SubqueryAlias spark_catalog.default.ids
+   +- Relation spark_catalog.default.ids[id#x] csv
+
+
+-- !query
+SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view)
+-- !query analysis
+Project [id#x]
++- SubqueryAlias __auto_generated_subquery_name
+   +- Project [id#x]
+      +- SubqueryAlias tbl_view
+         +- View (`tbl_view`, [id#x,name#x])
+            +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
+               +- Project [id#x, name#x]
+                  +- SubqueryAlias tbl_view
+                     +- LocalRelation [id#x, name#x]
+
+
+-- !query
+SELECT * EXCEPT name FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'name'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT * EXCEPT() name FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "')'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT * EXCEPT(invalid_column) FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`invalid_column`",
+    "proposal" : "`id`, `name`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "* EXCEPT(invalid_column)"
+  } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(id, invalid_column) FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`invalid_column`",
+    "proposal" : "`id`, `name`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "* EXCEPT(id, invalid_column)"
+  } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(id, id) FROM tbl_view
+-- !query analysis
+org.apache.spark.SparkException
+{
+  "errorClass" : "INTERNAL_ERROR",
+  "sqlState" : "XX000",
+  "messageParameters" : {
+    "message" : "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
new file mode 100644
index 0000000000000..15ce07ec4cbc5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
@@ -0,0 +1,45 @@
+CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
+  (10, "name1"),
+  (20, "name2"),
+  (30, "name3"),
+  (40, "name4"),
+  (50, "name5"),
+  (70, "name7")
+AS tbl_view(id, name);
+
+CREATE TABLE ids (id INT) USING CSV;
+
+-- Happy path
+-- EXCEPT basic scenario
+SELECT * FROM tbl_view;
+SELECT * EXCEPT (id) FROM tbl_view;
+SELECT * EXCEPT (name) FROM tbl_view;
+-- EXCEPT all columns
+SELECT * EXCEPT (id, name) FROM tbl_view;
+-- CREATE TABLE with SELECT
+CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view;
+SELECT * FROM namesTbl;
+-- EXCEPT special character names
+SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`);
+-- EXCEPT qualified star
+SELECT tbl_view.* EXCEPT (name) FROM tbl_view;
+-- EXCEPT insert into select
+INSERT INTO ids
+SELECT * EXCEPT (name) FROM tbl_view;
+SELECT * FROM ids;
+-- EXCEPT qualified columns
+SELECT * EXCEPT (ids.id) FROM ids;
+-- EXCEPT nested
+SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view);
+
+-- Errors
+-- EXCEPT missing brackets
+SELECT * EXCEPT name FROM tbl_view;
+-- EXCEPT no columns
+SELECT * EXCEPT() name FROM tbl_view;
+-- EXCEPT invalid column
+SELECT * EXCEPT(invalid_column) FROM tbl_view;
+-- EXCEPT find invalid column
+SELECT * EXCEPT(id, invalid_column) FROM tbl_view;
+-- EXCEPT duplicate column
+SELECT * EXCEPT(id, id) FROM tbl_view;
diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
new file mode 100644
index 0000000000000..2ec1d0e2603ce
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
@@ -0,0 +1,247 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
+  (10, "name1"),
+  (20, "name2"),
+  (30, "name3"),
+  (40, "name4"),
+  (50, "name5"),
+  (70, "name7")
+AS tbl_view(id, name)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE ids (id INT) USING CSV
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM tbl_view
+-- !query schema
+struct<id:int,name:string>
+-- !query output
+10	name1
+20	name2
+30	name3
+40	name4
+50	name5
+70	name7
+
+
+-- !query
+SELECT * EXCEPT (id) FROM tbl_view
+-- !query schema
+struct<name:string>
+-- !query output
+name1
+name2
+name3
+name4
+name5
+name7
+
+
+-- !query
+SELECT * EXCEPT (name) FROM tbl_view
+-- !query schema
+struct<id:int>
+-- !query output
+10
+20
+30
+40
+50
+70
+
+
+-- !query
+SELECT * EXCEPT (id, name) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM namesTbl
+-- !query schema
+struct<name:string>
+-- !query output
+name1
+name2
+name3
+name4
+name5
+name7
+
+
+-- !query
+SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`)
+-- !query schema
+struct<a_b_c:int>
+-- !query output
+1
+
+
+-- !query
+SELECT tbl_view.* EXCEPT (name) FROM tbl_view
+-- !query schema
+struct<id:int>
+-- !query output
+10
+20
+30
+40
+50
+70
+
+
+-- !query
+INSERT INTO ids
+SELECT * EXCEPT (name) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM ids
+-- !query schema
+struct<id:int>
+-- !query output
+10
+20
+30
+40
+50
+70
+
+
+-- !query
+SELECT * EXCEPT (ids.id) FROM ids
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view)
+-- !query schema
+struct<id:int>
+-- !query output
+10
+20
+30
+40
+50
+70
+
+
+-- !query
+SELECT * EXCEPT name FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'name'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT * EXCEPT() name FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "')'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT * EXCEPT(invalid_column) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`invalid_column`",
+    "proposal" : "`id`, `name`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "* EXCEPT(invalid_column)"
+  } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(id, invalid_column) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`invalid_column`",
+    "proposal" : "`id`, `name`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "* EXCEPT(id, invalid_column)"
+  } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(id, id) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+{
+  "errorClass" : "INTERNAL_ERROR",
+  "sqlState" : "XX000",
+  "messageParameters" : {
+    "message" : "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'"
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b7201c2d96d77..053d4e98bf26c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -168,6 +168,105 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       Row(1, 1) :: Row(1, 2) :: Row(2, 1) :: Row(2, 2) :: Row(3, 1) :: Row(3, 2) :: Nil)
   }
 
+  test("support star except") {
+    checkAnswer(
+      sql(
+        """
+          |SELECT * EXCEPT (b)
+          |FROM testData2
+        """.stripMargin),
+      Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil)
+  }
+
+  test("support table.star except") {
+    checkAnswer(
+      sql(
+        """
+          |SELECT r.* EXCEPT (r.b)
+          |FROM testData l join testData2 r on (l.key = r.a)
+        """.stripMargin),
+      Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil)
+  }
+
+  test("support table.star except all columns") {
+    checkAnswer(
+      sql(
+        """
+          |SELECT r.* EXCEPT (r.b, r.a)
+          |FROM testData l join testData2 r on (l.key = r.a)
+        """.stripMargin),
+      Row() :: Row() :: Row() :: Row() :: Row() :: Row() :: Nil)
+  }
+
+  test("support table.star except nested") {
+    checkAnswer(
+      sql(
+        """
+          |SELECT *
+          |FROM (SELECT * EXCEPT (b) FROM testData2)
+        """.stripMargin),
+      Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil)
+  }
+
+  test("except errors") {
+    // Try star expanding a scalar. This should fail.
+    checkError(
+      exception = intercept[ParseException]{
+        sql("select * EXCEPT () from testData2")
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = Option("42601"),
+      parameters = Map("error" -> "')'", "hint" -> "")
+    )
+
+    // Try star expanding a scalar. This should fail.
+    checkError(
+      exception = intercept[ParseException] {
+        sql("select * EXCEPT b testData2")
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = Option("42601"),
+      parameters = Map("error" -> "'b'", "hint" -> "")
+    )
+
+    // Try star expanding a scalar. This should fail.
+    checkError(
+      exception = intercept[SparkException] {
+        sql("select * EXCEPT(b, b) from testData2")
+      },
+      errorClass = "INTERNAL_ERROR",
+      parameters = Map("message" -> "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'")
+    )
+
+    // Try star expanding a scalar. This should fail.
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select * EXCEPT(invalid_column) from testData2")
+      },
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = Option("42703"),
+      parameters = Map("objectName" -> "`invalid_column`", "proposal" -> "`a`, `b`"),
+      matchPVals = true,
+      queryContext = Array(
+        ExpectedContext(fragment = "* EXCEPT(invalid_column)", start = 7, stop = 30)
+      )
+    )
+
+    // Try star expanding a scalar. This should fail.
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select * EXCEPT(a, invalid_column) from testData2")
+      },
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = Option("42703"),
+      parameters = Map("objectName" -> "`invalid_column`", "proposal" -> "`a`, `b`"),
+      matchPVals = true,
+      queryContext = Array(
+        ExpectedContext(fragment = "* EXCEPT(a, invalid_column)", start = 7, stop = 33)
+      )
+    )
+  }
+
   test("self join with alias in agg") {
     withTempView("df") {
       Seq(1, 2, 3)

From bc31ce217b3e9c84ee3ce44de6e66065bf20b009 Mon Sep 17 00:00:00 2001
From: milastdbx <milan.stefanovic@databricks.com>
Date: Tue, 21 Nov 2023 13:04:01 +0100
Subject: [PATCH 2/6] addressing comments

---
 .../main/resources/error/error-classes.json   |  12 ++
 .../sql/catalyst/parser/SqlBaseParser.g4      |   8 +-
 .../sql/catalyst/analysis/unresolved.scala    |  28 +--
 .../sql/catalyst/parser/AstBuilder.scala      |  12 +-
 .../analyzer-results/selectExcept.sql.out     | 193 ++++++++++++------
 .../sql-tests/inputs/selectExcept.sql         |  28 ++-
 .../sql-tests/results/selectExcept.sql.out    | 183 ++++++++++++-----
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  99 ---------
 8 files changed, 318 insertions(+), 245 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
index afcd841a2ce04..694127e655c4d 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -987,6 +987,18 @@
     ],
     "sqlState" : "42K09"
   },
+  "EXCEPT_NESTED_COLUMN_INVALID_TYPE" : {
+    "message" : [
+      "EXCEPT column <columnName> was resolved and expected to be StructType, but found type <dataType>."
+    ],
+    "sqlState" : "428H2"
+  },
+  "EXCEPT_OVERLAPPING_COLUMNS" : {
+    "message" : [
+      "Columns in an EXCEPT list must be distinct and non-overlapping."
+    ],
+    "sqlState" : "42702"
+  },
   "EXCEED_LIMIT_LENGTH" : {
     "message" : [
       "Exceeds char/varchar type length limitation: <limit>."
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 198a5266ca3b2..439a12c301817 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -588,7 +588,7 @@ notMatchedBySourceAction
     ;
 
 exceptClause
-    : EXCEPT '(' exceptCols=multipartIdentifierList ')'
+    : EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN
     ;
 
 assignmentList
@@ -973,10 +973,8 @@ primaryExpression
     | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN                                   #last
     | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN            #position
     | constant                                                                                 #constantDefault
-    | ASTERISK EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN                #starExcept
-    | qualifiedName DOT ASTERISK EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN   #starExcept
-    | ASTERISK                                                                                 #star
-    | qualifiedName DOT ASTERISK                                                               #star
+    | ASTERISK exceptClause?                                                                   #star
+    | qualifiedName DOT ASTERISK exceptClause?                                                 #star
     | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN                          #rowConstructor
     | LEFT_PAREN query RIGHT_PAREN                                                             #subqueryExpression
     | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 82667ac23e7e2..4dc67839d8683 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions._
@@ -482,26 +483,18 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
           UnresolvedAttribute(exceptParts).name, expandedCols.map(a => a.qualifier :+ a.name))
         // if target is defined and expandedCols does not include any Attributes, it must be struct
         // expansion; give message suggesting to use unqualified names of nested fields.
-        if (target.isDefined && !expandedCols.exists(_.isInstanceOf[Attribute])) {
-            throw new AnalysisException(
-              errorClass = "EXCEPT_UNRESOLVED_COLUMN_IN_STRUCT_EXPANSION",
-              messageParameters = Map(
-                "objectName" -> UnresolvedAttribute(exceptParts).sql,
-                "objectList" -> orderedCandidates.mkString(", ")))
-        } else {
-            throw QueryCompilationErrors
-              .unresolvedColumnError(UnresolvedAttribute(exceptParts).name, orderedCandidates)
-        }
+        throw QueryCompilationErrors
+          .unresolvedColumnError(UnresolvedAttribute(exceptParts).name, orderedCandidates)
       }
     }
 
     // Convert each resolved except into a pair of (col: Attribute, nestedColumn) representing the
     // top level column in expandedCols that we must 'filter' based on nestedColumn.
     @scala.annotation.tailrec
-    def getRootStruct(expr: Expression, nestedColumn: Seq[String] = Nil)
+    def getRootColumn(expr: Expression, nestedColumn: Seq[String] = Nil)
       : (NamedExpression, Seq[String]) = expr match {
       case GetStructField(fieldExpr, _, Some(fieldName)) =>
-        getRootStruct(fieldExpr, fieldName +: nestedColumn)
+        getRootColumn(fieldExpr, fieldName +: nestedColumn)
       case e: NamedExpression => e -> nestedColumn
       case other: ExtractValue => throw new AnalysisException(
         errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE",
@@ -514,7 +507,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
     // INVARIANT: we rely on the structure of the resolved except being an Alias of GetStructField
     // in the case of nested columns.
     val exceptPairs = resolvedExcepts.map {
-      case Alias(exceptExpr, name) => getRootStruct(exceptExpr)
+      case Alias(exceptExpr, name) => getRootColumn(exceptExpr)
       case except: NamedExpression => except -> Seq.empty
     }
 
@@ -555,12 +548,9 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
         case (col, Some(nestedExcepts)) if !nestedExcepts.exists(_.isEmpty) =>
           val fields = col.dataType match {
             case s: StructType => s.fields
-            // we shouldn't be here since we throw the same error above (in getRootStruct), but
-            // nonetheless just throw the same error
-            case _ => throw new AnalysisException(
-              errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE",
-              messageParameters =
-                Map("columnName" -> col.qualifiedName, "dataType" -> col.dataType.toString))
+            // we shouldn't be here since we EXCEPT_NEXTED_COLUMN_INVALID_TYPE in getRootColumn
+            // for this case - just throw internal error
+            case _ => throw SparkException.internalError("Invalid column type")
           }
           val extractedFields = fields.zipWithIndex.map { case (f, i) =>
             Alias(GetStructField(col, i), f.name)()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 81f40739dea30..00d3059ec5fad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1786,7 +1786,12 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * Both un-targeted (global) and targeted aliases are supported.
    */
   override def visitStar(ctx: StarContext): Expression = withOrigin(ctx) {
-    UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq))
+    if (ctx.exceptClause != null) {
+      visitStarExcept(ctx)
+    }
+    else {
+      UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq))
+    }
   }
 
   /**
@@ -1794,8 +1799,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * specified object except those in the except list.
    * Both un-targeted (global) and targeted aliases are supported.
    */
-  override def visitStarExcept(ctx: StarExceptContext): Expression = withOrigin(ctx) {
-    val exceptCols = ctx.exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]])
+  def visitStarExcept(ctx: StarContext): Expression = withOrigin(ctx) {
+    val exceptCols = ctx.exceptClause
+      .exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]])
     UnresolvedStarExcept(
       Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq),
       exceptCols.toSeq)
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
index 5c7c888b6b36c..38658ae5a71fc 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
@@ -1,25 +1,27 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
 CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
-  (10, "name1"),
-  (20, "name2"),
-  (30, "name3"),
-  (40, "name4"),
-  (50, "name5"),
-  (70, "name7")
-AS tbl_view(id, name)
+  (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))),
+  (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))),
+  (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))),
+  (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))),
+  (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))),
+  (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))),
+  (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g")))
+AS tbl_view(id, name, data)
 -- !query analysis
 CreateViewCommand `tbl_view`, SELECT * FROM VALUES
-  (10, "name1"),
-  (20, "name2"),
-  (30, "name3"),
-  (40, "name4"),
-  (50, "name5"),
-  (70, "name7")
-AS tbl_view(id, name), false, false, LocalTempView, true
-   +- Project [id#x, name#x]
+  (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))),
+  (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))),
+  (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))),
+  (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))),
+  (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))),
+  (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))),
+  (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g")))
+AS tbl_view(id, name, data), false, false, LocalTempView, true
+   +- Project [id#x, name#x, data#x]
       +- SubqueryAlias tbl_view
-         +- LocalRelation [id#x, name#x]
+         +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
@@ -38,53 +40,101 @@ org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 -- !query
 SELECT * FROM tbl_view
 -- !query analysis
-Project [id#x, name#x]
+Project [id#x, name#x, data#x]
 +- SubqueryAlias tbl_view
-   +- View (`tbl_view`, [id#x,name#x])
-      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
-         +- Project [id#x, name#x]
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
             +- SubqueryAlias tbl_view
-               +- LocalRelation [id#x, name#x]
+               +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
 SELECT * EXCEPT (id) FROM tbl_view
 -- !query analysis
-Project [name#x]
+Project [name#x, data#x]
 +- SubqueryAlias tbl_view
-   +- View (`tbl_view`, [id#x,name#x])
-      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
-         +- Project [id#x, name#x]
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
             +- SubqueryAlias tbl_view
-               +- LocalRelation [id#x, name#x]
+               +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
 SELECT * EXCEPT (name) FROM tbl_view
 -- !query analysis
-Project [id#x]
+Project [id#x, data#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (data) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (data.f1) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x, named_struct(s2, data#x.s2) AS data#x]
 +- SubqueryAlias tbl_view
-   +- View (`tbl_view`, [id#x,name#x])
-      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
-         +- Project [id#x, name#x]
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
             +- SubqueryAlias tbl_view
-               +- LocalRelation [id#x, name#x]
+               +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
-SELECT * EXCEPT (id, name) FROM tbl_view
+SELECT * EXCEPT (data.s2) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x, named_struct(f1, data#x.f1) AS data#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (data.s2.f2) FROM tbl_view
+-- !query analysis
+Project [id#x, name#x, named_struct(f1, data#x.f1, s2, named_struct(f3, data#x.s2.f3)) AS data#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT * EXCEPT (id, name, data) FROM tbl_view
 -- !query analysis
 Project
 +- SubqueryAlias tbl_view
-   +- View (`tbl_view`, [id#x,name#x])
-      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
-         +- Project [id#x, name#x]
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
             +- SubqueryAlias tbl_view
-               +- LocalRelation [id#x, name#x]
+               +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
-CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view
+CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view
 -- !query analysis
 org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 {
@@ -116,27 +166,27 @@ Project [a_b_c#x]
 -- !query
 SELECT tbl_view.* EXCEPT (name) FROM tbl_view
 -- !query analysis
-Project [id#x]
+Project [id#x, data#x]
 +- SubqueryAlias tbl_view
-   +- View (`tbl_view`, [id#x,name#x])
-      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
-         +- Project [id#x, name#x]
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
             +- SubqueryAlias tbl_view
-               +- LocalRelation [id#x, name#x]
+               +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
 INSERT INTO ids
-SELECT * EXCEPT (name) FROM tbl_view
+SELECT * EXCEPT (name, data) FROM tbl_view
 -- !query analysis
 InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/ids, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/ids], Append, `spark_catalog`.`default`.`ids`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/ids), [id]
 +- Project [id#x]
    +- SubqueryAlias tbl_view
-      +- View (`tbl_view`, [id#x,name#x])
-         +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
-            +- Project [id#x, name#x]
+      +- View (`tbl_view`, [id#x,name#x,data#x])
+         +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+            +- Project [id#x, name#x, data#x]
                +- SubqueryAlias tbl_view
-                  +- LocalRelation [id#x, name#x]
+                  +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
@@ -158,15 +208,15 @@ Project
 -- !query
 SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view)
 -- !query analysis
-Project [id#x]
+Project [id#x, data#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [id#x]
+   +- Project [id#x, data#x]
       +- SubqueryAlias tbl_view
-         +- View (`tbl_view`, [id#x,name#x])
-            +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x]
-               +- Project [id#x, name#x]
+         +- View (`tbl_view`, [id#x,name#x,data#x])
+            +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+               +- Project [id#x, name#x, data#x]
                   +- SubqueryAlias tbl_view
-                     +- LocalRelation [id#x, name#x]
+                     +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
@@ -206,7 +256,7 @@ org.apache.spark.sql.AnalysisException
   "sqlState" : "42703",
   "messageParameters" : {
     "objectName" : "`invalid_column`",
-    "proposal" : "`id`, `name`"
+    "proposal" : "`id`, `name`, `data`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -227,7 +277,7 @@ org.apache.spark.sql.AnalysisException
   "sqlState" : "42703",
   "messageParameters" : {
     "objectName" : "`invalid_column`",
-    "proposal" : "`id`, `name`"
+    "proposal" : "`id`, `name`, `data`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -242,11 +292,32 @@ org.apache.spark.sql.AnalysisException
 -- !query
 SELECT * EXCEPT(id, id) FROM tbl_view
 -- !query analysis
-org.apache.spark.SparkException
+org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "INTERNAL_ERROR",
-  "sqlState" : "XX000",
-  "messageParameters" : {
-    "message" : "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'"
-  }
+  "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+  "sqlState" : "42702",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "* EXCEPT(id, id)"
+  } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+  "sqlState" : "42702",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "* EXCEPT(data.s2, data.s2.f2)"
+  } ]
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
index 15ce07ec4cbc5..995267b8378aa 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
@@ -1,11 +1,12 @@
 CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
-  (10, "name1"),
-  (20, "name2"),
-  (30, "name3"),
-  (40, "name4"),
-  (50, "name5"),
-  (70, "name7")
-AS tbl_view(id, name);
+  (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))),
+  (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))),
+  (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))),
+  (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))),
+  (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))),
+  (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))),
+  (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g")))
+AS tbl_view(id, name, data);
 
 CREATE TABLE ids (id INT) USING CSV;
 
@@ -14,10 +15,15 @@ CREATE TABLE ids (id INT) USING CSV;
 SELECT * FROM tbl_view;
 SELECT * EXCEPT (id) FROM tbl_view;
 SELECT * EXCEPT (name) FROM tbl_view;
+-- EXCEPT named structs
+SELECT * EXCEPT (data) FROM tbl_view;
+SELECT * EXCEPT (data.f1) FROM tbl_view;
+SELECT * EXCEPT (data.s2) FROM tbl_view;
+SELECT * EXCEPT (data.s2.f2) FROM tbl_view;
 -- EXCEPT all columns
-SELECT * EXCEPT (id, name) FROM tbl_view;
+SELECT * EXCEPT (id, name, data) FROM tbl_view;
 -- CREATE TABLE with SELECT
-CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view;
+CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view;
 SELECT * FROM namesTbl;
 -- EXCEPT special character names
 SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`);
@@ -25,7 +31,7 @@ SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`);
 SELECT tbl_view.* EXCEPT (name) FROM tbl_view;
 -- EXCEPT insert into select
 INSERT INTO ids
-SELECT * EXCEPT (name) FROM tbl_view;
+SELECT * EXCEPT (name, data) FROM tbl_view;
 SELECT * FROM ids;
 -- EXCEPT qualified columns
 SELECT * EXCEPT (ids.id) FROM ids;
@@ -43,3 +49,5 @@ SELECT * EXCEPT(invalid_column) FROM tbl_view;
 SELECT * EXCEPT(id, invalid_column) FROM tbl_view;
 -- EXCEPT duplicate column
 SELECT * EXCEPT(id, id) FROM tbl_view;
+-- EXCEPT overlapping columns
+SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view;
diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
index 2ec1d0e2603ce..9dbcdc9eedb33 100644
--- a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
@@ -1,13 +1,14 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
 CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES
-  (10, "name1"),
-  (20, "name2"),
-  (30, "name3"),
-  (40, "name4"),
-  (50, "name5"),
-  (70, "name7")
-AS tbl_view(id, name)
+  (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))),
+  (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))),
+  (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))),
+  (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))),
+  (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))),
+  (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))),
+  (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g")))
+AS tbl_view(id, name, data)
 -- !query schema
 struct<>
 -- !query output
@@ -25,6 +26,48 @@ struct<>
 -- !query
 SELECT * FROM tbl_view
 -- !query schema
+struct<id:int,name:string,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
+-- !query output
+10	name1	{"f1":1,"s2":{"f2":101,"f3":"a"}}
+20	name2	{"f1":2,"s2":{"f2":202,"f3":"b"}}
+30	name3	{"f1":3,"s2":{"f2":303,"f3":"c"}}
+40	name4	{"f1":4,"s2":{"f2":404,"f3":"d"}}
+50	name5	{"f1":5,"s2":{"f2":505,"f3":"e"}}
+60	name6	{"f1":6,"s2":{"f2":606,"f3":"f"}}
+70	name7	{"f1":7,"s2":{"f2":707,"f3":"g"}}
+
+
+-- !query
+SELECT * EXCEPT (id) FROM tbl_view
+-- !query schema
+struct<name:string,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
+-- !query output
+name1	{"f1":1,"s2":{"f2":101,"f3":"a"}}
+name2	{"f1":2,"s2":{"f2":202,"f3":"b"}}
+name3	{"f1":3,"s2":{"f2":303,"f3":"c"}}
+name4	{"f1":4,"s2":{"f2":404,"f3":"d"}}
+name5	{"f1":5,"s2":{"f2":505,"f3":"e"}}
+name6	{"f1":6,"s2":{"f2":606,"f3":"f"}}
+name7	{"f1":7,"s2":{"f2":707,"f3":"g"}}
+
+
+-- !query
+SELECT * EXCEPT (name) FROM tbl_view
+-- !query schema
+struct<id:int,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
+-- !query output
+10	{"f1":1,"s2":{"f2":101,"f3":"a"}}
+20	{"f1":2,"s2":{"f2":202,"f3":"b"}}
+30	{"f1":3,"s2":{"f2":303,"f3":"c"}}
+40	{"f1":4,"s2":{"f2":404,"f3":"d"}}
+50	{"f1":5,"s2":{"f2":505,"f3":"e"}}
+60	{"f1":6,"s2":{"f2":606,"f3":"f"}}
+70	{"f1":7,"s2":{"f2":707,"f3":"g"}}
+
+
+-- !query
+SELECT * EXCEPT (data) FROM tbl_view
+-- !query schema
 struct<id:int,name:string>
 -- !query output
 10	name1
@@ -32,37 +75,54 @@ struct<id:int,name:string>
 30	name3
 40	name4
 50	name5
+60	name6
 70	name7
 
 
 -- !query
-SELECT * EXCEPT (id) FROM tbl_view
+SELECT * EXCEPT (data.f1) FROM tbl_view
 -- !query schema
-struct<name:string>
+struct<id:int,name:string,data:struct<s2:struct<f2:int,f3:string>>>
 -- !query output
-name1
-name2
-name3
-name4
-name5
-name7
+10	name1	{"s2":{"f2":101,"f3":"a"}}
+20	name2	{"s2":{"f2":202,"f3":"b"}}
+30	name3	{"s2":{"f2":303,"f3":"c"}}
+40	name4	{"s2":{"f2":404,"f3":"d"}}
+50	name5	{"s2":{"f2":505,"f3":"e"}}
+60	name6	{"s2":{"f2":606,"f3":"f"}}
+70	name7	{"s2":{"f2":707,"f3":"g"}}
 
 
 -- !query
-SELECT * EXCEPT (name) FROM tbl_view
+SELECT * EXCEPT (data.s2) FROM tbl_view
 -- !query schema
-struct<id:int>
+struct<id:int,name:string,data:struct<f1:int>>
 -- !query output
-10
-20
-30
-40
-50
-70
+10	name1	{"f1":1}
+20	name2	{"f1":2}
+30	name3	{"f1":3}
+40	name4	{"f1":4}
+50	name5	{"f1":5}
+60	name6	{"f1":6}
+70	name7	{"f1":7}
+
+
+-- !query
+SELECT * EXCEPT (data.s2.f2) FROM tbl_view
+-- !query schema
+struct<id:int,name:string,data:struct<f1:int,s2:struct<f3:string>>>
+-- !query output
+10	name1	{"f1":1,"s2":{"f3":"a"}}
+20	name2	{"f1":2,"s2":{"f3":"b"}}
+30	name3	{"f1":3,"s2":{"f3":"c"}}
+40	name4	{"f1":4,"s2":{"f3":"d"}}
+50	name5	{"f1":5,"s2":{"f3":"e"}}
+60	name6	{"f1":6,"s2":{"f3":"f"}}
+70	name7	{"f1":7,"s2":{"f3":"g"}}
 
 
 -- !query
-SELECT * EXCEPT (id, name) FROM tbl_view
+SELECT * EXCEPT (id, name, data) FROM tbl_view
 -- !query schema
 struct<>
 -- !query output
@@ -70,7 +130,7 @@ struct<>
 
 
 -- !query
-CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view
+CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view
 -- !query schema
 struct<>
 -- !query output
@@ -87,6 +147,7 @@ name2
 name3
 name4
 name5
+name6
 name7
 
 
@@ -101,19 +162,20 @@ struct<a_b_c:int>
 -- !query
 SELECT tbl_view.* EXCEPT (name) FROM tbl_view
 -- !query schema
-struct<id:int>
+struct<id:int,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
 -- !query output
-10
-20
-30
-40
-50
-70
+10	{"f1":1,"s2":{"f2":101,"f3":"a"}}
+20	{"f1":2,"s2":{"f2":202,"f3":"b"}}
+30	{"f1":3,"s2":{"f2":303,"f3":"c"}}
+40	{"f1":4,"s2":{"f2":404,"f3":"d"}}
+50	{"f1":5,"s2":{"f2":505,"f3":"e"}}
+60	{"f1":6,"s2":{"f2":606,"f3":"f"}}
+70	{"f1":7,"s2":{"f2":707,"f3":"g"}}
 
 
 -- !query
 INSERT INTO ids
-SELECT * EXCEPT (name) FROM tbl_view
+SELECT * EXCEPT (name, data) FROM tbl_view
 -- !query schema
 struct<>
 -- !query output
@@ -130,6 +192,7 @@ struct<id:int>
 30
 40
 50
+60
 70
 
 
@@ -144,14 +207,15 @@ struct<>
 -- !query
 SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view)
 -- !query schema
-struct<id:int>
+struct<id:int,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
 -- !query output
-10
-20
-30
-40
-50
-70
+10	{"f1":1,"s2":{"f2":101,"f3":"a"}}
+20	{"f1":2,"s2":{"f2":202,"f3":"b"}}
+30	{"f1":3,"s2":{"f2":303,"f3":"c"}}
+40	{"f1":4,"s2":{"f2":404,"f3":"d"}}
+50	{"f1":5,"s2":{"f2":505,"f3":"e"}}
+60	{"f1":6,"s2":{"f2":606,"f3":"f"}}
+70	{"f1":7,"s2":{"f2":707,"f3":"g"}}
 
 
 -- !query
@@ -197,7 +261,7 @@ org.apache.spark.sql.AnalysisException
   "sqlState" : "42703",
   "messageParameters" : {
     "objectName" : "`invalid_column`",
-    "proposal" : "`id`, `name`"
+    "proposal" : "`id`, `name`, `data`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -220,7 +284,7 @@ org.apache.spark.sql.AnalysisException
   "sqlState" : "42703",
   "messageParameters" : {
     "objectName" : "`invalid_column`",
-    "proposal" : "`id`, `name`"
+    "proposal" : "`id`, `name`, `data`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -237,11 +301,34 @@ SELECT * EXCEPT(id, id) FROM tbl_view
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.SparkException
+org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "INTERNAL_ERROR",
-  "sqlState" : "XX000",
-  "messageParameters" : {
-    "message" : "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'"
-  }
+  "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+  "sqlState" : "42702",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "* EXCEPT(id, id)"
+  } ]
+}
+
+
+-- !query
+SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+  "sqlState" : "42702",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "* EXCEPT(data.s2, data.s2.f2)"
+  } ]
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 053d4e98bf26c..b7201c2d96d77 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -168,105 +168,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       Row(1, 1) :: Row(1, 2) :: Row(2, 1) :: Row(2, 2) :: Row(3, 1) :: Row(3, 2) :: Nil)
   }
 
-  test("support star except") {
-    checkAnswer(
-      sql(
-        """
-          |SELECT * EXCEPT (b)
-          |FROM testData2
-        """.stripMargin),
-      Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil)
-  }
-
-  test("support table.star except") {
-    checkAnswer(
-      sql(
-        """
-          |SELECT r.* EXCEPT (r.b)
-          |FROM testData l join testData2 r on (l.key = r.a)
-        """.stripMargin),
-      Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil)
-  }
-
-  test("support table.star except all columns") {
-    checkAnswer(
-      sql(
-        """
-          |SELECT r.* EXCEPT (r.b, r.a)
-          |FROM testData l join testData2 r on (l.key = r.a)
-        """.stripMargin),
-      Row() :: Row() :: Row() :: Row() :: Row() :: Row() :: Nil)
-  }
-
-  test("support table.star except nested") {
-    checkAnswer(
-      sql(
-        """
-          |SELECT *
-          |FROM (SELECT * EXCEPT (b) FROM testData2)
-        """.stripMargin),
-      Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil)
-  }
-
-  test("except errors") {
-    // Try star expanding a scalar. This should fail.
-    checkError(
-      exception = intercept[ParseException]{
-        sql("select * EXCEPT () from testData2")
-      },
-      errorClass = "PARSE_SYNTAX_ERROR",
-      sqlState = Option("42601"),
-      parameters = Map("error" -> "')'", "hint" -> "")
-    )
-
-    // Try star expanding a scalar. This should fail.
-    checkError(
-      exception = intercept[ParseException] {
-        sql("select * EXCEPT b testData2")
-      },
-      errorClass = "PARSE_SYNTAX_ERROR",
-      sqlState = Option("42601"),
-      parameters = Map("error" -> "'b'", "hint" -> "")
-    )
-
-    // Try star expanding a scalar. This should fail.
-    checkError(
-      exception = intercept[SparkException] {
-        sql("select * EXCEPT(b, b) from testData2")
-      },
-      errorClass = "INTERNAL_ERROR",
-      parameters = Map("message" -> "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'")
-    )
-
-    // Try star expanding a scalar. This should fail.
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("select * EXCEPT(invalid_column) from testData2")
-      },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
-      sqlState = Option("42703"),
-      parameters = Map("objectName" -> "`invalid_column`", "proposal" -> "`a`, `b`"),
-      matchPVals = true,
-      queryContext = Array(
-        ExpectedContext(fragment = "* EXCEPT(invalid_column)", start = 7, stop = 30)
-      )
-    )
-
-    // Try star expanding a scalar. This should fail.
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("select * EXCEPT(a, invalid_column) from testData2")
-      },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
-      sqlState = Option("42703"),
-      parameters = Map("objectName" -> "`invalid_column`", "proposal" -> "`a`, `b`"),
-      matchPVals = true,
-      queryContext = Array(
-        ExpectedContext(fragment = "* EXCEPT(a, invalid_column)", start = 7, stop = 33)
-      )
-    )
-  }
-
   test("self join with alias in agg") {
     withTempView("df") {
       Seq(1, 2, 3)

From 64e2663bac2e3e9d86ffbf85cd68dbc8e32ef442 Mon Sep 17 00:00:00 2001
From: milastdbx <milan.stefanovic@databricks.com>
Date: Wed, 22 Nov 2023 16:10:39 +0100
Subject: [PATCH 3/6] addressing comments

---
 .../main/resources/error/error-classes.json   |  2 +-
 .../sql/catalyst/analysis/unresolved.scala    |  9 +-
 .../sql/catalyst/parser/AstBuilder.scala      | 10 ++-
 .../analyzer-results/selectExcept.sql.out     | 80 ++++++++---------
 .../sql-tests/inputs/selectExcept.sql         | 10 +--
 .../sql-tests/results/selectExcept.sql.out    | 85 +++++++++----------
 6 files changed, 96 insertions(+), 100 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
index 694127e655c4d..89c61318732e4 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -995,7 +995,7 @@
   },
   "EXCEPT_OVERLAPPING_COLUMNS" : {
     "message" : [
-      "Columns in an EXCEPT list must be distinct and non-overlapping."
+      "Columns in an EXCEPT list must be distinct and non-overlapping, but got (<columns>)."
     ],
     "sqlState" : "42702"
   },
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 4dc67839d8683..003342bbff69a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -473,7 +473,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
     // Use the UnresolvedStarBase expand method to get a seq of NamedExpressions corresponding to
     // the star expansion. This will yield a list of top-level columns from the logical plan's
     // output, or in the case of struct expansion (e.g. target=`x` for SELECT x.*) it will give
-    // a seq of NamedExpressions corresponding to struct fields.
+    // a seq of Alias wrapping the struct field extraction.
     val expandedCols = super.expand(input, resolver)
 
     // resolve except list with respect to the expandedCols
@@ -545,7 +545,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
         // pass through columns that don't match anything in groupedExcepts
         case (col, None) => col
         // found a match but nestedExcepts has remaining excepts - recurse to rewrite the struct
-        case (col, Some(nestedExcepts)) if !nestedExcepts.exists(_.isEmpty) =>
+        case (col, Some(nestedExcepts)) if nestedExcepts.exists(_.nonEmpty) =>
           val fields = col.dataType match {
             case s: StructType => s.fields
             // we shouldn't be here since we EXCEPT_NEXTED_COLUMN_INVALID_TYPE in getRootColumn
@@ -566,7 +566,10 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
         // if there are multiple nestedExcepts but one is empty we must have overlapping except
         // columns. throw an error.
         case (col, Some(nestedExcepts)) if nestedExcepts.size > 1 =>
-          throw new AnalysisException(errorClass = "EXCEPT_OVERLAPPING_COLUMNS", Map.empty)
+          throw new AnalysisException(
+            errorClass = "EXCEPT_OVERLAPPING_COLUMNS",
+            messageParameters = Map(
+              "columns" -> this.excepts.map(_.mkString(".")).mkString(", ")))
       }
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 00d3059ec5fad..9f62c8cb32396 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1786,11 +1786,13 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * Both un-targeted (global) and targeted aliases are supported.
    */
   override def visitStar(ctx: StarContext): Expression = withOrigin(ctx) {
+    var target = Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq)
+
     if (ctx.exceptClause != null) {
-      visitStarExcept(ctx)
+      visitStarExcept(ctx, target)
     }
     else {
-      UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq))
+      UnresolvedStar(target)
     }
   }
 
@@ -1799,11 +1801,11 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * specified object except those in the except list.
    * Both un-targeted (global) and targeted aliases are supported.
    */
-  def visitStarExcept(ctx: StarContext): Expression = withOrigin(ctx) {
+  def visitStarExcept(ctx: StarContext, target: Option[Seq[String]]): Expression = withOrigin(ctx) {
     val exceptCols = ctx.exceptClause
       .exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]])
     UnresolvedStarExcept(
-      Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq),
+      target,
       exceptCols.toSeq)
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
index 38658ae5a71fc..49fe96caef2cd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
@@ -133,27 +133,6 @@ Project
                +- LocalRelation [id#x, name#x, data#x]
 
 
--- !query
-CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view
--- !query analysis
-org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
-{
-  "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS",
-  "sqlState" : "42P07",
-  "messageParameters" : {
-    "relationName" : "`spark_catalog`.`default`.`namesTbl`"
-  }
-}
-
-
--- !query
-SELECT * FROM namesTbl
--- !query analysis
-Project [name#x]
-+- SubqueryAlias spark_catalog.default.namestbl
-   +- Relation spark_catalog.default.namestbl[name#x] csv
-
-
 -- !query
 SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`)
 -- !query analysis
@@ -206,17 +185,39 @@ Project
 
 
 -- !query
-SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view)
+SELECT data.* EXCEPT (s2) FROM tbl_view
 -- !query analysis
-Project [id#x, data#x]
-+- SubqueryAlias __auto_generated_subquery_name
-   +- Project [id#x, data#x]
-      +- SubqueryAlias tbl_view
-         +- View (`tbl_view`, [id#x,name#x,data#x])
-            +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
-               +- Project [id#x, name#x, data#x]
-                  +- SubqueryAlias tbl_view
-                     +- LocalRelation [id#x, name#x, data#x]
+Project [data#x.f1 AS f1#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT data.* EXCEPT (s2.f2) FROM tbl_view
+-- !query analysis
+Project [data#x.f1 AS f1#x, named_struct(f3, data#x.s2.f3) AS s2#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x, data#x]
+
+
+-- !query
+SELECT data.s2.* EXCEPT (f2) FROM tbl_view
+-- !query analysis
+Project [data#x.s2.f3 AS f3#x]
++- SubqueryAlias tbl_view
+   +- View (`tbl_view`, [id#x,name#x,data#x])
+      +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct<f1:int,s2:struct<f2:int,f3:string>>) AS data#x]
+         +- Project [id#x, name#x, data#x]
+            +- SubqueryAlias tbl_view
+               +- LocalRelation [id#x, name#x, data#x]
 
 
 -- !query
@@ -296,6 +297,9 @@ org.apache.spark.sql.AnalysisException
 {
   "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
   "sqlState" : "42702",
+  "messageParameters" : {
+    "columns" : "id, id"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -309,15 +313,5 @@ org.apache.spark.sql.AnalysisException
 -- !query
 SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view
 -- !query analysis
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
-  "sqlState" : "42702",
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 36,
-    "fragment" : "* EXCEPT(data.s2, data.s2.f2)"
-  } ]
-}
+java.util.NoSuchElementException
+head of empty list
diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
index 995267b8378aa..e07e4f1117c29 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql
@@ -22,21 +22,19 @@ SELECT * EXCEPT (data.s2) FROM tbl_view;
 SELECT * EXCEPT (data.s2.f2) FROM tbl_view;
 -- EXCEPT all columns
 SELECT * EXCEPT (id, name, data) FROM tbl_view;
--- CREATE TABLE with SELECT
-CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view;
-SELECT * FROM namesTbl;
 -- EXCEPT special character names
 SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`);
 -- EXCEPT qualified star
 SELECT tbl_view.* EXCEPT (name) FROM tbl_view;
--- EXCEPT insert into select
 INSERT INTO ids
 SELECT * EXCEPT (name, data) FROM tbl_view;
 SELECT * FROM ids;
 -- EXCEPT qualified columns
 SELECT * EXCEPT (ids.id) FROM ids;
--- EXCEPT nested
-SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view);
+-- EXCEPT structs
+SELECT data.* EXCEPT (s2) FROM tbl_view;
+SELECT data.* EXCEPT (s2.f2) FROM tbl_view;
+SELECT data.s2.* EXCEPT (f2) FROM tbl_view;
 
 -- Errors
 -- EXCEPT missing brackets
diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
index 9dbcdc9eedb33..314c628a894e8 100644
--- a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
@@ -129,28 +129,6 @@ struct<>
 
 
 
--- !query
-CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-SELECT * FROM namesTbl
--- !query schema
-struct<name:string>
--- !query output
-name1
-name2
-name3
-name4
-name5
-name6
-name7
-
-
 -- !query
 SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`)
 -- !query schema
@@ -205,17 +183,45 @@ struct<>
 
 
 -- !query
-SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view)
+SELECT data.* EXCEPT (s2) FROM tbl_view
 -- !query schema
-struct<id:int,data:struct<f1:int,s2:struct<f2:int,f3:string>>>
+struct<f1:int>
 -- !query output
-10	{"f1":1,"s2":{"f2":101,"f3":"a"}}
-20	{"f1":2,"s2":{"f2":202,"f3":"b"}}
-30	{"f1":3,"s2":{"f2":303,"f3":"c"}}
-40	{"f1":4,"s2":{"f2":404,"f3":"d"}}
-50	{"f1":5,"s2":{"f2":505,"f3":"e"}}
-60	{"f1":6,"s2":{"f2":606,"f3":"f"}}
-70	{"f1":7,"s2":{"f2":707,"f3":"g"}}
+1
+2
+3
+4
+5
+6
+7
+
+
+-- !query
+SELECT data.* EXCEPT (s2.f2) FROM tbl_view
+-- !query schema
+struct<f1:int,s2:struct<f3:string>>
+-- !query output
+1	{"f3":"a"}
+2	{"f3":"b"}
+3	{"f3":"c"}
+4	{"f3":"d"}
+5	{"f3":"e"}
+6	{"f3":"f"}
+7	{"f3":"g"}
+
+
+-- !query
+SELECT data.s2.* EXCEPT (f2) FROM tbl_view
+-- !query schema
+struct<f3:string>
+-- !query output
+a
+b
+c
+d
+e
+f
+g
 
 
 -- !query
@@ -305,6 +311,9 @@ org.apache.spark.sql.AnalysisException
 {
   "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
   "sqlState" : "42702",
+  "messageParameters" : {
+    "columns" : "id, id"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -320,15 +329,5 @@ SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
-  "sqlState" : "42702",
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 36,
-    "fragment" : "* EXCEPT(data.s2, data.s2.f2)"
-  } ]
-}
+java.util.NoSuchElementException
+head of empty list

From 3f3c6352071d5e63ad6a3487fb633fcbd2c7c4f9 Mon Sep 17 00:00:00 2001
From: milastdbx <milan.stefanovic@databricks.com>
Date: Wed, 22 Nov 2023 17:39:14 +0100
Subject: [PATCH 4/6] fixed bug

---
 .../sql/catalyst/analysis/unresolved.scala      |  2 +-
 .../analyzer-results/selectExcept.sql.out       | 17 +++++++++++++++--
 .../sql-tests/results/selectExcept.sql.out      | 17 +++++++++++++++--
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 003342bbff69a..7c98946638d40 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -545,7 +545,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
         // pass through columns that don't match anything in groupedExcepts
         case (col, None) => col
         // found a match but nestedExcepts has remaining excepts - recurse to rewrite the struct
-        case (col, Some(nestedExcepts)) if nestedExcepts.exists(_.nonEmpty) =>
+        case (col, Some(nestedExcepts)) if nestedExcepts.forall(_.nonEmpty) =>
           val fields = col.dataType match {
             case s: StructType => s.fields
             // we shouldn't be here since we EXCEPT_NEXTED_COLUMN_INVALID_TYPE in getRootColumn
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
index 49fe96caef2cd..8643d40b886bd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
@@ -313,5 +313,18 @@ org.apache.spark.sql.AnalysisException
 -- !query
 SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view
 -- !query analysis
-java.util.NoSuchElementException
-head of empty list
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "columns" : "data.s2, data.s2.f2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "* EXCEPT(data.s2, data.s2.f2)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
index 314c628a894e8..6f6ba9097342a 100644
--- a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out
@@ -329,5 +329,18 @@ SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view
 -- !query schema
 struct<>
 -- !query output
-java.util.NoSuchElementException
-head of empty list
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "columns" : "data.s2, data.s2.f2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "* EXCEPT(data.s2, data.s2.f2)"
+  } ]
+}

From 2e0587c27ffa1623fcddd5db6b77f72089274ecd Mon Sep 17 00:00:00 2001
From: milastdbx <milan.stefanovic@databricks.com>
Date: Wed, 22 Nov 2023 23:28:28 +0100
Subject: [PATCH 5/6] deprecated method replacement

---
 .../org/apache/spark/sql/catalyst/analysis/unresolved.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 7c98946638d40..60d1a89959da4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -538,7 +538,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
       : Seq[NamedExpression] = {
       // group the except pairs by the column they refer to. NOTE: no groupMap until scala 2.13
       val groupedExcepts: AttributeMap[Seq[Seq[String]]] =
-        AttributeMap(excepts.groupBy(_._1.toAttribute).mapValues(v => v.map(_._2)))
+        AttributeMap(excepts.groupBy(_._1.toAttribute).view.mapValues(v => v.map(_._2)))
 
       // map input columns while searching for the except entry corresponding to the current column
       columns.map(col => col -> groupedExcepts.get(col.toAttribute)).collect {

From 470e9277f1a9dcc9c3bb58a76d19b179bae6b34c Mon Sep 17 00:00:00 2001
From: milastdbx <milan.stefanovic@databricks.com>
Date: Thu, 23 Nov 2023 11:11:51 +0100
Subject: [PATCH 6/6] generated error class documents

---
 .../src/main/resources/error/error-classes.json      | 12 ++++++------
 docs/sql-error-conditions.md                         | 12 ++++++++++++
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
index 89c61318732e4..f8e520bb8615c 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -987,6 +987,12 @@
     ],
     "sqlState" : "42K09"
   },
+  "EXCEED_LIMIT_LENGTH" : {
+    "message" : [
+      "Exceeds char/varchar type length limitation: <limit>."
+    ],
+    "sqlState" : "54006"
+  },
   "EXCEPT_NESTED_COLUMN_INVALID_TYPE" : {
     "message" : [
       "EXCEPT column <columnName> was resolved and expected to be StructType, but found type <dataType>."
@@ -999,12 +1005,6 @@
     ],
     "sqlState" : "42702"
   },
-  "EXCEED_LIMIT_LENGTH" : {
-    "message" : [
-      "Exceeds char/varchar type length limitation: <limit>."
-    ],
-    "sqlState" : "54006"
-  },
   "EXPECT_PERMANENT_VIEW_NOT_TEMP" : {
     "message" : [
       "'<operation>' expects a permanent view but <viewName> is a temp view."
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index cba6a24b86999..eb5a120aca72f 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -587,6 +587,18 @@ The event time `<eventName>` has the invalid type `<eventType>`, but expected "T
 
 Exceeds char/varchar type length limitation: `<limit>`.
 
+### EXCEPT_NESTED_COLUMN_INVALID_TYPE
+
+[SQLSTATE: 428H2](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+EXCEPT column `<columnName>` was resolved and expected to be StructType, but found type `<dataType>`.
+
+### EXCEPT_OVERLAPPING_COLUMNS
+
+[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Columns in an EXCEPT list must be distinct and non-overlapping, but got (`<columns>`).
+
 ### EXPECT_PERMANENT_VIEW_NOT_TEMP
 
 [SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)