From 18dd5b22cf8dfd58de4a5dd3a222228c3a3e19ed Mon Sep 17 00:00:00 2001 From: milastdbx Date: Thu, 16 Nov 2023 14:32:49 +0100 Subject: [PATCH 1/6] introducing `select * except` syntax --- .../sql/catalyst/parser/SqlBaseParser.g4 | 6 + .../sql/catalyst/analysis/unresolved.scala | 163 ++++++++++- .../sql/catalyst/parser/AstBuilder.scala | 12 + .../analyzer-results/selectExcept.sql.out | 252 ++++++++++++++++++ .../sql-tests/inputs/selectExcept.sql | 45 ++++ .../sql-tests/results/selectExcept.sql.out | 247 +++++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 99 +++++++ 7 files changed, 819 insertions(+), 5 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 609bd72e21935..198a5266ca3b2 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -587,6 +587,10 @@ notMatchedBySourceAction | UPDATE SET assignmentList ; +exceptClause + : EXCEPT '(' exceptCols=multipartIdentifierList ')' + ; + assignmentList : assignment (COMMA assignment)* ; @@ -969,6 +973,8 @@ primaryExpression | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN #position | constant #constantDefault + | ASTERISK EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN #starExcept + | qualifiedName DOT ASTERISK EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN #starExcept | ASTERISK #star | qualifiedName DOT ASTERISK #star | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index e232dfc05faa8..82667ac23e7e2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -358,10 +358,10 @@ abstract class Star extends LeafExpression with NamedExpression { def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] } - /** * Represents all of the input attributes to a given relational operator, for example in * "SELECT * FROM ...". + * "SELECT * FROM ..." or "SELECT * EXCEPT(...) FROM ..." * * This is also used to expand structs. For example: * "SELECT record.* from (SELECT struct(a,b,c) as record ...) @@ -369,9 +369,12 @@ abstract class Star extends LeafExpression with NamedExpression { * @param target an optional name that should be the target of the expansion. If omitted all * targets' columns are produced. This can either be a table name or struct name. This * is a list of identifiers that is the path of the expansion. - */ -case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevaluable { - + * + * This class provides the shared behavior between the classes for SELECT * ([[UnresolvedStar]]) + * and SELECT * EXCEPT ([[UnresolvedStarExcept]]). [[UnresolvedStar]] is just a case class of this, + * while [[UnresolvedStarExcept]] adds some additional logic to the expand method. + */ +abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with Unevaluable { /** * Returns true if the nameParts is a subset of the last elements of qualifier of the attribute. * @@ -383,7 +386,7 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu * - `SELECT t.* FROM ns1.ns2.t` where nameParts is Seq("t") and * qualifier is Seq("ns1", "ns2", "t"). */ - private def matchedQualifier( + protected def matchedQualifier( attribute: Attribute, nameParts: Seq[String], resolver: Resolver): Boolean = { @@ -444,6 +447,156 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu override def toString: String = target.map(_.mkString("", ".", ".")).getOrElse("") + "*" } +/** + * Represents some of the input attributes to a given relational operator, for example in + * "SELECT * EXCEPT(a) FROM ...". + * + * @param target an optional name that should be the target of the expansion. If omitted all + * targets' columns are produced. This can only be a table name. This + * is a list of identifiers that is the path of the expansion. + * + * @param excepts a list of names that should be excluded from the expansion. + * + */ +case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[String]]) + extends UnresolvedStarBase(target) { + + /** + * We expand the * EXCEPT by the following three steps: + * 1. use the original .expand() to get top-level column list or struct expansion + * 2. resolve excepts (with respect to the Seq[NamedExpression] returned from (1)) + * 3. filter the expanded columns with the resolved except list. recursively apply filtering in + * case of nested columns in the except list (in order to rewrite structs) + */ + override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = { + // Use the UnresolvedStarBase expand method to get a seq of NamedExpressions corresponding to + // the star expansion. This will yield a list of top-level columns from the logical plan's + // output, or in the case of struct expansion (e.g. target=`x` for SELECT x.*) it will give + // a seq of NamedExpressions corresponding to struct fields. + val expandedCols = super.expand(input, resolver) + + // resolve except list with respect to the expandedCols + val resolvedExcepts = excepts.map { exceptParts => + AttributeSeq(expandedCols.map(_.toAttribute)).resolve(exceptParts, resolver).getOrElse { + val orderedCandidates = StringUtils.orderSuggestedIdentifiersBySimilarity( + UnresolvedAttribute(exceptParts).name, expandedCols.map(a => a.qualifier :+ a.name)) + // if target is defined and expandedCols does not include any Attributes, it must be struct + // expansion; give message suggesting to use unqualified names of nested fields. + if (target.isDefined && !expandedCols.exists(_.isInstanceOf[Attribute])) { + throw new AnalysisException( + errorClass = "EXCEPT_UNRESOLVED_COLUMN_IN_STRUCT_EXPANSION", + messageParameters = Map( + "objectName" -> UnresolvedAttribute(exceptParts).sql, + "objectList" -> orderedCandidates.mkString(", "))) + } else { + throw QueryCompilationErrors + .unresolvedColumnError(UnresolvedAttribute(exceptParts).name, orderedCandidates) + } + } + } + + // Convert each resolved except into a pair of (col: Attribute, nestedColumn) representing the + // top level column in expandedCols that we must 'filter' based on nestedColumn. + @scala.annotation.tailrec + def getRootStruct(expr: Expression, nestedColumn: Seq[String] = Nil) + : (NamedExpression, Seq[String]) = expr match { + case GetStructField(fieldExpr, _, Some(fieldName)) => + getRootStruct(fieldExpr, fieldName +: nestedColumn) + case e: NamedExpression => e -> nestedColumn + case other: ExtractValue => throw new AnalysisException( + errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE", + messageParameters = Map("columnName" -> other.sql, "dataType" -> other.dataType.toString)) + } + // An exceptPair represents the column in expandedCols along with the path of a nestedColumn + // that should be except-ed. Consider two examples: + // 1. excepting the entire col1 = (col1, Seq()) + // 2. excepting a nested field in col2, col2.a.b = (col2, Seq(a, b)) + // INVARIANT: we rely on the structure of the resolved except being an Alias of GetStructField + // in the case of nested columns. + val exceptPairs = resolvedExcepts.map { + case Alias(exceptExpr, name) => getRootStruct(exceptExpr) + case except: NamedExpression => except -> Seq.empty + } + + // Filter columns which correspond to ones listed in the except list and return a new list of + // columns which exclude the columns in the except list. The 'filtering' manifests as either + // dropping the column from the list of columns we return, or rewriting the projected column in + // order to remove excepts that refer to nested columns. For example, with the example above: + // excepts = Seq( + // (col1, Seq()), => filter col1 from the output + // (col2, Seq(a, b)) => rewrite col2 in the output so that it doesn't include the nested field + // ) corresponding to col2.a.b + // + // This occurs in two steps: + // 1. group the excepts by the column they refer to (groupedExcepts) + // 2. filter/rewrite input columns based on four cases: + // a. column doesn't match any groupedExcepts => column unchanged + // b. column exists in groupedExcepts and: + // i. none of remainingExcepts are empty => recursively apply filterColumns over the + // struct fields in order to rewrite the struct + // ii. a remainingExcept is empty, but there are multiple remainingExcepts => we must + // have duplicate/overlapping excepts - throw an error + // iii. [otherwise] remainingExcept is exactly Seq(Seq()) => this is the base 'filtering' + // case. we omit the column from the output (this is a column we would like to + // except). NOTE: this case isn't explicitly listed in the `collect` below since we + // 'collect' columns which match the cases above and omit ones that fall into this + // remaining case. + def filterColumns(columns: Seq[NamedExpression], excepts: Seq[(NamedExpression, Seq[String])]) + : Seq[NamedExpression] = { + // group the except pairs by the column they refer to. NOTE: no groupMap until scala 2.13 + val groupedExcepts: AttributeMap[Seq[Seq[String]]] = + AttributeMap(excepts.groupBy(_._1.toAttribute).mapValues(v => v.map(_._2))) + + // map input columns while searching for the except entry corresponding to the current column + columns.map(col => col -> groupedExcepts.get(col.toAttribute)).collect { + // pass through columns that don't match anything in groupedExcepts + case (col, None) => col + // found a match but nestedExcepts has remaining excepts - recurse to rewrite the struct + case (col, Some(nestedExcepts)) if !nestedExcepts.exists(_.isEmpty) => + val fields = col.dataType match { + case s: StructType => s.fields + // we shouldn't be here since we throw the same error above (in getRootStruct), but + // nonetheless just throw the same error + case _ => throw new AnalysisException( + errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE", + messageParameters = + Map("columnName" -> col.qualifiedName, "dataType" -> col.dataType.toString)) + } + val extractedFields = fields.zipWithIndex.map { case (f, i) => + Alias(GetStructField(col, i), f.name)() + } + val newExcepts = nestedExcepts.map { nestedExcept => + // INVARIANT: we cannot have duplicate column names in nested columns, thus, this `head` + // will find the one and only column corresponding to the correct extractedField. + extractedFields.collectFirst { case col if resolver(col.name, nestedExcept.head) => + col.toAttribute -> nestedExcept.tail + }.get + } + Alias(CreateStruct(filterColumns(extractedFields.toSeq, newExcepts)), col.name)() + // if there are multiple nestedExcepts but one is empty we must have overlapping except + // columns. throw an error. + case (col, Some(nestedExcepts)) if nestedExcepts.size > 1 => + throw new AnalysisException(errorClass = "EXCEPT_OVERLAPPING_COLUMNS", Map.empty) + } + } + + filterColumns(expandedCols, exceptPairs) + } +} + +/** + * Represents all of the input attributes to a given relational operator, for example in + * "SELECT * FROM ...". + * + * This is also used to expand structs. For example: + * "SELECT record.* from (SELECT struct(a,b,c) as record ...) + * + * @param target an optional name that should be the target of the expansion. If omitted all + * targets' columns are produced. This can either be a table name or struct name. This + * is a list of identifiers that is the path of the expansion. + */ +case class UnresolvedStar(target: Option[Seq[String]]) extends UnresolvedStarBase(target) + /** * Represents all of the input attributes to a given relational operator, for example in * "SELECT `(id)?+.+` FROM ...". diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 09161d8f8da53..81f40739dea30 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1789,6 +1789,18 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq)) } + /** + * Create a star-except (i.e. all - except list) expression; this selects all elements in the + * specified object except those in the except list. + * Both un-targeted (global) and targeted aliases are supported. + */ + override def visitStarExcept(ctx: StarExceptContext): Expression = withOrigin(ctx) { + val exceptCols = ctx.exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]]) + UnresolvedStarExcept( + Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq), + exceptCols.toSeq) + } + /** * Create an aliased expression if an alias is specified. Both single and multi-aliases are * supported. diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out new file mode 100644 index 0000000000000..5c7c888b6b36c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out @@ -0,0 +1,252 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES + (10, "name1"), + (20, "name2"), + (30, "name3"), + (40, "name4"), + (50, "name5"), + (70, "name7") +AS tbl_view(id, name) +-- !query analysis +CreateViewCommand `tbl_view`, SELECT * FROM VALUES + (10, "name1"), + (20, "name2"), + (30, "name3"), + (40, "name4"), + (50, "name5"), + (70, "name7") +AS tbl_view(id, name), false, false, LocalTempView, true + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x] + + +-- !query +CREATE TABLE ids (id INT) USING CSV +-- !query analysis +org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +{ + "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", + "sqlState" : "42P07", + "messageParameters" : { + "relationName" : "`spark_catalog`.`default`.`ids`" + } +} + + +-- !query +SELECT * FROM tbl_view +-- !query analysis +Project [id#x, name#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x] + + +-- !query +SELECT * EXCEPT (id) FROM tbl_view +-- !query analysis +Project [name#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x] + + +-- !query +SELECT * EXCEPT (name) FROM tbl_view +-- !query analysis +Project [id#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x] + + +-- !query +SELECT * EXCEPT (id, name) FROM tbl_view +-- !query analysis +Project ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x] + + +-- !query +CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view +-- !query analysis +org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +{ + "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", + "sqlState" : "42P07", + "messageParameters" : { + "relationName" : "`spark_catalog`.`default`.`namesTbl`" + } +} + + +-- !query +SELECT * FROM namesTbl +-- !query analysis +Project [name#x] ++- SubqueryAlias spark_catalog.default.namestbl + +- Relation spark_catalog.default.namestbl[name#x] csv + + +-- !query +SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`) +-- !query analysis +Project [a_b_c#x] ++- SubqueryAlias __auto_generated_subquery_name + +- Project [1 AS a_b_c#x, 2 AS a-b-c#x] + +- OneRowRelation + + +-- !query +SELECT tbl_view.* EXCEPT (name) FROM tbl_view +-- !query analysis +Project [id#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x] + + +-- !query +INSERT INTO ids +SELECT * EXCEPT (name) FROM tbl_view +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/ids, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/ids], Append, `spark_catalog`.`default`.`ids`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/ids), [id] ++- Project [id#x] + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x] + + +-- !query +SELECT * FROM ids +-- !query analysis +Project [id#x] ++- SubqueryAlias spark_catalog.default.ids + +- Relation spark_catalog.default.ids[id#x] csv + + +-- !query +SELECT * EXCEPT (ids.id) FROM ids +-- !query analysis +Project ++- SubqueryAlias spark_catalog.default.ids + +- Relation spark_catalog.default.ids[id#x] csv + + +-- !query +SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view) +-- !query analysis +Project [id#x] ++- SubqueryAlias __auto_generated_subquery_name + +- Project [id#x] + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x] + + +-- !query +SELECT * EXCEPT name FROM tbl_view +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'name'", + "hint" : "" + } +} + + +-- !query +SELECT * EXCEPT() name FROM tbl_view +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "')'", + "hint" : "" + } +} + + +-- !query +SELECT * EXCEPT(invalid_column) FROM tbl_view +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`invalid_column`", + "proposal" : "`id`, `name`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 31, + "fragment" : "* EXCEPT(invalid_column)" + } ] +} + + +-- !query +SELECT * EXCEPT(id, invalid_column) FROM tbl_view +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`invalid_column`", + "proposal" : "`id`, `name`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 35, + "fragment" : "* EXCEPT(id, invalid_column)" + } ] +} + + +-- !query +SELECT * EXCEPT(id, id) FROM tbl_view +-- !query analysis +org.apache.spark.SparkException +{ + "errorClass" : "INTERNAL_ERROR", + "sqlState" : "XX000", + "messageParameters" : { + "message" : "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'" + } +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql new file mode 100644 index 0000000000000..15ce07ec4cbc5 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql @@ -0,0 +1,45 @@ +CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES + (10, "name1"), + (20, "name2"), + (30, "name3"), + (40, "name4"), + (50, "name5"), + (70, "name7") +AS tbl_view(id, name); + +CREATE TABLE ids (id INT) USING CSV; + +-- Happy path +-- EXCEPT basic scenario +SELECT * FROM tbl_view; +SELECT * EXCEPT (id) FROM tbl_view; +SELECT * EXCEPT (name) FROM tbl_view; +-- EXCEPT all columns +SELECT * EXCEPT (id, name) FROM tbl_view; +-- CREATE TABLE with SELECT +CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view; +SELECT * FROM namesTbl; +-- EXCEPT special character names +SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`); +-- EXCEPT qualified star +SELECT tbl_view.* EXCEPT (name) FROM tbl_view; +-- EXCEPT insert into select +INSERT INTO ids +SELECT * EXCEPT (name) FROM tbl_view; +SELECT * FROM ids; +-- EXCEPT qualified columns +SELECT * EXCEPT (ids.id) FROM ids; +-- EXCEPT nested +SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view); + +-- Errors +-- EXCEPT missing brackets +SELECT * EXCEPT name FROM tbl_view; +-- EXCEPT no columns +SELECT * EXCEPT() name FROM tbl_view; +-- EXCEPT invalid column +SELECT * EXCEPT(invalid_column) FROM tbl_view; +-- EXCEPT find invalid column +SELECT * EXCEPT(id, invalid_column) FROM tbl_view; +-- EXCEPT duplicate column +SELECT * EXCEPT(id, id) FROM tbl_view; diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out new file mode 100644 index 0000000000000..2ec1d0e2603ce --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out @@ -0,0 +1,247 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES + (10, "name1"), + (20, "name2"), + (30, "name3"), + (40, "name4"), + (50, "name5"), + (70, "name7") +AS tbl_view(id, name) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE ids (id INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM tbl_view +-- !query schema +struct +-- !query output +10 name1 +20 name2 +30 name3 +40 name4 +50 name5 +70 name7 + + +-- !query +SELECT * EXCEPT (id) FROM tbl_view +-- !query schema +struct +-- !query output +name1 +name2 +name3 +name4 +name5 +name7 + + +-- !query +SELECT * EXCEPT (name) FROM tbl_view +-- !query schema +struct +-- !query output +10 +20 +30 +40 +50 +70 + + +-- !query +SELECT * EXCEPT (id, name) FROM tbl_view +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM namesTbl +-- !query schema +struct +-- !query output +name1 +name2 +name3 +name4 +name5 +name7 + + +-- !query +SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT tbl_view.* EXCEPT (name) FROM tbl_view +-- !query schema +struct +-- !query output +10 +20 +30 +40 +50 +70 + + +-- !query +INSERT INTO ids +SELECT * EXCEPT (name) FROM tbl_view +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM ids +-- !query schema +struct +-- !query output +10 +20 +30 +40 +50 +70 + + +-- !query +SELECT * EXCEPT (ids.id) FROM ids +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view) +-- !query schema +struct +-- !query output +10 +20 +30 +40 +50 +70 + + +-- !query +SELECT * EXCEPT name FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'name'", + "hint" : "" + } +} + + +-- !query +SELECT * EXCEPT() name FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "')'", + "hint" : "" + } +} + + +-- !query +SELECT * EXCEPT(invalid_column) FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`invalid_column`", + "proposal" : "`id`, `name`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 31, + "fragment" : "* EXCEPT(invalid_column)" + } ] +} + + +-- !query +SELECT * EXCEPT(id, invalid_column) FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`invalid_column`", + "proposal" : "`id`, `name`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 35, + "fragment" : "* EXCEPT(id, invalid_column)" + } ] +} + + +-- !query +SELECT * EXCEPT(id, id) FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkException +{ + "errorClass" : "INTERNAL_ERROR", + "sqlState" : "XX000", + "messageParameters" : { + "message" : "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'" + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b7201c2d96d77..053d4e98bf26c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -168,6 +168,105 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark Row(1, 1) :: Row(1, 2) :: Row(2, 1) :: Row(2, 2) :: Row(3, 1) :: Row(3, 2) :: Nil) } + test("support star except") { + checkAnswer( + sql( + """ + |SELECT * EXCEPT (b) + |FROM testData2 + """.stripMargin), + Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil) + } + + test("support table.star except") { + checkAnswer( + sql( + """ + |SELECT r.* EXCEPT (r.b) + |FROM testData l join testData2 r on (l.key = r.a) + """.stripMargin), + Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil) + } + + test("support table.star except all columns") { + checkAnswer( + sql( + """ + |SELECT r.* EXCEPT (r.b, r.a) + |FROM testData l join testData2 r on (l.key = r.a) + """.stripMargin), + Row() :: Row() :: Row() :: Row() :: Row() :: Row() :: Nil) + } + + test("support table.star except nested") { + checkAnswer( + sql( + """ + |SELECT * + |FROM (SELECT * EXCEPT (b) FROM testData2) + """.stripMargin), + Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil) + } + + test("except errors") { + // Try star expanding a scalar. This should fail. + checkError( + exception = intercept[ParseException]{ + sql("select * EXCEPT () from testData2") + }, + errorClass = "PARSE_SYNTAX_ERROR", + sqlState = Option("42601"), + parameters = Map("error" -> "')'", "hint" -> "") + ) + + // Try star expanding a scalar. This should fail. + checkError( + exception = intercept[ParseException] { + sql("select * EXCEPT b testData2") + }, + errorClass = "PARSE_SYNTAX_ERROR", + sqlState = Option("42601"), + parameters = Map("error" -> "'b'", "hint" -> "") + ) + + // Try star expanding a scalar. This should fail. + checkError( + exception = intercept[SparkException] { + sql("select * EXCEPT(b, b) from testData2") + }, + errorClass = "INTERNAL_ERROR", + parameters = Map("message" -> "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'") + ) + + // Try star expanding a scalar. This should fail. + checkError( + exception = intercept[AnalysisException] { + sql("select * EXCEPT(invalid_column) from testData2") + }, + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + sqlState = Option("42703"), + parameters = Map("objectName" -> "`invalid_column`", "proposal" -> "`a`, `b`"), + matchPVals = true, + queryContext = Array( + ExpectedContext(fragment = "* EXCEPT(invalid_column)", start = 7, stop = 30) + ) + ) + + // Try star expanding a scalar. This should fail. + checkError( + exception = intercept[AnalysisException] { + sql("select * EXCEPT(a, invalid_column) from testData2") + }, + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + sqlState = Option("42703"), + parameters = Map("objectName" -> "`invalid_column`", "proposal" -> "`a`, `b`"), + matchPVals = true, + queryContext = Array( + ExpectedContext(fragment = "* EXCEPT(a, invalid_column)", start = 7, stop = 33) + ) + ) + } + test("self join with alias in agg") { withTempView("df") { Seq(1, 2, 3) From bc31ce217b3e9c84ee3ce44de6e66065bf20b009 Mon Sep 17 00:00:00 2001 From: milastdbx Date: Tue, 21 Nov 2023 13:04:01 +0100 Subject: [PATCH 2/6] addressing comments --- .../main/resources/error/error-classes.json | 12 ++ .../sql/catalyst/parser/SqlBaseParser.g4 | 8 +- .../sql/catalyst/analysis/unresolved.scala | 28 +-- .../sql/catalyst/parser/AstBuilder.scala | 12 +- .../analyzer-results/selectExcept.sql.out | 193 ++++++++++++------ .../sql-tests/inputs/selectExcept.sql | 28 ++- .../sql-tests/results/selectExcept.sql.out | 183 ++++++++++++----- .../org/apache/spark/sql/SQLQuerySuite.scala | 99 --------- 8 files changed, 318 insertions(+), 245 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index afcd841a2ce04..694127e655c4d 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -987,6 +987,18 @@ ], "sqlState" : "42K09" }, + "EXCEPT_NESTED_COLUMN_INVALID_TYPE" : { + "message" : [ + "EXCEPT column was resolved and expected to be StructType, but found type ." + ], + "sqlState" : "428H2" + }, + "EXCEPT_OVERLAPPING_COLUMNS" : { + "message" : [ + "Columns in an EXCEPT list must be distinct and non-overlapping." + ], + "sqlState" : "42702" + }, "EXCEED_LIMIT_LENGTH" : { "message" : [ "Exceeds char/varchar type length limitation: ." diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 198a5266ca3b2..439a12c301817 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -588,7 +588,7 @@ notMatchedBySourceAction ; exceptClause - : EXCEPT '(' exceptCols=multipartIdentifierList ')' + : EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN ; assignmentList @@ -973,10 +973,8 @@ primaryExpression | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN #position | constant #constantDefault - | ASTERISK EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN #starExcept - | qualifiedName DOT ASTERISK EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN #starExcept - | ASTERISK #star - | qualifiedName DOT ASTERISK #star + | ASTERISK exceptClause? #star + | qualifiedName DOT ASTERISK exceptClause? #star | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor | LEFT_PAREN query RIGHT_PAREN #subqueryExpression | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 82667ac23e7e2..4dc67839d8683 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.analysis +import org.apache.spark.SparkException import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.expressions._ @@ -482,26 +483,18 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St UnresolvedAttribute(exceptParts).name, expandedCols.map(a => a.qualifier :+ a.name)) // if target is defined and expandedCols does not include any Attributes, it must be struct // expansion; give message suggesting to use unqualified names of nested fields. - if (target.isDefined && !expandedCols.exists(_.isInstanceOf[Attribute])) { - throw new AnalysisException( - errorClass = "EXCEPT_UNRESOLVED_COLUMN_IN_STRUCT_EXPANSION", - messageParameters = Map( - "objectName" -> UnresolvedAttribute(exceptParts).sql, - "objectList" -> orderedCandidates.mkString(", "))) - } else { - throw QueryCompilationErrors - .unresolvedColumnError(UnresolvedAttribute(exceptParts).name, orderedCandidates) - } + throw QueryCompilationErrors + .unresolvedColumnError(UnresolvedAttribute(exceptParts).name, orderedCandidates) } } // Convert each resolved except into a pair of (col: Attribute, nestedColumn) representing the // top level column in expandedCols that we must 'filter' based on nestedColumn. @scala.annotation.tailrec - def getRootStruct(expr: Expression, nestedColumn: Seq[String] = Nil) + def getRootColumn(expr: Expression, nestedColumn: Seq[String] = Nil) : (NamedExpression, Seq[String]) = expr match { case GetStructField(fieldExpr, _, Some(fieldName)) => - getRootStruct(fieldExpr, fieldName +: nestedColumn) + getRootColumn(fieldExpr, fieldName +: nestedColumn) case e: NamedExpression => e -> nestedColumn case other: ExtractValue => throw new AnalysisException( errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE", @@ -514,7 +507,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St // INVARIANT: we rely on the structure of the resolved except being an Alias of GetStructField // in the case of nested columns. val exceptPairs = resolvedExcepts.map { - case Alias(exceptExpr, name) => getRootStruct(exceptExpr) + case Alias(exceptExpr, name) => getRootColumn(exceptExpr) case except: NamedExpression => except -> Seq.empty } @@ -555,12 +548,9 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St case (col, Some(nestedExcepts)) if !nestedExcepts.exists(_.isEmpty) => val fields = col.dataType match { case s: StructType => s.fields - // we shouldn't be here since we throw the same error above (in getRootStruct), but - // nonetheless just throw the same error - case _ => throw new AnalysisException( - errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE", - messageParameters = - Map("columnName" -> col.qualifiedName, "dataType" -> col.dataType.toString)) + // we shouldn't be here since we EXCEPT_NEXTED_COLUMN_INVALID_TYPE in getRootColumn + // for this case - just throw internal error + case _ => throw SparkException.internalError("Invalid column type") } val extractedFields = fields.zipWithIndex.map { case (f, i) => Alias(GetStructField(col, i), f.name)() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 81f40739dea30..00d3059ec5fad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1786,7 +1786,12 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { * Both un-targeted (global) and targeted aliases are supported. */ override def visitStar(ctx: StarContext): Expression = withOrigin(ctx) { - UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq)) + if (ctx.exceptClause != null) { + visitStarExcept(ctx) + } + else { + UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq)) + } } /** @@ -1794,8 +1799,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { * specified object except those in the except list. * Both un-targeted (global) and targeted aliases are supported. */ - override def visitStarExcept(ctx: StarExceptContext): Expression = withOrigin(ctx) { - val exceptCols = ctx.exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]]) + def visitStarExcept(ctx: StarContext): Expression = withOrigin(ctx) { + val exceptCols = ctx.exceptClause + .exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]]) UnresolvedStarExcept( Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq), exceptCols.toSeq) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out index 5c7c888b6b36c..38658ae5a71fc 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out @@ -1,25 +1,27 @@ -- Automatically generated by SQLQueryTestSuite -- !query CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES - (10, "name1"), - (20, "name2"), - (30, "name3"), - (40, "name4"), - (50, "name5"), - (70, "name7") -AS tbl_view(id, name) + (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))), + (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))), + (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))), + (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))), + (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))), + (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))), + (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g"))) +AS tbl_view(id, name, data) -- !query analysis CreateViewCommand `tbl_view`, SELECT * FROM VALUES - (10, "name1"), - (20, "name2"), - (30, "name3"), - (40, "name4"), - (50, "name5"), - (70, "name7") -AS tbl_view(id, name), false, false, LocalTempView, true - +- Project [id#x, name#x] + (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))), + (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))), + (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))), + (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))), + (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))), + (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))), + (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g"))) +AS tbl_view(id, name, data), false, false, LocalTempView, true + +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x] + +- LocalRelation [id#x, name#x, data#x] -- !query @@ -38,53 +40,101 @@ org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException -- !query SELECT * FROM tbl_view -- !query analysis -Project [id#x, name#x] +Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] - +- Project [id#x, name#x] + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x] + +- LocalRelation [id#x, name#x, data#x] -- !query SELECT * EXCEPT (id) FROM tbl_view -- !query analysis -Project [name#x] +Project [name#x, data#x] +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] - +- Project [id#x, name#x] + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x] + +- LocalRelation [id#x, name#x, data#x] -- !query SELECT * EXCEPT (name) FROM tbl_view -- !query analysis -Project [id#x] +Project [id#x, data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (data) FROM tbl_view +-- !query analysis +Project [id#x, name#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (data.f1) FROM tbl_view +-- !query analysis +Project [id#x, name#x, named_struct(s2, data#x.s2) AS data#x] +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] - +- Project [id#x, name#x] + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x] + +- LocalRelation [id#x, name#x, data#x] -- !query -SELECT * EXCEPT (id, name) FROM tbl_view +SELECT * EXCEPT (data.s2) FROM tbl_view +-- !query analysis +Project [id#x, name#x, named_struct(f1, data#x.f1) AS data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (data.s2.f2) FROM tbl_view +-- !query analysis +Project [id#x, name#x, named_struct(f1, data#x.f1, s2, named_struct(f3, data#x.s2.f3)) AS data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (id, name, data) FROM tbl_view -- !query analysis Project +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] - +- Project [id#x, name#x] + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x] + +- LocalRelation [id#x, name#x, data#x] -- !query -CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view +CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view -- !query analysis org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException { @@ -116,27 +166,27 @@ Project [a_b_c#x] -- !query SELECT tbl_view.* EXCEPT (name) FROM tbl_view -- !query analysis -Project [id#x] +Project [id#x, data#x] +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] - +- Project [id#x, name#x] + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x] + +- LocalRelation [id#x, name#x, data#x] -- !query INSERT INTO ids -SELECT * EXCEPT (name) FROM tbl_view +SELECT * EXCEPT (name, data) FROM tbl_view -- !query analysis InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/ids, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/ids], Append, `spark_catalog`.`default`.`ids`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/ids), [id] +- Project [id#x] +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] - +- Project [id#x, name#x] + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x] + +- LocalRelation [id#x, name#x, data#x] -- !query @@ -158,15 +208,15 @@ Project -- !query SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view) -- !query analysis -Project [id#x] +Project [id#x, data#x] +- SubqueryAlias __auto_generated_subquery_name - +- Project [id#x] + +- Project [id#x, data#x] +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] - +- Project [id#x, name#x] + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x] + +- LocalRelation [id#x, name#x, data#x] -- !query @@ -206,7 +256,7 @@ org.apache.spark.sql.AnalysisException "sqlState" : "42703", "messageParameters" : { "objectName" : "`invalid_column`", - "proposal" : "`id`, `name`" + "proposal" : "`id`, `name`, `data`" }, "queryContext" : [ { "objectType" : "", @@ -227,7 +277,7 @@ org.apache.spark.sql.AnalysisException "sqlState" : "42703", "messageParameters" : { "objectName" : "`invalid_column`", - "proposal" : "`id`, `name`" + "proposal" : "`id`, `name`, `data`" }, "queryContext" : [ { "objectType" : "", @@ -242,11 +292,32 @@ org.apache.spark.sql.AnalysisException -- !query SELECT * EXCEPT(id, id) FROM tbl_view -- !query analysis -org.apache.spark.SparkException +org.apache.spark.sql.AnalysisException { - "errorClass" : "INTERNAL_ERROR", - "sqlState" : "XX000", - "messageParameters" : { - "message" : "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'" - } + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 23, + "fragment" : "* EXCEPT(id, id)" + } ] +} + + +-- !query +SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 36, + "fragment" : "* EXCEPT(data.s2, data.s2.f2)" + } ] } diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql index 15ce07ec4cbc5..995267b8378aa 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql @@ -1,11 +1,12 @@ CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES - (10, "name1"), - (20, "name2"), - (30, "name3"), - (40, "name4"), - (50, "name5"), - (70, "name7") -AS tbl_view(id, name); + (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))), + (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))), + (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))), + (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))), + (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))), + (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))), + (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g"))) +AS tbl_view(id, name, data); CREATE TABLE ids (id INT) USING CSV; @@ -14,10 +15,15 @@ CREATE TABLE ids (id INT) USING CSV; SELECT * FROM tbl_view; SELECT * EXCEPT (id) FROM tbl_view; SELECT * EXCEPT (name) FROM tbl_view; +-- EXCEPT named structs +SELECT * EXCEPT (data) FROM tbl_view; +SELECT * EXCEPT (data.f1) FROM tbl_view; +SELECT * EXCEPT (data.s2) FROM tbl_view; +SELECT * EXCEPT (data.s2.f2) FROM tbl_view; -- EXCEPT all columns -SELECT * EXCEPT (id, name) FROM tbl_view; +SELECT * EXCEPT (id, name, data) FROM tbl_view; -- CREATE TABLE with SELECT -CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view; +CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view; SELECT * FROM namesTbl; -- EXCEPT special character names SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`); @@ -25,7 +31,7 @@ SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`); SELECT tbl_view.* EXCEPT (name) FROM tbl_view; -- EXCEPT insert into select INSERT INTO ids -SELECT * EXCEPT (name) FROM tbl_view; +SELECT * EXCEPT (name, data) FROM tbl_view; SELECT * FROM ids; -- EXCEPT qualified columns SELECT * EXCEPT (ids.id) FROM ids; @@ -43,3 +49,5 @@ SELECT * EXCEPT(invalid_column) FROM tbl_view; SELECT * EXCEPT(id, invalid_column) FROM tbl_view; -- EXCEPT duplicate column SELECT * EXCEPT(id, id) FROM tbl_view; +-- EXCEPT overlapping columns +SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view; diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out index 2ec1d0e2603ce..9dbcdc9eedb33 100644 --- a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out @@ -1,13 +1,14 @@ -- Automatically generated by SQLQueryTestSuite -- !query CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES - (10, "name1"), - (20, "name2"), - (30, "name3"), - (40, "name4"), - (50, "name5"), - (70, "name7") -AS tbl_view(id, name) + (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))), + (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))), + (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))), + (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))), + (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))), + (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))), + (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g"))) +AS tbl_view(id, name, data) -- !query schema struct<> -- !query output @@ -25,6 +26,48 @@ struct<> -- !query SELECT * FROM tbl_view -- !query schema +struct>> +-- !query output +10 name1 {"f1":1,"s2":{"f2":101,"f3":"a"}} +20 name2 {"f1":2,"s2":{"f2":202,"f3":"b"}} +30 name3 {"f1":3,"s2":{"f2":303,"f3":"c"}} +40 name4 {"f1":4,"s2":{"f2":404,"f3":"d"}} +50 name5 {"f1":5,"s2":{"f2":505,"f3":"e"}} +60 name6 {"f1":6,"s2":{"f2":606,"f3":"f"}} +70 name7 {"f1":7,"s2":{"f2":707,"f3":"g"}} + + +-- !query +SELECT * EXCEPT (id) FROM tbl_view +-- !query schema +struct>> +-- !query output +name1 {"f1":1,"s2":{"f2":101,"f3":"a"}} +name2 {"f1":2,"s2":{"f2":202,"f3":"b"}} +name3 {"f1":3,"s2":{"f2":303,"f3":"c"}} +name4 {"f1":4,"s2":{"f2":404,"f3":"d"}} +name5 {"f1":5,"s2":{"f2":505,"f3":"e"}} +name6 {"f1":6,"s2":{"f2":606,"f3":"f"}} +name7 {"f1":7,"s2":{"f2":707,"f3":"g"}} + + +-- !query +SELECT * EXCEPT (name) FROM tbl_view +-- !query schema +struct>> +-- !query output +10 {"f1":1,"s2":{"f2":101,"f3":"a"}} +20 {"f1":2,"s2":{"f2":202,"f3":"b"}} +30 {"f1":3,"s2":{"f2":303,"f3":"c"}} +40 {"f1":4,"s2":{"f2":404,"f3":"d"}} +50 {"f1":5,"s2":{"f2":505,"f3":"e"}} +60 {"f1":6,"s2":{"f2":606,"f3":"f"}} +70 {"f1":7,"s2":{"f2":707,"f3":"g"}} + + +-- !query +SELECT * EXCEPT (data) FROM tbl_view +-- !query schema struct -- !query output 10 name1 @@ -32,37 +75,54 @@ struct 30 name3 40 name4 50 name5 +60 name6 70 name7 -- !query -SELECT * EXCEPT (id) FROM tbl_view +SELECT * EXCEPT (data.f1) FROM tbl_view -- !query schema -struct +struct>> -- !query output -name1 -name2 -name3 -name4 -name5 -name7 +10 name1 {"s2":{"f2":101,"f3":"a"}} +20 name2 {"s2":{"f2":202,"f3":"b"}} +30 name3 {"s2":{"f2":303,"f3":"c"}} +40 name4 {"s2":{"f2":404,"f3":"d"}} +50 name5 {"s2":{"f2":505,"f3":"e"}} +60 name6 {"s2":{"f2":606,"f3":"f"}} +70 name7 {"s2":{"f2":707,"f3":"g"}} -- !query -SELECT * EXCEPT (name) FROM tbl_view +SELECT * EXCEPT (data.s2) FROM tbl_view -- !query schema -struct +struct> -- !query output -10 -20 -30 -40 -50 -70 +10 name1 {"f1":1} +20 name2 {"f1":2} +30 name3 {"f1":3} +40 name4 {"f1":4} +50 name5 {"f1":5} +60 name6 {"f1":6} +70 name7 {"f1":7} + + +-- !query +SELECT * EXCEPT (data.s2.f2) FROM tbl_view +-- !query schema +struct>> +-- !query output +10 name1 {"f1":1,"s2":{"f3":"a"}} +20 name2 {"f1":2,"s2":{"f3":"b"}} +30 name3 {"f1":3,"s2":{"f3":"c"}} +40 name4 {"f1":4,"s2":{"f3":"d"}} +50 name5 {"f1":5,"s2":{"f3":"e"}} +60 name6 {"f1":6,"s2":{"f3":"f"}} +70 name7 {"f1":7,"s2":{"f3":"g"}} -- !query -SELECT * EXCEPT (id, name) FROM tbl_view +SELECT * EXCEPT (id, name, data) FROM tbl_view -- !query schema struct<> -- !query output @@ -70,7 +130,7 @@ struct<> -- !query -CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id) FROM tbl_view +CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view -- !query schema struct<> -- !query output @@ -87,6 +147,7 @@ name2 name3 name4 name5 +name6 name7 @@ -101,19 +162,20 @@ struct -- !query SELECT tbl_view.* EXCEPT (name) FROM tbl_view -- !query schema -struct +struct>> -- !query output -10 -20 -30 -40 -50 -70 +10 {"f1":1,"s2":{"f2":101,"f3":"a"}} +20 {"f1":2,"s2":{"f2":202,"f3":"b"}} +30 {"f1":3,"s2":{"f2":303,"f3":"c"}} +40 {"f1":4,"s2":{"f2":404,"f3":"d"}} +50 {"f1":5,"s2":{"f2":505,"f3":"e"}} +60 {"f1":6,"s2":{"f2":606,"f3":"f"}} +70 {"f1":7,"s2":{"f2":707,"f3":"g"}} -- !query INSERT INTO ids -SELECT * EXCEPT (name) FROM tbl_view +SELECT * EXCEPT (name, data) FROM tbl_view -- !query schema struct<> -- !query output @@ -130,6 +192,7 @@ struct 30 40 50 +60 70 @@ -144,14 +207,15 @@ struct<> -- !query SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view) -- !query schema -struct +struct>> -- !query output -10 -20 -30 -40 -50 -70 +10 {"f1":1,"s2":{"f2":101,"f3":"a"}} +20 {"f1":2,"s2":{"f2":202,"f3":"b"}} +30 {"f1":3,"s2":{"f2":303,"f3":"c"}} +40 {"f1":4,"s2":{"f2":404,"f3":"d"}} +50 {"f1":5,"s2":{"f2":505,"f3":"e"}} +60 {"f1":6,"s2":{"f2":606,"f3":"f"}} +70 {"f1":7,"s2":{"f2":707,"f3":"g"}} -- !query @@ -197,7 +261,7 @@ org.apache.spark.sql.AnalysisException "sqlState" : "42703", "messageParameters" : { "objectName" : "`invalid_column`", - "proposal" : "`id`, `name`" + "proposal" : "`id`, `name`, `data`" }, "queryContext" : [ { "objectType" : "", @@ -220,7 +284,7 @@ org.apache.spark.sql.AnalysisException "sqlState" : "42703", "messageParameters" : { "objectName" : "`invalid_column`", - "proposal" : "`id`, `name`" + "proposal" : "`id`, `name`, `data`" }, "queryContext" : [ { "objectType" : "", @@ -237,11 +301,34 @@ SELECT * EXCEPT(id, id) FROM tbl_view -- !query schema struct<> -- !query output -org.apache.spark.SparkException +org.apache.spark.sql.AnalysisException { - "errorClass" : "INTERNAL_ERROR", - "sqlState" : "XX000", - "messageParameters" : { - "message" : "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'" - } + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 23, + "fragment" : "* EXCEPT(id, id)" + } ] +} + + +-- !query +SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 36, + "fragment" : "* EXCEPT(data.s2, data.s2.f2)" + } ] } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 053d4e98bf26c..b7201c2d96d77 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -168,105 +168,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark Row(1, 1) :: Row(1, 2) :: Row(2, 1) :: Row(2, 2) :: Row(3, 1) :: Row(3, 2) :: Nil) } - test("support star except") { - checkAnswer( - sql( - """ - |SELECT * EXCEPT (b) - |FROM testData2 - """.stripMargin), - Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil) - } - - test("support table.star except") { - checkAnswer( - sql( - """ - |SELECT r.* EXCEPT (r.b) - |FROM testData l join testData2 r on (l.key = r.a) - """.stripMargin), - Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil) - } - - test("support table.star except all columns") { - checkAnswer( - sql( - """ - |SELECT r.* EXCEPT (r.b, r.a) - |FROM testData l join testData2 r on (l.key = r.a) - """.stripMargin), - Row() :: Row() :: Row() :: Row() :: Row() :: Row() :: Nil) - } - - test("support table.star except nested") { - checkAnswer( - sql( - """ - |SELECT * - |FROM (SELECT * EXCEPT (b) FROM testData2) - """.stripMargin), - Row(1) :: Row(1) :: Row(2) :: Row(2) :: Row(3) :: Row(3) :: Nil) - } - - test("except errors") { - // Try star expanding a scalar. This should fail. - checkError( - exception = intercept[ParseException]{ - sql("select * EXCEPT () from testData2") - }, - errorClass = "PARSE_SYNTAX_ERROR", - sqlState = Option("42601"), - parameters = Map("error" -> "')'", "hint" -> "") - ) - - // Try star expanding a scalar. This should fail. - checkError( - exception = intercept[ParseException] { - sql("select * EXCEPT b testData2") - }, - errorClass = "PARSE_SYNTAX_ERROR", - sqlState = Option("42601"), - parameters = Map("error" -> "'b'", "hint" -> "") - ) - - // Try star expanding a scalar. This should fail. - checkError( - exception = intercept[SparkException] { - sql("select * EXCEPT(b, b) from testData2") - }, - errorClass = "INTERNAL_ERROR", - parameters = Map("message" -> "Cannot find main error class 'EXCEPT_OVERLAPPING_COLUMNS'") - ) - - // Try star expanding a scalar. This should fail. - checkError( - exception = intercept[AnalysisException] { - sql("select * EXCEPT(invalid_column) from testData2") - }, - errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", - sqlState = Option("42703"), - parameters = Map("objectName" -> "`invalid_column`", "proposal" -> "`a`, `b`"), - matchPVals = true, - queryContext = Array( - ExpectedContext(fragment = "* EXCEPT(invalid_column)", start = 7, stop = 30) - ) - ) - - // Try star expanding a scalar. This should fail. - checkError( - exception = intercept[AnalysisException] { - sql("select * EXCEPT(a, invalid_column) from testData2") - }, - errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", - sqlState = Option("42703"), - parameters = Map("objectName" -> "`invalid_column`", "proposal" -> "`a`, `b`"), - matchPVals = true, - queryContext = Array( - ExpectedContext(fragment = "* EXCEPT(a, invalid_column)", start = 7, stop = 33) - ) - ) - } - test("self join with alias in agg") { withTempView("df") { Seq(1, 2, 3) From 64e2663bac2e3e9d86ffbf85cd68dbc8e32ef442 Mon Sep 17 00:00:00 2001 From: milastdbx Date: Wed, 22 Nov 2023 16:10:39 +0100 Subject: [PATCH 3/6] addressing comments --- .../main/resources/error/error-classes.json | 2 +- .../sql/catalyst/analysis/unresolved.scala | 9 +- .../sql/catalyst/parser/AstBuilder.scala | 10 ++- .../analyzer-results/selectExcept.sql.out | 80 ++++++++--------- .../sql-tests/inputs/selectExcept.sql | 10 +-- .../sql-tests/results/selectExcept.sql.out | 85 +++++++++---------- 6 files changed, 96 insertions(+), 100 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 694127e655c4d..89c61318732e4 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -995,7 +995,7 @@ }, "EXCEPT_OVERLAPPING_COLUMNS" : { "message" : [ - "Columns in an EXCEPT list must be distinct and non-overlapping." + "Columns in an EXCEPT list must be distinct and non-overlapping, but got ()." ], "sqlState" : "42702" }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 4dc67839d8683..003342bbff69a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -473,7 +473,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St // Use the UnresolvedStarBase expand method to get a seq of NamedExpressions corresponding to // the star expansion. This will yield a list of top-level columns from the logical plan's // output, or in the case of struct expansion (e.g. target=`x` for SELECT x.*) it will give - // a seq of NamedExpressions corresponding to struct fields. + // a seq of Alias wrapping the struct field extraction. val expandedCols = super.expand(input, resolver) // resolve except list with respect to the expandedCols @@ -545,7 +545,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St // pass through columns that don't match anything in groupedExcepts case (col, None) => col // found a match but nestedExcepts has remaining excepts - recurse to rewrite the struct - case (col, Some(nestedExcepts)) if !nestedExcepts.exists(_.isEmpty) => + case (col, Some(nestedExcepts)) if nestedExcepts.exists(_.nonEmpty) => val fields = col.dataType match { case s: StructType => s.fields // we shouldn't be here since we EXCEPT_NEXTED_COLUMN_INVALID_TYPE in getRootColumn @@ -566,7 +566,10 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St // if there are multiple nestedExcepts but one is empty we must have overlapping except // columns. throw an error. case (col, Some(nestedExcepts)) if nestedExcepts.size > 1 => - throw new AnalysisException(errorClass = "EXCEPT_OVERLAPPING_COLUMNS", Map.empty) + throw new AnalysisException( + errorClass = "EXCEPT_OVERLAPPING_COLUMNS", + messageParameters = Map( + "columns" -> this.excepts.map(_.mkString(".")).mkString(", "))) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 00d3059ec5fad..9f62c8cb32396 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1786,11 +1786,13 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { * Both un-targeted (global) and targeted aliases are supported. */ override def visitStar(ctx: StarContext): Expression = withOrigin(ctx) { + var target = Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq) + if (ctx.exceptClause != null) { - visitStarExcept(ctx) + visitStarExcept(ctx, target) } else { - UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq)) + UnresolvedStar(target) } } @@ -1799,11 +1801,11 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { * specified object except those in the except list. * Both un-targeted (global) and targeted aliases are supported. */ - def visitStarExcept(ctx: StarContext): Expression = withOrigin(ctx) { + def visitStarExcept(ctx: StarContext, target: Option[Seq[String]]): Expression = withOrigin(ctx) { val exceptCols = ctx.exceptClause .exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]]) UnresolvedStarExcept( - Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq), + target, exceptCols.toSeq) } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out index 38658ae5a71fc..49fe96caef2cd 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out @@ -133,27 +133,6 @@ Project +- LocalRelation [id#x, name#x, data#x] --- !query -CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view --- !query analysis -org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException -{ - "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", - "sqlState" : "42P07", - "messageParameters" : { - "relationName" : "`spark_catalog`.`default`.`namesTbl`" - } -} - - --- !query -SELECT * FROM namesTbl --- !query analysis -Project [name#x] -+- SubqueryAlias spark_catalog.default.namestbl - +- Relation spark_catalog.default.namestbl[name#x] csv - - -- !query SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`) -- !query analysis @@ -206,17 +185,39 @@ Project -- !query -SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view) +SELECT data.* EXCEPT (s2) FROM tbl_view -- !query analysis -Project [id#x, data#x] -+- SubqueryAlias __auto_generated_subquery_name - +- Project [id#x, data#x] - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x,name#x,data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] +Project [data#x.f1 AS f1#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT data.* EXCEPT (s2.f2) FROM tbl_view +-- !query analysis +Project [data#x.f1 AS f1#x, named_struct(f3, data#x.s2.f3) AS s2#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT data.s2.* EXCEPT (f2) FROM tbl_view +-- !query analysis +Project [data#x.s2.f3 AS f3#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] -- !query @@ -296,6 +297,9 @@ org.apache.spark.sql.AnalysisException { "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", "sqlState" : "42702", + "messageParameters" : { + "columns" : "id, id" + }, "queryContext" : [ { "objectType" : "", "objectName" : "", @@ -309,15 +313,5 @@ org.apache.spark.sql.AnalysisException -- !query SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view -- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", - "sqlState" : "42702", - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 36, - "fragment" : "* EXCEPT(data.s2, data.s2.f2)" - } ] -} +java.util.NoSuchElementException +head of empty list diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql index 995267b8378aa..e07e4f1117c29 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql @@ -22,21 +22,19 @@ SELECT * EXCEPT (data.s2) FROM tbl_view; SELECT * EXCEPT (data.s2.f2) FROM tbl_view; -- EXCEPT all columns SELECT * EXCEPT (id, name, data) FROM tbl_view; --- CREATE TABLE with SELECT -CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view; -SELECT * FROM namesTbl; -- EXCEPT special character names SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`); -- EXCEPT qualified star SELECT tbl_view.* EXCEPT (name) FROM tbl_view; --- EXCEPT insert into select INSERT INTO ids SELECT * EXCEPT (name, data) FROM tbl_view; SELECT * FROM ids; -- EXCEPT qualified columns SELECT * EXCEPT (ids.id) FROM ids; --- EXCEPT nested -SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view); +-- EXCEPT structs +SELECT data.* EXCEPT (s2) FROM tbl_view; +SELECT data.* EXCEPT (s2.f2) FROM tbl_view; +SELECT data.s2.* EXCEPT (f2) FROM tbl_view; -- Errors -- EXCEPT missing brackets diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out index 9dbcdc9eedb33..314c628a894e8 100644 --- a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out @@ -129,28 +129,6 @@ struct<> --- !query -CREATE TABLE namesTbl USING CSV AS SELECT * EXCEPT (id, data) FROM tbl_view --- !query schema -struct<> --- !query output - - - --- !query -SELECT * FROM namesTbl --- !query schema -struct --- !query output -name1 -name2 -name3 -name4 -name5 -name6 -name7 - - -- !query SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`) -- !query schema @@ -205,17 +183,45 @@ struct<> -- !query -SELECT * FROM (SELECT * EXCEPT (name) FROM tbl_view) +SELECT data.* EXCEPT (s2) FROM tbl_view -- !query schema -struct>> +struct -- !query output -10 {"f1":1,"s2":{"f2":101,"f3":"a"}} -20 {"f1":2,"s2":{"f2":202,"f3":"b"}} -30 {"f1":3,"s2":{"f2":303,"f3":"c"}} -40 {"f1":4,"s2":{"f2":404,"f3":"d"}} -50 {"f1":5,"s2":{"f2":505,"f3":"e"}} -60 {"f1":6,"s2":{"f2":606,"f3":"f"}} -70 {"f1":7,"s2":{"f2":707,"f3":"g"}} +1 +2 +3 +4 +5 +6 +7 + + +-- !query +SELECT data.* EXCEPT (s2.f2) FROM tbl_view +-- !query schema +struct> +-- !query output +1 {"f3":"a"} +2 {"f3":"b"} +3 {"f3":"c"} +4 {"f3":"d"} +5 {"f3":"e"} +6 {"f3":"f"} +7 {"f3":"g"} + + +-- !query +SELECT data.s2.* EXCEPT (f2) FROM tbl_view +-- !query schema +struct +-- !query output +a +b +c +d +e +f +g -- !query @@ -305,6 +311,9 @@ org.apache.spark.sql.AnalysisException { "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", "sqlState" : "42702", + "messageParameters" : { + "columns" : "id, id" + }, "queryContext" : [ { "objectType" : "", "objectName" : "", @@ -320,15 +329,5 @@ SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", - "sqlState" : "42702", - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 36, - "fragment" : "* EXCEPT(data.s2, data.s2.f2)" - } ] -} +java.util.NoSuchElementException +head of empty list From 3f3c6352071d5e63ad6a3487fb633fcbd2c7c4f9 Mon Sep 17 00:00:00 2001 From: milastdbx Date: Wed, 22 Nov 2023 17:39:14 +0100 Subject: [PATCH 4/6] fixed bug --- .../sql/catalyst/analysis/unresolved.scala | 2 +- .../analyzer-results/selectExcept.sql.out | 17 +++++++++++++++-- .../sql-tests/results/selectExcept.sql.out | 17 +++++++++++++++-- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 003342bbff69a..7c98946638d40 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -545,7 +545,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St // pass through columns that don't match anything in groupedExcepts case (col, None) => col // found a match but nestedExcepts has remaining excepts - recurse to rewrite the struct - case (col, Some(nestedExcepts)) if nestedExcepts.exists(_.nonEmpty) => + case (col, Some(nestedExcepts)) if nestedExcepts.forall(_.nonEmpty) => val fields = col.dataType match { case s: StructType => s.fields // we shouldn't be here since we EXCEPT_NEXTED_COLUMN_INVALID_TYPE in getRootColumn diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out index 49fe96caef2cd..8643d40b886bd 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out @@ -313,5 +313,18 @@ org.apache.spark.sql.AnalysisException -- !query SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view -- !query analysis -java.util.NoSuchElementException -head of empty list +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "messageParameters" : { + "columns" : "data.s2, data.s2.f2" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 36, + "fragment" : "* EXCEPT(data.s2, data.s2.f2)" + } ] +} diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out index 314c628a894e8..6f6ba9097342a 100644 --- a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out @@ -329,5 +329,18 @@ SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view -- !query schema struct<> -- !query output -java.util.NoSuchElementException -head of empty list +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "messageParameters" : { + "columns" : "data.s2, data.s2.f2" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 36, + "fragment" : "* EXCEPT(data.s2, data.s2.f2)" + } ] +} From 2e0587c27ffa1623fcddd5db6b77f72089274ecd Mon Sep 17 00:00:00 2001 From: milastdbx Date: Wed, 22 Nov 2023 23:28:28 +0100 Subject: [PATCH 5/6] deprecated method replacement --- .../org/apache/spark/sql/catalyst/analysis/unresolved.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 7c98946638d40..60d1a89959da4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -538,7 +538,7 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St : Seq[NamedExpression] = { // group the except pairs by the column they refer to. NOTE: no groupMap until scala 2.13 val groupedExcepts: AttributeMap[Seq[Seq[String]]] = - AttributeMap(excepts.groupBy(_._1.toAttribute).mapValues(v => v.map(_._2))) + AttributeMap(excepts.groupBy(_._1.toAttribute).view.mapValues(v => v.map(_._2))) // map input columns while searching for the except entry corresponding to the current column columns.map(col => col -> groupedExcepts.get(col.toAttribute)).collect { From 470e9277f1a9dcc9c3bb58a76d19b179bae6b34c Mon Sep 17 00:00:00 2001 From: milastdbx Date: Thu, 23 Nov 2023 11:11:51 +0100 Subject: [PATCH 6/6] generated error class documents --- .../src/main/resources/error/error-classes.json | 12 ++++++------ docs/sql-error-conditions.md | 12 ++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 89c61318732e4..f8e520bb8615c 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -987,6 +987,12 @@ ], "sqlState" : "42K09" }, + "EXCEED_LIMIT_LENGTH" : { + "message" : [ + "Exceeds char/varchar type length limitation: ." + ], + "sqlState" : "54006" + }, "EXCEPT_NESTED_COLUMN_INVALID_TYPE" : { "message" : [ "EXCEPT column was resolved and expected to be StructType, but found type ." @@ -999,12 +1005,6 @@ ], "sqlState" : "42702" }, - "EXCEED_LIMIT_LENGTH" : { - "message" : [ - "Exceeds char/varchar type length limitation: ." - ], - "sqlState" : "54006" - }, "EXPECT_PERMANENT_VIEW_NOT_TEMP" : { "message" : [ "'' expects a permanent view but is a temp view." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index cba6a24b86999..eb5a120aca72f 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -587,6 +587,18 @@ The event time `` has the invalid type ``, but expected "T Exceeds char/varchar type length limitation: ``. +### EXCEPT_NESTED_COLUMN_INVALID_TYPE + +[SQLSTATE: 428H2](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +EXCEPT column `` was resolved and expected to be StructType, but found type ``. + +### EXCEPT_OVERLAPPING_COLUMNS + +[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Columns in an EXCEPT list must be distinct and non-overlapping, but got (``). + ### EXPECT_PERMANENT_VIEW_NOT_TEMP [SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)