diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index afcd841a2ce04..f8e520bb8615c 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -993,6 +993,18 @@ ], "sqlState" : "54006" }, + "EXCEPT_NESTED_COLUMN_INVALID_TYPE" : { + "message" : [ + "EXCEPT column was resolved and expected to be StructType, but found type ." + ], + "sqlState" : "428H2" + }, + "EXCEPT_OVERLAPPING_COLUMNS" : { + "message" : [ + "Columns in an EXCEPT list must be distinct and non-overlapping, but got ()." + ], + "sqlState" : "42702" + }, "EXPECT_PERMANENT_VIEW_NOT_TEMP" : { "message" : [ "'' expects a permanent view but is a temp view." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index cba6a24b86999..eb5a120aca72f 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -587,6 +587,18 @@ The event time `` has the invalid type ``, but expected "T Exceeds char/varchar type length limitation: ``. +### EXCEPT_NESTED_COLUMN_INVALID_TYPE + +[SQLSTATE: 428H2](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +EXCEPT column `` was resolved and expected to be StructType, but found type ``. + +### EXCEPT_OVERLAPPING_COLUMNS + +[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Columns in an EXCEPT list must be distinct and non-overlapping, but got (``). + ### EXPECT_PERMANENT_VIEW_NOT_TEMP [SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 609bd72e21935..439a12c301817 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -587,6 +587,10 @@ notMatchedBySourceAction | UPDATE SET assignmentList ; +exceptClause + : EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN + ; + assignmentList : assignment (COMMA assignment)* ; @@ -969,8 +973,8 @@ primaryExpression | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN #position | constant #constantDefault - | ASTERISK #star - | qualifiedName DOT ASTERISK #star + | ASTERISK exceptClause? #star + | qualifiedName DOT ASTERISK exceptClause? #star | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor | LEFT_PAREN query RIGHT_PAREN #subqueryExpression | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index e232dfc05faa8..60d1a89959da4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.analysis +import org.apache.spark.SparkException import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.expressions._ @@ -358,10 +359,10 @@ abstract class Star extends LeafExpression with NamedExpression { def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] } - /** * Represents all of the input attributes to a given relational operator, for example in * "SELECT * FROM ...". + * "SELECT * FROM ..." or "SELECT * EXCEPT(...) FROM ..." * * This is also used to expand structs. For example: * "SELECT record.* from (SELECT struct(a,b,c) as record ...) @@ -369,9 +370,12 @@ abstract class Star extends LeafExpression with NamedExpression { * @param target an optional name that should be the target of the expansion. If omitted all * targets' columns are produced. This can either be a table name or struct name. This * is a list of identifiers that is the path of the expansion. - */ -case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevaluable { - + * + * This class provides the shared behavior between the classes for SELECT * ([[UnresolvedStar]]) + * and SELECT * EXCEPT ([[UnresolvedStarExcept]]). [[UnresolvedStar]] is just a case class of this, + * while [[UnresolvedStarExcept]] adds some additional logic to the expand method. + */ +abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with Unevaluable { /** * Returns true if the nameParts is a subset of the last elements of qualifier of the attribute. * @@ -383,7 +387,7 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu * - `SELECT t.* FROM ns1.ns2.t` where nameParts is Seq("t") and * qualifier is Seq("ns1", "ns2", "t"). */ - private def matchedQualifier( + protected def matchedQualifier( attribute: Attribute, nameParts: Seq[String], resolver: Resolver): Boolean = { @@ -444,6 +448,148 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu override def toString: String = target.map(_.mkString("", ".", ".")).getOrElse("") + "*" } +/** + * Represents some of the input attributes to a given relational operator, for example in + * "SELECT * EXCEPT(a) FROM ...". + * + * @param target an optional name that should be the target of the expansion. If omitted all + * targets' columns are produced. This can only be a table name. This + * is a list of identifiers that is the path of the expansion. + * + * @param excepts a list of names that should be excluded from the expansion. + * + */ +case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[String]]) + extends UnresolvedStarBase(target) { + + /** + * We expand the * EXCEPT by the following three steps: + * 1. use the original .expand() to get top-level column list or struct expansion + * 2. resolve excepts (with respect to the Seq[NamedExpression] returned from (1)) + * 3. filter the expanded columns with the resolved except list. recursively apply filtering in + * case of nested columns in the except list (in order to rewrite structs) + */ + override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = { + // Use the UnresolvedStarBase expand method to get a seq of NamedExpressions corresponding to + // the star expansion. This will yield a list of top-level columns from the logical plan's + // output, or in the case of struct expansion (e.g. target=`x` for SELECT x.*) it will give + // a seq of Alias wrapping the struct field extraction. + val expandedCols = super.expand(input, resolver) + + // resolve except list with respect to the expandedCols + val resolvedExcepts = excepts.map { exceptParts => + AttributeSeq(expandedCols.map(_.toAttribute)).resolve(exceptParts, resolver).getOrElse { + val orderedCandidates = StringUtils.orderSuggestedIdentifiersBySimilarity( + UnresolvedAttribute(exceptParts).name, expandedCols.map(a => a.qualifier :+ a.name)) + // if target is defined and expandedCols does not include any Attributes, it must be struct + // expansion; give message suggesting to use unqualified names of nested fields. + throw QueryCompilationErrors + .unresolvedColumnError(UnresolvedAttribute(exceptParts).name, orderedCandidates) + } + } + + // Convert each resolved except into a pair of (col: Attribute, nestedColumn) representing the + // top level column in expandedCols that we must 'filter' based on nestedColumn. + @scala.annotation.tailrec + def getRootColumn(expr: Expression, nestedColumn: Seq[String] = Nil) + : (NamedExpression, Seq[String]) = expr match { + case GetStructField(fieldExpr, _, Some(fieldName)) => + getRootColumn(fieldExpr, fieldName +: nestedColumn) + case e: NamedExpression => e -> nestedColumn + case other: ExtractValue => throw new AnalysisException( + errorClass = "EXCEPT_NESTED_COLUMN_INVALID_TYPE", + messageParameters = Map("columnName" -> other.sql, "dataType" -> other.dataType.toString)) + } + // An exceptPair represents the column in expandedCols along with the path of a nestedColumn + // that should be except-ed. Consider two examples: + // 1. excepting the entire col1 = (col1, Seq()) + // 2. excepting a nested field in col2, col2.a.b = (col2, Seq(a, b)) + // INVARIANT: we rely on the structure of the resolved except being an Alias of GetStructField + // in the case of nested columns. + val exceptPairs = resolvedExcepts.map { + case Alias(exceptExpr, name) => getRootColumn(exceptExpr) + case except: NamedExpression => except -> Seq.empty + } + + // Filter columns which correspond to ones listed in the except list and return a new list of + // columns which exclude the columns in the except list. The 'filtering' manifests as either + // dropping the column from the list of columns we return, or rewriting the projected column in + // order to remove excepts that refer to nested columns. For example, with the example above: + // excepts = Seq( + // (col1, Seq()), => filter col1 from the output + // (col2, Seq(a, b)) => rewrite col2 in the output so that it doesn't include the nested field + // ) corresponding to col2.a.b + // + // This occurs in two steps: + // 1. group the excepts by the column they refer to (groupedExcepts) + // 2. filter/rewrite input columns based on four cases: + // a. column doesn't match any groupedExcepts => column unchanged + // b. column exists in groupedExcepts and: + // i. none of remainingExcepts are empty => recursively apply filterColumns over the + // struct fields in order to rewrite the struct + // ii. a remainingExcept is empty, but there are multiple remainingExcepts => we must + // have duplicate/overlapping excepts - throw an error + // iii. [otherwise] remainingExcept is exactly Seq(Seq()) => this is the base 'filtering' + // case. we omit the column from the output (this is a column we would like to + // except). NOTE: this case isn't explicitly listed in the `collect` below since we + // 'collect' columns which match the cases above and omit ones that fall into this + // remaining case. + def filterColumns(columns: Seq[NamedExpression], excepts: Seq[(NamedExpression, Seq[String])]) + : Seq[NamedExpression] = { + // group the except pairs by the column they refer to. NOTE: no groupMap until scala 2.13 + val groupedExcepts: AttributeMap[Seq[Seq[String]]] = + AttributeMap(excepts.groupBy(_._1.toAttribute).view.mapValues(v => v.map(_._2))) + + // map input columns while searching for the except entry corresponding to the current column + columns.map(col => col -> groupedExcepts.get(col.toAttribute)).collect { + // pass through columns that don't match anything in groupedExcepts + case (col, None) => col + // found a match but nestedExcepts has remaining excepts - recurse to rewrite the struct + case (col, Some(nestedExcepts)) if nestedExcepts.forall(_.nonEmpty) => + val fields = col.dataType match { + case s: StructType => s.fields + // we shouldn't be here since we EXCEPT_NEXTED_COLUMN_INVALID_TYPE in getRootColumn + // for this case - just throw internal error + case _ => throw SparkException.internalError("Invalid column type") + } + val extractedFields = fields.zipWithIndex.map { case (f, i) => + Alias(GetStructField(col, i), f.name)() + } + val newExcepts = nestedExcepts.map { nestedExcept => + // INVARIANT: we cannot have duplicate column names in nested columns, thus, this `head` + // will find the one and only column corresponding to the correct extractedField. + extractedFields.collectFirst { case col if resolver(col.name, nestedExcept.head) => + col.toAttribute -> nestedExcept.tail + }.get + } + Alias(CreateStruct(filterColumns(extractedFields.toSeq, newExcepts)), col.name)() + // if there are multiple nestedExcepts but one is empty we must have overlapping except + // columns. throw an error. + case (col, Some(nestedExcepts)) if nestedExcepts.size > 1 => + throw new AnalysisException( + errorClass = "EXCEPT_OVERLAPPING_COLUMNS", + messageParameters = Map( + "columns" -> this.excepts.map(_.mkString(".")).mkString(", "))) + } + } + + filterColumns(expandedCols, exceptPairs) + } +} + +/** + * Represents all of the input attributes to a given relational operator, for example in + * "SELECT * FROM ...". + * + * This is also used to expand structs. For example: + * "SELECT record.* from (SELECT struct(a,b,c) as record ...) + * + * @param target an optional name that should be the target of the expansion. If omitted all + * targets' columns are produced. This can either be a table name or struct name. This + * is a list of identifiers that is the path of the expansion. + */ +case class UnresolvedStar(target: Option[Seq[String]]) extends UnresolvedStarBase(target) + /** * Represents all of the input attributes to a given relational operator, for example in * "SELECT `(id)?+.+` FROM ...". diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 09161d8f8da53..9f62c8cb32396 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1786,7 +1786,27 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { * Both un-targeted (global) and targeted aliases are supported. */ override def visitStar(ctx: StarContext): Expression = withOrigin(ctx) { - UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq)) + var target = Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq) + + if (ctx.exceptClause != null) { + visitStarExcept(ctx, target) + } + else { + UnresolvedStar(target) + } + } + + /** + * Create a star-except (i.e. all - except list) expression; this selects all elements in the + * specified object except those in the except list. + * Both un-targeted (global) and targeted aliases are supported. + */ + def visitStarExcept(ctx: StarContext, target: Option[Seq[String]]): Expression = withOrigin(ctx) { + val exceptCols = ctx.exceptClause + .exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]]) + UnresolvedStarExcept( + target, + exceptCols.toSeq) } /** diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out new file mode 100644 index 0000000000000..8643d40b886bd --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out @@ -0,0 +1,330 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES + (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))), + (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))), + (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))), + (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))), + (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))), + (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))), + (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g"))) +AS tbl_view(id, name, data) +-- !query analysis +CreateViewCommand `tbl_view`, SELECT * FROM VALUES + (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))), + (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))), + (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))), + (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))), + (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))), + (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))), + (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g"))) +AS tbl_view(id, name, data), false, false, LocalTempView, true + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +CREATE TABLE ids (id INT) USING CSV +-- !query analysis +org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +{ + "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", + "sqlState" : "42P07", + "messageParameters" : { + "relationName" : "`spark_catalog`.`default`.`ids`" + } +} + + +-- !query +SELECT * FROM tbl_view +-- !query analysis +Project [id#x, name#x, data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (id) FROM tbl_view +-- !query analysis +Project [name#x, data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (name) FROM tbl_view +-- !query analysis +Project [id#x, data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (data) FROM tbl_view +-- !query analysis +Project [id#x, name#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (data.f1) FROM tbl_view +-- !query analysis +Project [id#x, name#x, named_struct(s2, data#x.s2) AS data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (data.s2) FROM tbl_view +-- !query analysis +Project [id#x, name#x, named_struct(f1, data#x.f1) AS data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (data.s2.f2) FROM tbl_view +-- !query analysis +Project [id#x, name#x, named_struct(f1, data#x.f1, s2, named_struct(f3, data#x.s2.f3)) AS data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (id, name, data) FROM tbl_view +-- !query analysis +Project ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`) +-- !query analysis +Project [a_b_c#x] ++- SubqueryAlias __auto_generated_subquery_name + +- Project [1 AS a_b_c#x, 2 AS a-b-c#x] + +- OneRowRelation + + +-- !query +SELECT tbl_view.* EXCEPT (name) FROM tbl_view +-- !query analysis +Project [id#x, data#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +INSERT INTO ids +SELECT * EXCEPT (name, data) FROM tbl_view +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/ids, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/ids], Append, `spark_catalog`.`default`.`ids`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/ids), [id] ++- Project [id#x] + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * FROM ids +-- !query analysis +Project [id#x] ++- SubqueryAlias spark_catalog.default.ids + +- Relation spark_catalog.default.ids[id#x] csv + + +-- !query +SELECT * EXCEPT (ids.id) FROM ids +-- !query analysis +Project ++- SubqueryAlias spark_catalog.default.ids + +- Relation spark_catalog.default.ids[id#x] csv + + +-- !query +SELECT data.* EXCEPT (s2) FROM tbl_view +-- !query analysis +Project [data#x.f1 AS f1#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT data.* EXCEPT (s2.f2) FROM tbl_view +-- !query analysis +Project [data#x.f1 AS f1#x, named_struct(f3, data#x.s2.f3) AS s2#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT data.s2.* EXCEPT (f2) FROM tbl_view +-- !query analysis +Project [data#x.s2.f3 AS f3#x] ++- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x,name#x,data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT * EXCEPT name FROM tbl_view +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'name'", + "hint" : "" + } +} + + +-- !query +SELECT * EXCEPT() name FROM tbl_view +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "')'", + "hint" : "" + } +} + + +-- !query +SELECT * EXCEPT(invalid_column) FROM tbl_view +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`invalid_column`", + "proposal" : "`id`, `name`, `data`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 31, + "fragment" : "* EXCEPT(invalid_column)" + } ] +} + + +-- !query +SELECT * EXCEPT(id, invalid_column) FROM tbl_view +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`invalid_column`", + "proposal" : "`id`, `name`, `data`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 35, + "fragment" : "* EXCEPT(id, invalid_column)" + } ] +} + + +-- !query +SELECT * EXCEPT(id, id) FROM tbl_view +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "messageParameters" : { + "columns" : "id, id" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 23, + "fragment" : "* EXCEPT(id, id)" + } ] +} + + +-- !query +SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "messageParameters" : { + "columns" : "data.s2, data.s2.f2" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 36, + "fragment" : "* EXCEPT(data.s2, data.s2.f2)" + } ] +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql new file mode 100644 index 0000000000000..e07e4f1117c29 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/selectExcept.sql @@ -0,0 +1,51 @@ +CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES + (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))), + (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))), + (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))), + (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))), + (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))), + (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))), + (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g"))) +AS tbl_view(id, name, data); + +CREATE TABLE ids (id INT) USING CSV; + +-- Happy path +-- EXCEPT basic scenario +SELECT * FROM tbl_view; +SELECT * EXCEPT (id) FROM tbl_view; +SELECT * EXCEPT (name) FROM tbl_view; +-- EXCEPT named structs +SELECT * EXCEPT (data) FROM tbl_view; +SELECT * EXCEPT (data.f1) FROM tbl_view; +SELECT * EXCEPT (data.s2) FROM tbl_view; +SELECT * EXCEPT (data.s2.f2) FROM tbl_view; +-- EXCEPT all columns +SELECT * EXCEPT (id, name, data) FROM tbl_view; +-- EXCEPT special character names +SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`); +-- EXCEPT qualified star +SELECT tbl_view.* EXCEPT (name) FROM tbl_view; +INSERT INTO ids +SELECT * EXCEPT (name, data) FROM tbl_view; +SELECT * FROM ids; +-- EXCEPT qualified columns +SELECT * EXCEPT (ids.id) FROM ids; +-- EXCEPT structs +SELECT data.* EXCEPT (s2) FROM tbl_view; +SELECT data.* EXCEPT (s2.f2) FROM tbl_view; +SELECT data.s2.* EXCEPT (f2) FROM tbl_view; + +-- Errors +-- EXCEPT missing brackets +SELECT * EXCEPT name FROM tbl_view; +-- EXCEPT no columns +SELECT * EXCEPT() name FROM tbl_view; +-- EXCEPT invalid column +SELECT * EXCEPT(invalid_column) FROM tbl_view; +-- EXCEPT find invalid column +SELECT * EXCEPT(id, invalid_column) FROM tbl_view; +-- EXCEPT duplicate column +SELECT * EXCEPT(id, id) FROM tbl_view; +-- EXCEPT overlapping columns +SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view; diff --git a/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out new file mode 100644 index 0000000000000..6f6ba9097342a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/selectExcept.sql.out @@ -0,0 +1,346 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES + (10, "name1", named_struct("f1", 1, "s2", named_struct("f2", 101, "f3", "a"))), + (20, "name2", named_struct("f1", 2, "s2", named_struct("f2", 202, "f3", "b"))), + (30, "name3", named_struct("f1", 3, "s2", named_struct("f2", 303, "f3", "c"))), + (40, "name4", named_struct("f1", 4, "s2", named_struct("f2", 404, "f3", "d"))), + (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))), + (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))), + (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g"))) +AS tbl_view(id, name, data) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE ids (id INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM tbl_view +-- !query schema +struct>> +-- !query output +10 name1 {"f1":1,"s2":{"f2":101,"f3":"a"}} +20 name2 {"f1":2,"s2":{"f2":202,"f3":"b"}} +30 name3 {"f1":3,"s2":{"f2":303,"f3":"c"}} +40 name4 {"f1":4,"s2":{"f2":404,"f3":"d"}} +50 name5 {"f1":5,"s2":{"f2":505,"f3":"e"}} +60 name6 {"f1":6,"s2":{"f2":606,"f3":"f"}} +70 name7 {"f1":7,"s2":{"f2":707,"f3":"g"}} + + +-- !query +SELECT * EXCEPT (id) FROM tbl_view +-- !query schema +struct>> +-- !query output +name1 {"f1":1,"s2":{"f2":101,"f3":"a"}} +name2 {"f1":2,"s2":{"f2":202,"f3":"b"}} +name3 {"f1":3,"s2":{"f2":303,"f3":"c"}} +name4 {"f1":4,"s2":{"f2":404,"f3":"d"}} +name5 {"f1":5,"s2":{"f2":505,"f3":"e"}} +name6 {"f1":6,"s2":{"f2":606,"f3":"f"}} +name7 {"f1":7,"s2":{"f2":707,"f3":"g"}} + + +-- !query +SELECT * EXCEPT (name) FROM tbl_view +-- !query schema +struct>> +-- !query output +10 {"f1":1,"s2":{"f2":101,"f3":"a"}} +20 {"f1":2,"s2":{"f2":202,"f3":"b"}} +30 {"f1":3,"s2":{"f2":303,"f3":"c"}} +40 {"f1":4,"s2":{"f2":404,"f3":"d"}} +50 {"f1":5,"s2":{"f2":505,"f3":"e"}} +60 {"f1":6,"s2":{"f2":606,"f3":"f"}} +70 {"f1":7,"s2":{"f2":707,"f3":"g"}} + + +-- !query +SELECT * EXCEPT (data) FROM tbl_view +-- !query schema +struct +-- !query output +10 name1 +20 name2 +30 name3 +40 name4 +50 name5 +60 name6 +70 name7 + + +-- !query +SELECT * EXCEPT (data.f1) FROM tbl_view +-- !query schema +struct>> +-- !query output +10 name1 {"s2":{"f2":101,"f3":"a"}} +20 name2 {"s2":{"f2":202,"f3":"b"}} +30 name3 {"s2":{"f2":303,"f3":"c"}} +40 name4 {"s2":{"f2":404,"f3":"d"}} +50 name5 {"s2":{"f2":505,"f3":"e"}} +60 name6 {"s2":{"f2":606,"f3":"f"}} +70 name7 {"s2":{"f2":707,"f3":"g"}} + + +-- !query +SELECT * EXCEPT (data.s2) FROM tbl_view +-- !query schema +struct> +-- !query output +10 name1 {"f1":1} +20 name2 {"f1":2} +30 name3 {"f1":3} +40 name4 {"f1":4} +50 name5 {"f1":5} +60 name6 {"f1":6} +70 name7 {"f1":7} + + +-- !query +SELECT * EXCEPT (data.s2.f2) FROM tbl_view +-- !query schema +struct>> +-- !query output +10 name1 {"f1":1,"s2":{"f3":"a"}} +20 name2 {"f1":2,"s2":{"f3":"b"}} +30 name3 {"f1":3,"s2":{"f3":"c"}} +40 name4 {"f1":4,"s2":{"f3":"d"}} +50 name5 {"f1":5,"s2":{"f3":"e"}} +60 name6 {"f1":6,"s2":{"f3":"f"}} +70 name7 {"f1":7,"s2":{"f3":"g"}} + + +-- !query +SELECT * EXCEPT (id, name, data) FROM tbl_view +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * EXCEPT (`a-b-c`) FROM (SELECT 1 a_b_c, 2 `a-b-c`) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT tbl_view.* EXCEPT (name) FROM tbl_view +-- !query schema +struct>> +-- !query output +10 {"f1":1,"s2":{"f2":101,"f3":"a"}} +20 {"f1":2,"s2":{"f2":202,"f3":"b"}} +30 {"f1":3,"s2":{"f2":303,"f3":"c"}} +40 {"f1":4,"s2":{"f2":404,"f3":"d"}} +50 {"f1":5,"s2":{"f2":505,"f3":"e"}} +60 {"f1":6,"s2":{"f2":606,"f3":"f"}} +70 {"f1":7,"s2":{"f2":707,"f3":"g"}} + + +-- !query +INSERT INTO ids +SELECT * EXCEPT (name, data) FROM tbl_view +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM ids +-- !query schema +struct +-- !query output +10 +20 +30 +40 +50 +60 +70 + + +-- !query +SELECT * EXCEPT (ids.id) FROM ids +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT data.* EXCEPT (s2) FROM tbl_view +-- !query schema +struct +-- !query output +1 +2 +3 +4 +5 +6 +7 + + +-- !query +SELECT data.* EXCEPT (s2.f2) FROM tbl_view +-- !query schema +struct> +-- !query output +1 {"f3":"a"} +2 {"f3":"b"} +3 {"f3":"c"} +4 {"f3":"d"} +5 {"f3":"e"} +6 {"f3":"f"} +7 {"f3":"g"} + + +-- !query +SELECT data.s2.* EXCEPT (f2) FROM tbl_view +-- !query schema +struct +-- !query output +a +b +c +d +e +f +g + + +-- !query +SELECT * EXCEPT name FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'name'", + "hint" : "" + } +} + + +-- !query +SELECT * EXCEPT() name FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "')'", + "hint" : "" + } +} + + +-- !query +SELECT * EXCEPT(invalid_column) FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`invalid_column`", + "proposal" : "`id`, `name`, `data`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 31, + "fragment" : "* EXCEPT(invalid_column)" + } ] +} + + +-- !query +SELECT * EXCEPT(id, invalid_column) FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`invalid_column`", + "proposal" : "`id`, `name`, `data`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 35, + "fragment" : "* EXCEPT(id, invalid_column)" + } ] +} + + +-- !query +SELECT * EXCEPT(id, id) FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "messageParameters" : { + "columns" : "id, id" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 23, + "fragment" : "* EXCEPT(id, id)" + } ] +} + + +-- !query +SELECT * EXCEPT(data.s2, data.s2.f2) FROM tbl_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "EXCEPT_OVERLAPPING_COLUMNS", + "sqlState" : "42702", + "messageParameters" : { + "columns" : "data.s2, data.s2.f2" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 36, + "fragment" : "* EXCEPT(data.s2, data.s2.f2)" + } ] +}