From 7c2dbe5a3449c7839442d32fb087c0385c266783 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 12 Apr 2021 19:56:26 +0800 Subject: [PATCH 1/2] disallow group by alias --- docs/sql-ref-ansi-compliance.md | 1 + .../sql/catalyst/analysis/Analyzer.scala | 5 +- .../apache/spark/sql/internal/SQLConf.scala | 27 +- .../sql-tests/inputs/ansi/group-analytics.sql | 1 + .../results/ansi/group-analytics.sql.out | 506 ++++++++++++++++++ 5 files changed, 526 insertions(+), 14 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 70a1fa382fee..0c4f5e542881 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -183,6 +183,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`). - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map. + - `GROUP BY`: aliases in a select list can not be used in GROUP BY clauses. Each column referenced in a GROUP BY clause shall unambiguously reference a column of the table resulting from the FROM clause. ### SQL Keywords diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index d41a638f55db..3cd7d93651c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1852,9 +1852,12 @@ class Analyzer(override val catalogManager: CatalogManager) }} } + // Group by alias is not allowed in ANSI mode. + private def allowGroupByAlias: Boolean = conf.groupByAliases && !conf.ansiEnabled + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { case agg @ Aggregate(groups, aggs, child) - if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) && + if allowGroupByAlias && child.resolved && aggs.forall(_.resolved) && groups.exists(!_.resolved) => agg.copy(groupingExpressions = mayResolveAttrByAggregateExprs(groups, aggs, child)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 7138c56560a3..6317ef262696 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -206,6 +206,17 @@ object SQLConf { .intConf .createWithDefault(100) + val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled") + .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " + + "For example, Spark will throw an exception at runtime instead of returning null results " + + "when the inputs to a SQL operator/function are invalid." + + "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " + + "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " + + "standard directly, but their behaviors align with ANSI SQL's style") + .version("3.0.0") + .booleanConf + .createWithDefault(false) + val OPTIMIZER_EXCLUDED_RULES = buildConf("spark.sql.optimizer.excludedRules") .doc("Configures a list of rules to be disabled in the optimizer, in which the rules are " + "specified by their rule names and separated by comma. It is not guaranteed that all the " + @@ -1092,8 +1103,9 @@ object SQLConf { .createWithDefault(true) val GROUP_BY_ALIASES = buildConf("spark.sql.groupByAliases") - .doc("When true, aliases in a select list can be used in group by clauses. When false, " + - "an analysis exception is thrown in the case.") + .doc("This configuration is only effective when ANSI mode is disabled. When it is true and " + + s"${ANSI_ENABLED.key} is false, aliases in a select list can be used in group by clauses. " + + "Otherwise, an analysis exception is thrown in the case.") .version("2.2.0") .booleanConf .createWithDefault(true) @@ -2348,17 +2360,6 @@ object SQLConf { .checkValues(StoreAssignmentPolicy.values.map(_.toString)) .createWithDefault(StoreAssignmentPolicy.ANSI.toString) - val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled") - .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " + - "For example, Spark will throw an exception at runtime instead of returning null results " + - "when the inputs to a SQL operator/function are invalid." + - "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " + - "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " + - "standard directly, but their behaviors align with ANSI SQL's style") - .version("3.0.0") - .booleanConf - .createWithDefault(false) - val SORT_BEFORE_REPARTITION = buildConf("spark.sql.execution.sortBeforeRepartition") .internal() diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql new file mode 100644 index 000000000000..d78689233ddf --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql @@ -0,0 +1 @@ +--IMPORT group-analytics.sql \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out new file mode 100644 index 000000000000..f0daa7be6104 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out @@ -0,0 +1,506 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 37 + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2) +AS testData(a, b) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE +-- !query schema +struct<(a + b):int,b:int,sum((a - b)):bigint> +-- !query output +2 1 0 +2 NULL 0 +3 1 1 +3 2 -1 +3 NULL 0 +4 1 2 +4 2 0 +4 NULL 2 +5 2 1 +5 NULL 1 +NULL 1 3 +NULL 2 0 +NULL NULL 3 + + +-- !query +SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE +-- !query schema +struct +-- !query output +1 1 1 +1 2 2 +1 NULL 3 +2 1 1 +2 2 2 +2 NULL 3 +3 1 1 +3 2 2 +3 NULL 3 +NULL 1 3 +NULL 2 6 +NULL NULL 9 + + +-- !query +SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP +-- !query schema +struct<(a + b):int,b:int,sum((a - b)):bigint> +-- !query output +2 1 0 +2 NULL 0 +3 1 1 +3 2 -1 +3 NULL 0 +4 1 2 +4 2 0 +4 NULL 2 +5 2 1 +5 NULL 1 +NULL NULL 3 + + +-- !query +SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP +-- !query schema +struct +-- !query output +1 1 1 +1 2 2 +1 NULL 3 +2 1 1 +2 2 2 +2 NULL 3 +3 1 1 +3 2 2 +3 NULL 3 +NULL NULL 9 + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES +("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000) +AS courseSales(course, year, earnings) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 113000 +Java NULL 50000 +Java 2012 20000 +Java 2013 30000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2013 48000 + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 113000 +Java NULL 50000 +Java 2012 20000 +Java 2012 20000 +Java 2013 30000 +Java 2013 30000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2013 48000 +dotNET 2013 48000 + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Empty set in ROLLUP grouping sets is not supported.(line 1, pos 61) + +== SQL == +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year +-------------------------------------------------------------^^^ + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 113000 +NULL 2012 35000 +NULL 2013 78000 +Java NULL 50000 +Java 2012 20000 +Java 2013 30000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2013 48000 + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 113000 +NULL 2012 35000 +NULL 2013 78000 +Java NULL 50000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Empty set in CUBE grouping sets is not supported.(line 1, pos 61) + +== SQL == +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year +-------------------------------------------------------------^^^ + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year) +-- !query schema +struct +-- !query output +Java NULL 50000 +NULL 2012 35000 +NULL 2013 78000 +dotNET NULL 63000 + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ()) +-- !query schema +struct +-- !query output +Java NULL 50000 +NULL 2012 35000 +NULL 2013 78000 +NULL NULL 113000 +dotNET NULL 63000 + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course) +-- !query schema +struct +-- !query output +Java NULL 50000 +dotNET NULL 63000 + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year) +-- !query schema +struct +-- !query output +NULL 2012 35000 +NULL 2013 78000 + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Partial CUBE/ROLLUP/GROUPING SETS like `GROUP BY a, b, CUBE(a, b)` is not supported.(line 1, pos 52) + +== SQL == +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year +----------------------------------------------------^^^ + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Mixed CUBE/ROLLUP/GROUPING SETS like `GROUP BY CUBE(a, b), ROLLUP(a, c)` is not supported.(line 1, pos 52) + +== SQL == +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year +----------------------------------------------------^^^ + + +-- !query +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Mixed CUBE/ROLLUP/GROUPING SETS like `GROUP BY CUBE(a, b), ROLLUP(a, c)` is not supported.(line 1, pos 52) + +== SQL == +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year +----------------------------------------------------^^^ + + +-- !query +SELECT course, SUM(earnings) AS sum FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum +-- !query schema +struct +-- !query output +NULL 113000 +Java 20000 +Java 30000 +Java 50000 +dotNET 5000 +dotNET 10000 +dotNET 48000 +dotNET 63000 + + +-- !query +SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum +-- !query schema +struct +-- !query output +NULL 113000 3 +Java 20000 0 +Java 30000 0 +Java 50000 1 +dotNET 5000 0 +dotNET 10000 0 +dotNET 48000 0 +dotNET 63000 1 + + +-- !query +SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales +GROUP BY CUBE(course, year) +-- !query schema +struct +-- !query output +Java 2012 0 0 0 +Java 2013 0 0 0 +Java NULL 0 1 1 +NULL 2012 1 0 2 +NULL 2013 1 0 2 +NULL NULL 1 1 3 +dotNET 2012 0 0 0 +dotNET 2013 0 0 0 +dotNET NULL 0 1 1 + + +-- !query +SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +grouping() can only be used with GroupingSets/Cube/Rollup + + +-- !query +SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +grouping_id() can only be used with GroupingSets/Cube/Rollup + + +-- !query +SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year +-- !query schema +struct +-- !query output +Java 2012 0 +Java 2013 0 +dotNET 2012 0 +dotNET 2013 0 +Java NULL 1 +dotNET NULL 1 +NULL 2012 2 +NULL 2013 2 +NULL NULL 3 + + +-- !query +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) +HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL +Java NULL +dotNET NULL + + +-- !query +SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup + + +-- !query +SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup + + +-- !query +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0 +-- !query schema +struct +-- !query output +Java NULL +NULL 2012 +NULL 2013 +NULL NULL +dotNET NULL + + +-- !query +SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year) +ORDER BY GROUPING(course), GROUPING(year), course, year +-- !query schema +struct +-- !query output +Java 2012 0 0 +Java 2013 0 0 +dotNET 2012 0 0 +dotNET 2013 0 0 +Java NULL 0 1 +dotNET NULL 0 1 +NULL 2012 1 0 +NULL 2013 1 0 +NULL NULL 1 1 + + +-- !query +SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year) +ORDER BY GROUPING(course), GROUPING(year), course, year +-- !query schema +struct +-- !query output +Java 2012 0 +Java 2013 0 +dotNET 2012 0 +dotNET 2013 0 +Java NULL 1 +dotNET NULL 1 +NULL 2012 2 +NULL 2013 2 +NULL NULL 3 + + +-- !query +SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup + + +-- !query +SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup + + +-- !query +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year +-- !query schema +struct +-- !query output +Java 2012 +Java 2013 +dotNET 2012 +dotNET 2013 +Java NULL +dotNET NULL +NULL 2012 +NULL 2013 +NULL NULL + + +-- !query +SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'k1' given input columns: [testdata.a, testdata.b]; line 1 pos 68 + + +-- !query +SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'k' given input columns: [testdata.a, testdata.b]; line 1 pos 63 + + +-- !query +SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'k' given input columns: [testdata.a, testdata.b]; line 1 pos 79 From 62cee4f24ed49d9c8f967f82267dbd3f3180c32c Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 13 Apr 2021 01:31:23 +0800 Subject: [PATCH 2/2] update sql.out --- .../results/ansi/group-analytics.sql.out | 613 +++++++++++++++++- 1 file changed, 588 insertions(+), 25 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out index f0daa7be6104..1db8febb81f9 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 37 +-- Number of queries: 44 -- !query @@ -248,43 +248,155 @@ NULL 2013 78000 -- !query SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -Partial CUBE/ROLLUP/GROUPING SETS like `GROUP BY a, b, CUBE(a, b)` is not supported.(line 1, pos 52) - -== SQL == -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year -----------------------------------------------------^^^ +Java NULL 50000 +Java NULL 50000 +Java 2012 20000 +Java 2012 20000 +Java 2013 30000 +Java 2013 30000 +dotNET NULL 63000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2013 48000 +dotNET 2013 48000 -- !query SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -Mixed CUBE/ROLLUP/GROUPING SETS like `GROUP BY CUBE(a, b), ROLLUP(a, c)` is not supported.(line 1, pos 52) - -== SQL == -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year -----------------------------------------------------^^^ +NULL NULL 113000 +NULL 2012 35000 +NULL 2013 78000 +Java NULL 50000 +Java NULL 50000 +Java NULL 50000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +dotNET NULL 63000 +dotNET NULL 63000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 -- !query SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -Mixed CUBE/ROLLUP/GROUPING SETS like `GROUP BY CUBE(a, b), ROLLUP(a, c)` is not supported.(line 1, pos 52) - -== SQL == -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year -----------------------------------------------------^^^ +NULL 2012 35000 +NULL 2012 35000 +NULL 2013 78000 +NULL 2013 78000 +Java NULL 50000 +Java NULL 50000 +Java NULL 50000 +Java NULL 50000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2012 20000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +Java 2013 30000 +dotNET NULL 63000 +dotNET NULL 63000 +dotNET NULL 63000 +dotNET NULL 63000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2012 15000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 +dotNET 2013 48000 -- !query @@ -504,3 +616,454 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException cannot resolve 'k' given input columns: [testdata.a, testdata.b]; line 1 pos 79 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, b, CUBE(a, b) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, b, ROLLUP(a, b) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +NULL 1 3 +NULL 2 3 +NULL NULL 6 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2