-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-32528][SQL][TEST] The analyze method should make sure the plan is analyzed #29349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -418,8 +418,11 @@ package object dsl { | |
| def distribute(exprs: Expression*)(n: Int): LogicalPlan = | ||
| RepartitionByExpression(exprs, logicalPlan, numPartitions = n) | ||
|
|
||
| def analyze: LogicalPlan = | ||
| EliminateSubqueryAliases(analysis.SimpleAnalyzer.execute(logicalPlan)) | ||
| def analyze: LogicalPlan = { | ||
| val analyzed = analysis.SimpleAnalyzer.execute(logicalPlan) | ||
| analysis.SimpleAnalyzer.checkAnalysis(analyzed) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, this change can find many miswritten tests..., nice. |
||
| EliminateSubqueryAliases(analyzed) | ||
| } | ||
|
|
||
| def hint(name: String, parameters: Any*): LogicalPlan = | ||
| UnresolvedHint(name, parameters, logicalPlan) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ import org.apache.log4j.Level | |
| import org.scalatest.matchers.must.Matchers | ||
|
|
||
| import org.apache.spark.api.python.PythonEvalType | ||
| import org.apache.spark.sql.AnalysisException | ||
| import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier} | ||
| import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog} | ||
| import org.apache.spark.sql.catalyst.dsl.expressions._ | ||
|
|
@@ -47,6 +48,13 @@ import org.apache.spark.sql.types._ | |
| class AnalysisSuite extends AnalysisTest with Matchers { | ||
| import org.apache.spark.sql.catalyst.analysis.TestRelations._ | ||
|
|
||
| test("fail for unresolved plan") { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we add a JIRA prefix?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is very general and not a regression test, maybe not needed? |
||
| intercept[AnalysisException] { | ||
| // `testRelation` does not have column `b`. | ||
| testRelation.select('b).analyze | ||
| } | ||
| } | ||
|
|
||
| test("union project *") { | ||
| val plan = (1 to 120) | ||
| .map(_ => testRelation) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -119,9 +119,7 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper | |
|
|
||
| // GetStructField with different names are semantically equal; thus, `EqualTo(fieldA1, fieldA2)` | ||
| // will be optimized to `TrueLiteral` by `SimplifyBinaryComparison`. | ||
| val originalQuery = nonNullableRelation | ||
| .where(EqualTo(fieldA1, fieldA2)) | ||
| .analyze | ||
| val originalQuery = nonNullableRelation.where(EqualTo(fieldA1, fieldA2)) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we construct the resolved plan directly, because we need to reply on case insensitive and
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I go to look at this test and its original PR. |
||
|
|
||
| val optimized = Optimize.execute(originalQuery) | ||
| val correctAnswer = nonNullableRelation.analyze | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans._ | |
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.rules._ | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.{BooleanType, IntegerType} | ||
| import org.apache.spark.sql.types.{BooleanType, IntegerType, TimestampType} | ||
| import org.apache.spark.unsafe.types.CalendarInterval | ||
|
|
||
| class FilterPushdownSuite extends PlanTest { | ||
|
|
@@ -678,14 +678,14 @@ class FilterPushdownSuite extends PlanTest { | |
| val generator = Explode('c_arr) | ||
| val originalQuery = { | ||
| testRelationWithArrayType | ||
| .generate(generator, alias = Some("arr")) | ||
| .generate(generator, alias = Some("arr"), outputNames = Seq("c")) | ||
| .where(('b >= 5) && ('c > 6)) | ||
| } | ||
| val optimized = Optimize.execute(originalQuery.analyze) | ||
| val referenceResult = { | ||
| testRelationWithArrayType | ||
| .where('b >= 5) | ||
| .generate(generator, alias = Some("arr")) | ||
| .generate(generator, alias = Some("arr"), outputNames = Seq("c")) | ||
| .where('c > 6).analyze | ||
| } | ||
|
|
||
|
|
@@ -1149,75 +1149,60 @@ class FilterPushdownSuite extends PlanTest { | |
| comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer) | ||
| } | ||
|
|
||
| test("join condition pushdown: deterministic and non-deterministic") { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was added in #14815 It's not valid anymore, as we don't allow nondeterministic join condition now. |
||
| val x = testRelation.subquery('x) | ||
| val y = testRelation.subquery('y) | ||
|
|
||
| // Verify that all conditions except the watermark touching condition are pushed down | ||
| // by the optimizer and others are not. | ||
| val originalQuery = x.join(y, condition = Some("x.a".attr === 5 && "y.a".attr === 5 && | ||
| "x.a".attr === Rand(10) && "y.b".attr === 5)) | ||
| val correctAnswer = | ||
| x.where("x.a".attr === 5).join(y.where("y.a".attr === 5 && "y.b".attr === 5), | ||
| condition = Some("x.a".attr === Rand(10))) | ||
|
|
||
| // CheckAnalysis will ensure nondeterministic expressions not appear in join condition. | ||
| // TODO support nondeterministic expressions in join condition. | ||
| comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze, | ||
| checkAnalysis = false) | ||
| } | ||
|
|
||
| test("watermark pushdown: no pushdown on watermark attribute #1") { | ||
| val interval = new CalendarInterval(2, 2, 2000L) | ||
| val relation = LocalRelation(attrA, 'b.timestamp, attrC) | ||
|
|
||
| // Verify that all conditions except the watermark touching condition are pushed down | ||
| // by the optimizer and others are not. | ||
| val originalQuery = EventTimeWatermark('b, interval, testRelation) | ||
| .where('a === 5 && 'b === 10 && 'c === 5) | ||
| val originalQuery = EventTimeWatermark('b, interval, relation) | ||
| .where('a === 5 && 'b === new java.sql.Timestamp(0) && 'c === 5) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. watermark column must be timestamp. |
||
| val correctAnswer = EventTimeWatermark( | ||
| 'b, interval, testRelation.where('a === 5 && 'c === 5)) | ||
| .where('b === 10) | ||
| 'b, interval, relation.where('a === 5 && 'c === 5)) | ||
| .where('b === new java.sql.Timestamp(0)) | ||
|
|
||
| comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze, | ||
| checkAnalysis = false) | ||
|
Comment on lines
-1181
to
-1182
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We set
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. It's true that sometimes we need to construct a special unresolved plan to test some special branches of the optimizer rule. But it's not the case here. |
||
| comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) | ||
| } | ||
|
|
||
| test("watermark pushdown: no pushdown for nondeterministic filter") { | ||
| val interval = new CalendarInterval(2, 2, 2000L) | ||
| val relation = LocalRelation(attrA, attrB, 'c.timestamp) | ||
|
|
||
| // Verify that all conditions except the watermark touching condition are pushed down | ||
| // by the optimizer and others are not. | ||
| val originalQuery = EventTimeWatermark('c, interval, testRelation) | ||
| .where('a === 5 && 'b === Rand(10) && 'c === 5) | ||
| val originalQuery = EventTimeWatermark('c, interval, relation) | ||
| .where('a === 5 && 'b === Rand(10) && 'c === new java.sql.Timestamp(0)) | ||
| val correctAnswer = EventTimeWatermark( | ||
| 'c, interval, testRelation.where('a === 5)) | ||
| .where('b === Rand(10) && 'c === 5) | ||
| 'c, interval, relation.where('a === 5)) | ||
| .where('b === Rand(10) && 'c === new java.sql.Timestamp(0)) | ||
|
|
||
| comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze, | ||
| checkAnalysis = false) | ||
| } | ||
|
|
||
| test("watermark pushdown: full pushdown") { | ||
| val interval = new CalendarInterval(2, 2, 2000L) | ||
| val relation = LocalRelation(attrA, attrB, 'c.timestamp) | ||
|
|
||
| // Verify that all conditions except the watermark touching condition are pushed down | ||
| // by the optimizer and others are not. | ||
| val originalQuery = EventTimeWatermark('c, interval, testRelation) | ||
| val originalQuery = EventTimeWatermark('c, interval, relation) | ||
| .where('a === 5 && 'b === 10) | ||
| val correctAnswer = EventTimeWatermark( | ||
| 'c, interval, testRelation.where('a === 5 && 'b === 10)) | ||
| 'c, interval, relation.where('a === 5 && 'b === 10)) | ||
|
|
||
| comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze, | ||
| checkAnalysis = false) | ||
| } | ||
|
|
||
| test("watermark pushdown: no pushdown on watermark attribute #2") { | ||
| val interval = new CalendarInterval(2, 2, 2000L) | ||
| val relation = LocalRelation('a.timestamp, attrB, attrC) | ||
|
|
||
| val originalQuery = EventTimeWatermark('a, interval, testRelation) | ||
| .where('a === 5 && 'b === 10) | ||
| val originalQuery = EventTimeWatermark('a, interval, relation) | ||
| .where('a === new java.sql.Timestamp(0) && 'b === 10) | ||
| val correctAnswer = EventTimeWatermark( | ||
| 'a, interval, testRelation.where('b === 10)).where('a === 5) | ||
| 'a, interval, relation.where('b === 10)).where('a === new java.sql.Timestamp(0)) | ||
|
|
||
| comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze, | ||
| checkAnalysis = false) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -307,9 +307,9 @@ class JoinReorderSuite extends PlanTest with StatsEstimationTestBase { | |
| val originalPlan2 = | ||
| t1.join(t2, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t2.k-1-5"))) | ||
| .hint("broadcast") | ||
| .join(t4, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))) | ||
| .join(t4, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2"))) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the join condition was wrong written. |
||
| .hint("broadcast") | ||
| .join(t3, Inner, Some(nameToAttr("t1.k-1-2") === nameToAttr("t4.k-1-2"))) | ||
| .join(t3, Inner, Some(nameToAttr("t4.v-1-10") === nameToAttr("t3.v-1-100"))) | ||
|
|
||
| assertEqualPlans(originalPlan2, originalPlan2) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -91,9 +91,8 @@ class PullupCorrelatedPredicatesSuite extends PlanTest { | |
| .select(max('d)) | ||
| val scalarSubquery = | ||
| testRelation | ||
| .where(ScalarSubquery(subPlan)) | ||
| .where(ScalarSubquery(subPlan) === 1) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| .select('a).analyze | ||
| assert(scalarSubquery.resolved) | ||
|
|
||
| val optimized = Optimize.execute(scalarSubquery) | ||
| val doubleOptimized = Optimize.execute(optimized) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,8 @@ class SimplifyCastsSuite extends PlanTest { | |
|
|
||
| test("nullable element to non-nullable element array cast") { | ||
| val input = LocalRelation('a.array(ArrayType(IntegerType, true))) | ||
| val plan = input.select('a.cast(ArrayType(IntegerType, false)).as("casted")).analyze | ||
| val attr = input.output.head | ||
| val plan = input.select(attr.cast(ArrayType(IntegerType, false)).as("casted")) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here we are testing something that can't pass analysis, just to make sure the optimizer rule is robust. |
||
| val optimized = Optimize.execute(plan) | ||
| // Though cast from `ArrayType(IntegerType, true)` to `ArrayType(IntegerType, false)` is not | ||
| // allowed, here we just ensure that `SimplifyCasts` rule respect the plan. | ||
|
|
@@ -60,13 +61,13 @@ class SimplifyCastsSuite extends PlanTest { | |
|
|
||
| test("nullable value map to non-nullable value map cast") { | ||
| val input = LocalRelation('m.map(MapType(StringType, StringType, true))) | ||
| val plan = input.select('m.cast(MapType(StringType, StringType, false)) | ||
| .as("casted")).analyze | ||
| val attr = input.output.head | ||
| val plan = input.select(attr.cast(MapType(StringType, StringType, false)) | ||
| .as("casted")) | ||
| val optimized = Optimize.execute(plan) | ||
| // Though cast from `MapType(StringType, StringType, true)` to | ||
| // `MapType(StringType, StringType, false)` is not allowed, here we just ensure that | ||
| // `SimplifyCasts` rule respect the plan. | ||
| comparePlans(optimized, plan, checkAnalysis = false) | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks nice. This seems only used in test.