-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-16947][SQL] Improve type coercion for inline tables. #14539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3b0a28b
3f3aa93
9a827ce
392eb0a
daa01a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -656,40 +656,36 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { | |
| * Create an inline table (a virtual table in Hive parlance). | ||
| */ | ||
| override def visitInlineTable(ctx: InlineTableContext): LogicalPlan = withOrigin(ctx) { | ||
| // Get the backing expressions. | ||
| val expressions = ctx.expression.asScala.map { eCtx => | ||
| val e = expression(eCtx) | ||
| assert(e.foldable, "All expressions in an inline table must be constants.", eCtx) | ||
| e | ||
| } | ||
|
|
||
| // Validate and evaluate the rows. | ||
| val (structType, structConstructor) = expressions.head.dataType match { | ||
| case st: StructType => | ||
| (st, (e: Expression) => e) | ||
| case dt => | ||
| val st = CreateStruct(Seq(expressions.head)).dataType | ||
| (st, (e: Expression) => CreateStruct(Seq(e))) | ||
| } | ||
| val rows = expressions.map { | ||
| case expression => | ||
| val safe = Cast(structConstructor(expression), structType) | ||
| safe.eval().asInstanceOf[InternalRow] | ||
| // Create expressions. | ||
| val rows = ctx.expression.asScala.map { e => | ||
| expression(e) match { | ||
| case CreateStruct(children) => children | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hvanhovell what's this about? Why do we need to expand struct?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually I think I understand what's happening here now.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The parser creates rows by issuing |
||
| case child => Seq(child) | ||
| } | ||
| } | ||
|
|
||
| // Construct attributes. | ||
| val baseAttributes = structType.toAttributes.map(_.withNullability(true)) | ||
| val attributes = if (ctx.identifierList != null) { | ||
| val aliases = visitIdentifierList(ctx.identifierList) | ||
| assert(aliases.size == baseAttributes.size, | ||
| "Number of aliases must match the number of fields in an inline table.", ctx) | ||
| baseAttributes.zip(aliases).map(p => p._1.withName(p._2)) | ||
| // Resolve aliases. | ||
| val numExpectedColumns = rows.head.size | ||
| val aliases = if (ctx.identifierList != null) { | ||
| val names = visitIdentifierList(ctx.identifierList) | ||
| assert(names.size == numExpectedColumns, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an error case users can hit, should we throw
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It uses a parser only version of assert that throws a ParseException: https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala#L81 Come to think of it, we might need to rename it because people expect that assert calls can be elided. That is for a different PR though.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
| s"Number of aliases '${names.size}' must match the number of fields " + | ||
| s"'$numExpectedColumns' in an inline table", ctx) | ||
| names | ||
| } else { | ||
| baseAttributes | ||
| Seq.tabulate(numExpectedColumns)(i => s"col${i + 1}") | ||
| } | ||
|
|
||
| // Create plan and add an alias if a name has been defined. | ||
| LocalRelation(attributes, rows).optionalMap(ctx.identifier)(aliasPlan) | ||
| // Create the inline table. | ||
| val table = InlineTable(rows.zipWithIndex.map { case (expressions, index) => | ||
| assert(expressions.size == numExpectedColumns, | ||
| s"Number of values '${expressions.size}' in row '${index + 1}' does not match the " + | ||
| s"expected number of values '$numExpectedColumns' in a row", ctx) | ||
| expressions.zip(aliases).map { | ||
| case (expression, name) => Alias(expression, name)() | ||
| } | ||
| }) | ||
| table.optionalMap(ctx.identifier)(aliasPlan) | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.plans.logical | |
|
|
||
| import scala.collection.mutable.ArrayBuffer | ||
|
|
||
| import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation | ||
| import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, UnresolvedAttribute} | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression | ||
| import org.apache.spark.sql.catalyst.plans._ | ||
|
|
@@ -769,3 +769,41 @@ case object OneRowRelation extends LeafNode { | |
| */ | ||
| override lazy val statistics: Statistics = Statistics(sizeInBytes = 1) | ||
| } | ||
|
|
||
| /** | ||
| * An inline table that holds a number of foldable expressions, which can be materialized into | ||
| * rows. This is semantically the same as a Union of one row relations. | ||
| */ | ||
| case class InlineTable(rows: Seq[Seq[NamedExpression]]) extends LeafNode { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we assert
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I had these checks. The thing is that none of the LogicalPlans have such logic, it has all been centralized in CheckAnalysis. So I added it there. It might not be a bad plan to move this functionality into the separate plans on the longer run. |
||
| lazy val expressionsResolved: Boolean = rows.forall(_.forall(_.resolved)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we really need this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I needed that piece of code in two places (resolve and type coercion), so I made it a val. But I can remove this. |
||
|
|
||
| lazy val validDimensions: Boolean = { | ||
| val size = rows.headOption.map(_.size).getOrElse(0) | ||
| rows.tail.forall(_.size == size) | ||
| } | ||
|
|
||
| override lazy val resolved: Boolean = { | ||
| def allRowsCompatible: Boolean = { | ||
| val expectedDataTypes = rows.headOption.toSeq.flatMap(_.map(_.dataType)) | ||
| rows.tail.forall { row => | ||
| row.map(_.dataType).zip(expectedDataTypes).forall { | ||
| case (dt1, dt2) => dt1 == dt2 | ||
| } | ||
| } | ||
| } | ||
| expressionsResolved && validDimensions && allRowsCompatible | ||
| } | ||
|
|
||
| override def maxRows: Option[Long] = Some(rows.size) | ||
|
|
||
| override def output: Seq[Attribute] = rows.transpose.map { | ||
| case column if column.forall(_.resolved) => | ||
| column.head.toAttribute.withNullability(column.exists(_.nullable)) | ||
| case column => | ||
| UnresolvedAttribute(column.head.name) | ||
| } | ||
|
|
||
| override lazy val statistics: Statistics = { | ||
| Statistics(output.map(_.dataType.defaultSize).sum * rows.size) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update comment.