-
Notifications
You must be signed in to change notification settings - Fork 28.7k
[SPARK-51834][SQL] Support end-to-end table constraint management #50631
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f73411b
259de54
3ab17fc
2fac017
76508df
a6507e0
b78c4f0
d54a64f
0e4b88a
aa2b99b
f4932ab
d284204
722d0a1
e7d98d9
5101f2e
a87d4eb
59551c8
8044c9b
a997c9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,9 +22,11 @@ import scala.jdk.CollectionConverters._ | |
import org.apache.spark.SparkException | ||
import org.apache.spark.sql.AnalysisException | ||
import org.apache.spark.sql.catalyst.SqlScriptingLocalVariableManager | ||
import org.apache.spark.sql.catalyst.expressions.AttributeReference | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
import org.apache.spark.sql.catalyst.rules.Rule | ||
import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, Identifier, LookupCatalog, SupportsNamespaces} | ||
import org.apache.spark.sql.catalyst.util.SparkCharVarcharUtils.replaceCharVarcharWithString | ||
import org.apache.spark.sql.connector.catalog._ | ||
import org.apache.spark.sql.errors.DataTypeErrors.toSQLId | ||
import org.apache.spark.sql.errors.QueryCompilationErrors | ||
import org.apache.spark.util.ArrayImplicits._ | ||
|
@@ -77,14 +79,19 @@ class ResolveCatalogs(val catalogManager: CatalogManager) | |
assertValidSessionVariableNameParts(nameParts, resolved) | ||
d.copy(name = resolved) | ||
|
||
// For CREATE TABLE and REPLACE TABLE statements, resolve the table identifier and include | ||
// the table columns as output. This allows expressions (e.g., constraints) referencing these | ||
// columns to be resolved correctly. | ||
case c @ CreateTable(UnresolvedIdentifier(nameParts, allowTemp), columns, _, _, _) => | ||
val resolvedIdentifier = resolveIdentifier(nameParts, allowTemp, columns) | ||
c.copy(name = resolvedIdentifier) | ||
|
||
case r @ ReplaceTable(UnresolvedIdentifier(nameParts, allowTemp), columns, _, _, _) => | ||
val resolvedIdentifier = resolveIdentifier(nameParts, allowTemp, columns) | ||
r.copy(name = resolvedIdentifier) | ||
|
||
case UnresolvedIdentifier(nameParts, allowTemp) => | ||
if (allowTemp && catalogManager.v1SessionCatalog.isTempView(nameParts)) { | ||
val ident = Identifier.of(nameParts.dropRight(1).toArray, nameParts.last) | ||
ResolvedIdentifier(FakeSystemCatalog, ident) | ||
} else { | ||
val CatalogAndIdentifier(catalog, identifier) = nameParts | ||
ResolvedIdentifier(catalog, identifier) | ||
} | ||
resolveIdentifier(nameParts, allowTemp, Nil) | ||
|
||
case CurrentNamespace => | ||
ResolvedNamespace(currentCatalog, catalogManager.currentNamespace.toImmutableArraySeq) | ||
|
@@ -94,6 +101,27 @@ class ResolveCatalogs(val catalogManager: CatalogManager) | |
resolveNamespace(catalog, ns, fetchMetadata) | ||
} | ||
|
||
private def resolveIdentifier( | ||
nameParts: Seq[String], | ||
allowTemp: Boolean, | ||
columns: Seq[ColumnDefinition]): ResolvedIdentifier = { | ||
val columnOutput = columns.map { col => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan I made this new change to bypass 38c6ef4#diff-583171e935b2dc349378063a5841c5b98b30a2d57ac3743a9eccfe7bffcb8f2aR286 |
||
val dataType = if (conf.preserveCharVarcharTypeInfo) { | ||
col.dataType | ||
} else { | ||
replaceCharVarcharWithString(col.dataType) | ||
} | ||
AttributeReference(col.name, dataType, col.nullable, col.metadata)() | ||
} | ||
if (allowTemp && catalogManager.v1SessionCatalog.isTempView(nameParts)) { | ||
val ident = Identifier.of(nameParts.dropRight(1).toArray, nameParts.last) | ||
ResolvedIdentifier(FakeSystemCatalog, ident, columnOutput) | ||
} else { | ||
val CatalogAndIdentifier(catalog, identifier) = nameParts | ||
ResolvedIdentifier(catalog, identifier, columnOutput) | ||
} | ||
} | ||
|
||
private def resolveNamespace( | ||
catalog: CatalogPlugin, | ||
ns: Seq[String], | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ | |
package org.apache.spark.sql.catalyst.analysis | ||
|
||
import org.apache.spark.SparkThrowable | ||
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} | ||
import org.apache.spark.sql.catalyst.expressions._ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the agreement in the community on wildcard imports? Are they permitted after a given number of elements are imported directly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As per https://github.com/databricks/scala-style-guide?tab=readme-ov-file#imports, |
||
import org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTime | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
import org.apache.spark.sql.catalyst.rules.Rule | ||
|
@@ -61,7 +61,7 @@ object ResolveTableSpec extends Rule[LogicalPlan] { | |
input: LogicalPlan, | ||
tableSpec: TableSpecBase, | ||
withNewSpec: TableSpecBase => LogicalPlan): LogicalPlan = tableSpec match { | ||
case u: UnresolvedTableSpec if u.optionExpression.resolved => | ||
case u: UnresolvedTableSpec if u.childrenResolved => | ||
val newOptions: Seq[(String, String)] = u.optionExpression.options.map { | ||
case (key: String, null) => | ||
(key, null) | ||
|
@@ -86,6 +86,18 @@ object ResolveTableSpec extends Rule[LogicalPlan] { | |
} | ||
(key, newValue) | ||
} | ||
|
||
u.constraints.foreach { | ||
case check: CheckConstraint => | ||
if (!check.child.deterministic) { | ||
check.child.failAnalysis( | ||
errorClass = "NON_DETERMINISTIC_CHECK_CONSTRAINT", | ||
messageParameters = Map("checkCondition" -> check.condition) | ||
) | ||
} | ||
case _ => | ||
} | ||
|
||
val newTableSpec = TableSpec( | ||
properties = u.properties, | ||
provider = u.provider, | ||
|
@@ -94,7 +106,8 @@ object ResolveTableSpec extends Rule[LogicalPlan] { | |
comment = u.comment, | ||
collation = u.collation, | ||
serde = u.serde, | ||
external = u.external) | ||
external = u.external, | ||
constraints = u.constraints.map(_.toV2Constraint)) | ||
withNewSpec(newTableSpec) | ||
case _ => | ||
input | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ | |
|
||
package org.apache.spark.sql.catalyst.plans.logical | ||
|
||
import org.apache.spark.sql.catalyst.analysis.{FieldName, FieldPosition, ResolvedFieldName, UnresolvedException} | ||
import org.apache.spark.sql.catalyst.analysis.{FieldName, FieldPosition, ResolvedFieldName, ResolvedTable, UnresolvedException} | ||
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec | ||
import org.apache.spark.sql.catalyst.catalog.ClusterBySpec | ||
import org.apache.spark.sql.catalyst.expressions.{Expression, TableConstraint, Unevaluable} | ||
|
@@ -295,7 +295,16 @@ case class AlterTableCollation( | |
case class AddConstraint( | ||
table: LogicalPlan, | ||
tableConstraint: TableConstraint) extends AlterTableCommand { | ||
override def changes: Seq[TableChange] = Seq.empty | ||
override def changes: Seq[TableChange] = { | ||
val constraint = tableConstraint.toV2Constraint | ||
val validatedTableVersion = table match { | ||
case t: ResolvedTable if constraint.enforced() => | ||
t.table.currentVersion() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created a follow-up https://issues.apache.org/jira/browse/SPARK-51835 for testing the table version |
||
case _ => | ||
null | ||
} | ||
Seq(TableChange.addConstraint(constraint, validatedTableVersion)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CHECK constraints must optionally validate existing data in ALTER. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Make sense. Do you mean There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ENFORCED/NOT ENFORCED impacts subsequent writes. I was referring to ALTER TABLE ... ADD CONSTRAINT that must scan the existing data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created a follow-up: https://issues.apache.org/jira/browse/SPARK-51905 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just for my understanding. Anton's comment was about how to validate the existing data in The above follow-up JIRA (SPARK-51905) is not about that, isn't it?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I think we need one more JIRA to add the scan capability to ALTER TABLE ... ADD CONSTRAINT. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, @gengliangwang already created it: SPARK-51903. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. Thank you, @aokolnychyi . Ya, SPARK-51903 is what I expected. |
||
} | ||
|
||
protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild) | ||
} | ||
|
@@ -308,7 +317,8 @@ case class DropConstraint( | |
name: String, | ||
ifExists: Boolean, | ||
cascade: Boolean) extends AlterTableCommand { | ||
override def changes: Seq[TableChange] = Seq.empty | ||
override def changes: Seq[TableChange] = | ||
Seq(TableChange.dropConstraint(name, ifExists, cascade)) | ||
|
||
protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error code seems consistent with DB2 and what we use for generated columns, +1.