-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-28341][SQL] create a public API for V2SessionCatalog #25104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ffc391b
efe6f9c
a19943f
1d9f155
155c37f
75e2ca7
f89f25a
494e031
8240062
05db860
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalog.v2; | ||
|
|
||
| import org.apache.spark.annotation.Experimental; | ||
| import org.apache.spark.sql.util.CaseInsensitiveStringMap; | ||
|
|
||
| /** | ||
| * An API to extend the Spark built-in session catalog. Implementation can get the built-in session | ||
| * catalog from {@link #setDelegateCatalog(TableCatalog)}, implement catalog functions with | ||
| * some custom logic and call the built-in session catalog at the end. For example, they can | ||
| * implement {@code createTable}, do something else before calling {@code createTable} of the | ||
| * built-in session catalog. | ||
| */ | ||
| @Experimental | ||
| public interface CatalogExtension extends TableCatalog { | ||
|
|
||
| /** | ||
| * This will be called only once by Spark to pass in the Spark built-in session catalog, after | ||
| * {@link #initialize(String, CaseInsensitiveStringMap)} is called. | ||
| */ | ||
| void setDelegateCatalog(TableCatalog delegate); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalog.v2; | ||
|
|
||
| import java.util.Map; | ||
|
|
||
| import org.apache.spark.annotation.Experimental; | ||
| import org.apache.spark.sql.catalog.v2.expressions.Transform; | ||
| import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; | ||
| import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; | ||
| import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; | ||
| import org.apache.spark.sql.sources.v2.Table; | ||
| import org.apache.spark.sql.types.StructType; | ||
| import org.apache.spark.sql.util.CaseInsensitiveStringMap; | ||
|
|
||
| /** | ||
| * A simple implementation of {@link CatalogExtension}, which implements all the catalog functions | ||
| * by calling the built-in session catalog directly. This is created for convenience, so that users | ||
| * only need to override some methods where they want to apply custom logic. For example, they can | ||
| * override {@code createTable}, do something else before calling {@code super.createTable}. | ||
| */ | ||
| @Experimental | ||
| public abstract class DelegatingCatalogExtension implements CatalogExtension { | ||
|
|
||
| private TableCatalog delegate; | ||
|
|
||
| public final void setDelegateCatalog(TableCatalog delegate) { | ||
| this.delegate = delegate; | ||
| } | ||
|
|
||
| @Override | ||
| public String name() { | ||
| return delegate.name(); | ||
| } | ||
|
|
||
| @Override | ||
| public final void initialize(String name, CaseInsensitiveStringMap options) {} | ||
|
|
||
| @Override | ||
| public Identifier[] listTables(String[] namespace) throws NoSuchNamespaceException { | ||
| return delegate.listTables(namespace); | ||
| } | ||
|
|
||
| @Override | ||
| public Table loadTable(Identifier ident) throws NoSuchTableException { | ||
| return delegate.loadTable(ident); | ||
| } | ||
|
|
||
| @Override | ||
| public void invalidateTable(Identifier ident) { | ||
| delegate.invalidateTable(ident); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean tableExists(Identifier ident) { | ||
| return delegate.tableExists(ident); | ||
| } | ||
|
|
||
| @Override | ||
| public Table createTable( | ||
| Identifier ident, | ||
| StructType schema, | ||
| Transform[] partitions, | ||
| Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException { | ||
| return delegate.createTable(ident, schema, partitions, properties); | ||
| } | ||
|
|
||
| @Override | ||
| public Table alterTable( | ||
| Identifier ident, | ||
| TableChange... changes) throws NoSuchTableException { | ||
| return delegate.alterTable(ident, changes); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean dropTable(Identifier ident) { | ||
| return delegate.dropTable(ident); | ||
| } | ||
|
|
||
| @Override | ||
| public void renameTable( | ||
| Identifier oldIdent, | ||
| Identifier newIdent) throws NoSuchTableException, TableAlreadyExistsException { | ||
| delegate.renameTable(oldIdent, newIdent); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
|
|
||
| package org.apache.spark.sql.catalyst.analysis | ||
|
|
||
| import java.util | ||
| import java.util.Locale | ||
|
|
||
| import scala.collection.mutable | ||
|
|
@@ -25,7 +26,7 @@ import scala.util.Random | |
|
|
||
| import org.apache.spark.sql.AnalysisException | ||
| import org.apache.spark.sql.catalog.v2._ | ||
| import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, IdentityTransform} | ||
| import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, IdentityTransform, Transform} | ||
| import org.apache.spark.sql.catalyst._ | ||
| import org.apache.spark.sql.catalyst.catalog._ | ||
| import org.apache.spark.sql.catalyst.encoders.OuterScopes | ||
|
|
@@ -45,6 +46,7 @@ import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssig | |
| import org.apache.spark.sql.sources.v2.Table | ||
| import org.apache.spark.sql.sources.v2.internal.V1Table | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.sql.util.CaseInsensitiveStringMap | ||
|
|
||
| /** | ||
| * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]]. | ||
|
|
@@ -60,6 +62,24 @@ object SimpleAnalyzer extends Analyzer( | |
| }, | ||
| new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) | ||
|
|
||
| object FakeV2SessionCatalog extends TableCatalog { | ||
| private def fail() = throw new UnsupportedOperationException | ||
| override def listTables(namespace: Array[String]): Array[Identifier] = fail() | ||
| override def loadTable(ident: Identifier): Table = { | ||
| throw new NoSuchTableException(ident.toString) | ||
| } | ||
| override def createTable( | ||
| ident: Identifier, | ||
| schema: StructType, | ||
| partitions: Array[Transform], | ||
| properties: util.Map[String, String]): Table = fail() | ||
| override def alterTable(ident: Identifier, changes: TableChange*): Table = fail() | ||
| override def dropTable(ident: Identifier): Boolean = fail() | ||
| override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = fail() | ||
| override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = fail() | ||
| override def name(): String = fail() | ||
| } | ||
|
|
||
| /** | ||
| * Provides a way to keep state during the analysis, this enables us to decouple the concerns | ||
| * of analysis environment from the catalog. | ||
|
|
@@ -101,15 +121,21 @@ object AnalysisContext { | |
| */ | ||
| class Analyzer( | ||
| catalog: SessionCatalog, | ||
| v2SessionCatalog: TableCatalog, | ||
| conf: SQLConf, | ||
| maxIterations: Int) | ||
| extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog { | ||
|
|
||
| // Only for tests. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't link this too much. Do we need to change too much if we need to remove this constructor? It's bad to inject test related stuff in production code
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are more than 10 test suites using this constructor. BTW this is an existing constructor, this PR just add a comment. |
||
| def this(catalog: SessionCatalog, conf: SQLConf) = { | ||
| this(catalog, conf, conf.optimizerMaxIterations) | ||
| this(catalog, FakeV2SessionCatalog, conf, conf.optimizerMaxIterations) | ||
| } | ||
|
|
||
| def this(catalog: SessionCatalog, v2SessionCatalog: TableCatalog, conf: SQLConf) = { | ||
| this(catalog, v2SessionCatalog, conf, conf.optimizerMaxIterations) | ||
| } | ||
|
|
||
| override val catalogManager: CatalogManager = new CatalogManager(conf) | ||
| override val catalogManager: CatalogManager = new CatalogManager(conf, v2SessionCatalog) | ||
|
|
||
| def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = { | ||
| AnalysisHelper.markInAnalyzer { | ||
|
|
@@ -954,7 +980,7 @@ class Analyzer( | |
| case scala.Right(tableOpt) => | ||
| tableOpt.map { table => | ||
| AlterTable( | ||
| sessionCatalog.get.asTableCatalog, // table being resolved means this exists | ||
| sessionCatalog.asTableCatalog, | ||
| Identifier.of(tableName.init.toArray, tableName.last), | ||
| DataSourceV2Relation.create(table), | ||
| changes | ||
|
|
@@ -2837,7 +2863,7 @@ class Analyzer( | |
| case CatalogObjectIdentifier(Some(v2Catalog), ident) => | ||
| scala.Left((v2Catalog, ident, loadTable(v2Catalog, ident))) | ||
| case CatalogObjectIdentifier(None, ident) => | ||
| catalogManager.v2SessionCatalog.flatMap(loadTable(_, ident)) match { | ||
| loadTable(catalogManager.v2SessionCatalog, ident) match { | ||
| case Some(_: V1Table) => scala.Right(None) | ||
| case other => scala.Right(other) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@brkyvz I agree with you that the logic was a bit confusing, so I refine it a little bit:
V2_SESSION_CATALOGis not set, return the default session catalogV2SessionCatalog.V2_SESSION_CATALOGis set, try to instantiate it, and return the default session catalog if we hit problems during instantiating.