From dea8a579bbf1a9935d8ebda6d7b94acb9fb04755 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Wed, 9 Nov 2016 17:47:18 +0100 Subject: [PATCH] Add table information to InsertIntoHadoopFsRelationCommand. --- .../apache/spark/sql/execution/datasources/DataSource.scala | 3 ++- .../sql/execution/datasources/DataSourceStrategy.scala | 5 +++-- .../datasources/InsertIntoHadoopFsRelationCommand.scala | 5 +++-- .../scala/org/apache/spark/sql/hive/parquetSuites.scala | 6 ++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 52666119351b..5d663949df6b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -424,7 +424,8 @@ case class DataSource( _ => Unit, // No existing table needs to be refreshed. options, data.logicalPlan, - mode) + mode, + catalogTable) sparkSession.sessionState.executePlan(plan).toRdd // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring it. copy(userSpecifiedSchema = Some(data.schema.asNullable)).resolveRelation() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index a548e88cb683..2d43a6ad098e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -162,7 +162,7 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] { case i @ logical.InsertIntoTable( - l @ LogicalRelation(t: HadoopFsRelation, _, _), part, query, overwrite, false) + l @ LogicalRelation(t: HadoopFsRelation, _, table), part, query, overwrite, false) if query.resolved && t.schema.asNullable == query.schema.asNullable => // Sanity checks @@ -222,7 +222,8 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] { refreshPartitionsCallback, t.options, query, - mode) + mode, + table) insertCmd } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala index 9c75e2ae7476..a0a8cb5024c3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala @@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.internal.io.FileCommitProtocol import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.catalog.BucketSpec +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -41,7 +41,8 @@ case class InsertIntoHadoopFsRelationCommand( refreshFunction: (Seq[TablePartitionSpec]) => Unit, options: Map[String, String], @transient query: LogicalPlan, - mode: SaveMode) + mode: SaveMode, + catalogTable: Option[CatalogTable]) extends RunnableCommand { override protected def innerChildren: Seq[LogicalPlan] = query :: Nil diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala index 9fc62a389db4..3644ff952eb0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala @@ -307,7 +307,8 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest { val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt") df.queryExecution.sparkPlan match { - case ExecutedCommandExec(_: InsertIntoHadoopFsRelationCommand) => // OK + case ExecutedCommandExec(cmd: InsertIntoHadoopFsRelationCommand) => + assert(cmd.catalogTable.map(_.identifier.table) === Some("test_insert_parquet")) case o => fail("test_insert_parquet should be converted to a " + s"${classOf[HadoopFsRelation ].getCanonicalName} and " + s"${classOf[InsertIntoDataSourceCommand].getCanonicalName} should have been SparkPlan. " + @@ -337,7 +338,8 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest { val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt_array") df.queryExecution.sparkPlan match { - case ExecutedCommandExec(_: InsertIntoHadoopFsRelationCommand) => // OK + case ExecutedCommandExec(cmd: InsertIntoHadoopFsRelationCommand) => + assert(cmd.catalogTable.map(_.identifier.table) === Some("test_insert_parquet")) case o => fail("test_insert_parquet should be converted to a " + s"${classOf[HadoopFsRelation ].getCanonicalName} and " + s"${classOf[InsertIntoDataSourceCommand].getCanonicalName} should have been SparkPlan." +