-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20680][SQL] Adding HiveVoidType in Spark to be compatible with Hive #28935
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
17b1853
ba2ef06
17aace2
a3a1cef
9b9d021
fa853b1
3fa76cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2184,7 +2184,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging | |
| * Create a Spark DataType. | ||
| */ | ||
| private def visitSparkDataType(ctx: DataTypeContext): DataType = { | ||
| HiveStringType.replaceCharType(typedVisit(ctx)) | ||
| HiveVoidType.replaceVoidType(HiveStringType.replaceCharType(typedVisit(ctx))) | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -2212,6 +2212,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging | |
| case ("decimal" | "dec" | "numeric", precision :: scale :: Nil) => | ||
| DecimalType(precision.getText.toInt, scale.getText.toInt) | ||
| case ("interval", Nil) => CalendarIntervalType | ||
| case ("void", Nil) => HiveVoidType | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can just reuse |
||
| case (dt, params) => | ||
| val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else dt | ||
| throw new ParseException(s"DataType $dtStr is not supported.", ctx) | ||
|
|
@@ -2258,9 +2259,9 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging | |
| builder.putString("comment", _) | ||
| } | ||
|
|
||
| // Add Hive type string to metadata. | ||
| // Add Hive type 'string' and 'void' to metadata. | ||
| val rawDataType = typedVisit[DataType](ctx.dataType) | ||
| val cleanedDataType = HiveStringType.replaceCharType(rawDataType) | ||
| val cleanedDataType = HiveVoidType.replaceVoidType(HiveStringType.replaceCharType(rawDataType)) | ||
| if (rawDataType != cleanedDataType) { | ||
| builder.putString(HIVE_TYPE_STRING, rawDataType.catalogString) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.types | ||
|
|
||
| /** | ||
| * A hive void type for compatibility. These datatypes should only used for parsing, | ||
| * and should NOT be used anywhere else. Any instance of these data types should be | ||
| * replaced by a [[NullType]] before analysis. | ||
| */ | ||
| class HiveVoidType private() extends DataType { | ||
|
|
||
| override def defaultSize: Int = 1 | ||
|
|
||
| override private[spark] def asNullable: HiveVoidType = this | ||
|
|
||
| override def simpleString: String = "void" | ||
| } | ||
|
|
||
| case object HiveVoidType extends HiveVoidType { | ||
| def replaceVoidType(dt: DataType): DataType = dt match { | ||
| case ArrayType(et, nullable) => | ||
| ArrayType(replaceVoidType(et), nullable) | ||
| case MapType(kt, vt, nullable) => | ||
| MapType(replaceVoidType(kt), replaceVoidType(vt), nullable) | ||
| case StructType(fields) => | ||
| StructType(fields.map(f => f.copy(dataType = replaceVoidType(f.dataType)))) | ||
| case _: HiveVoidType => NullType | ||
| case _ => dt | ||
| } | ||
|
|
||
| def containsVoidType(dt: DataType): Boolean = dt match { | ||
| case ArrayType(et, _) => containsVoidType(et) | ||
| case MapType(kt, vt, _) => containsVoidType(kt) || containsVoidType(vt) | ||
| case StructType(fields) => fields.exists(f => containsVoidType(f.dataType)) | ||
| case _ => dt.isInstanceOf[HiveVoidType] | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.command._ | |
| import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, RefreshTable} | ||
| import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType} | ||
| import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, HiveVoidType, MetadataBuilder, StructField, StructType} | ||
|
|
||
| /** | ||
| * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements | ||
|
|
@@ -50,6 +50,7 @@ class ResolveSessionCatalog( | |
| nameParts @ SessionCatalogAndTable(catalog, tbl), cols) => | ||
| loadTable(catalog, tbl.asIdentifier).collect { | ||
| case v1Table: V1Table => | ||
| cols.foreach(c => failVoidType(c.dataType)) | ||
| if (!DDLUtils.isHiveTable(v1Table.v1Table)) { | ||
| cols.foreach(c => failCharType(c.dataType)) | ||
| } | ||
|
|
@@ -62,6 +63,7 @@ class ResolveSessionCatalog( | |
| } | ||
| AlterTableAddColumnsCommand(tbl.asTableIdentifier, cols.map(convertToStructField)) | ||
| }.getOrElse { | ||
| cols.foreach(c => failVoidType(c.dataType)) | ||
| cols.foreach(c => failCharType(c.dataType)) | ||
| val changes = cols.map { col => | ||
| TableChange.addColumn( | ||
|
|
@@ -80,6 +82,7 @@ class ResolveSessionCatalog( | |
| case Some(_: V1Table) => | ||
| throw new AnalysisException("REPLACE COLUMNS is only supported with v2 tables.") | ||
| case Some(table) => | ||
| cols.foreach(c => failVoidType(c.dataType)) | ||
| cols.foreach(c => failCharType(c.dataType)) | ||
| // REPLACE COLUMNS deletes all the existing columns and adds new columns specified. | ||
| val deleteChanges = table.schema.fieldNames.map { name => | ||
|
|
@@ -102,6 +105,7 @@ class ResolveSessionCatalog( | |
| nameParts @ SessionCatalogAndTable(catalog, tbl), _, _, _, _, _) => | ||
| loadTable(catalog, tbl.asIdentifier).collect { | ||
| case v1Table: V1Table => | ||
| a.dataType.foreach(failVoidType) | ||
| if (!DDLUtils.isHiveTable(v1Table.v1Table)) { | ||
| a.dataType.foreach(failCharType) | ||
| } | ||
|
|
@@ -131,8 +135,9 @@ class ResolveSessionCatalog( | |
| s"Available: ${v1Table.schema.fieldNames.mkString(", ")}") | ||
| } | ||
| } | ||
| // Add Hive type string to metadata. | ||
| val cleanedDataType = HiveStringType.replaceCharType(dataType) | ||
| // Add Hive type 'string' and 'void' to metadata. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can be more aggressive here: forbid void type in all cases, including hive tables. |
||
| val cleanedDataType = | ||
| HiveVoidType.replaceVoidType(HiveStringType.replaceCharType(dataType)) | ||
| if (dataType != cleanedDataType) { | ||
| builder.putString(HIVE_TYPE_STRING, dataType.catalogString) | ||
| } | ||
|
|
@@ -143,6 +148,7 @@ class ResolveSessionCatalog( | |
| builder.build()) | ||
| AlterTableChangeColumnCommand(tbl.asTableIdentifier, colName, newColumn) | ||
| }.getOrElse { | ||
| a.dataType.foreach(failVoidType) | ||
| a.dataType.foreach(failCharType) | ||
| val colName = a.column.toArray | ||
| val typeChange = a.dataType.map { newDataType => | ||
|
|
@@ -270,6 +276,7 @@ class ResolveSessionCatalog( | |
| SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) => | ||
| val provider = c.provider.getOrElse(conf.defaultDataSourceName) | ||
| if (!isV2Provider(provider)) { | ||
| assertNoVoidTypeInSchema(c.tableSchema) | ||
| if (!DDLUtils.isHiveTable(Some(provider))) { | ||
| assertNoCharTypeInSchema(c.tableSchema) | ||
| } | ||
|
|
@@ -279,6 +286,7 @@ class ResolveSessionCatalog( | |
| val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists | ||
| CreateTable(tableDesc, mode, None) | ||
| } else { | ||
| assertNoVoidTypeInSchema(c.tableSchema) | ||
| assertNoCharTypeInSchema(c.tableSchema) | ||
| CreateV2Table( | ||
| catalog.asTableCatalog, | ||
|
|
@@ -323,6 +331,7 @@ class ResolveSessionCatalog( | |
| if (!isV2Provider(provider)) { | ||
| throw new AnalysisException("REPLACE TABLE is only supported with v2 tables.") | ||
| } else { | ||
| assertNoVoidTypeInSchema(c.tableSchema) | ||
| assertNoCharTypeInSchema(c.tableSchema) | ||
| ReplaceTable( | ||
| catalog.asTableCatalog, | ||
|
|
@@ -719,7 +728,8 @@ class ResolveSessionCatalog( | |
| val builder = new MetadataBuilder | ||
| col.comment.foreach(builder.putString("comment", _)) | ||
|
|
||
| val cleanedDataType = HiveStringType.replaceCharType(col.dataType) | ||
| val cleanedDataType = | ||
| HiveVoidType.replaceVoidType(HiveStringType.replaceCharType(col.dataType)) | ||
| if (col.dataType != cleanedDataType) { | ||
| builder.putString(HIVE_TYPE_STRING, col.dataType.catalogString) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't get it why we need
HiveVoidType. What happens if we just parsevoidtoNullType?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because that could indicate VOID is a Hive type, the handle processing is more unified. Or, we can just use the PR #28833
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For example, below function will point the failure is due to the legacy hive void type. If we mix VOID and NULL, I am not sure it would be better than separation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VOID and NULL are indeed the same type. We can just check null type and fail with error message:
Cannot create tables with VOID typeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The point is consistency: The VOID type in SQL statement should be the same as
NullTypespecified by Scala API inspark.catalog.createTable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cloud-fan , ok. I will follow your suggestion to fix it in #28833 , since this PR is a refator with new type
HiveVoidType. Now we don't need it.