diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index f092360a98ba..82e41433fa50 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1312,12 +1312,28 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { // just convert the whole type string to lower case, otherwise the struct field names // will no longer be case sensitive. Instead, we rely on our parser to get the proper // case before passing it to Hive. - typedVisit[DataType](col.dataType).catalogString, + getCatalogColumnDataType(col.dataType()), nullable = true, Option(col.STRING).map(string)) } } + private def getCatalogColumnDataType(dataTypeContext: DataTypeContext): String = { + val defaultType = typedVisit[DataType](dataTypeContext).catalogString + + dataTypeContext match { + case p: PrimitiveDataTypeContext => + p.identifier.getText.toLowerCase match { + case "varchar" | "char" => + dataTypeContext.getText.toLowerCase + case _ => + defaultType + } + case _ => + defaultType + } + } + /** * Create a [[ScriptInputOutputSchema]]. */ diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 88cf06fce6c6..8fdfd167ef06 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -37,7 +37,8 @@ import org.apache.spark.sql.hive.client.HiveClient * A persistent implementation of the system catalog using Hive. * All public methods must be synchronized for thread-safety. */ -private[spark] class HiveExternalCatalog(client: HiveClient, hadoopConf: Configuration) +private[spark] class HiveExternalCatalog(private[hive] val client: HiveClient, + hadoopConf: Configuration) extends ExternalCatalog with Logging { import CatalogTypes.TablePartitionSpec diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index 0f37cd7bf365..7ba2bd6ec31b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.orc import java.io.File +import org.apache.spark.sql.hive.HiveExternalCatalog import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.{QueryTest, Row} @@ -146,6 +147,81 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA sql("DROP TABLE IF EXISTS orcNullValues") } + + test("Read char/varchar column written by Hive") { + val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client + val location = Utils.createTempDir() + val uri = location.toURI + try { + hiveClient.runSqlHive( + s""" + |CREATE EXTERNAL TABLE hive_orc( + | a STRING, + | b CHAR(10), + | c VARCHAR(10)) + |STORED AS orc + |LOCATION '$uri'""".stripMargin) + + hiveClient.runSqlHive( + "INSERT INTO TABLE hive_orc SELECT 'a', 'b', 'c' FROM (SELECT 1) t") + + // We create a different table in Spark using the same schema which points to + // the same location. + spark.sql( + s""" + |CREATE EXTERNAL TABLE spark_orc( + | a STRING, + | b CHAR(10), + | c VARCHAR(10)) + |STORED AS orc + |LOCATION '$uri'""".stripMargin) + val result = Row("a", "b ", "c") + checkAnswer(spark.table("hive_orc"), result) + checkAnswer(spark.table("spark_orc"), result) + } finally { + hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") + hiveClient.runSqlHive("DROP TABLE IF EXISTS spark_orc") + Utils.deleteRecursively(location) + } + } + + test("Read char/varchar column written by Spark in Hive") { + val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client + val location = Utils.createTempDir() + val uri = location.toURI + + try { + spark.sql( + s""" + |CREATE EXTERNAL TABLE spark_orc( + | a STRING, + | b CHAR(10), + | c VARCHAR(10)) + |STORED AS orc + |LOCATION '$uri'""".stripMargin) + + spark.sql("INSERT INTO TABLE spark_orc SELECT 'a', 'b', 'c' FROM (SELECT 1) t") + + // We create a different table in Hive, using the same schema which points to + // the same location + hiveClient.runSqlHive( + s""" + |CREATE EXTERNAL TABLE hive_orc( + | a STRING, + | b CHAR(10), + | c VARCHAR(10)) + |STORED AS orc + |LOCATION '$uri'""".stripMargin) + + val result = Row("a", "b ", "c") + checkAnswer(spark.table("hive_orc"), result) + checkAnswer(spark.table("spark_orc"), result) + } finally { + hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") + hiveClient.runSqlHive("DROP TABLE IF EXISTS spark_orc") + Utils.deleteRecursively(location) + } + } } class OrcSourceSuite extends OrcSuite {