Skip to content

Commit dd69ac6

Browse files
committed
[SPARK-19611][SQL][FOLLOWUP] set dataSchema correctly in HiveMetastoreCatalog.convertToLogicalRelation
We made a mistake in #16944 . In `HiveMetastoreCatalog#inferIfNeeded` we infer the data schema, merge with full schema, and return the new full schema. At caller side we treat the full schema as data schema and set it to `HadoopFsRelation`. This doesn't cause any problem because both parquet and orc can work with a wrong data schema that has extra columns, but it's better to fix this mistake. N/A Author: Wenchen Fan <wenchen@databricks.com> Closes #19615 from cloud-fan/infer. (cherry picked from commit 4d9ebf3) Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 7f8236c commit dd69ac6

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,12 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
164164
}
165165
}
166166

167-
val (dataSchema, updatedTable) =
168-
inferIfNeeded(relation, options, fileFormat, Option(fileIndex))
167+
val updatedTable = inferIfNeeded(relation, options, fileFormat, Option(fileIndex))
169168

170169
val fsRelation = HadoopFsRelation(
171170
location = fileIndex,
172171
partitionSchema = partitionSchema,
173-
dataSchema = dataSchema,
172+
dataSchema = updatedTable.dataSchema,
174173
// We don't support hive bucketed tables, only ones we write out.
175174
bucketSpec = None,
176175
fileFormat = fileFormat,
@@ -192,13 +191,13 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
192191
fileFormatClass,
193192
None)
194193
val logicalRelation = cached.getOrElse {
195-
val (dataSchema, updatedTable) = inferIfNeeded(relation, options, fileFormat)
194+
val updatedTable = inferIfNeeded(relation, options, fileFormat)
196195
val created =
197196
LogicalRelation(
198197
DataSource(
199198
sparkSession = sparkSession,
200199
paths = rootPath.toString :: Nil,
201-
userSpecifiedSchema = Option(dataSchema),
200+
userSpecifiedSchema = Option(updatedTable.dataSchema),
202201
// We don't support hive bucketed tables, only ones we write out.
203202
bucketSpec = None,
204203
options = options,
@@ -226,7 +225,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
226225
relation: HiveTableRelation,
227226
options: Map[String, String],
228227
fileFormat: FileFormat,
229-
fileIndexOpt: Option[FileIndex] = None): (StructType, CatalogTable) = {
228+
fileIndexOpt: Option[FileIndex] = None): CatalogTable = {
230229
val inferenceMode = sparkSession.sessionState.conf.caseSensitiveInferenceMode
231230
val shouldInfer = (inferenceMode != NEVER_INFER) && !relation.tableMeta.schemaPreservesCase
232231
val tableName = relation.tableMeta.identifier.unquotedString
@@ -243,21 +242,22 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
243242
sparkSession,
244243
options,
245244
fileIndex.listFiles(Nil, Nil).flatMap(_.files))
246-
.map(mergeWithMetastoreSchema(relation.tableMeta.schema, _))
245+
.map(mergeWithMetastoreSchema(relation.tableMeta.dataSchema, _))
247246

248247
inferredSchema match {
249-
case Some(schema) =>
248+
case Some(dataSchema) =>
249+
val schema = StructType(dataSchema ++ relation.tableMeta.partitionSchema)
250250
if (inferenceMode == INFER_AND_SAVE) {
251251
updateCatalogSchema(relation.tableMeta.identifier, schema)
252252
}
253-
(schema, relation.tableMeta.copy(schema = schema))
253+
relation.tableMeta.copy(schema = schema)
254254
case None =>
255255
logWarning(s"Unable to infer schema for table $tableName from file format " +
256256
s"$fileFormat (inference mode: $inferenceMode). Using metastore schema.")
257-
(relation.tableMeta.schema, relation.tableMeta)
257+
relation.tableMeta
258258
}
259259
} else {
260-
(relation.tableMeta.schema, relation.tableMeta)
260+
relation.tableMeta
261261
}
262262
}
263263

0 commit comments

Comments
 (0)