Skip to content

Commit b60979b

Browse files
committed
Adds a constructor which accepts a Configuration, and fixes default value of assumeBinaryIsString
1 parent 743730f commit b60979b

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,12 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
506506
*/
507507
private[spark] def isParquetINT96AsTimestamp: Boolean = getConf(PARQUET_INT96_AS_TIMESTAMP)
508508

509+
/**
510+
* When set to true, sticks to Parquet format spec when converting Parquet schema to Spark SQL
511+
* schema and vice versa. Otherwise, falls back to compatible mode.
512+
*/
513+
private[spark] def followParquetFormatSpec: Boolean = getConf(PARQUET_FOLLOW_PARQUET_FORMAT_SPEC)
514+
509515
/**
510516
* When set to true, partition pruning for in-memory columnar tables is enabled.
511517
*/

sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.sql.parquet
1919

2020
import scala.collection.JavaConversions._
2121

22+
import org.apache.hadoop.conf.Configuration
2223
import org.apache.parquet.schema.OriginalType._
2324
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
2425
import org.apache.parquet.schema.Type.Repetition._
@@ -60,15 +61,29 @@ private[parquet] class CatalystSchemaConverter(
6061
// Only used when constructing converter for converting Spark SQL schema to Parquet schema, in
6162
// which case `assumeInt96IsTimestamp` and `assumeBinaryIsString` are irrelevant.
6263
def this() = this(
63-
assumeBinaryIsString = true,
64-
assumeInt96IsTimestamp = true,
65-
followParquetFormatSpec = false)
64+
assumeBinaryIsString = SQLConf.PARQUET_BINARY_AS_STRING.defaultValue.get,
65+
assumeInt96IsTimestamp = SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get,
66+
followParquetFormatSpec = SQLConf.PARQUET_FOLLOW_PARQUET_FORMAT_SPEC.defaultValue.get)
6667

6768
def this(conf: SQLConf) = this(
6869
assumeBinaryIsString = conf.isParquetBinaryAsString,
6970
assumeInt96IsTimestamp = conf.isParquetINT96AsTimestamp,
7071
followParquetFormatSpec = conf.followParquetFormatSpec)
7172

73+
def this(conf: Configuration) = this(
74+
assumeBinaryIsString =
75+
conf.getBoolean(
76+
SQLConf.PARQUET_BINARY_AS_STRING.key,
77+
SQLConf.PARQUET_BINARY_AS_STRING.defaultValue.get),
78+
assumeInt96IsTimestamp =
79+
conf.getBoolean(
80+
SQLConf.PARQUET_INT96_AS_TIMESTAMP.key,
81+
SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get),
82+
followParquetFormatSpec =
83+
conf.getBoolean(
84+
SQLConf.PARQUET_FOLLOW_PARQUET_FORMAT_SPEC.key,
85+
SQLConf.PARQUET_FOLLOW_PARQUET_FORMAT_SPEC.defaultValue.get))
86+
7287
/**
7388
* Converts Parquet [[MessageType]] `parquetSchema` to a Spark SQL [[StructType]].
7489
*/

0 commit comments

Comments
 (0)