diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 0b20022b14b8d..b1453c637f79e 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -152,6 +152,8 @@ def json(self, path, schema=None):
You can set the following JSON-specific options to deal with non-standard JSON files:
* ``primitivesAsString`` (default ``false``): infers all primitive values as a string \
type
+ * `floatAsBigDecimal` (default `false`): infers all floating-point values as a decimal \
+ type
* ``allowComments`` (default ``false``): ignores Java/C++ style comment in JSON records
* ``allowUnquotedFieldNames`` (default ``false``): allows unquoted JSON field names
* ``allowSingleQuotes`` (default ``true``): allows single quotes in addition to double \
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 634c1bd4739b1..2e0c6c7df967e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -252,6 +252,8 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {
*
* You can set the following JSON-specific options to deal with non-standard JSON files:
*
`primitivesAsString` (default `false`): infers all primitive values as a string type
+ * `floatAsBigDecimal` (default `false`): infers all floating-point values as a decimal
+ * type
* `allowComments` (default `false`): ignores Java/C++ style comment in JSON records
* `allowUnquotedFieldNames` (default `false`): allows unquoted JSON field names
* `allowSingleQuotes` (default `true`): allows single quotes in addition to double quotes
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala
index 44d5e4ff7ec8b..8b773ddfcb656 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/InferSchema.scala
@@ -134,8 +134,12 @@ private[json] object InferSchema {
val v = parser.getDecimalValue
DecimalType(v.precision(), v.scale())
case FLOAT | DOUBLE =>
- // TODO(davies): Should we use decimal if possible?
- DoubleType
+ if (configOptions.floatAsBigDecimal) {
+ val v = parser.getDecimalValue
+ DecimalType(v.precision(), v.scale())
+ } else {
+ DoubleType
+ }
}
case VALUE_TRUE | VALUE_FALSE => BooleanType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
index fe5b20697e40e..31a95ed461215 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONOptions.scala
@@ -34,6 +34,8 @@ private[sql] class JSONOptions(
parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
val primitivesAsString =
parameters.get("primitivesAsString").map(_.toBoolean).getOrElse(false)
+ val floatAsBigDecimal =
+ parameters.get("floatAsBigDecimal").map(_.toBoolean).getOrElse(false)
val allowComments =
parameters.get("allowComments").map(_.toBoolean).getOrElse(false)
val allowUnquotedFieldNames =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 00eaeb0d34e87..dd83a0e36f6f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -771,6 +771,34 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
)
}
+ test("Loading a JSON dataset floatAsBigDecimal returns schema with float types as BigDecimal") {
+ val jsonDF = sqlContext.read.option("floatAsBigDecimal", "true").json(primitiveFieldAndType)
+
+ val expectedSchema = StructType(
+ StructField("bigInteger", DecimalType(20, 0), true) ::
+ StructField("boolean", BooleanType, true) ::
+ StructField("double", DecimalType(17, -292), true) ::
+ StructField("integer", LongType, true) ::
+ StructField("long", LongType, true) ::
+ StructField("null", StringType, true) ::
+ StructField("string", StringType, true) :: Nil)
+
+ assert(expectedSchema === jsonDF.schema)
+
+ jsonDF.registerTempTable("jsonTable")
+
+ checkAnswer(
+ sql("select * from jsonTable"),
+ Row(BigDecimal("92233720368547758070"),
+ true,
+ BigDecimal("1.7976931348623157E308"),
+ 10,
+ 21474836470L,
+ null,
+ "this is a simple string.")
+ )
+ }
+
test("Loading a JSON dataset from a text file with SQL") {
val dir = Utils.createTempDir()
dir.delete()