Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions dev/deps/spark-deps-hadoop-2.2
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ hk2-utils-2.4.0-b34.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
jackson-annotations-2.7.3.jar
jackson-core-2.7.3.jar
jackson-annotations-2.6.5.jar
jackson-core-2.6.5.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.7.3.jar
jackson-databind-2.6.5.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-paranamer-2.7.3.jar
jackson-module-scala_2.11-2.7.3.jar
jackson-module-paranamer-2.6.5.jar
jackson-module-scala_2.11-2.6.5.jar
janino-2.7.8.jar
javassist-3.18.1-GA.jar
javax.annotation-api-1.2.jar
Expand Down Expand Up @@ -128,7 +128,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
paranamer-2.3.jar
parquet-column-1.8.1.jar
parquet-common-1.8.1.jar
parquet-encoding-1.8.1.jar
Expand Down
12 changes: 6 additions & 6 deletions dev/deps/spark-deps-hadoop-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ hk2-utils-2.4.0-b34.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
jackson-annotations-2.7.3.jar
jackson-core-2.7.3.jar
jackson-annotations-2.6.5.jar
jackson-core-2.6.5.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.7.3.jar
jackson-databind-2.6.5.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-paranamer-2.7.3.jar
jackson-module-scala_2.11-2.7.3.jar
jackson-module-paranamer-2.6.5.jar
jackson-module-scala_2.11-2.6.5.jar
janino-2.7.8.jar
java-xmlbuilder-1.0.jar
javassist-3.18.1-GA.jar
Expand Down Expand Up @@ -135,7 +135,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
paranamer-2.3.jar
parquet-column-1.8.1.jar
parquet-common-1.8.1.jar
parquet-encoding-1.8.1.jar
Expand Down
12 changes: 6 additions & 6 deletions dev/deps/spark-deps-hadoop-2.4
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ hk2-utils-2.4.0-b34.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
jackson-annotations-2.7.3.jar
jackson-core-2.7.3.jar
jackson-annotations-2.6.5.jar
jackson-core-2.6.5.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.7.3.jar
jackson-databind-2.6.5.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-paranamer-2.7.3.jar
jackson-module-scala_2.11-2.7.3.jar
jackson-module-paranamer-2.6.5.jar
jackson-module-scala_2.11-2.6.5.jar
janino-2.7.8.jar
java-xmlbuilder-1.0.jar
javassist-3.18.1-GA.jar
Expand Down Expand Up @@ -135,7 +135,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
paranamer-2.3.jar
parquet-column-1.8.1.jar
parquet-common-1.8.1.jar
parquet-encoding-1.8.1.jar
Expand Down
12 changes: 6 additions & 6 deletions dev/deps/spark-deps-hadoop-2.6
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ htrace-core-3.0.4.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
jackson-annotations-2.7.3.jar
jackson-core-2.7.3.jar
jackson-annotations-2.6.5.jar
jackson-core-2.6.5.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.7.3.jar
jackson-databind-2.6.5.jar
jackson-jaxrs-1.9.13.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-paranamer-2.7.3.jar
jackson-module-scala_2.11-2.7.3.jar
jackson-module-paranamer-2.6.5.jar
jackson-module-scala_2.11-2.6.5.jar
jackson-xc-1.9.13.jar
janino-2.7.8.jar
java-xmlbuilder-1.0.jar
Expand Down Expand Up @@ -143,7 +143,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
paranamer-2.3.jar
parquet-column-1.8.1.jar
parquet-common-1.8.1.jar
parquet-encoding-1.8.1.jar
Expand Down
12 changes: 6 additions & 6 deletions dev/deps/spark-deps-hadoop-2.7
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ htrace-core-3.1.0-incubating.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
jackson-annotations-2.7.3.jar
jackson-core-2.7.3.jar
jackson-annotations-2.6.5.jar
jackson-core-2.6.5.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.7.3.jar
jackson-databind-2.6.5.jar
jackson-jaxrs-1.9.13.jar
jackson-mapper-asl-1.9.13.jar
jackson-module-paranamer-2.7.3.jar
jackson-module-scala_2.11-2.7.3.jar
jackson-module-paranamer-2.6.5.jar
jackson-module-scala_2.11-2.6.5.jar
jackson-xc-1.9.13.jar
janino-2.7.8.jar
java-xmlbuilder-1.0.jar
Expand Down Expand Up @@ -144,7 +144,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
paranamer-2.3.jar
parquet-column-1.8.1.jar
parquet-common-1.8.1.jar
parquet-encoding-1.8.1.jar
Expand Down
8 changes: 1 addition & 7 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@
<jline.version>${scala.version}</jline.version>
<jline.groupid>org.scala-lang</jline.groupid>
<codehaus.jackson.version>1.9.13</codehaus.jackson.version>
<fasterxml.jackson.version>2.7.3</fasterxml.jackson.version>
<fasterxml.jackson.version>2.6.5</fasterxml.jackson.version>
<snappy.version>1.1.2.4</snappy.version>
<netlib.java.version>1.1.2</netlib.java.version>
<calcite.version>1.2.0-incubating</calcite.version>
Expand All @@ -180,7 +180,6 @@
<antlr4.version>4.5.3</antlr4.version>
<jpam.version>1.1</jpam.version>
<selenium.version>2.52.0</selenium.version>
<paranamer.version>2.8</paranamer.version>

<test.java.home>${java.home}</test.java.home>
<test.exclude.tags></test.exclude.tags>
Expand Down Expand Up @@ -1826,11 +1825,6 @@
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<dependency>
<groupId>com.thoughtworks.paranamer</groupId>
<artifactId>paranamer</artifactId>
<version>${paranamer.version}</version>
</dependency>
</dependencies>
</dependencyManagement>

Expand Down
3 changes: 0 additions & 3 deletions python/pyspark/sql/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,6 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
set, it uses the default value, ``true``.
:param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
set, it uses the default value, ``false``.
:param allowNonNumericNumbers: allows using non-numeric numbers such as "NaN", "Infinity",
"-Infinity", "INF", "-INF", which are convertd to floating
point numbers, ``true``.
:param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
using backslash quoting mechanism. If None is
set, it uses the default value, ``false``.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
* </li>
* <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
* (e.g. 00012)</li>
* <li>`allowNonNumericNumbers` (default `true`): allows using non-numeric numbers such as "NaN",
* "Infinity", "-Infinity", "INF", "-INF", which are convertd to floating point numbers.</li>
* <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
* character using backslash quoting mechanism</li>
* <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,13 @@ object JacksonParser extends Logging {
case (VALUE_STRING, FloatType) =>
// Special case handling for NaN and Infinity.
val value = parser.getText
if (value.equals("NaN") ||
value.equals("Infinity") ||
value.equals("+Infinity") ||
value.equals("-Infinity")) {
val lowerCaseValue = value.toLowerCase()
if (lowerCaseValue.equals("nan") ||
lowerCaseValue.equals("infinity") ||
lowerCaseValue.equals("-infinity") ||
lowerCaseValue.equals("inf") ||
lowerCaseValue.equals("-inf")) {
value.toFloat
} else if (value.equals("+INF") || value.equals("INF")) {
Float.PositiveInfinity
} else if (value.equals("-INF")) {
Float.NegativeInfinity
} else {
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as FloatType.")
}
Expand All @@ -148,15 +146,13 @@ object JacksonParser extends Logging {
case (VALUE_STRING, DoubleType) =>
// Special case handling for NaN and Infinity.
val value = parser.getText
if (value.equals("NaN") ||
value.equals("Infinity") ||
value.equals("+Infinity") ||
value.equals("-Infinity")) {
val lowerCaseValue = value.toLowerCase()
if (lowerCaseValue.equals("nan") ||
lowerCaseValue.equals("infinity") ||
lowerCaseValue.equals("-infinity") ||
lowerCaseValue.equals("inf") ||
lowerCaseValue.equals("-inf")) {
value.toDouble
} else if (value.equals("+INF") || value.equals("INF")) {
Double.PositiveInfinity
} else if (value.equals("-INF")) {
Double.NegativeInfinity
} else {
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as DoubleType.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.json

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}

/**
* Test cases for various [[JSONOptions]].
Expand Down Expand Up @@ -94,51 +93,23 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
assert(df.first().getLong(0) == 18)
}

test("allowNonNumericNumbers off") {
// non-quoted non-numeric numbers don't work if allowNonNumericNumbers is off.
var testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
"""{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": INF}""",
"""{"age": +INF}""", """{"age": -INF}""")
testCases.foreach { str =>
val rdd = spark.sparkContext.parallelize(Seq(str))
val df = spark.read.option("allowNonNumericNumbers", "false").json(rdd)

assert(df.schema.head.name == "_corrupt_record")
}

// quoted non-numeric numbers should still work even allowNonNumericNumbers is off.
testCases = Seq("""{"age": "NaN"}""", """{"age": "Infinity"}""", """{"age": "+Infinity"}""",
"""{"age": "-Infinity"}""", """{"age": "INF"}""", """{"age": "+INF"}""",
"""{"age": "-INF"}""")
val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
_.isNegInfinity, _.isPosInfinity, _.isPosInfinity, _.isNegInfinity)
val schema = StructType(StructField("age", DoubleType, true) :: Nil)

testCases.zipWithIndex.foreach { case (str, idx) =>
val rdd = spark.sparkContext.parallelize(Seq(str))
val df = spark.read.option("allowNonNumericNumbers", "false").schema(schema).json(rdd)

assert(df.schema.head.name == "age")
assert(tests(idx)(df.first().getDouble(0)))
}
// The following two tests are not really working - need to look into Jackson's
// JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS.
ignore("allowNonNumericNumbers off") {
val str = """{"age": NaN}"""
val rdd = spark.sparkContext.parallelize(Seq(str))
val df = spark.read.json(rdd)

assert(df.schema.head.name == "_corrupt_record")
}

test("allowNonNumericNumbers on") {
val testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
"""{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": +INF}""",
"""{"age": -INF}""", """{"age": "NaN"}""", """{"age": "Infinity"}""",
"""{"age": "-Infinity"}""")
val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
_.isNegInfinity, _.isPosInfinity, _.isNegInfinity, _.isNaN, _.isPosInfinity,
_.isNegInfinity, _.isPosInfinity, _.isNegInfinity)
val schema = StructType(StructField("age", DoubleType, true) :: Nil)
testCases.zipWithIndex.foreach { case (str, idx) =>
val rdd = spark.sparkContext.parallelize(Seq(str))
val df = spark.read.option("allowNonNumericNumbers", "true").schema(schema).json(rdd)

assert(df.schema.head.name == "age")
assert(tests(idx)(df.first().getDouble(0)))
}
ignore("allowNonNumericNumbers on") {
val str = """{"age": NaN}"""
val rdd = spark.sparkContext.parallelize(Seq(str))
val df = spark.read.option("allowNonNumericNumbers", "true").json(rdd)

assert(df.schema.head.name == "age")
assert(df.first().getDouble(0).isNaN)
}

test("allowBackslashEscapingAnyCharacter off") {
Expand Down