Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -240,15 +240,34 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {

/**
* Loads a JSON file (one object per line) and returns the result as a [[DataFrame]].
* With param isStandard we can load multi-lines json object directly.
* See the documentation on the overloaded `json()` method with varargs for more details.
*
* @since 1.4.0
*/
def json(path: String): DataFrame = {

def json(path: String, isStandard: Boolean = false ): DataFrame = {
// This method ensures that calls that explicit need single argument works, see SPARK-16009
json(Seq(path): _*)
if (!isStandard) {
json(Seq(path): _*)
} else {
val jsonRDD = sparkSession.sparkContext.wholeTextFiles(path)
.map(line => line.toString().replaceAll("\\s+", ""))
.map { jsonLine =>
val index = jsonLine.indexOf(",")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mind if I ask what this line means?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe this code is bad, I just want to get the json contents
such as: ("filename",json_contents)

jsonLine.substring(index + 1, jsonLine.length - 1)
}
sparkSession.read.json(jsonRDD)
}
}

/**
* To keep compatible with spark-1.6 see SPARK-16009.
* Because the json(path: String, isStandard: Boolean = false) method will compile failed
* when we call Option(path).map(spark.read.json), we provide this method.
*/
def json(path: String): DataFrame = format("json").load(path)

/**
* Loads a JSON file (one object per line) and returns the result as a [[DataFrame]].
*
Expand Down