diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala index 92b156fb8f238..218d56c0f2034 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala @@ -95,7 +95,7 @@ class XmlOptions( val nullValue = parameters.getOrElse(NULL_VALUE, XmlOptions.DEFAULT_NULL_VALUE) val columnNameOfCorruptRecord = parameters.getOrElse(COLUMN_NAME_OF_CORRUPT_RECORD, defaultColumnNameOfCorruptRecord) - val ignoreSurroundingSpaces = getBool(IGNORE_SURROUNDING_SPACES, false) + val ignoreSurroundingSpaces = getBool(IGNORE_SURROUNDING_SPACES, true) val parseMode = ParseMode.fromString(parameters.getOrElse(MODE, PermissiveMode.name)) val inferSchema = getBool(INFER_SCHEMA, true) val rowValidationXSDPath = parameters.get(ROW_VALIDATION_XSD_PATH).orNull diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala index 78f9d5285c239..6cbbd32800614 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala @@ -732,7 +732,7 @@ class XmlSuite extends QueryTest with SharedSparkSession { .collect() assert(results(0) === Row("alice", "35")) - assert(results(1) === Row("bob", " ")) + assert(results(1) === Row("bob", "")) assert(results(2) === Row("coc", "24")) } @@ -818,7 +818,7 @@ class XmlSuite extends QueryTest with SharedSparkSession { assert(result(0) === Row(Row(null))) assert(result(1) === Row(Row(Row(null, null)))) assert(result(2) === Row(Row(Row("E", null)))) - assert(result(3) === Row(Row(Row("E", " ")))) + assert(result(3) === Row(Row(Row("E", "")))) assert(result(4) === Row(Row(Row("E", "")))) } @@ -1146,8 +1146,8 @@ class XmlSuite extends QueryTest with SharedSparkSession { .option("inferSchema", true) .xml(getTestResourcePath(resDir + "mixed_children.xml")) val mixedRow = mixedDF.head() - assert(mixedRow.getAs[Row](0) === Row(List(" issue ", " text ignored "), " lorem ")) - assert(mixedRow.getString(1) === " ipsum ") + assert(mixedRow.getAs[Row](0) === Row(List("issue", "text ignored"), "lorem")) + assert(mixedRow.getString(1) === "ipsum") } test("test mixed text and complex element children") { @@ -1155,9 +1155,9 @@ class XmlSuite extends QueryTest with SharedSparkSession { .option("rowTag", "root") .option("inferSchema", true) .xml(getTestResourcePath(resDir + "mixed_children_2.xml")) - assert(mixedDF.select("foo.bar").head().getString(0) === " lorem ") + assert(mixedDF.select("foo.bar").head().getString(0) === "lorem") assert(mixedDF.select("foo.baz.bing").head().getLong(0) === 2) - assert(mixedDF.select("missing").head().getString(0) === " ipsum ") + assert(mixedDF.select("missing").head().getString(0) === "ipsum") } test("test XSD validation") { @@ -1721,7 +1721,7 @@ class XmlSuite extends QueryTest with SharedSparkSession { assert(result(1).getAs[String]("_attr") == "attr1" && result(1).getAs[String]("_VALUE") == "value2") // comments aren't included in valueTag - assert(result(2).getAs[String]("_VALUE") == "\n value3\n ") + assert(result(2).getAs[String]("_VALUE") == "value3") } }