diff --git a/README.md b/README.md index 48688e20..f6ee4749 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ When writing files the API accepts several options: * `nullValue`: The value to write `null` value. Default is string `null`. When this is `null`, it does not write attributes and elements for fields. * `attributePrefix`: The prefix for attributes so that we can differentiating attributes and elements. This will be the prefix for field names. Default is `@`. * `valueTag`: The tag used for the value when there are attributes in the element having no child. Default is `#VALUE`. -* `codec`: compression codec to use when saving to file. Should be the fully qualified name of a class implementing `org.apache.hadoop.io.compress.CompressionCodec` or one of case-insensitive shorten names (`bzip2`, `gzip`, `lz4`, and `snappy`). Defaults to no compression when a codec is not specified. +* `compression`: compression codec to use when saving to file. Should be the fully qualified name of a class implementing `org.apache.hadoop.io.compress.CompressionCodec` or one of case-insensitive shorten names (`bzip2`, `gzip`, `lz4`, and `snappy`). Defaults to no compression when a codec is not specified. Currently it supports the shortened name usage. You can use just `xml` instead of `com.databricks.spark.xml` from Spark 1.5.0+ diff --git a/src/main/scala/com/databricks/spark/xml/XmlOptions.scala b/src/main/scala/com/databricks/spark/xml/XmlOptions.scala index 3b8fe61c..01ccaa7c 100644 --- a/src/main/scala/com/databricks/spark/xml/XmlOptions.scala +++ b/src/main/scala/com/databricks/spark/xml/XmlOptions.scala @@ -23,7 +23,7 @@ private[xml] class XmlOptions( extends Serializable{ val charset = parameters.getOrElse("charset", XmlOptions.DEFAULT_CHARSET) - val codec = parameters.get("codec").orNull + val codec = parameters.get("compression").orElse(parameters.get("codec")).orNull val rowTag = parameters.getOrElse("rowTag", XmlOptions.DEFAULT_ROW_TAG) val rootTag = parameters.getOrElse("rootTag", XmlOptions.DEFAULT_ROOT_TAG) val samplingRatio = parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0) diff --git a/src/test/scala/com/databricks/spark/xml/XmlSuite.scala b/src/test/scala/com/databricks/spark/xml/XmlSuite.scala index 64e10a9b..8610fa81 100755 --- a/src/test/scala/com/databricks/spark/xml/XmlSuite.scala +++ b/src/test/scala/com/databricks/spark/xml/XmlSuite.scala @@ -353,7 +353,7 @@ class XmlSuite extends FunSuite with BeforeAndAfterAll { val cars = sqlContext.xmlFile(carsFile) cars.save("com.databricks.spark.xml", SaveMode.Overwrite, - Map("path" -> copyFilePath, "codec" -> "gZiP")) + Map("path" -> copyFilePath, "compression" -> "gZiP")) val carsCopyPartFile = new File(copyFilePath, "part-00000.gz") // Check that the part file has a .gz extension assert(carsCopyPartFile.exists())