diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md index 561f601aa4e56..abd1901d24e4b 100644 --- a/docs/sql-data-sources-orc.md +++ b/docs/sql-data-sources-orc.md @@ -240,7 +240,7 @@ Data source options of ORC can be set via: compression - snappy + zstd compression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, snappy, zlib, lzo, zstd and lz4). This will override orc.compress and spark.sql.orc.compression.codec. write diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 30a37d97042af..dbb25e5adc042 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -36,6 +36,7 @@ license: | - `spark.sql.parquet.int96RebaseModeInRead` instead of `spark.sql.legacy.parquet.int96RebaseModeInRead` - `spark.sql.avro.datetimeRebaseModeInWrite` instead of `spark.sql.legacy.avro.datetimeRebaseModeInWrite` - `spark.sql.avro.datetimeRebaseModeInRead` instead of `spark.sql.legacy.avro.datetimeRebaseModeInRead` +- Since Spark 4.0, the default value of `spark.sql.orc.compression.codec` is changed from `snappy` to `zstd`. To restore the previous behavior, set `spark.sql.orc.compression.codec` to `snappy`. ## Upgrading from Spark SQL 3.4 to 3.5 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d1ac061f02af6..1928e74363cbc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1211,7 +1211,7 @@ object SQLConf { .stringConf .transform(_.toLowerCase(Locale.ROOT)) .checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo", "zstd", "lz4")) - .createWithDefault("snappy") + .createWithDefault("zstd") val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl") .doc("When native, use the native version of ORC support instead of the ORC library in Hive. " +