From ea49eedeb450751430499af8e6e396b9f52dc3fa Mon Sep 17 00:00:00 2001 From: liwensun Date: Mon, 3 Jun 2019 14:18:29 -0700 Subject: [PATCH 1/3] change the conf default --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index b9e9e59bff54..d6761255cb0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1718,7 +1718,7 @@ object SQLConf { "Data source V1 now silently drops partitionBy columns for non-file-format sources; " + "turning the flag on provides a way for these sources to see these partitionBy columns.") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val NAME_NON_STRUCT_GROUPING_KEY_AS_VALUE = buildConf("spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue") From 2e180933c5463389bddd8ff683bc3d70889739d6 Mon Sep 17 00:00:00 2001 From: liwensun Date: Thu, 6 Jun 2019 12:51:41 -0700 Subject: [PATCH 2/3] remove legacy config --- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 9 --------- .../scala/org/apache/spark/sql/DataFrameWriter.scala | 9 +++------ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d6761255cb0f..583db58a67a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1711,15 +1711,6 @@ object SQLConf { .booleanConf .createWithDefault(false) - val LEGACY_PASS_PARTITION_BY_AS_OPTIONS = - buildConf("spark.sql.legacy.sources.write.passPartitionByAsOptions") - .internal() - .doc("Whether to pass the partitionBy columns as options in DataFrameWriter. " + - "Data source V1 now silently drops partitionBy columns for non-file-format sources; " + - "turning the flag on provides a way for these sources to see these partitionBy columns.") - .booleanConf - .createWithDefault(true) - val NAME_NON_STRUCT_GROUPING_KEY_AS_VALUE = buildConf("spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue") .internal() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 0c48ec9bb465..e900c185f824 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -316,12 +316,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } private def saveToV1Source(): Unit = { - if (SparkSession.active.sessionState.conf.getConf( - SQLConf.LEGACY_PASS_PARTITION_BY_AS_OPTIONS)) { - partitioningColumns.foreach { columns => - extraOptions += (DataSourceUtils.PARTITIONING_COLUMNS_KEY -> - DataSourceUtils.encodePartitioningColumns(columns)) - } + partitioningColumns.foreach { columns => + extraOptions += (DataSourceUtils.PARTITIONING_COLUMNS_KEY -> + DataSourceUtils.encodePartitioningColumns(columns)) } // Code path for data source v1. From bbd8d14b939b1d21fe9237393ceacd297c2a0e87 Mon Sep 17 00:00:00 2001 From: liwensun Date: Thu, 6 Jun 2019 13:25:41 -0700 Subject: [PATCH 3/3] fix test --- .../sql/test/DataFrameReaderWriterSuite.scala | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index 5e6e3b4fc164..e9ab62800f84 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -225,21 +225,13 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be } test("pass partitionBy as options") { - Seq(true, false).foreach { flag => - withSQLConf(SQLConf.LEGACY_PASS_PARTITION_BY_AS_OPTIONS.key -> s"$flag") { - Seq(1).toDF.write - .format("org.apache.spark.sql.test") - .partitionBy("col1", "col2") - .save() - - if (flag) { - val partColumns = LastOptions.parameters(DataSourceUtils.PARTITIONING_COLUMNS_KEY) - assert(DataSourceUtils.decodePartitioningColumns(partColumns) === Seq("col1", "col2")) - } else { - assert(!LastOptions.parameters.contains(DataSourceUtils.PARTITIONING_COLUMNS_KEY)) - } - } - } + Seq(1).toDF.write + .format("org.apache.spark.sql.test") + .partitionBy("col1", "col2") + .save() + + val partColumns = LastOptions.parameters(DataSourceUtils.PARTITIONING_COLUMNS_KEY) + assert(DataSourceUtils.decodePartitioningColumns(partColumns) === Seq("col1", "col2")) } test("save mode") {