diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala index 3c995573d53d7..37331362efad2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala @@ -746,7 +746,8 @@ object OptimizeOneRowRelationSubquery extends Rule[LogicalPlan] { def unapply(plan: LogicalPlan): Option[Seq[NamedExpression]] = { // SPARK-40800: always inline expressions to support a broader range of correlated // subqueries and avoid expensive domain joins. - CollapseProject(EliminateSubqueryAliases(plan), alwaysInline = true) match { + val alwaysInline = conf.getConf(SQLConf.ALWAYS_INLINE_ONE_ROW_RELATION_SUBQUERY) + CollapseProject(EliminateSubqueryAliases(plan), alwaysInline = alwaysInline) match { case Project(projectList, _: OneRowRelation) => Some(stripOuterReferences(projectList)) case _ => None } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 0a60c6b0265af..101c075c90927 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3097,6 +3097,16 @@ object SQLConf { .booleanConf .createWithDefault(true) + val ALWAYS_INLINE_ONE_ROW_RELATION_SUBQUERY = + buildConf("spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline") + .internal() + .doc(s"When true, the optimizer will always inline single row subqueries even if it " + + "causes extra duplication. It only takes effect when " + + s"${OPTIMIZE_ONE_ROW_RELATION_SUBQUERY.key} is set to true.") + .version("3.4.0") + .booleanConf + .createWithDefault(true) + val TOP_K_SORT_FALLBACK_THRESHOLD = buildConf("spark.sql.execution.topKSortFallbackThreshold") .doc("In SQL queries with a SORT followed by a LIMIT like " + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 7b67648d4752a..fe65e282c7712 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -2470,25 +2470,29 @@ class SubquerySuite extends QueryTest test("SPARK-40800: always inline expressions in OptimizeOneRowRelationSubquery") { withTempView("t1") { sql("CREATE TEMP VIEW t1 AS SELECT ARRAY('a', 'b') a") - // Scalar subquery. - checkAnswer(sql( - """ - |SELECT ( - | SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] AS sorted - | FROM (SELECT MAP('a', 1, 'b', 2) rank) - |) FROM t1 - |""".stripMargin), - Row("a")) - // Lateral subquery. - checkAnswer( - sql(""" - |SELECT sorted[0] FROM t1 - |JOIN LATERAL ( - | SELECT array_sort(a, (i, j) -> rank[i] - rank[j]) AS sorted - | FROM (SELECT MAP('a', 1, 'b', 2) rank) - |) - |""".stripMargin), - Row("a")) + Seq(true, false).foreach { enabled => + withSQLConf(SQLConf.ALWAYS_INLINE_ONE_ROW_RELATION_SUBQUERY.key -> enabled.toString) { + // Scalar subquery. + checkAnswer(sql( + """ + |SELECT ( + | SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] AS sorted + | FROM (SELECT MAP('a', 1, 'b', 2) rank) + |) FROM t1 + |""".stripMargin), + Row("a")) + // Lateral subquery. + checkAnswer( + sql(""" + |SELECT sorted[0] FROM t1 + |JOIN LATERAL ( + | SELECT array_sort(a, (i, j) -> rank[i] - rank[j]) AS sorted + | FROM (SELECT MAP('a', 1, 'b', 2) rank) + |) + |""".stripMargin), + Row("a")) + } + } } }