From 55f98f75b2663e875f23fd02bb879246d403720a Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 18 Jul 2025 12:22:40 +0800 Subject: [PATCH 1/5] add default_null_ordering config --- datafusion/common/src/config.rs | 5 ++ .../core/src/execution/session_state.rs | 4 ++ datafusion/sql/src/expr/order_by.rs | 4 +- datafusion/sql/src/planner.rs | 58 +++++++++++++++++++ datafusion/sql/src/statement.rs | 4 +- datafusion/sql/tests/sql_integration.rs | 4 +- 6 files changed, 74 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 65ecd4032729..880d263fdd4b 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -259,6 +259,11 @@ config_namespace! { /// Specifies the recursion depth limit when parsing complex SQL Queries pub recursion_limit: usize, default = 50 + + /// Specifies the default null ordering for query results + /// By default, `asc_reverse` is used to follow Postgres's behavior. + /// postgres rule: https://www.postgresql.org/docs/current/queries-order.html + pub default_null_ordering: String, default = "asc_reverse".to_string() } } diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index ad2b69f25abc..ad97dd32c40d 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -597,6 +597,10 @@ impl SessionState { .enable_options_value_normalization, support_varchar_with_length: sql_parser_options.support_varchar_with_length, collect_spans: sql_parser_options.collect_spans, + default_null_ordering: sql_parser_options + .default_null_ordering + .as_str() + .into(), } } diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs index b7ed04326f40..e1d4e0ff434e 100644 --- a/datafusion/sql/src/expr/order_by.rs +++ b/datafusion/sql/src/expr/order_by.rs @@ -102,9 +102,7 @@ impl SqlToRel<'_, S> { expr_vec.push(Sort::new( expr, asc, - // When asc is true, by default nulls last to be consistent with postgres - // postgres rule: https://www.postgresql.org/docs/current/queries-order.html - nulls_first.unwrap_or(!asc), + nulls_first.unwrap_or(self.options.default_null_ordering.eval(asc)), )) } Ok(expr_vec) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index bc7c2b7f4377..d68286f55c57 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -17,6 +17,7 @@ //! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST) use std::collections::HashMap; +use std::str::FromStr; use std::sync::Arc; use std::vec; @@ -54,6 +55,8 @@ pub struct ParserOptions { pub enable_options_value_normalization: bool, /// Whether to collect spans pub collect_spans: bool, + /// Default null ordering for sorting expressions. + pub default_null_ordering: NullOrdering, } impl ParserOptions { @@ -74,6 +77,9 @@ impl ParserOptions { support_varchar_with_length: true, enable_options_value_normalization: false, collect_spans: false, + // By default, `asc_reverse` is used to follow Postgres's behavior. + // postgres rule: https://www.postgresql.org/docs/current/queries-order.html + default_null_ordering: NullOrdering::AscReverse, } } @@ -122,6 +128,12 @@ impl ParserOptions { self.collect_spans = value; self } + + /// Sets the `default_null_ordering` option. + pub fn with_default_null_ordering(mut self, value: NullOrdering) -> Self { + self.default_null_ordering = value; + self + } } impl Default for ParserOptions { @@ -139,10 +151,56 @@ impl From<&SqlParserOptions> for ParserOptions { enable_options_value_normalization: options .enable_options_value_normalization, collect_spans: options.collect_spans, + default_null_ordering: options.default_null_ordering.as_str().into(), + } + } +} + +/// Represents the null ordering for sorting expressions. +#[derive(Debug, Clone, Copy)] +pub enum NullOrdering { + /// Ascending order with nulls appearing last. + AscReverse, + /// Descending order with nulls appearing last. + DescReverse, + /// Nulls appear first. + NullsFirst, + /// Nulls appear last. + NullsLast, +} + +impl NullOrdering { + /// Evaluates the null ordering based on the given ascending flag. + pub fn eval(&self, asc: bool) -> bool { + match self { + Self::AscReverse => !asc, + Self::DescReverse => asc, + Self::NullsFirst => true, + Self::NullsLast => false, } } } +impl FromStr for NullOrdering { + type Err = DataFusionError; + + fn from_str(s: &str) -> Result { + match s { + "asc_reverse" => Ok(Self::AscReverse), + "desc_reverse" => Ok(Self::DescReverse), + "nulls_first" => Ok(Self::NullsFirst), + "nulls_last" => Ok(Self::NullsLast), + _ => plan_err!("Unknown null ordering: {s}"), + } + } +} + +impl From<&str> for NullOrdering { + fn from(s: &str) -> Self { + Self::from_str(s).unwrap_or(Self::AscReverse) + } +} + /// Ident Normalizer #[derive(Debug)] pub struct IdentNormalizer { diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index fbe6d6501c86..df750ab3095d 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1345,7 +1345,9 @@ impl SqlToRel<'_, S> { ) .unwrap(); let asc = order_by_expr.asc.unwrap_or(true); - let nulls_first = order_by_expr.nulls_first.unwrap_or(!asc); + let nulls_first = order_by_expr + .nulls_first + .unwrap_or(self.options.default_null_ordering.eval(asc)); SortExpr::new(ordered_expr, asc, nulls_first) }) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 1df18302687e..e195e59afdaf 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -36,7 +36,7 @@ use datafusion_expr::{ use datafusion_functions::{string, unicode}; use datafusion_sql::{ parser::DFParser, - planner::{ParserOptions, SqlToRel}, + planner::{NullOrdering, ParserOptions, SqlToRel}, }; use crate::common::{CustomExprPlanner, CustomTypePlanner, MockSessionState}; @@ -94,6 +94,7 @@ fn parse_decimals() { support_varchar_with_length: false, enable_options_value_normalization: false, collect_spans: false, + default_null_ordering: NullOrdering::AscReverse, }, ); } @@ -150,6 +151,7 @@ fn parse_ident_normalization() { support_varchar_with_length: false, enable_options_value_normalization: false, collect_spans: false, + default_null_ordering: NullOrdering::AscReverse, }, ); if plan.is_ok() { From 2a45294eaad7cf48bfd751b016e877f4dabfe71f Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 18 Jul 2025 12:23:50 +0800 Subject: [PATCH 2/5] add test for different config --- .../test_files/information_schema.slt | 2 + datafusion/sqllogictest/test_files/order.slt | 68 +++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index b0538b5e6502..89769078d533 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -259,6 +259,7 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.collect_spans false +datafusion.sql_parser.default_null_ordering asc_reverse datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.enable_options_value_normalization false @@ -356,6 +357,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. +datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: https://www.postgresql.org/docs/current/queries-order.html datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 1b979cce4b0e..4c2a29607701 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -94,6 +94,74 @@ NULL three 1 one 2 two +statement ok +set datafusion.sql_parser.default_null_ordering = 'desc_reverse'; + +# test asc with `desc_reverse` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num +---- +NULL three +1 one +2 two + +# test desc with `desc_reverse` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC +---- +2 two +1 one +NULL three + +statement ok +set datafusion.sql_parser.default_null_ordering = 'nulls_first'; + +# test asc with `nulls_first` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num +---- +NULL three +1 one +2 two + +# test desc with `nulls_first` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC +---- +NULL three +2 two +1 one + + +statement ok +set datafusion.sql_parser.default_null_ordering = 'nulls_last'; + +# test asc with `nulls_last` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num +---- +1 one +2 two +NULL three + +# test desc with `nulls_last` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC +---- +2 two +1 one +NULL three + +# reset to default null ordering +statement ok +set datafusion.sql_parser.default_null_ordering = 'asc_reverse'; + # sort statement ok From ec9815d97c19ed05bad02ae24049586f567b7c02 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 18 Jul 2025 12:32:16 +0800 Subject: [PATCH 3/5] Update datafusion/sql/src/planner.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- datafusion/sql/src/planner.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index d68286f55c57..4df038cc1947 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -171,6 +171,10 @@ pub enum NullOrdering { impl NullOrdering { /// Evaluates the null ordering based on the given ascending flag. + /// + /// # Returns + /// * `true` if nulls should appear first. + /// * `false` if nulls should appear last. pub fn eval(&self, asc: bool) -> bool { match self { Self::AscReverse => !asc, From fd3f4dd69649d2eca119b16f38a9f9e776666c61 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 18 Jul 2025 13:16:12 +0800 Subject: [PATCH 4/5] update doc --- datafusion/common/src/config.rs | 2 +- docs/source/user-guide/configs.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 880d263fdd4b..810cfec21528 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -262,7 +262,7 @@ config_namespace! { /// Specifies the default null ordering for query results /// By default, `asc_reverse` is used to follow Postgres's behavior. - /// postgres rule: https://www.postgresql.org/docs/current/queries-order.html + /// postgres rule: pub default_null_ordering: String, default = "asc_reverse".to_string() } } diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 3f5fc53f1cc6..e3b4aac21eb4 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -129,3 +129,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. | | datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. | | datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries | +| datafusion.sql_parser.default_null_ordering | asc_reverse | Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: | From 64e014349892b99d47779af7d8a9986b90684c5b Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 18 Jul 2025 13:43:28 +0800 Subject: [PATCH 5/5] fix sqllogictest --- datafusion/sqllogictest/test_files/information_schema.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 89769078d533..570a85b1eead 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -357,7 +357,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. -datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: https://www.postgresql.org/docs/current/queries-order.html +datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.