Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@ config_namespace! {

/// Specifies the recursion depth limit when parsing complex SQL Queries
pub recursion_limit: usize, default = 50

/// Specifies the default null ordering for query results
/// By default, `asc_reverse` is used to follow Postgres's behavior.
/// postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
pub default_null_ordering: String, default = "asc_reverse".to_string()
}
}

Expand Down
4 changes: 4 additions & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,10 @@ impl SessionState {
.enable_options_value_normalization,
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
collect_spans: sql_parser_options.collect_spans,
default_null_ordering: sql_parser_options
.default_null_ordering
.as_str()
.into(),
}
}

Expand Down
4 changes: 1 addition & 3 deletions datafusion/sql/src/expr/order_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
expr_vec.push(Sort::new(
expr,
asc,
// When asc is true, by default nulls last to be consistent with postgres
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
nulls_first.unwrap_or(!asc),
nulls_first.unwrap_or(self.options.default_null_ordering.eval(asc)),
))
}
Ok(expr_vec)
Expand Down
62 changes: 62 additions & 0 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

//! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST)
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use std::vec;

Expand Down Expand Up @@ -54,6 +55,8 @@ pub struct ParserOptions {
pub enable_options_value_normalization: bool,
/// Whether to collect spans
pub collect_spans: bool,
/// Default null ordering for sorting expressions.
pub default_null_ordering: NullOrdering,
}

impl ParserOptions {
Expand All @@ -74,6 +77,9 @@ impl ParserOptions {
support_varchar_with_length: true,
enable_options_value_normalization: false,
collect_spans: false,
// By default, `asc_reverse` is used to follow Postgres's behavior.
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
default_null_ordering: NullOrdering::AscReverse,
}
}

Expand Down Expand Up @@ -122,6 +128,12 @@ impl ParserOptions {
self.collect_spans = value;
self
}

/// Sets the `default_null_ordering` option.
pub fn with_default_null_ordering(mut self, value: NullOrdering) -> Self {
self.default_null_ordering = value;
self
}
}

impl Default for ParserOptions {
Expand All @@ -139,10 +151,60 @@ impl From<&SqlParserOptions> for ParserOptions {
enable_options_value_normalization: options
.enable_options_value_normalization,
collect_spans: options.collect_spans,
default_null_ordering: options.default_null_ordering.as_str().into(),
}
}
}

/// Represents the null ordering for sorting expressions.
#[derive(Debug, Clone, Copy)]
pub enum NullOrdering {
/// Ascending order with nulls appearing last.
AscReverse,
/// Descending order with nulls appearing last.
DescReverse,
/// Nulls appear first.
NullsFirst,
/// Nulls appear last.
NullsLast,
}

impl NullOrdering {
/// Evaluates the null ordering based on the given ascending flag.
///
/// # Returns
/// * `true` if nulls should appear first.
/// * `false` if nulls should appear last.
pub fn eval(&self, asc: bool) -> bool {
match self {
Self::AscReverse => !asc,
Self::DescReverse => asc,
Self::NullsFirst => true,
Self::NullsLast => false,
}
}
}

impl FromStr for NullOrdering {
type Err = DataFusionError;

fn from_str(s: &str) -> Result<Self> {
match s {
"asc_reverse" => Ok(Self::AscReverse),
"desc_reverse" => Ok(Self::DescReverse),
"nulls_first" => Ok(Self::NullsFirst),
"nulls_last" => Ok(Self::NullsLast),
_ => plan_err!("Unknown null ordering: {s}"),
}
}
}

impl From<&str> for NullOrdering {
fn from(s: &str) -> Self {
Self::from_str(s).unwrap_or(Self::AscReverse)
}
}

/// Ident Normalizer
#[derive(Debug)]
pub struct IdentNormalizer {
Expand Down
4 changes: 3 additions & 1 deletion datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
)
.unwrap();
let asc = order_by_expr.asc.unwrap_or(true);
let nulls_first = order_by_expr.nulls_first.unwrap_or(!asc);
let nulls_first = order_by_expr
.nulls_first
.unwrap_or(self.options.default_null_ordering.eval(asc));

SortExpr::new(ordered_expr, asc, nulls_first)
})
Expand Down
4 changes: 3 additions & 1 deletion datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use datafusion_expr::{
use datafusion_functions::{string, unicode};
use datafusion_sql::{
parser::DFParser,
planner::{ParserOptions, SqlToRel},
planner::{NullOrdering, ParserOptions, SqlToRel},
};

use crate::common::{CustomExprPlanner, CustomTypePlanner, MockSessionState};
Expand Down Expand Up @@ -94,6 +94,7 @@ fn parse_decimals() {
support_varchar_with_length: false,
enable_options_value_normalization: false,
collect_spans: false,
default_null_ordering: NullOrdering::AscReverse,
},
);
}
Expand Down Expand Up @@ -150,6 +151,7 @@ fn parse_ident_normalization() {
support_varchar_with_length: false,
enable_options_value_normalization: false,
collect_spans: false,
default_null_ordering: NullOrdering::AscReverse,
},
);
if plan.is_ok() {
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ datafusion.optimizer.repartition_windows true
datafusion.optimizer.skip_failed_rules false
datafusion.optimizer.top_down_join_key_reordering true
datafusion.sql_parser.collect_spans false
datafusion.sql_parser.default_null_ordering asc_reverse
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
Expand Down Expand Up @@ -356,6 +357,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data
datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes.
datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
Expand Down
68 changes: 68 additions & 0 deletions datafusion/sqllogictest/test_files/order.slt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,74 @@ NULL three
1 one
2 two

statement ok
set datafusion.sql_parser.default_null_ordering = 'desc_reverse';

# test asc with `desc_reverse` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
----
NULL three
1 one
2 two

# test desc with `desc_reverse` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
----
2 two
1 one
NULL three

statement ok
set datafusion.sql_parser.default_null_ordering = 'nulls_first';

# test asc with `nulls_first` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
----
NULL three
1 one
2 two

# test desc with `nulls_first` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
----
NULL three
2 two
1 one


statement ok
set datafusion.sql_parser.default_null_ordering = 'nulls_last';

# test asc with `nulls_last` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
----
1 one
2 two
NULL three

# test desc with `nulls_last` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
----
2 two
1 one
NULL three

# reset to default null ordering
statement ok
set datafusion.sql_parser.default_null_ordering = 'asc_reverse';

# sort

statement ok
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. |
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |
| datafusion.sql_parser.default_null_ordering | asc_reverse | Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html> |
Loading