From e2673da67495a14eb4b5cccbbe157060a76c9053 Mon Sep 17 00:00:00 2001 From: Dima Date: Thu, 19 Jun 2025 13:09:09 +0100 Subject: [PATCH 01/11] chore(deps): Update sqlparser to 0.56.0 --- Cargo.lock | 4 +- Cargo.toml | 2 +- datafusion/sql/src/expr/mod.rs | 1 + datafusion/sql/src/expr/subquery.rs | 17 +++- datafusion/sql/src/expr/substring.rs | 21 ++-- datafusion/sql/src/planner.rs | 6 ++ datafusion/sql/src/query.rs | 61 +++++++++--- datafusion/sql/src/statement.rs | 144 ++++++++++++++------------- datafusion/sql/src/unparser/ast.rs | 28 ++---- datafusion/sql/src/unparser/expr.rs | 2 +- datafusion/sql/src/unparser/plan.rs | 38 +++++-- 11 files changed, 194 insertions(+), 130 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2e593e62528..ac37b60cf40e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5953,9 +5953,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.55.0" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" +checksum = "e68feb51ffa54fc841e086f58da543facfe3d7ae2a60d69b0a8cbbd30d16ae8d" dependencies = [ "log", "recursive", diff --git a/Cargo.toml b/Cargo.toml index f2cd6f72c7e6..75ac1edbc53a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -167,7 +167,7 @@ recursive = "0.1.1" regex = "1.8" rstest = "0.25.0" serde_json = "1" -sqlparser = { version = "0.55.0", default-features = false, features = ["std", "visitor"] } +sqlparser = { version = "0.56.0", default-features = false, features = ["std", "visitor"] } tempfile = "3" tokio = { version = "1.45", features = ["macros", "rt", "sync"] } url = "2.5.4" diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index e92869873731..e6886127c30f 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -446,6 +446,7 @@ impl SqlToRel<'_, S> { substring_from, substring_for, special: _, + shorthand: _, } => self.sql_substring_to_expr( expr, substring_from, diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 602d39233d58..6bc906c2e979 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -49,7 +49,7 @@ impl SqlToRel<'_, S> { pub(super) fn parse_in_subquery( &self, expr: SQLExpr, - subquery: Query, + subquery: SetExpr, negated: bool, input_schema: &DFSchema, planner_context: &mut PlannerContext, @@ -58,7 +58,7 @@ impl SqlToRel<'_, S> { planner_context.set_outer_query_schema(Some(input_schema.clone().into())); let mut spans = Spans::new(); - if let SetExpr::Select(select) = subquery.body.as_ref() { + if let SetExpr::Select(select) = &subquery { for item in &select.projection { if let SelectItem::UnnamedExpr(SQLExpr::Identifier(ident)) = item { if let Some(span) = Span::try_from_sqlparser_span(ident.span) { @@ -68,7 +68,18 @@ impl SqlToRel<'_, S> { } } - let sub_plan = self.query_to_plan(subquery, planner_context)?; + let query = Query { + with: None, + body: Box::new(subquery), + order_by: None, + limit_clause: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + }; + let sub_plan = self.query_to_plan(query, planner_context)?; let outer_ref_columns = sub_plan.all_out_ref_exprs(); planner_context.set_outer_query_schema(old_outer_query_schema); diff --git a/datafusion/sql/src/expr/substring.rs b/datafusion/sql/src/expr/substring.rs index 8f6e77e035c1..41bf3e872eac 100644 --- a/datafusion/sql/src/expr/substring.rs +++ b/datafusion/sql/src/expr/substring.rs @@ -16,10 +16,10 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{not_impl_err, plan_err}; +use datafusion_common::{internal_datafusion_err, plan_err}; use datafusion_common::{DFSchema, Result, ScalarValue}; -use datafusion_expr::planner::PlannerResult; -use datafusion_expr::Expr; +use datafusion_expr::{expr::ScalarFunction, planner::PlannerResult, Expr}; + use sqlparser::ast::Expr as SQLExpr; impl SqlToRel<'_, S> { @@ -62,6 +62,7 @@ impl SqlToRel<'_, S> { substring_from: None, substring_for: None, special: false, + shorthand: false, }; return plan_err!("Substring without for/from is not valid {orig_sql:?}"); @@ -77,8 +78,16 @@ impl SqlToRel<'_, S> { } } - not_impl_err!( - "Substring not supported by UserDefinedExtensionPlanners: {substring_args:?}" - ) + let fun = self + .context_provider + .get_function_meta("substr") + .ok_or_else(|| { + internal_datafusion_err!("Unable to find expected 'substr' function") + })?; + + Ok(Expr::ScalarFunction(ScalarFunction::new_udf( + fun, + substring_args, + ))) } } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 03396822eca8..b77831053770 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -739,6 +739,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::AnyType | SQLDataType::Table(_) | SQLDataType::VarBit(_) + | SQLDataType::UTinyInt + | SQLDataType::USmallInt + | SQLDataType::HugeInt + | SQLDataType::UHugeInt + | SQLDataType::UBigInt + | SQLDataType::TimestampNtz | SQLDataType::GeometricType(_) => { not_impl_err!("Unsupported SQL type {sql_type:?}") } diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs index 2ea2299c1fcf..c06147e08f87 100644 --- a/datafusion/sql/src/query.rs +++ b/datafusion/sql/src/query.rs @@ -27,8 +27,8 @@ use datafusion_expr::{ CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan, LogicalPlanBuilder, }; use sqlparser::ast::{ - Expr as SQLExpr, Ident, Offset as SQLOffset, OrderBy, OrderByExpr, OrderByKind, - Query, SelectInto, SetExpr, + Expr as SQLExpr, Ident, LimitClause, OrderBy, OrderByExpr, OrderByKind, Query, + SelectInto, SetExpr, }; use sqlparser::tokenizer::Span; @@ -54,8 +54,7 @@ impl SqlToRel<'_, S> { let select_into = select.into.take(); let plan = self.select_to_plan(*select, query.order_by, planner_context)?; - let plan = - self.limit(plan, query.offset, query.limit, planner_context)?; + let plan = self.limit(plan, query.limit_clause, planner_context)?; // Process the `SELECT INTO` after `LIMIT`. self.select_into(plan, select_into) } @@ -77,7 +76,7 @@ impl SqlToRel<'_, S> { None, )?; let plan = self.order_by(plan, order_by_rex)?; - self.limit(plan, query.offset, query.limit, planner_context) + self.limit(plan, query.limit_clause, planner_context) } } } @@ -86,23 +85,53 @@ impl SqlToRel<'_, S> { fn limit( &self, input: LogicalPlan, - skip: Option, - fetch: Option, + limit_clause: Option, planner_context: &mut PlannerContext, ) -> Result { - if skip.is_none() && fetch.is_none() { + let Some(limit_clause) = limit_clause else { return Ok(input); - } + }; - // skip and fetch expressions are not allowed to reference columns from the input plan let empty_schema = DFSchema::empty(); - let skip = skip - .map(|o| self.sql_to_expr(o.value, &empty_schema, planner_context)) - .transpose()?; - let fetch = fetch - .map(|e| self.sql_to_expr(e, &empty_schema, planner_context)) - .transpose()?; + let (skip, fetch, limit_by_exprs) = match limit_clause { + LimitClause::LimitOffset { + limit, + offset, + limit_by, + } => { + let skip = offset + .map(|o| self.sql_to_expr(o.value, &empty_schema, planner_context)) + .transpose()?; + + let fetch = limit + .map(|e| self.sql_to_expr(e, &empty_schema, planner_context)) + .transpose()?; + + let limit_by_exprs = limit_by + .into_iter() + .map(|e| self.sql_to_expr(e, &empty_schema, planner_context)) + .collect::>>()?; + + (skip, fetch, limit_by_exprs) + } + LimitClause::OffsetCommaLimit { offset, limit } => { + let skip = + Some(self.sql_to_expr(offset, &empty_schema, planner_context)?); + let fetch = + Some(self.sql_to_expr(limit, &empty_schema, planner_context)?); + (skip, fetch, vec![]) + } + }; + + if !limit_by_exprs.is_empty() { + return not_impl_err!("LIMIT BY clause is not supported yet"); + } + + if skip.is_none() && fetch.is_none() { + return Ok(input); + } + LogicalPlanBuilder::from(input) .limit_by_expr(skip, fetch)? .build() diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index dafb0346485e..b29cf3769771 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -55,16 +55,16 @@ use datafusion_expr::{ Volatility, WriteOp, }; use sqlparser::ast::{ - self, BeginTransactionKind, NullsDistinctOption, ShowStatementIn, - ShowStatementOptions, SqliteOnConflict, TableObject, UpdateTableFromKind, - ValueWithSpan, + self, BeginTransactionKind, IndexType, NullsDistinctOption, OrderByExpr, Set, + ShowStatementIn, ShowStatementOptions, SqliteOnConflict, TableObject, + UpdateTableFromKind, ValueWithSpan, }; use sqlparser::ast::{ Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert, - ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr, - ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, - TableWithJoins, TransactionMode, UnaryOperator, Value, + ObjectName, ObjectType, Query, SchemaName, SetExpr, ShowCreateObject, + ShowStatementFilter, Statement, TableConstraint, TableFactor, TableWithJoins, + TransactionMode, UnaryOperator, Value, }; use sqlparser::parser::ParserError::ParserError; @@ -233,13 +233,7 @@ impl SqlToRel<'_, S> { } Statement::Query(query) => self.query_to_plan(*query, planner_context), Statement::ShowVariable { variable } => self.show_variable_to_plan(&variable), - Statement::SetVariable { - local, - hivevar, - variables, - value, - } => self.set_variable_to_plan(local, hivevar, &variables, value), - + Statement::Set(statement) => self.set_statement_to_plan(statement), Statement::CreateTable(CreateTable { temporary, external, @@ -290,6 +284,7 @@ impl SqlToRel<'_, S> { catalog, catalog_sync, storage_serialization_policy, + inherits, }) if table_properties.is_empty() && with_options.is_empty() => { if temporary { return not_impl_err!("Temporary tables not supported")?; @@ -428,6 +423,9 @@ impl SqlToRel<'_, S> { if storage_serialization_policy.is_some() { return not_impl_err!("Storage serialization policy not supported")?; } + if inherits.is_some() { + return not_impl_err!("Table inheritance not supported")?; + } // Merge inline constraints and existing constraints let mut all_constraints = constraints; @@ -451,10 +449,10 @@ impl SqlToRel<'_, S> { let plan = if has_columns { if schema.fields().len() != input_schema.fields().len() { return plan_err!( - "Mismatch: {} columns specified, but result has {} columns", - schema.fields().len(), - input_schema.fields().len() - ); + "Mismatch: {} columns specified, but result has {} columns", + schema.fields().len(), + input_schema.fields().len() + ); } let input_fields = input_schema.fields(); let project_exprs = schema @@ -534,6 +532,7 @@ impl SqlToRel<'_, S> { temporary, to, params, + or_alter, } => { if materialized { return not_impl_err!("Materialized views not supported")?; @@ -570,6 +569,7 @@ impl SqlToRel<'_, S> { temporary, to, params, + or_alter, }; let sql = stmt.to_string(); let Statement::CreateView { @@ -617,6 +617,7 @@ impl SqlToRel<'_, S> { Statement::CreateSchema { schema_name, if_not_exists, + .. } => Ok(LogicalPlan::Ddl(DdlStatement::CreateCatalogSchema( CreateCatalogSchema { schema_name: get_schema_name(&schema_name), @@ -1182,6 +1183,11 @@ impl SqlToRel<'_, S> { ast::CreateFunctionBody::AsBeforeOptions(expr) => expr, ast::CreateFunctionBody::AsAfterOptions(expr) => expr, ast::CreateFunctionBody::Return(expr) => expr, + ast::CreateFunctionBody::AsBeginEnd(_) => { + return not_impl_err!( + "Qualified functions (AsBeginEnd) are not supported" + )?; + } }, &DFSchema::empty(), &mut planner_context, @@ -1251,9 +1257,15 @@ impl SqlToRel<'_, S> { .get_table_source(table.clone())? .schema() .to_dfschema_ref()?; - let using: Option = using.as_ref().map(ident_to_string); + let using: Option = + using.as_ref().map(|index_type| match index_type { + IndexType::Custom(ident) => ident_to_string(ident), + _ => index_type.to_string().to_ascii_lowercase(), + }); + let order_by_exprs: Vec = + columns.into_iter().map(|col| col.column).collect(); let columns = self.order_by_to_sort_expr( - columns, + order_by_exprs, &table_schema, planner_context, false, @@ -1739,64 +1751,56 @@ impl SqlToRel<'_, S> { self.statement_to_plan(rewrite.pop_front().unwrap()) } - fn set_variable_to_plan( - &self, - local: bool, - hivevar: bool, - variables: &OneOrManyWithParens, - value: Vec, - ) -> Result { - if local { - return not_impl_err!("LOCAL is not supported"); - } - - if hivevar { - return not_impl_err!("HIVEVAR is not supported"); - } + fn set_statement_to_plan(&self, statement: Set) -> Result { + match statement { + Set::SingleAssignment { + scope, + hivevar, + variable, + values, + } => { + if scope.is_some() { + return not_impl_err!("SET with scope modifiers is not supported"); + } - let variable = match variables { - OneOrManyWithParens::One(v) => object_name_to_string(v), - OneOrManyWithParens::Many(vs) => { - return not_impl_err!( - "SET only supports single variable assignment: {vs:?}" - ); - } - }; - let mut variable_lower = variable.to_lowercase(); + if hivevar { + return not_impl_err!("SET HIVEVAR is not supported"); + } - if variable_lower == "timezone" || variable_lower == "time.zone" { - // We could introduce alias in OptionDefinition if this string matching thing grows - variable_lower = "datafusion.execution.time_zone".to_string(); - } + let variable = object_name_to_string(&variable); + let mut variable_lower = variable.to_lowercase(); - // Parse value string from Expr - let value_string = match &value[0] { - SQLExpr::Identifier(i) => ident_to_string(i), - SQLExpr::Value(v) => match crate::utils::value_to_string(&v.value) { - None => { - return plan_err!("Unsupported Value {}", value[0]); + if variable_lower == "timezone" || variable_lower == "time.zone" { + variable_lower = "datafusion.execution.time_zone".to_string(); } - Some(v) => v, - }, - // For capture signed number e.g. +8, -8 - SQLExpr::UnaryOp { op, expr } => match op { - UnaryOperator::Plus => format!("+{expr}"), - UnaryOperator::Minus => format!("-{expr}"), - _ => { - return plan_err!("Unsupported Value {}", value[0]); + + if values.len() != 1 { + return plan_err!("SET only supports single value assignment"); } - }, - _ => { - return plan_err!("Unsupported Value {}", value[0]); - } - }; - let statement = PlanStatement::SetVariable(SetVariable { - variable: variable_lower, - value: value_string, - }); + let value_string = match &values[0] { + SQLExpr::Identifier(i) => ident_to_string(i), + SQLExpr::Value(v) => match crate::utils::value_to_string(&v.value) { + None => return plan_err!("Unsupported value {:?}", v), + Some(s) => s, + }, + SQLExpr::UnaryOp { op, expr } => match op { + UnaryOperator::Plus => format!("+{expr}"), + UnaryOperator::Minus => format!("-{expr}"), + _ => return plan_err!("Unsupported unary op {:?}", op), + }, + _ => return plan_err!("Unsupported expr {:?}", values[0]), + }; - Ok(LogicalPlan::Statement(statement)) + Ok(LogicalPlan::Statement(PlanStatement::SetVariable( + SetVariable { + variable: variable_lower, + value: value_string, + }, + ))) + } + other => not_impl_err!("SET variant not implemented yet: {other:?}"), + } } fn delete_to_plan( diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index d9ade822aa00..cb94a539c989 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -19,16 +19,16 @@ use core::fmt; use std::ops::ControlFlow; use sqlparser::ast::helpers::attached_token::AttachedToken; -use sqlparser::ast::{self, visit_expressions_mut, OrderByKind, SelectFlavor}; +use sqlparser::ast::{ + self, visit_expressions_mut, LimitClause, OrderByKind, SelectFlavor, +}; #[derive(Clone)] pub struct QueryBuilder { with: Option, body: Option>, order_by_kind: Option, - limit: Option, - limit_by: Vec, - offset: Option, + limit_clause: Option, fetch: Option, locks: Vec, for_clause: Option, @@ -53,16 +53,8 @@ impl QueryBuilder { self.order_by_kind = Some(value); self } - pub fn limit(&mut self, value: Option) -> &mut Self { - self.limit = value; - self - } - pub fn limit_by(&mut self, value: Vec) -> &mut Self { - self.limit_by = value; - self - } - pub fn offset(&mut self, value: Option) -> &mut Self { - self.offset = value; + pub fn limit_clause(&mut self, value: LimitClause) -> &mut Self { + self.limit_clause = Some(value); self } pub fn fetch(&mut self, value: Option) -> &mut Self { @@ -100,9 +92,7 @@ impl QueryBuilder { None => return Err(Into::into(UninitializedFieldError::from("body"))), }, order_by, - limit: self.limit.clone(), - limit_by: self.limit_by.clone(), - offset: self.offset.clone(), + limit_clause: self.limit_clause.clone(), fetch: self.fetch.clone(), locks: self.locks.clone(), for_clause: self.for_clause.clone(), @@ -115,9 +105,7 @@ impl QueryBuilder { with: Default::default(), body: Default::default(), order_by_kind: Default::default(), - limit: Default::default(), - limit_by: Default::default(), - offset: Default::default(), + limit_clause: Default::default(), fetch: Default::default(), locks: Default::default(), for_clause: Default::default(), diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index cce14894acaf..6cba534aa400 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -378,7 +378,7 @@ impl Unparser<'_> { }; Ok(ast::Expr::InSubquery { expr: inexpr, - subquery: sub_query, + subquery: sub_query.body, negated: insubq.negated, }) } diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index d9f9767ba9e4..7a364c8c9973 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -49,7 +49,9 @@ use datafusion_expr::{ LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, UserDefinedLogicalNode, }; -use sqlparser::ast::{self, Ident, OrderByKind, SetExpr, TableAliasColumnDef}; +use sqlparser::ast::{ + self, Ident, LimitClause, OrderByKind, SetExpr, TableAliasColumnDef, +}; use std::{sync::Arc, vec}; /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`] @@ -457,7 +459,12 @@ impl Unparser<'_> { "Limit operator only valid in a statement context." ); }; - query.limit(Some(self.expr_to_sql(fetch)?)); + let limit_clause = LimitClause::LimitOffset { + limit: Some(self.expr_to_sql(fetch)?), + offset: None, + limit_by: vec![], + }; + query.limit_clause(limit_clause); } if let Some(skip) = &limit.skip { @@ -466,10 +473,14 @@ impl Unparser<'_> { "Offset operator only valid in a statement context." ); }; - query.offset(Some(ast::Offset { - rows: ast::OffsetRows::None, - value: self.expr_to_sql(skip)?, - })); + query.limit_clause(LimitClause::LimitOffset { + limit: None, + offset: Some(ast::Offset { + rows: ast::OffsetRows::None, + value: self.expr_to_sql(skip)?, + }), + limit_by: vec![], + }); } self.select_to_sql_recursively( @@ -497,10 +508,15 @@ impl Unparser<'_> { }; if let Some(fetch) = sort.fetch { - query_ref.limit(Some(ast::Expr::value(ast::Value::Number( - fetch.to_string(), - false, - )))); + let limit_clause = LimitClause::LimitOffset { + limit: Some(ast::Expr::value(ast::Value::Number( + fetch.to_string(), + false, + ))), + offset: None, + limit_by: vec![], + }; + query_ref.limit_clause(limit_clause); }; let agg = find_agg_node_within_select(plan, select.already_projected()); @@ -799,7 +815,7 @@ impl Unparser<'_> { let projection = left_projection .into_iter() - .chain(right_projection.into_iter()) + .chain(right_projection) .collect(); select.projection(projection); } From 7bda63c0a702c5ba67f811e86c5c093d85b5195a Mon Sep 17 00:00:00 2001 From: Dima Date: Fri, 20 Jun 2025 10:50:04 +0100 Subject: [PATCH 02/11] chore(deps): minor fixes --- datafusion/sql/src/expr/subquery.rs | 13 +------ datafusion/sql/src/unparser/ast.rs | 26 ++++++++++--- datafusion/sql/src/unparser/plan.rs | 37 ++++++------------- datafusion/sqllogictest/test_files/errors.slt | 2 +- 4 files changed, 35 insertions(+), 43 deletions(-) diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index 6bc906c2e979..f1ba943d66d0 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -16,6 +16,7 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; +use crate::unparser::ast::QueryBuilder; use datafusion_common::{plan_err, DFSchema, Diagnostic, Result, Span, Spans}; use datafusion_expr::expr::{Exists, InSubquery}; use datafusion_expr::{Expr, LogicalPlan, Subquery}; @@ -68,17 +69,7 @@ impl SqlToRel<'_, S> { } } - let query = Query { - with: None, - body: Box::new(subquery), - order_by: None, - limit_clause: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - }; + let query = QueryBuilder::default().body(Box::new(subquery)).build()?; let sub_plan = self.query_to_plan(query, planner_context)?; let outer_ref_columns = sub_plan.all_out_ref_exprs(); planner_context.set_outer_query_schema(old_outer_query_schema); diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index cb94a539c989..358583e5708f 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -28,7 +28,9 @@ pub struct QueryBuilder { with: Option, body: Option>, order_by_kind: Option, - limit_clause: Option, + limit: Option, + limit_by: Vec, + offset: Option, fetch: Option, locks: Vec, for_clause: Option, @@ -53,8 +55,16 @@ impl QueryBuilder { self.order_by_kind = Some(value); self } - pub fn limit_clause(&mut self, value: LimitClause) -> &mut Self { - self.limit_clause = Some(value); + pub fn limit(&mut self, value: Option) -> &mut Self { + self.limit = value; + self + } + pub fn limit_by(&mut self, value: Vec) -> &mut Self { + self.limit_by = value; + self + } + pub fn offset(&mut self, value: Option) -> &mut Self { + self.offset = value; self } pub fn fetch(&mut self, value: Option) -> &mut Self { @@ -92,7 +102,11 @@ impl QueryBuilder { None => return Err(Into::into(UninitializedFieldError::from("body"))), }, order_by, - limit_clause: self.limit_clause.clone(), + limit_clause: Some(LimitClause::LimitOffset { + limit: self.limit.clone(), + offset: self.offset.clone(), + limit_by: self.limit_by.clone(), + }), fetch: self.fetch.clone(), locks: self.locks.clone(), for_clause: self.for_clause.clone(), @@ -105,7 +119,9 @@ impl QueryBuilder { with: Default::default(), body: Default::default(), order_by_kind: Default::default(), - limit_clause: Default::default(), + limit: Default::default(), + limit_by: Default::default(), + offset: Default::default(), fetch: Default::default(), locks: Default::default(), for_clause: Default::default(), diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 7a364c8c9973..21850302458e 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -49,9 +49,7 @@ use datafusion_expr::{ LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, UserDefinedLogicalNode, }; -use sqlparser::ast::{ - self, Ident, LimitClause, OrderByKind, SetExpr, TableAliasColumnDef, -}; +use sqlparser::ast::{self, Ident, OrderByKind, SetExpr, TableAliasColumnDef}; use std::{sync::Arc, vec}; /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`] @@ -459,12 +457,7 @@ impl Unparser<'_> { "Limit operator only valid in a statement context." ); }; - let limit_clause = LimitClause::LimitOffset { - limit: Some(self.expr_to_sql(fetch)?), - offset: None, - limit_by: vec![], - }; - query.limit_clause(limit_clause); + query.limit(Some(self.expr_to_sql(fetch)?)); } if let Some(skip) = &limit.skip { @@ -473,14 +466,11 @@ impl Unparser<'_> { "Offset operator only valid in a statement context." ); }; - query.limit_clause(LimitClause::LimitOffset { - limit: None, - offset: Some(ast::Offset { - rows: ast::OffsetRows::None, - value: self.expr_to_sql(skip)?, - }), - limit_by: vec![], - }); + + query.offset(Some(ast::Offset { + rows: ast::OffsetRows::None, + value: self.expr_to_sql(skip)?, + })); } self.select_to_sql_recursively( @@ -508,15 +498,10 @@ impl Unparser<'_> { }; if let Some(fetch) = sort.fetch { - let limit_clause = LimitClause::LimitOffset { - limit: Some(ast::Expr::value(ast::Value::Number( - fetch.to_string(), - false, - ))), - offset: None, - limit_by: vec![], - }; - query_ref.limit_clause(limit_clause); + query_ref.limit(Some(ast::Expr::value(ast::Value::Number( + fetch.to_string(), + false, + )))); }; let agg = find_agg_node_within_select(plan, select.already_projected()); diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt index dc7a53adf889..954ed0f849f2 100644 --- a/datafusion/sqllogictest/test_files/errors.slt +++ b/datafusion/sqllogictest/test_files/errors.slt @@ -148,7 +148,7 @@ SELECT query error DataFusion error: Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type create table foo as values (1), ('foo'); -query error user-defined coercion failed +query error DataFusion error: Error during planning: Substring without for/from is not valid select 1 group by substr(''); # Error in filter should be reported From 9121a58b7604c6bb1b81fcaf4a8e832df6df0879 Mon Sep 17 00:00:00 2001 From: Dima Date: Sat, 28 Jun 2025 19:49:21 +0100 Subject: [PATCH 03/11] chore(deps) refactorings --- datafusion/sql/src/expr/subquery.rs | 8 +++----- datafusion/sql/src/unparser/expr.rs | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs index f1ba943d66d0..24bb813634cc 100644 --- a/datafusion/sql/src/expr/subquery.rs +++ b/datafusion/sql/src/expr/subquery.rs @@ -16,7 +16,6 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use crate::unparser::ast::QueryBuilder; use datafusion_common::{plan_err, DFSchema, Diagnostic, Result, Span, Spans}; use datafusion_expr::expr::{Exists, InSubquery}; use datafusion_expr::{Expr, LogicalPlan, Subquery}; @@ -50,7 +49,7 @@ impl SqlToRel<'_, S> { pub(super) fn parse_in_subquery( &self, expr: SQLExpr, - subquery: SetExpr, + subquery: Query, negated: bool, input_schema: &DFSchema, planner_context: &mut PlannerContext, @@ -59,7 +58,7 @@ impl SqlToRel<'_, S> { planner_context.set_outer_query_schema(Some(input_schema.clone().into())); let mut spans = Spans::new(); - if let SetExpr::Select(select) = &subquery { + if let SetExpr::Select(select) = &subquery.body.as_ref() { for item in &select.projection { if let SelectItem::UnnamedExpr(SQLExpr::Identifier(ident)) = item { if let Some(span) = Span::try_from_sqlparser_span(ident.span) { @@ -69,8 +68,7 @@ impl SqlToRel<'_, S> { } } - let query = QueryBuilder::default().body(Box::new(subquery)).build()?; - let sub_plan = self.query_to_plan(query, planner_context)?; + let sub_plan = self.query_to_plan(subquery, planner_context)?; let outer_ref_columns = sub_plan.all_out_ref_exprs(); planner_context.set_outer_query_schema(old_outer_query_schema); diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 6cba534aa400..cce14894acaf 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -378,7 +378,7 @@ impl Unparser<'_> { }; Ok(ast::Expr::InSubquery { expr: inexpr, - subquery: sub_query.body, + subquery: sub_query, negated: insubq.negated, }) } From bf7ef2e4e2afc1f8f465eff5da38f909a8588c67 Mon Sep 17 00:00:00 2001 From: Dima Date: Wed, 2 Jul 2025 22:44:42 +0100 Subject: [PATCH 04/11] chore(deps) temporary point to unrealised v0.56.1 --- Cargo.lock | 6 ++---- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ac37b60cf40e..7f6465905c9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5954,8 +5954,7 @@ dependencies = [ [[package]] name = "sqlparser" version = "0.56.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e68feb51ffa54fc841e086f58da543facfe3d7ae2a60d69b0a8cbbd30d16ae8d" +source = "git+https://github.com/Dimchikkk/datafusion-sqlparser-rs.git?branch=v0.56.1#14d02dd0a58a12781209675b98fd05c3f3cb8ad3" dependencies = [ "log", "recursive", @@ -5965,8 +5964,7 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +source = "git+https://github.com/Dimchikkk/datafusion-sqlparser-rs.git?branch=v0.56.1#14d02dd0a58a12781209675b98fd05c3f3cb8ad3" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 75ac1edbc53a..9f0fcd4ced40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -167,7 +167,7 @@ recursive = "0.1.1" regex = "1.8" rstest = "0.25.0" serde_json = "1" -sqlparser = { version = "0.56.0", default-features = false, features = ["std", "visitor"] } +sqlparser = { git = "https://github.com/Dimchikkk/datafusion-sqlparser-rs.git", branch = "v0.56.1", default-features = false, features = ["std", "visitor"] } tempfile = "3" tokio = { version = "1.45", features = ["macros", "rt", "sync"] } url = "2.5.4" From e77da71e05e1438424e76201d121c911e407a0e7 Mon Sep 17 00:00:00 2001 From: Dima Date: Wed, 2 Jul 2025 23:01:09 +0100 Subject: [PATCH 05/11] chore(deps) fix Cargo.toml formatting --- Cargo.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9f0fcd4ced40..60402e967e38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -167,7 +167,10 @@ recursive = "0.1.1" regex = "1.8" rstest = "0.25.0" serde_json = "1" -sqlparser = { git = "https://github.com/Dimchikkk/datafusion-sqlparser-rs.git", branch = "v0.56.1", default-features = false, features = ["std", "visitor"] } +sqlparser = { git = "https://github.com/Dimchikkk/datafusion-sqlparser-rs.git", branch = "v0.56.1", default-features = false, features = [ + "std", + "visitor", +] } tempfile = "3" tokio = { version = "1.45", features = ["macros", "rt", "sync"] } url = "2.5.4" From 21640bdfd289be1b243e6a939ffed1d0132a92b0 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Mon, 18 Aug 2025 13:37:09 +0900 Subject: [PATCH 06/11] Update sqlparser to 0.58.0 --- Cargo.lock | 8 ++- Cargo.toml | 5 +- datafusion/sql/src/expr/mod.rs | 30 +++++----- datafusion/sql/src/planner.rs | 3 + datafusion/sql/src/statement.rs | 93 +++++++++++++++++++---------- datafusion/sql/src/unparser/ast.rs | 6 +- datafusion/sql/src/unparser/expr.rs | 10 +++- 7 files changed, 97 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7f6465905c9e..934f599f49c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5953,8 +5953,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.56.0" -source = "git+https://github.com/Dimchikkk/datafusion-sqlparser-rs.git?branch=v0.56.1#14d02dd0a58a12781209675b98fd05c3f3cb8ad3" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" dependencies = [ "log", "recursive", @@ -5964,7 +5965,8 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.3.0" -source = "git+https://github.com/Dimchikkk/datafusion-sqlparser-rs.git?branch=v0.56.1#14d02dd0a58a12781209675b98fd05c3f3cb8ad3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 60402e967e38..ca5997ab7901 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -167,10 +167,7 @@ recursive = "0.1.1" regex = "1.8" rstest = "0.25.0" serde_json = "1" -sqlparser = { git = "https://github.com/Dimchikkk/datafusion-sqlparser-rs.git", branch = "v0.56.1", default-features = false, features = [ - "std", - "visitor", -] } +sqlparser = { version = "0.58.0", default-features = false, features = ["std", "visitor"] } tempfile = "3" tokio = { version = "1.45", features = ["macros", "rt", "sync"] } url = "2.5.4" diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index e6886127c30f..b7cd5b882167 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -254,6 +254,8 @@ impl SqlToRel<'_, S> { operand, conditions, else_result, + case_token: _, + end_token: _, } => self.sql_case_identifier_to_expr( operand, conditions, @@ -814,7 +816,7 @@ impl SqlToRel<'_, S> { negated: bool, expr: SQLExpr, pattern: SQLExpr, - escape_char: Option, + escape_char: Option, schema: &DFSchema, planner_context: &mut PlannerContext, case_insensitive: bool, @@ -824,13 +826,12 @@ impl SqlToRel<'_, S> { return not_impl_err!("ANY in LIKE expression"); } let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?; - let escape_char = if let Some(char) = escape_char { - if char.len() != 1 { - return plan_err!("Invalid escape character in LIKE expression"); + let escape_char = match escape_char { + Some(Value::SingleQuotedString(char)) if char.len() == 1 => { + Some(char.chars().next().unwrap()) } - Some(char.chars().next().unwrap()) - } else { - None + Some(_) => return plan_err!("Invalid escape character in LIKE expression"), + None => None, }; Ok(Expr::Like(Like::new( negated, @@ -846,7 +847,7 @@ impl SqlToRel<'_, S> { negated: bool, expr: SQLExpr, pattern: SQLExpr, - escape_char: Option, + escape_char: Option, schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { @@ -855,13 +856,14 @@ impl SqlToRel<'_, S> { if pattern_type != DataType::Utf8 && pattern_type != DataType::Null { return plan_err!("Invalid pattern in SIMILAR TO expression"); } - let escape_char = if let Some(char) = escape_char { - if char.len() != 1 { - return plan_err!("Invalid escape character in SIMILAR TO expression"); + let escape_char = match escape_char { + Some(Value::SingleQuotedString(char)) if char.len() == 1 => { + Some(char.chars().next().unwrap()) } - Some(char.chars().next().unwrap()) - } else { - None + Some(_) => { + return plan_err!("Invalid escape character in SIMILAR TO expression") + } + None => None, }; Ok(Expr::SimilarTo(Like::new( negated, diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index b77831053770..b4c04729322d 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -745,6 +745,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::UHugeInt | SQLDataType::UBigInt | SQLDataType::TimestampNtz + | SQLDataType::NamedTable { .. } + | SQLDataType::TsVector + | SQLDataType::TsQuery | SQLDataType::GeometricType(_) => { not_impl_err!("Unsupported SQL type {sql_type:?}") } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index b29cf3769771..6c90b4d19067 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -55,9 +55,9 @@ use datafusion_expr::{ Volatility, WriteOp, }; use sqlparser::ast::{ - self, BeginTransactionKind, IndexType, NullsDistinctOption, OrderByExpr, Set, - ShowStatementIn, ShowStatementOptions, SqliteOnConflict, TableObject, - UpdateTableFromKind, ValueWithSpan, + self, BeginTransactionKind, IndexColumn, IndexType, NullsDistinctOption, OrderByExpr, + OrderByOptions, Set, ShowStatementIn, ShowStatementOptions, SqliteOnConflict, + TableObject, UpdateTableFromKind, ValueWithSpan, }; use sqlparser::ast::{ Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable, @@ -111,7 +111,17 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec constraints.push(TableConstraint::Unique { name: name.clone(), - columns: vec![column.name.clone()], + columns: vec![IndexColumn { + column: OrderByExpr { + expr: SQLExpr::Identifier(column.name.clone()), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + }, + operator_class: None, + }], characteristics: *characteristics, index_name: None, index_type_display: ast::KeyOrIndexDisplay::None, @@ -124,7 +134,17 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec constraints.push(TableConstraint::PrimaryKey { name: name.clone(), - columns: vec![column.name.clone()], + columns: vec![IndexColumn { + column: OrderByExpr { + expr: SQLExpr::Identifier(column.name.clone()), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + }, + operator_class: None, + }], characteristics: *characteristics, index_name: None, index_type: None, @@ -144,11 +164,13 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec { constraints.push(TableConstraint::Check { name: name.clone(), expr: Box::new(expr.clone()), + enforced: None, }) } // Other options are not constraint related. @@ -168,6 +190,7 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec {} } } @@ -248,18 +271,12 @@ impl SqlToRel<'_, S> { name, columns, constraints, - table_properties, - with_options, if_not_exists, or_replace, without_rowid, like, clone, - engine, comment, - auto_increment_offset, - default_charset, - collation, on_commit, on_cluster, primary_key, @@ -267,7 +284,6 @@ impl SqlToRel<'_, S> { partition_by, cluster_by, clustered_by, - options, strict, copy_grants, enable_schema_evolution, @@ -285,7 +301,8 @@ impl SqlToRel<'_, S> { catalog_sync, storage_serialization_policy, inherits, - }) if table_properties.is_empty() && with_options.is_empty() => { + table_options: CreateTableOptions::None, + }) => { if temporary { return not_impl_err!("Temporary tables not supported")?; } @@ -334,21 +351,9 @@ impl SqlToRel<'_, S> { if clone.is_some() { return not_impl_err!("Clone not supported")?; } - if engine.is_some() { - return not_impl_err!("Engine not supported")?; - } if comment.is_some() { return not_impl_err!("Comment not supported")?; } - if auto_increment_offset.is_some() { - return not_impl_err!("Auto increment offset not supported")?; - } - if default_charset.is_some() { - return not_impl_err!("Default charset not supported")?; - } - if collation.is_some() { - return not_impl_err!("Collation not supported")?; - } if on_commit.is_some() { return not_impl_err!("On commit not supported")?; } @@ -370,9 +375,6 @@ impl SqlToRel<'_, S> { if clustered_by.is_some() { return not_impl_err!("Clustered by not supported")?; } - if options.is_some() { - return not_impl_err!("Options not supported")?; - } if strict { return not_impl_err!("Strict not supported")?; } @@ -644,6 +646,7 @@ impl SqlToRel<'_, S> { restrict: _, purge: _, temporary: _, + table: _, } => { // We don't support cascade and purge for now. // nor do we support multiple object names @@ -739,6 +742,8 @@ impl SqlToRel<'_, S> { has_parentheses: _, immediate, into, + output, + default, } => { // `USING` is a MySQL-specific syntax and currently not supported. if !using.is_empty() { @@ -754,6 +759,16 @@ impl SqlToRel<'_, S> { if !into.is_empty() { return not_impl_err!("Execute statement with INTO is not supported"); } + if output { + return not_impl_err!( + "Execute statement with OUTPUT is not supported" + ); + } + if default { + return not_impl_err!( + "Execute statement with DEFAULT is not supported" + ); + } let empty_schema = DFSchema::empty(); let parameters = parameters .into_iter() @@ -1020,8 +1035,8 @@ impl SqlToRel<'_, S> { modifier, transaction, statements, - exception_statements, has_end_keyword, + exception, } => { if let Some(modifier) = modifier { return not_impl_err!( @@ -1033,7 +1048,7 @@ impl SqlToRel<'_, S> { "Transaction with multiple statements not supported" ); } - if exception_statements.is_some() { + if exception.is_some() { return not_impl_err!( "Transaction with exception statements not supported" ); @@ -1188,6 +1203,12 @@ impl SqlToRel<'_, S> { "Qualified functions (AsBeginEnd) are not supported" )?; } + ast::CreateFunctionBody::AsReturnExpr(_) + | ast::CreateFunctionBody::AsReturnSelect(_) => { + return not_impl_err!( + "AS RETURN function syntax are not supported" + )? + } }, &DFSchema::empty(), &mut planner_context, @@ -1547,13 +1568,21 @@ impl SqlToRel<'_, S> { fn get_constraint_column_indices( &self, df_schema: &DFSchemaRef, - columns: &[Ident], + columns: &[IndexColumn], constraint_name: &str, ) -> Result> { let field_names = df_schema.field_names(); columns .iter() - .map(|ident| { + .map(|index_column| { + let expr = &index_column.column.expr; + let ident = if let SQLExpr::Identifier(ident) = expr { + ident + } else { + return Err(plan_datafusion_err!( + "Column name for {constraint_name} must be an identifier: {expr}" + )); + }; let column = self.ident_normalizer.normalize(ident.clone()); field_names .iter() diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index 358583e5708f..2cf26009ac0f 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -112,6 +112,7 @@ impl QueryBuilder { for_clause: self.for_clause.clone(), settings: None, format_clause: None, + pipe_operators: vec![], }) } fn create_empty() -> Self { @@ -147,7 +148,7 @@ pub struct SelectBuilder { group_by: Option, cluster_by: Vec, distribute_by: Vec, - sort_by: Vec, + sort_by: Vec, having: Option, named_window: Vec, qualify: Option, @@ -264,7 +265,7 @@ impl SelectBuilder { self.distribute_by = value; self } - pub fn sort_by(&mut self, value: Vec) -> &mut Self { + pub fn sort_by(&mut self, value: Vec) -> &mut Self { self.sort_by = value; self } @@ -319,6 +320,7 @@ impl SelectBuilder { Some(ref value) => value.clone(), None => return Err(Into::into(UninitializedFieldError::from("flavor"))), }, + exclude: None, }) } fn create_empty() -> Self { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index cce14894acaf..2501156bd850 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -182,6 +182,8 @@ impl Unparser<'_> { operand, conditions, else_result, + case_token: AttachedToken::empty(), + end_token: AttachedToken::empty(), }) } Expr::Cast(Cast { expr, data_type }) => { @@ -278,7 +280,7 @@ impl Unparser<'_> { negated: *negated, expr: Box::new(self.expr_to_sql_inner(expr)?), pattern: Box::new(self.expr_to_sql_inner(pattern)?), - escape_char: escape_char.map(|c| c.to_string()), + escape_char: escape_char.map(|c| SingleQuotedString(c.to_string())), any: false, }), Expr::Like(Like { @@ -293,7 +295,8 @@ impl Unparser<'_> { negated: *negated, expr: Box::new(self.expr_to_sql_inner(expr)?), pattern: Box::new(self.expr_to_sql_inner(pattern)?), - escape_char: escape_char.map(|c| c.to_string()), + escape_char: escape_char + .map(|c| SingleQuotedString(c.to_string())), any: false, }) } else { @@ -301,7 +304,8 @@ impl Unparser<'_> { negated: *negated, expr: Box::new(self.expr_to_sql_inner(expr)?), pattern: Box::new(self.expr_to_sql_inner(pattern)?), - escape_char: escape_char.map(|c| c.to_string()), + escape_char: escape_char + .map(|c| SingleQuotedString(c.to_string())), any: false, }) } From 44296bb4ea9304cd49e025c39ba024c303133452 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Mon, 18 Aug 2025 15:00:09 +0900 Subject: [PATCH 07/11] Fix some error message wording --- datafusion/sql/src/statement.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 5ef1bd803ad9..77b9dac4747b 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1200,13 +1200,13 @@ impl SqlToRel<'_, S> { ast::CreateFunctionBody::Return(expr) => expr, ast::CreateFunctionBody::AsBeginEnd(_) => { return not_impl_err!( - "Qualified functions (AsBeginEnd) are not supported" + "BEGIN/END enclosed function body syntax is not supported" )?; } ast::CreateFunctionBody::AsReturnExpr(_) | ast::CreateFunctionBody::AsReturnSelect(_) => { return not_impl_err!( - "AS RETURN function syntax are not supported" + "AS RETURN function syntax is not supported" )? } }, From 12192cf855f65473b59193c6180f537df36cc1a1 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Mon, 18 Aug 2025 15:35:19 +0900 Subject: [PATCH 08/11] Fix failing order.slt test due to output error formatting --- datafusion/sql/src/statement.rs | 4 +++- datafusion/sqllogictest/test_files/order.slt | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 77b9dac4747b..2d9867b09927 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1812,7 +1812,9 @@ impl SqlToRel<'_, S> { let value_string = match &values[0] { SQLExpr::Identifier(i) => ident_to_string(i), SQLExpr::Value(v) => match crate::utils::value_to_string(&v.value) { - None => return plan_err!("Unsupported value {:?}", v), + None => { + return plan_err!("Unsupported value {:?}", v.value); + } Some(s) => s, }, SQLExpr::UnaryOp { op, expr } => match op { diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 14bc5fba3abd..c5674d6998dd 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -179,7 +179,7 @@ NULL three 2 two 1 one -statement error DataFusion error: Error during planning: Unsupported Value NULL +statement error DataFusion error: Error during planning: Unsupported value Null set datafusion.sql_parser.default_null_ordering = null; # reset to default null ordering From c22d64b3f0596a9829eea60d7790729ddeec3819 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Tue, 19 Aug 2025 20:43:24 +0900 Subject: [PATCH 09/11] Fix SQL LIKE/SIMILAR TO error messages to be more informative --- datafusion/sql/src/expr/mod.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index b7cd5b882167..ae4cddc61f54 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -830,7 +830,7 @@ impl SqlToRel<'_, S> { Some(Value::SingleQuotedString(char)) if char.len() == 1 => { Some(char.chars().next().unwrap()) } - Some(_) => return plan_err!("Invalid escape character in LIKE expression"), + Some(value) => return plan_err!("Invalid escape character in LIKE expression. Expected a single character wrapped with single quotes, got {value}"), None => None, }; Ok(Expr::Like(Like::new( @@ -860,9 +860,7 @@ impl SqlToRel<'_, S> { Some(Value::SingleQuotedString(char)) if char.len() == 1 => { Some(char.chars().next().unwrap()) } - Some(_) => { - return plan_err!("Invalid escape character in SIMILAR TO expression") - } + Some(value) => return plan_err!("Invalid escape character in SIMILAR TO expression. Expected a single character wrapped with single quotes, got {value}"), None => None, }; Ok(Expr::SimilarTo(Like::new( From 837a19eceab1094226e4225c4c33d25b5f056718 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 19 Aug 2025 17:36:49 -0400 Subject: [PATCH 10/11] Avoid substr lookup by name --- datafusion/sql/src/expr/substring.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/datafusion/sql/src/expr/substring.rs b/datafusion/sql/src/expr/substring.rs index 41bf3e872eac..0cb47a770f9d 100644 --- a/datafusion/sql/src/expr/substring.rs +++ b/datafusion/sql/src/expr/substring.rs @@ -16,9 +16,9 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{internal_datafusion_err, plan_err}; +use datafusion_common::{not_impl_err, plan_err}; use datafusion_common::{DFSchema, Result, ScalarValue}; -use datafusion_expr::{expr::ScalarFunction, planner::PlannerResult, Expr}; +use datafusion_expr::{planner::PlannerResult, Expr}; use sqlparser::ast::Expr as SQLExpr; @@ -69,6 +69,7 @@ impl SqlToRel<'_, S> { } }; + // Try to plan the substring expression using one of the registered planners for planner in self.context_provider.get_expr_planners() { match planner.plan_substring(substring_args)? { PlannerResult::Planned(expr) => return Ok(expr), @@ -78,16 +79,6 @@ impl SqlToRel<'_, S> { } } - let fun = self - .context_provider - .get_function_meta("substr") - .ok_or_else(|| { - internal_datafusion_err!("Unable to find expected 'substr' function") - })?; - - Ok(Expr::ScalarFunction(ScalarFunction::new_udf( - fun, - substring_args, - ))) + not_impl_err!("Substring could not be planned by registered expr planner. Hint: enable the `unicode_expressions" ) } } From e46e5322b81c33883ed79dfeac89e0a5dd8382dc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 19 Aug 2025 17:43:10 -0400 Subject: [PATCH 11/11] Fix sql tests --- datafusion/sql/tests/cases/plan_to_sql.rs | 2 ++ datafusion/sql/tests/sql_integration.rs | 24 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index fe6a98d5c2d0..cffb062974e5 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -51,6 +51,7 @@ use datafusion_expr::builder::{ project, subquery_alias, table_scan_with_filter_and_fetch, table_scan_with_filters, }; use datafusion_functions::core::planner::CoreFunctionPlanner; +use datafusion_functions::planner::UserDefinedFunctionPlanner; use datafusion_functions_nested::extract::array_element_udf; use datafusion_functions_nested::planner::{FieldAccessPlanner, NestedFunctionPlanner}; use datafusion_sql::unparser::ast::{ @@ -1340,6 +1341,7 @@ where .with_scalar_function(Arc::new(unicode::substr().as_ref().clone())) .with_scalar_function(make_array_udf()) .with_expr_planner(Arc::new(CoreFunctionPlanner::default())) + .with_expr_planner(Arc::new(UserDefinedFunctionPlanner)) .with_expr_planner(Arc::new(NestedFunctionPlanner)) .with_expr_planner(Arc::new(FieldAccessPlanner)), }; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index fcd5115ae8bd..28181771f158 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4564,6 +4564,30 @@ fn test_no_functions_registered() { ); } +#[test] +fn test_no_substring_registered() { + // substring requires an expression planner + let sql = "SELECT SUBSTRING(foo, bar, baz) FROM person"; + let err = logical_plan(sql).expect_err("query should have failed"); + + assert_snapshot!( + err.strip_backtrace(), + @"This feature is not implemented: Substring could not be planned by registered expr planner. Hint: enable the `unicode_expressions" + ); +} + +#[test] +fn test_no_substring_registered_alt_syntax() { + // Alternate syntax for substring + let sql = "SELECT SUBSTRING(foo FROM bar) FROM person"; + let err = logical_plan(sql).expect_err("query should have failed"); + + assert_snapshot!( + err.strip_backtrace(), + @"This feature is not implemented: Substring could not be planned by registered expr planner. Hint: enable the `unicode_expressions" + ); +} + #[test] fn test_custom_type_plan() -> Result<()> { let sql = "SELECT DATETIME '2001-01-01 18:00:00'";