Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ async-trait = { workspace = true }
aws-config = "1.8.7"
aws-credential-types = "1.2.7"
chrono = { workspace = true }
clap = { version = "4.5.47", features = ["derive", "cargo"] }
clap = { version = "4.5.47", features = ["cargo", "derive"] }
datafusion = { workspace = true, features = [
"avro",
"compression",
Expand All @@ -55,6 +55,7 @@ datafusion = { workspace = true, features = [
"sql",
"unicode_expressions",
] }
datafusion-common = { workspace = true }
dirs = "6.0.0"
env_logger = { workspace = true }
futures = { workspace = true }
Expand All @@ -65,7 +66,7 @@ parking_lot = { workspace = true }
parquet = { workspace = true, default-features = false }
regex = { workspace = true }
rustyline = "17.0"
tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] }
tokio = { workspace = true, features = ["macros", "parking_lot", "rt", "rt-multi-thread", "signal", "sync"] }
url = { workspace = true }

[dev-dependencies]
Expand Down
19 changes: 10 additions & 9 deletions datafusion-cli/src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter};

use datafusion::sql::parser::{DFParser, Statement};
use datafusion::sql::sqlparser::dialect::dialect_from_str;
use datafusion_common::config::Dialect;

use rustyline::completion::{Completer, FilenameCompleter, Pair};
use rustyline::error::ReadlineError;
Expand All @@ -34,33 +35,33 @@ use rustyline::{Context, Helper, Result};

pub struct CliHelper {
completer: FilenameCompleter,
dialect: String,
dialect: Dialect,
highlighter: Box<dyn Highlighter>,
}

impl CliHelper {
pub fn new(dialect: &str, color: bool) -> Self {
pub fn new(dialect: &Dialect, color: bool) -> Self {
let highlighter: Box<dyn Highlighter> = if !color {
Box::new(NoSyntaxHighlighter {})
} else {
Box::new(SyntaxHighlighter::new(dialect))
};
Self {
completer: FilenameCompleter::new(),
dialect: dialect.into(),
dialect: *dialect,
highlighter,
}
}

pub fn set_dialect(&mut self, dialect: &str) {
if dialect != self.dialect {
self.dialect = dialect.to_string();
pub fn set_dialect(&mut self, dialect: &Dialect) {
if *dialect != self.dialect {
self.dialect = *dialect;
}
}

fn validate_input(&self, input: &str) -> Result<ValidationResult> {
if let Some(sql) = input.strip_suffix(';') {
let dialect = match dialect_from_str(&self.dialect) {
let dialect = match dialect_from_str(self.dialect) {
Some(dialect) => dialect,
None => {
return Ok(ValidationResult::Invalid(Some(format!(
Expand Down Expand Up @@ -97,7 +98,7 @@ impl CliHelper {

impl Default for CliHelper {
fn default() -> Self {
Self::new("generic", false)
Self::new(&Dialect::Generic, false)
}
}

Expand Down Expand Up @@ -289,7 +290,7 @@ mod tests {
);

// valid in postgresql dialect
validator.set_dialect("postgresql");
validator.set_dialect(&Dialect::PostgreSQL);
let result =
readline_direct(Cursor::new(r"select 1 # 2;".as_bytes()), &validator)?;
assert!(matches!(result, ValidationResult::Valid(None)));
Expand Down
10 changes: 6 additions & 4 deletions datafusion-cli/src/highlighter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use datafusion::sql::sqlparser::{
keywords::Keyword,
tokenizer::{Token, Tokenizer},
};
use datafusion_common::config;
use rustyline::highlight::{CmdKind, Highlighter};

/// The syntax highlighter.
Expand All @@ -36,7 +37,7 @@ pub struct SyntaxHighlighter {
}

impl SyntaxHighlighter {
pub fn new(dialect: &str) -> Self {
pub fn new(dialect: &config::Dialect) -> Self {
let dialect = dialect_from_str(dialect).unwrap_or(Box::new(GenericDialect {}));
Self { dialect }
}
Expand Down Expand Up @@ -93,13 +94,14 @@ impl Color {

#[cfg(test)]
mod tests {
use super::config::Dialect;
use super::SyntaxHighlighter;
use rustyline::highlight::Highlighter;

#[test]
fn highlighter_valid() {
let s = "SElect col_a from tab_1;";
let highlighter = SyntaxHighlighter::new("generic");
let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
let out = highlighter.highlight(s, s.len());
assert_eq!(
"\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1;",
Expand All @@ -110,7 +112,7 @@ mod tests {
#[test]
fn highlighter_valid_with_new_line() {
let s = "SElect col_a from tab_1\n WHERE col_b = 'なにか';";
let highlighter = SyntaxHighlighter::new("generic");
let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
let out = highlighter.highlight(s, s.len());
assert_eq!(
"\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1\n \u{1b}[91mWHERE\u{1b}[0m col_b = \u{1b}[92m'なにか'\u{1b}[0m;",
Expand All @@ -121,7 +123,7 @@ mod tests {
#[test]
fn highlighter_invalid() {
let s = "SElect col_a from tab_1 WHERE col_b = ';";
let highlighter = SyntaxHighlighter::new("generic");
let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
let out = highlighter.highlight(s, s.len());
assert_eq!("SElect col_a from tab_1 WHERE col_b = ';", out);
}
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/remote_catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ async fn main() -> Result<()> {
let state = ctx.state();

// First, parse the SQL (but don't plan it / resolve any table references)
let dialect = state.config().options().sql_parser.dialect.as_str();
let statement = state.sql_to_statement(sql, dialect)?;
let dialect = state.config().options().sql_parser.dialect;
let statement = state.sql_to_statement(sql, &dialect)?;

// Find all `TableReferences` in the parsed queries. These correspond to the
// tables referred to by the query (in this case
Expand Down
90 changes: 89 additions & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ config_namespace! {

/// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
pub dialect: String, default = "generic".to_string()
pub dialect: Dialect, default = Dialect::Generic
// no need to lowercase because `sqlparser::dialect_from_str`] is case-insensitive

/// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
Expand Down Expand Up @@ -292,6 +292,94 @@ config_namespace! {
}
}

/// This is the SQL dialect used by DataFusion's parser.
/// This mirrors [sqlparser::dialect::Dialect](https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html)
/// trait in order to offer an easier API and avoid adding the `sqlparser` dependency
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add documentation to this enum that explains:

  1. What it represents
  2. How it is related to https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html ?

Specifically it would help to understand if the intent is to mirror the code in sqlparser (but is replicated to avoid adding a sqlparser dependency)

Copy link
Contributor Author

@dariocurr dariocurr Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your feedback, I added documentation

pub enum Dialect {
#[default]
Generic,
MySQL,
PostgreSQL,
Hive,
SQLite,
Snowflake,
Redshift,
MsSQL,
ClickHouse,
BigQuery,
Ansi,
DuckDB,
Databricks,
}

impl AsRef<str> for Dialect {
fn as_ref(&self) -> &str {
match self {
Self::Generic => "generic",
Self::MySQL => "mysql",
Self::PostgreSQL => "postgresql",
Self::Hive => "hive",
Self::SQLite => "sqlite",
Self::Snowflake => "snowflake",
Self::Redshift => "redshift",
Self::MsSQL => "mssql",
Self::ClickHouse => "clickhouse",
Self::BigQuery => "bigquery",
Self::Ansi => "ansi",
Self::DuckDB => "duckdb",
Self::Databricks => "databricks",
}
}
}

impl FromStr for Dialect {
type Err = DataFusionError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let value = match s.to_ascii_lowercase().as_str() {
"generic" => Self::Generic,
"mysql" => Self::MySQL,
"postgresql" | "postgres" => Self::PostgreSQL,
"hive" => Self::Hive,
"sqlite" => Self::SQLite,
"snowflake" => Self::Snowflake,
"redshift" => Self::Redshift,
"mssql" => Self::MsSQL,
"clickhouse" => Self::ClickHouse,
"bigquery" => Self::BigQuery,
"ansi" => Self::Ansi,
"duckdb" => Self::DuckDB,
"databricks" => Self::Databricks,
other => {
let error_message = format!(
"Invalid Dialect: {other}. Expected one of: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks"
);
return Err(DataFusionError::Configuration(error_message));
}
};
Ok(value)
}
}

impl ConfigField for Dialect {
fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
v.some(key, self, description)
}

fn set(&mut self, _: &str, value: &str) -> Result<()> {
*self = Self::from_str(value)?;
Ok(())
}
}

impl Display for Dialect {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let str = self.as_ref();
write!(f, "{str}")
}
}

#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub enum SpillCompression {
Zstd,
Expand Down
7 changes: 5 additions & 2 deletions datafusion/core/benches/sql_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use criterion::Bencher;
use datafusion::datasource::MemTable;
use datafusion::execution::context::SessionContext;
use datafusion::prelude::DataFrame;
use datafusion_common::ScalarValue;
use datafusion_common::{config::Dialect, ScalarValue};
use datafusion_expr::Expr::Literal;
use datafusion_expr::{cast, col, lit, not, try_cast, when};
use datafusion_functions::expr_fn::{
Expand Down Expand Up @@ -288,7 +288,10 @@ fn benchmark_with_param_values_many_columns(
}
// SELECT max(attr0), ..., max(attrN) FROM t1.
let query = format!("SELECT {aggregates} FROM t1");
let statement = ctx.state().sql_to_statement(&query, "Generic").unwrap();
let statement = ctx
.state()
.sql_to_statement(&query, &Dialect::Generic)
.unwrap();
let plan =
rt.block_on(async { ctx.state().statement_to_plan(statement).await.unwrap() });
b.iter(|| {
Expand Down
27 changes: 14 additions & 13 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,14 @@ use crate::datasource::provider_as_source;
use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner};
use crate::execution::SessionStateDefaults;
use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
use arrow::datatypes::DataType;
use datafusion_catalog::information_schema::{
InformationSchemaProvider, INFORMATION_SCHEMA,
};

use arrow::datatypes::DataType;
use datafusion_catalog::MemoryCatalogProviderList;
use datafusion_catalog::{TableFunction, TableFunctionImpl};
use datafusion_common::alias::AliasGenerator;
use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions};
use datafusion_common::config::{ConfigExtension, ConfigOptions, Dialect, TableOptions};
use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
use datafusion_common::tree_node::TreeNode;
use datafusion_common::{
Expand Down Expand Up @@ -374,7 +373,7 @@ impl SessionState {
pub fn sql_to_statement(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is technically a breaking API change (now it requires a &Dialect). Maybe can you add a note to the upgrade guide and a doc example of how to use this API to help people upgrade?

This is explained a bit more in https://datafusion.apache.org/contributor-guide/api-health.html#upgrade-guides

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a section in upgrading.md but I’m not quite sure I understand what I need to do. I saw in the datafusion API health guide that "when making breaking public API changes, please add the api-change label to the PR" 

Should I add it something more?

&self,
sql: &str,
dialect: &str,
dialect: &Dialect,
) -> datafusion_common::Result<Statement> {
let dialect = dialect_from_str(dialect).ok_or_else(|| {
plan_datafusion_err!(
Expand Down Expand Up @@ -411,7 +410,7 @@ impl SessionState {
pub fn sql_to_expr(
&self,
sql: &str,
dialect: &str,
dialect: &Dialect,
) -> datafusion_common::Result<SQLExpr> {
self.sql_to_expr_with_alias(sql, dialect).map(|x| x.expr)
}
Expand All @@ -423,7 +422,7 @@ impl SessionState {
pub fn sql_to_expr_with_alias(
&self,
sql: &str,
dialect: &str,
dialect: &Dialect,
) -> datafusion_common::Result<SQLExprWithAlias> {
let dialect = dialect_from_str(dialect).ok_or_else(|| {
plan_datafusion_err!(
Expand Down Expand Up @@ -527,8 +526,8 @@ impl SessionState {
&self,
sql: &str,
) -> datafusion_common::Result<LogicalPlan> {
let dialect = self.config.options().sql_parser.dialect.as_str();
let statement = self.sql_to_statement(sql, dialect)?;
let dialect = self.config.options().sql_parser.dialect;
let statement = self.sql_to_statement(sql, &dialect)?;
let plan = self.statement_to_plan(statement).await?;
Ok(plan)
}
Expand All @@ -542,9 +541,9 @@ impl SessionState {
sql: &str,
df_schema: &DFSchema,
) -> datafusion_common::Result<Expr> {
let dialect = self.config.options().sql_parser.dialect.as_str();
let dialect = self.config.options().sql_parser.dialect;

let sql_expr = self.sql_to_expr_with_alias(sql, dialect)?;
let sql_expr = self.sql_to_expr_with_alias(sql, &dialect)?;

let provider = SessionContextProvider {
state: self,
Expand Down Expand Up @@ -2034,6 +2033,7 @@ mod tests {
use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_catalog::MemoryCatalogProviderList;
use datafusion_common::config::Dialect;
use datafusion_common::DFSchema;
use datafusion_common::Result;
use datafusion_execution::config::SessionConfig;
Expand All @@ -2059,8 +2059,8 @@ mod tests {
let sql = "[1,2,3]";
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
let df_schema = DFSchema::try_from(schema)?;
let dialect = state.config.options().sql_parser.dialect.as_str();
let sql_expr = state.sql_to_expr(sql, dialect)?;
let dialect = state.config.options().sql_parser.dialect;
let sql_expr = state.sql_to_expr(sql, &dialect)?;

let query = SqlToRel::new_with_options(&provider, state.get_parser_options());
query.sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())
Expand Down Expand Up @@ -2218,7 +2218,8 @@ mod tests {
}

let state = &context_provider.state;
let statement = state.sql_to_statement("select count(*) from t", "mysql")?;
let statement =
state.sql_to_statement("select count(*) from t", &Dialect::MySQL)?;
let plan = SqlToRel::new(&context_provider).statement_to_plan(statement)?;
state.create_physical_plan(&plan).await
}
Expand Down
Loading