diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e2faa75e7bc4..ae6c1ee56129 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -106,7 +106,7 @@ jobs: RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C codegen-units=256" RUST_BACKTRACE: "1" # avoid rust stack overflows on tpc-ds tests - RUST_MINSTACK: "3000000" + RUST_MIN_STACK: "3000000" - name: Verify Working Directory Clean run: git diff --exit-code @@ -316,7 +316,7 @@ jobs: RUSTFLAGS: "-C debuginfo=line-tables-only" RUST_BACKTRACE: "1" # avoid rust stack overflows on tpc-ds tests - RUST_MINSTACK: "3000000" + RUST_MIN_STACK: "3000000" macos: name: cargo test (mac) runs-on: macos-latest @@ -356,7 +356,7 @@ jobs: RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C codegen-units=256" RUST_BACKTRACE: "1" # avoid rust stack overflows on tpc-ds tests - RUST_MINSTACK: "3000000" + RUST_MIN_STACK: "3000000" test-datafusion-pyarrow: name: cargo test pyarrow (amd64) diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs index 2e729c128e73..f0edc7175948 100644 --- a/datafusion/common/src/column.rs +++ b/datafusion/common/src/column.rs @@ -17,6 +17,7 @@ //! Column +use crate::error::_schema_err; use crate::utils::{parse_identifiers_normalized, quote_identifier}; use crate::{DFSchema, DataFusionError, OwnedTableReference, Result, SchemaError}; use std::collections::HashSet; @@ -211,13 +212,13 @@ impl Column { } } - Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { + _schema_err!(SchemaError::FieldNotFound { field: Box::new(Column::new(self.relation.clone(), self.name)), valid_fields: schemas .iter() .flat_map(|s| s.fields().iter().map(|f| f.qualified_column())) .collect(), - })) + }) } /// Qualify column if not done yet. @@ -299,23 +300,21 @@ impl Column { } // If not due to USING columns then due to ambiguous column name - return Err(DataFusionError::SchemaError( - SchemaError::AmbiguousReference { - field: Column::new_unqualified(self.name), - }, - )); + return _schema_err!(SchemaError::AmbiguousReference { + field: Column::new_unqualified(self.name), + }); } } } - Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { + _schema_err!(SchemaError::FieldNotFound { field: Box::new(self), valid_fields: schemas .iter() .flat_map(|s| s.iter()) .flat_map(|s| s.fields().iter().map(|f| f.qualified_column())) .collect(), - })) + }) } } diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 8772c4c3e1ff..85b97aac037d 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -26,6 +26,7 @@ use std::sync::Arc; use crate::error::{ unqualified_field_not_found, DataFusionError, Result, SchemaError, _plan_err, + _schema_err, }; use crate::{ field_not_found, Column, FunctionalDependencies, OwnedTableReference, TableReference, @@ -141,11 +142,9 @@ impl DFSchema { if let Some(qualifier) = field.qualifier() { qualified_names.insert((qualifier, field.name())); } else if !unqualified_names.insert(field.name()) { - return Err(DataFusionError::SchemaError( - SchemaError::DuplicateUnqualifiedField { - name: field.name().to_string(), - }, - )); + return _schema_err!(SchemaError::DuplicateUnqualifiedField { + name: field.name().to_string(), + }); } } @@ -159,14 +158,12 @@ impl DFSchema { qualified_names.sort(); for (qualifier, name) in &qualified_names { if unqualified_names.contains(name) { - return Err(DataFusionError::SchemaError( - SchemaError::AmbiguousReference { - field: Column { - relation: Some((*qualifier).clone()), - name: name.to_string(), - }, - }, - )); + return _schema_err!(SchemaError::AmbiguousReference { + field: Column { + relation: Some((*qualifier).clone()), + name: name.to_string(), + } + }); } } Ok(Self { @@ -392,14 +389,12 @@ impl DFSchema { if fields_without_qualifier.len() == 1 { Ok(fields_without_qualifier[0]) } else { - Err(DataFusionError::SchemaError( - SchemaError::AmbiguousReference { - field: Column { - relation: None, - name: name.to_string(), - }, + _schema_err!(SchemaError::AmbiguousReference { + field: Column { + relation: None, + name: name.to_string(), }, - )) + }) } } } diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index e58faaa15096..978938809c1b 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -82,7 +82,9 @@ pub enum DataFusionError { Configuration(String), /// This error happens with schema-related errors, such as schema inference not possible /// and non-unique column names. - SchemaError(SchemaError), + /// 2nd argument is for optional backtrace + /// Boxing the optional backtrace to prevent + SchemaError(SchemaError, Box>), /// Error returned during execution of the query. /// Examples include files not found, errors in parsing certain types. Execution(String), @@ -125,34 +127,6 @@ pub enum SchemaError { }, } -/// Create a "field not found" DataFusion::SchemaError -pub fn field_not_found>( - qualifier: Option, - name: &str, - schema: &DFSchema, -) -> DataFusionError { - DataFusionError::SchemaError(SchemaError::FieldNotFound { - field: Box::new(Column::new(qualifier, name)), - valid_fields: schema - .fields() - .iter() - .map(|f| f.qualified_column()) - .collect(), - }) -} - -/// Convenience wrapper over [`field_not_found`] for when there is no qualifier -pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> DataFusionError { - DataFusionError::SchemaError(SchemaError::FieldNotFound { - field: Box::new(Column::new_unqualified(name)), - valid_fields: schema - .fields() - .iter() - .map(|f| f.qualified_column()) - .collect(), - }) -} - impl Display for SchemaError { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { @@ -298,7 +272,7 @@ impl Display for DataFusionError { write!(f, "IO error: {desc}") } DataFusionError::SQL(ref desc, ref backtrace) => { - let backtrace = backtrace.clone().unwrap_or("".to_owned()); + let backtrace: String = backtrace.clone().unwrap_or("".to_owned()); write!(f, "SQL error: {desc:?}{backtrace}") } DataFusionError::Configuration(ref desc) => { @@ -314,8 +288,10 @@ impl Display for DataFusionError { DataFusionError::Plan(ref desc) => { write!(f, "Error during planning: {desc}") } - DataFusionError::SchemaError(ref desc) => { - write!(f, "Schema error: {desc}") + DataFusionError::SchemaError(ref desc, ref backtrace) => { + let backtrace: &str = + &backtrace.as_ref().clone().unwrap_or("".to_owned()); + write!(f, "Schema error: {desc}{backtrace}") } DataFusionError::Execution(ref desc) => { write!(f, "Execution error: {desc}") @@ -356,7 +332,7 @@ impl Error for DataFusionError { DataFusionError::Internal(_) => None, DataFusionError::Configuration(_) => None, DataFusionError::Plan(_) => None, - DataFusionError::SchemaError(e) => Some(e), + DataFusionError::SchemaError(e, _) => Some(e), DataFusionError::Execution(_) => None, DataFusionError::ResourcesExhausted(_) => None, DataFusionError::External(e) => Some(e.as_ref()), @@ -556,12 +532,63 @@ macro_rules! arrow_err { }; } +// Exposes a macro to create `DataFusionError::SchemaError` with optional backtrace +#[macro_export] +macro_rules! schema_datafusion_err { + ($ERR:expr) => { + DataFusionError::SchemaError( + $ERR, + Box::new(Some(DataFusionError::get_back_trace())), + ) + }; +} + +// Exposes a macro to create `Err(DataFusionError::SchemaError)` with optional backtrace +#[macro_export] +macro_rules! schema_err { + ($ERR:expr) => { + Err(DataFusionError::SchemaError( + $ERR, + Box::new(Some(DataFusionError::get_back_trace())), + )) + }; +} + // To avoid compiler error when using macro in the same crate: // macros from the current crate cannot be referred to by absolute paths pub use internal_datafusion_err as _internal_datafusion_err; pub use internal_err as _internal_err; pub use not_impl_err as _not_impl_err; pub use plan_err as _plan_err; +pub use schema_err as _schema_err; + +/// Create a "field not found" DataFusion::SchemaError +pub fn field_not_found>( + qualifier: Option, + name: &str, + schema: &DFSchema, +) -> DataFusionError { + schema_datafusion_err!(SchemaError::FieldNotFound { + field: Box::new(Column::new(qualifier, name)), + valid_fields: schema + .fields() + .iter() + .map(|f| f.qualified_column()) + .collect(), + }) +} + +/// Convenience wrapper over [`field_not_found`] for when there is no qualifier +pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> DataFusionError { + schema_datafusion_err!(SchemaError::FieldNotFound { + field: Box::new(Column::new_unqualified(name)), + valid_fields: schema + .fields() + .iter() + .map(|f| f.qualified_column()) + .collect(), + }) +} #[cfg(test)] mod test { diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 33e198d6d588..f15f1e9ba6fb 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1186,7 +1186,7 @@ impl DataFrame { let field_to_rename = match self.plan.schema().field_from_column(&old_column) { Ok(field) => field, // no-op if field not found - Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { .. })) => { + Err(DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }, _)) => { return Ok(self) } Err(err) => return Err(err), diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index a684f3e97485..847fbbbf61c7 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -1845,13 +1845,16 @@ mod tests { .project(vec![col("id"), col("first_name").alias("id")]); match plan { - Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference { - field: - Column { - relation: Some(OwnedTableReference::Bare { table }), - name, - }, - })) => { + Err(DataFusionError::SchemaError( + SchemaError::AmbiguousReference { + field: + Column { + relation: Some(OwnedTableReference::Bare { table }), + name, + }, + }, + _, + )) => { assert_eq!("employee_csv", table); assert_eq!("id", &name); Ok(()) @@ -1872,13 +1875,16 @@ mod tests { .aggregate(vec![col("state")], vec![sum(col("salary")).alias("state")]); match plan { - Err(DataFusionError::SchemaError(SchemaError::AmbiguousReference { - field: - Column { - relation: Some(OwnedTableReference::Bare { table }), - name, - }, - })) => { + Err(DataFusionError::SchemaError( + SchemaError::AmbiguousReference { + field: + Column { + relation: Some(OwnedTableReference::Bare { table }), + name, + }, + }, + _, + )) => { assert_eq!("employee_csv", table); assert_eq!("state", &name); Ok(()) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index c5c30e3a2253..a04df5589b85 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -250,7 +250,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Default expressions are restricted, column references are not allowed let empty_schema = DFSchema::empty(); let error_desc = |e: DataFusionError| match e { - DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }) => { + DataFusionError::SchemaError(SchemaError::FieldNotFound { .. }, _) => { plan_datafusion_err!( "Column reference is not allowed in the DEFAULT expression : {}", e diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index b96553ffbf86..b9fb4c65dc2c 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -31,9 +31,10 @@ use arrow_schema::DataType; use datafusion_common::file_options::StatementOptions; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ - not_impl_err, plan_datafusion_err, plan_err, unqualified_field_not_found, Column, - Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, - Result, ScalarValue, SchemaReference, TableReference, ToDFSchema, + not_impl_err, plan_datafusion_err, plan_err, schema_err, unqualified_field_not_found, + Column, Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, + OwnedTableReference, Result, ScalarValue, SchemaError, SchemaReference, + TableReference, ToDFSchema, }; use datafusion_expr::dml::{CopyOptions, CopyTo}; use datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check; @@ -1138,11 +1139,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .index_of_column_by_name(None, &c)? .ok_or_else(|| unqualified_field_not_found(&c, &table_schema))?; if value_indices[column_index].is_some() { - return Err(DataFusionError::SchemaError( - datafusion_common::SchemaError::DuplicateUnqualifiedField { - name: c, - }, - )); + return schema_err!(SchemaError::DuplicateUnqualifiedField { + name: c, + }); } else { value_indices[column_index] = Some(i); } diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 48ba50145308..4de08a7124cf 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -756,9 +756,11 @@ fn join_with_ambiguous_column() { #[test] fn where_selection_with_ambiguous_column() { let sql = "SELECT * FROM person a, person b WHERE id = id + 1"; - let err = logical_plan(sql).expect_err("query should have failed"); + let err = logical_plan(sql) + .expect_err("query should have failed") + .strip_backtrace(); assert_eq!( - "SchemaError(AmbiguousReference { field: Column { relation: None, name: \"id\" } })", + "\"Schema error: Ambiguous reference to unqualified field id\"", format!("{err:?}") ); }