diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index af3a774e06bd..013b1d5a2cab 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -20,6 +20,7 @@ use std::backtrace::{Backtrace, BacktraceStatus}; use std::borrow::Cow; +use std::collections::VecDeque; use std::error::Error; use std::fmt::{Display, Formatter}; use std::io; @@ -136,6 +137,13 @@ pub enum DataFusionError { /// human-readable messages, and locations in the source query that relate /// to the error in some way. Diagnostic(Box, Box), + /// A collection of one or more [`DataFusionError`]. Useful in cases where + /// DataFusion can recover from an erroneous state, and produce more errors + /// before terminating. e.g. when planning a SELECT clause, DataFusion can + /// synchronize to the next `SelectItem` if the previous one had errors. The + /// end result is that the user can see errors about all `SelectItem`, + /// instead of just the first one. + Collection(Vec), /// A [`DataFusionError`] which shares an underlying [`DataFusionError`]. /// /// This is useful when the same underlying [`DataFusionError`] is passed @@ -360,6 +368,14 @@ impl Error for DataFusionError { DataFusionError::Context(_, e) => Some(e.as_ref()), DataFusionError::Substrait(_) => None, DataFusionError::Diagnostic(_, e) => Some(e.as_ref()), + // Can't really make a Collection fit into the mold of "an error has + // at most one source", but returning the first one is probably good + // idea. Especially since `DataFusionError::Collection` is mostly + // meant for consumption by the end user, so shouldn't interfere + // with programmatic usage too much. Plus, having 1 or 5 errors + // doesn't really change the fact that the query is invalid and + // can't be executed. + DataFusionError::Collection(errs) => errs.first().map(|e| e as &dyn Error), DataFusionError::Shared(e) => Some(e.as_ref()), } } @@ -463,18 +479,27 @@ impl DataFusionError { DataFusionError::ObjectStore(_) => "Object Store error: ", DataFusionError::IoError(_) => "IO error: ", DataFusionError::SQL(_, _) => "SQL error: ", - DataFusionError::NotImplemented(_) => "This feature is not implemented: ", + DataFusionError::NotImplemented(_) => { + "This feature is not implemented: " + } DataFusionError::Internal(_) => "Internal error: ", DataFusionError::Plan(_) => "Error during planning: ", - DataFusionError::Configuration(_) => "Invalid or Unsupported Configuration: ", + DataFusionError::Configuration(_) => { + "Invalid or Unsupported Configuration: " + } DataFusionError::SchemaError(_, _) => "Schema error: ", DataFusionError::Execution(_) => "Execution error: ", DataFusionError::ExecutionJoin(_) => "ExecutionJoin error: ", - DataFusionError::ResourcesExhausted(_) => "Resources exhausted: ", + DataFusionError::ResourcesExhausted(_) => { + "Resources exhausted: " + } DataFusionError::External(_) => "External error: ", DataFusionError::Context(_, _) => "", DataFusionError::Substrait(_) => "Substrait error: ", DataFusionError::Diagnostic(_, _) => "", + DataFusionError::Collection(errs) => { + errs.first().expect("cannot construct DataFusionError::Collection with 0 errors, but got one such case").error_prefix() + } DataFusionError::Shared(_) => "", } } @@ -517,6 +542,13 @@ impl DataFusionError { } DataFusionError::Substrait(ref desc) => Cow::Owned(desc.to_string()), DataFusionError::Diagnostic(_, ref err) => Cow::Owned(err.to_string()), + // Returning the message of the first error is probably fine enough, + // and makes `DataFusionError::Collection` a transparent wrapped, + // unless the end user explicitly calls `DataFusionError::iter`. + DataFusionError::Collection(ref errs) => errs + .first() + .expect("cannot construct DataFusionError::Collection with 0 errors") + .message(), DataFusionError::Shared(ref desc) => Cow::Owned(desc.to_string()), } } @@ -569,6 +601,63 @@ impl DataFusionError { DiagnosticsIterator { head: self }.next() } + + /// Sometimes DataFusion is able to collect multiple errors in a SQL query + /// before terminating, e.g. across different expressions in a SELECT + /// statements or different sides of a UNION. This method returns an + /// iterator over all the errors in the collection. + /// + /// For this to work, the top-level error must be a + /// `DataFusionError::Collection`, not something that contains it. + pub fn iter(&self) -> impl Iterator { + struct ErrorIterator<'a> { + queue: VecDeque<&'a DataFusionError>, + } + + impl<'a> Iterator for ErrorIterator<'a> { + type Item = &'a DataFusionError; + + fn next(&mut self) -> Option { + loop { + let popped = self.queue.pop_front()?; + match popped { + DataFusionError::Collection(errs) => self.queue.extend(errs), + _ => return Some(popped), + } + } + } + } + + let mut queue = VecDeque::new(); + queue.push_back(self); + ErrorIterator { queue } + } +} + +pub struct DataFusionErrorBuilder(Vec); + +impl DataFusionErrorBuilder { + pub fn new() -> Self { + Self(Vec::new()) + } + + pub fn add_error(&mut self, error: DataFusionError) { + self.0.push(error); + } + + pub fn error_or(self, ok: T) -> Result { + match self.0.len() { + 0 => Ok(ok), + 1 => Err(self.0.into_iter().next().expect("length matched 1")), + _ => Err(DataFusionError::Collection(self.0)), + } + } +} + +impl Default for DataFusionErrorBuilder { + fn default() -> Self { + Self::new() + } } /// Unwrap an `Option` if possible. Otherwise return an `DataFusionError::Internal`. @@ -954,4 +1043,20 @@ mod test { assert_eq!(e.strip_backtrace(), exp.strip_backtrace()); assert_eq!(std::mem::discriminant(e), std::mem::discriminant(&exp),) } + + #[test] + fn test_iter() { + let err = DataFusionError::Collection(vec![ + DataFusionError::Plan("a".to_string()), + DataFusionError::Collection(vec![ + DataFusionError::Plan("b".to_string()), + DataFusionError::Plan("c".to_string()), + ]), + ]); + let errs = err.iter().collect::>(); + assert_eq!(errs.len(), 3); + assert_eq!(errs[0].strip_backtrace(), "Error during planning: a"); + assert_eq!(errs[1].strip_backtrace(), "Error during planning: b"); + assert_eq!(errs[2].strip_backtrace(), "Error during planning: c"); + } } diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 0f9dbec722c2..7ac836ef3aeb 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -261,8 +261,9 @@ fn get_valid_types_with_scalar_udf( TypeSignature::UserDefined => match func.coerce_types(current_types) { Ok(coerced_types) => Ok(vec![coerced_types]), Err(e) => exec_err!( - "Function '{}' user-defined coercion failed with {e:?}", - func.name() + "Function '{}' user-defined coercion failed with {:?}", + func.name(), + e.strip_backtrace() ), }, TypeSignature::OneOf(signatures) => { @@ -304,8 +305,9 @@ fn get_valid_types_with_aggregate_udf( Ok(coerced_types) => vec![coerced_types], Err(e) => { return exec_err!( - "Function '{}' user-defined coercion failed with {e:?}", - func.name() + "Function '{}' user-defined coercion failed with {:?}", + func.name(), + e.strip_backtrace() ) } }, @@ -332,8 +334,9 @@ fn get_valid_types_with_window_udf( Ok(coerced_types) => vec![coerced_types], Err(e) => { return exec_err!( - "Function '{}' user-defined coercion failed with {e:?}", - func.name() + "Function '{}' user-defined coercion failed with {:?}", + func.name(), + e.strip_backtrace() ) } }, diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 50d89dea6763..05782e6ecd75 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -25,6 +25,7 @@ use crate::utils::{ CheckColumnsSatisfyExprsPurpose, }; +use datafusion_common::error::DataFusionErrorBuilder; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; use datafusion_common::{not_impl_err, plan_err, Result}; use datafusion_common::{RecursionUnnestOption, UnnestOptions}; @@ -574,10 +575,15 @@ impl SqlToRel<'_, S> { empty_from: bool, planner_context: &mut PlannerContext, ) -> Result> { - projection - .into_iter() - .map(|expr| self.sql_select_to_rex(expr, plan, empty_from, planner_context)) - .collect::>>() + let mut prepared_select_exprs = vec![]; + let mut error_builder = DataFusionErrorBuilder::new(); + for expr in projection { + match self.sql_select_to_rex(expr, plan, empty_from, planner_context) { + Ok(expr) => prepared_select_exprs.push(expr), + Err(err) => error_builder.add_error(err), + } + } + error_builder.error_or(prepared_select_exprs) } /// Generate a relational expression from a select SQL expression diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs index 3ddbe373ecd3..2579f2397228 100644 --- a/datafusion/sql/src/set_expr.rs +++ b/datafusion/sql/src/set_expr.rs @@ -16,7 +16,9 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{not_impl_err, plan_err, Diagnostic, Result, Span}; +use datafusion_common::{ + not_impl_err, plan_err, DataFusionError, Diagnostic, Result, Span, +}; use datafusion_expr::{LogicalPlan, LogicalPlanBuilder}; use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier, Spanned}; @@ -39,8 +41,19 @@ impl SqlToRel<'_, S> { } => { let left_span = Span::try_from_sqlparser_span(left.span()); let right_span = Span::try_from_sqlparser_span(right.span()); - let left_plan = self.set_expr_to_plan(*left, planner_context)?; - let right_plan = self.set_expr_to_plan(*right, planner_context)?; + let left_plan = self.set_expr_to_plan(*left, planner_context); + let right_plan = self.set_expr_to_plan(*right, planner_context); + let (left_plan, right_plan) = match (left_plan, right_plan) { + (Ok(left_plan), Ok(right_plan)) => (left_plan, right_plan), + (Err(left_err), Err(right_err)) => { + return Err(DataFusionError::Collection(vec![ + left_err, right_err, + ])); + } + (Err(err), _) | (_, Err(err)) => { + return Err(err); + } + }; self.validate_set_expr_num_of_columns( op, left_span, @@ -49,6 +62,7 @@ impl SqlToRel<'_, S> { &right_plan, set_expr_span, )?; + self.set_operation_to_plan(op, left_plan, right_plan, set_quantifier) } SetExpr::Query(q) => self.query_to_plan(*q, planner_context), diff --git a/datafusion/sql/tests/cases/collection.rs b/datafusion/sql/tests/cases/collection.rs new file mode 100644 index 000000000000..59704d6445b3 --- /dev/null +++ b/datafusion/sql/tests/cases/collection.rs @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::{assert_contains, DataFusionError}; +use datafusion_sql::planner::SqlToRel; +use sqlparser::{dialect::GenericDialect, parser::Parser}; + +use crate::{MockContextProvider, MockSessionState}; + +fn do_query(sql: &'static str) -> DataFusionError { + let dialect = GenericDialect {}; + let statement = Parser::new(&dialect) + .try_with_sql(sql) + .expect("unable to create parser") + .parse_statement() + .expect("unable to parse query"); + let state = MockSessionState::default(); + let context = MockContextProvider { state }; + let sql_to_rel = SqlToRel::new(&context); + sql_to_rel + .sql_statement_to_plan(statement) + .expect_err("expected error") +} + +#[test] +fn test_collect_select_items() { + let query = "SELECT first_namex, last_namex FROM person"; + let error = do_query(query); + let errors = error.iter().collect::>(); + assert_eq!(errors.len(), 2); + assert!(errors[0] + .to_string() + .contains("No field named first_namex.")); + assert_contains!(errors[1].to_string(), "No field named last_namex."); +} + +#[test] +fn test_collect_set_exprs() { + let query = "SELECT first_namex FROM person UNION ALL SELECT last_namex FROM person"; + let error = do_query(query); + let errors = error.iter().collect::>(); + assert_eq!(errors.len(), 2); + assert_contains!(errors[0].to_string(), "No field named first_namex."); + assert_contains!(errors[1].to_string(), "No field named last_namex."); +} diff --git a/datafusion/sql/tests/cases/mod.rs b/datafusion/sql/tests/cases/mod.rs index 48574984c0ca..b3eedcdc41e3 100644 --- a/datafusion/sql/tests/cases/mod.rs +++ b/datafusion/sql/tests/cases/mod.rs @@ -15,5 +15,6 @@ // specific language governing permissions and limitations // under the License. +mod collection; mod diagnostic; mod plan_to_sql; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 22af160f723f..c514458d4a27 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4422,10 +4422,17 @@ fn plan_create_index() { } } -fn assert_field_not_found(err: DataFusionError, name: &str) { - let err = match err { - DataFusionError::Diagnostic(_, err) => *err, - err => err, +fn assert_field_not_found(mut err: DataFusionError, name: &str) { + let err = loop { + match err { + DataFusionError::Diagnostic(_, wrapped_err) => { + err = *wrapped_err; + } + DataFusionError::Collection(errs) => { + err = errs.into_iter().next().unwrap(); + } + err => break err, + } }; match err { DataFusionError::SchemaError { .. } => { diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/error.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/error.rs index 5bb40aca2ab8..ae56c0260564 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/error.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/error.rs @@ -30,7 +30,7 @@ pub enum DFSqlLogicTestError { #[error("SqlLogicTest error(from sqllogictest-rs crate): {0}")] SqlLogicTest(#[from] TestError), /// Error from datafusion - #[error("DataFusion error: {0}")] + #[error("DataFusion error: {}", .0.strip_backtrace())] DataFusion(#[from] DataFusionError), /// Error returned when SQL is syntactically incorrect. #[error("SQL Parser error: {0}")] diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt index 7e2af5b9cbc9..5a94ba9c0583 100644 --- a/datafusion/sqllogictest/test_files/errors.slt +++ b/datafusion/sqllogictest/test_files/errors.slt @@ -148,7 +148,7 @@ SELECT query error DataFusion error: Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type create table foo as values (1), ('foo'); -query error No function matches +query error user-defined coercion failed select 1 group by substr(''); # Error in filter should be reported diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 838387965e6a..b9699dfd5c06 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -267,16 +267,24 @@ statement error Did you mean 'to_timestamp_seconds'? SELECT to_TIMESTAMPS_second(v2) from test; # Aggregate function -query error DataFusion error: Error during planning: Invalid function 'counter' +query error SELECT counter(*) from test; +---- +DataFusion error: Error during planning: Invalid function 'counter'. +Did you mean 'count'? + # Aggregate function statement error Did you mean 'stddev'? SELECT STDEV(v1) from test; # Aggregate function -statement error DataFusion error: Error during planning: Invalid function 'covaria'.\nDid you mean 'covar'? +statement error SELECT COVARIA(1,1); +---- +DataFusion error: Error during planning: Invalid function 'covaria'. +Did you mean 'covar'? + # Window function statement error diff --git a/datafusion/sqllogictest/test_files/identifiers.slt b/datafusion/sqllogictest/test_files/identifiers.slt index f1b2777bece0..755d617e7a2a 100644 --- a/datafusion/sqllogictest/test_files/identifiers.slt +++ b/datafusion/sqllogictest/test_files/identifiers.slt @@ -90,16 +90,16 @@ drop table case_insensitive_test statement ok CREATE TABLE test("Column1" string) AS VALUES ('content1'); -statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\. +statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\. SELECT COLumn1 from test -statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\. +statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\. SELECT Column1 from test -statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\. +statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\. SELECT column1 from test -statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\. +statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\. SELECT "column1" from test statement ok diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 0650f978d3a8..e12bdca37e6f 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1156,7 +1156,7 @@ SELECT * FROM empty_table statement ok CREATE TABLE case_sensitive_table("INT32" int) AS VALUES (1), (2), (3), (4), (5); -statement error DataFusion error: Schema error: No field named int32. Valid fields are case_sensitive_table."INT32". +statement error DataFusion error: Schema error: No field named int32\. Valid fields are case_sensitive_table\."INT32"\. select "int32" from case_sensitive_table query I @@ -1799,7 +1799,7 @@ select a + b from (select 1 as a, 2 as b, 1 as "a + b"); 3 # Can't reference an output column by expression over projection. -query error DataFusion error: Schema error: No field named a. Valid fields are "a \+ Int64\(1\)"\. +query error DataFusion error: Schema error: No field named a\. Valid fields are "a \+ Int64\(1\)"\. select a + 1 from (select a+1 from (select 1 as a)); query I @@ -1834,7 +1834,7 @@ statement ok DROP TABLE test; # Can't reference an unqualified column by a qualified name -query error DataFusion error: Schema error: No field named t1.v1. Column names are case sensitive. You can use double quotes to refer to the "t1.v1" column or set the datafusion.sql_parser.enable_ident_normalization configuration. Valid fields are "t1.v1". +query error DataFusion error: Schema error: No field named t1\.v1\. Column names are case sensitive\. You can use double quotes to refer to the "t1\.v1" column or set the datafusion\.sql_parser\.enable_ident_normalization configuration\. Valid fields are "t1\.v1"\. SELECT t1.v1 FROM (SELECT 1 AS "t1.v1"); # Test issue: https://github.com/apache/datafusion/issues/14124