Skip to content

Commit b337fbc

Browse files
changsun20alamb
andauthored
feat: Attach Diagnostic to more than one column errors in scalar_subquery and in_subquery (#15143)
* feat: diagnostic to more than one column error * test: improve test case * feat: multiple "in" queries diagnostic * refactor: reuse code in subquery.rs * chore: clearer note message * fix: use plain Spans * fix: better error messages * fix: remove the specific subquery type * fix: use iter_union to create full_span * fix: CI fail problems * Update expected error message --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent ac989af commit b337fbc

File tree

11 files changed

+194
-21
lines changed

11 files changed

+194
-21
lines changed

datafusion/expr/src/expr_fn.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use arrow::compute::kernels::cast_utils::{
3737
parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
3838
};
3939
use arrow::datatypes::{DataType, Field};
40-
use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference};
40+
use datafusion_common::{plan_err, Column, Result, ScalarValue, Spans, TableReference};
4141
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
4242
use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
4343
use sqlparser::ast::NullTreatment;
@@ -252,6 +252,7 @@ pub fn exists(subquery: Arc<LogicalPlan>) -> Expr {
252252
subquery: Subquery {
253253
subquery,
254254
outer_ref_columns,
255+
spans: Spans::new(),
255256
},
256257
negated: false,
257258
})
@@ -264,6 +265,7 @@ pub fn not_exists(subquery: Arc<LogicalPlan>) -> Expr {
264265
subquery: Subquery {
265266
subquery,
266267
outer_ref_columns,
268+
spans: Spans::new(),
267269
},
268270
negated: true,
269271
})
@@ -277,6 +279,7 @@ pub fn in_subquery(expr: Expr, subquery: Arc<LogicalPlan>) -> Expr {
277279
Subquery {
278280
subquery,
279281
outer_ref_columns,
282+
spans: Spans::new(),
280283
},
281284
false,
282285
))
@@ -290,6 +293,7 @@ pub fn not_in_subquery(expr: Expr, subquery: Arc<LogicalPlan>) -> Expr {
290293
Subquery {
291294
subquery,
292295
outer_ref_columns,
296+
spans: Spans::new(),
293297
},
294298
true,
295299
))
@@ -301,6 +305,7 @@ pub fn scalar_subquery(subquery: Arc<LogicalPlan>) -> Expr {
301305
Expr::ScalarSubquery(Subquery {
302306
subquery,
303307
outer_ref_columns,
308+
spans: Spans::new(),
304309
})
305310
}
306311

datafusion/expr/src/expr_schema.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use arrow::compute::can_cast_types;
3030
use arrow::datatypes::{DataType, Field};
3131
use datafusion_common::{
3232
not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema,
33-
Result, TableReference,
33+
Result, Spans, TableReference,
3434
};
3535
use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
3636
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
@@ -617,6 +617,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result<Subq
617617
Ok(Subquery {
618618
subquery: Arc::new(new_plan),
619619
outer_ref_columns: subquery.outer_ref_columns,
620+
spans: Spans::new(),
620621
})
621622
}
622623

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ use datafusion_common::tree_node::{
5555
use datafusion_common::{
5656
aggregate_functional_dependencies, internal_err, plan_err, Column, Constraints,
5757
DFSchema, DFSchemaRef, DataFusionError, Dependency, FunctionalDependence,
58-
FunctionalDependencies, ParamValues, Result, ScalarValue, TableReference,
58+
FunctionalDependencies, ParamValues, Result, ScalarValue, Spans, TableReference,
5959
UnnestOptions,
6060
};
6161
use indexmap::IndexSet;
@@ -939,14 +939,17 @@ impl LogicalPlan {
939939
}))
940940
}
941941
LogicalPlan::Subquery(Subquery {
942-
outer_ref_columns, ..
942+
outer_ref_columns,
943+
spans,
944+
..
943945
}) => {
944946
self.assert_no_expressions(expr)?;
945947
let input = self.only_input(inputs)?;
946948
let subquery = LogicalPlanBuilder::from(input).build()?;
947949
Ok(LogicalPlan::Subquery(Subquery {
948950
subquery: Arc::new(subquery),
949951
outer_ref_columns: outer_ref_columns.clone(),
952+
spans: spans.clone(),
950953
}))
951954
}
952955
LogicalPlan::SubqueryAlias(SubqueryAlias { alias, .. }) => {
@@ -3615,6 +3618,8 @@ pub struct Subquery {
36153618
pub subquery: Arc<LogicalPlan>,
36163619
/// The outer references used in the subquery
36173620
pub outer_ref_columns: Vec<Expr>,
3621+
/// Span information for subquery projection columns
3622+
pub spans: Spans,
36183623
}
36193624

36203625
impl Normalizeable for Subquery {
@@ -3649,6 +3654,7 @@ impl Subquery {
36493654
Subquery {
36503655
subquery: plan,
36513656
outer_ref_columns: self.outer_ref_columns.clone(),
3657+
spans: Spans::new(),
36523658
}
36533659
}
36543660
}

datafusion/expr/src/logical_plan/tree_node.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,12 @@ impl TreeNode for LogicalPlan {
159159
LogicalPlan::Subquery(Subquery {
160160
subquery,
161161
outer_ref_columns,
162+
spans,
162163
}) => subquery.map_elements(f)?.update_data(|subquery| {
163164
LogicalPlan::Subquery(Subquery {
164165
subquery,
165166
outer_ref_columns,
167+
spans,
166168
})
167169
}),
168170
LogicalPlan::SubqueryAlias(SubqueryAlias {

datafusion/optimizer/src/analyzer/type_coercion.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,12 +311,14 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> {
311311
Expr::ScalarSubquery(Subquery {
312312
subquery,
313313
outer_ref_columns,
314+
spans,
314315
}) => {
315316
let new_plan =
316317
analyze_internal(self.schema, Arc::unwrap_or_clone(subquery))?.data;
317318
Ok(Transformed::yes(Expr::ScalarSubquery(Subquery {
318319
subquery: Arc::new(new_plan),
319320
outer_ref_columns,
321+
spans,
320322
})))
321323
}
322324
Expr::Exists(Exists { subquery, negated }) => {
@@ -329,6 +331,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> {
329331
subquery: Subquery {
330332
subquery: Arc::new(new_plan),
331333
outer_ref_columns: subquery.outer_ref_columns,
334+
spans: subquery.spans,
332335
},
333336
negated,
334337
})))
@@ -352,6 +355,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> {
352355
let new_subquery = Subquery {
353356
subquery: Arc::new(new_plan),
354357
outer_ref_columns: subquery.outer_ref_columns,
358+
spans: subquery.spans,
355359
};
356360
Ok(Transformed::yes(Expr::InSubquery(InSubquery::new(
357361
Box::new(expr.cast_to(&common_type, self.schema)?),
@@ -1049,7 +1053,7 @@ mod test {
10491053
use crate::test::{assert_analyzed_plan_eq, assert_analyzed_plan_with_config_eq};
10501054
use datafusion_common::config::ConfigOptions;
10511055
use datafusion_common::tree_node::{TransformedResult, TreeNode};
1052-
use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue};
1056+
use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue, Spans};
10531057
use datafusion_expr::expr::{self, InSubquery, Like, ScalarFunction};
10541058
use datafusion_expr::logical_plan::{EmptyRelation, Projection, Sort};
10551059
use datafusion_expr::test::function_stub::avg_udaf;
@@ -2089,6 +2093,7 @@ mod test {
20892093
Subquery {
20902094
subquery: empty_int32,
20912095
outer_ref_columns: vec![],
2096+
spans: Spans::new(),
20922097
},
20932098
false,
20942099
));
@@ -2114,6 +2119,7 @@ mod test {
21142119
Subquery {
21152120
subquery: empty_int64,
21162121
outer_ref_columns: vec![],
2122+
spans: Spans::new(),
21172123
},
21182124
false,
21192125
));
@@ -2138,6 +2144,7 @@ mod test {
21382144
Subquery {
21392145
subquery: empty_inside,
21402146
outer_ref_columns: vec![],
2147+
spans: Spans::new(),
21412148
},
21422149
false,
21432150
));

datafusion/optimizer/src/scalar_subquery_to_join.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -731,9 +731,7 @@ mod tests {
731731
.project(vec![col("customer.c_custkey")])?
732732
.build()?;
733733

734-
let expected = "Invalid (non-executable) plan after Analyzer\
735-
\ncaused by\
736-
\nError during planning: Scalar subquery should only return one column";
734+
let expected = "Error during planning: Scalar subquery should only return one column, but found 4: orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice";
737735
assert_analyzer_check_err(vec![], plan, expected);
738736
Ok(())
739737
}
@@ -793,9 +791,7 @@ mod tests {
793791
.project(vec![col("customer.c_custkey")])?
794792
.build()?;
795793

796-
let expected = "Invalid (non-executable) plan after Analyzer\
797-
\ncaused by\
798-
\nError during planning: Scalar subquery should only return one column";
794+
let expected = "Error during planning: Scalar subquery should only return one column, but found 2: orders.o_custkey, orders.o_orderkey";
799795
assert_analyzer_check_err(vec![], plan, expected);
800796
Ok(())
801797
}

datafusion/sql/src/expr/subquery.rs

Lines changed: 89 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,11 @@
1616
// under the License.
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
19-
use datafusion_common::{DFSchema, Result};
20-
use datafusion_expr::expr::Exists;
21-
use datafusion_expr::expr::InSubquery;
22-
use datafusion_expr::{Expr, Subquery};
19+
use datafusion_common::{plan_err, DFSchema, Diagnostic, Result, Span, Spans};
20+
use datafusion_expr::expr::{Exists, InSubquery};
21+
use datafusion_expr::{Expr, LogicalPlan, Subquery};
2322
use sqlparser::ast::Expr as SQLExpr;
24-
use sqlparser::ast::Query;
23+
use sqlparser::ast::{Query, SelectItem, SetExpr};
2524
use std::sync::Arc;
2625

2726
impl<S: ContextProvider> SqlToRel<'_, S> {
@@ -41,6 +40,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
4140
subquery: Subquery {
4241
subquery: Arc::new(sub_plan),
4342
outer_ref_columns,
43+
spans: Spans::new(),
4444
},
4545
negated,
4646
}))
@@ -56,15 +56,37 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
5656
) -> Result<Expr> {
5757
let old_outer_query_schema =
5858
planner_context.set_outer_query_schema(Some(input_schema.clone().into()));
59+
60+
let mut spans = Spans::new();
61+
if let SetExpr::Select(select) = subquery.body.as_ref() {
62+
for item in &select.projection {
63+
if let SelectItem::UnnamedExpr(SQLExpr::Identifier(ident)) = item {
64+
if let Some(span) = Span::try_from_sqlparser_span(ident.span) {
65+
spans.add_span(span);
66+
}
67+
}
68+
}
69+
}
70+
5971
let sub_plan = self.query_to_plan(subquery, planner_context)?;
6072
let outer_ref_columns = sub_plan.all_out_ref_exprs();
6173
planner_context.set_outer_query_schema(old_outer_query_schema);
62-
let expr = Box::new(self.sql_to_expr(expr, input_schema, planner_context)?);
74+
75+
self.validate_single_column(
76+
&sub_plan,
77+
spans.clone(),
78+
"Too many columns! The subquery should only return one column",
79+
"Select only one column in the subquery",
80+
)?;
81+
82+
let expr_obj = self.sql_to_expr(expr, input_schema, planner_context)?;
83+
6384
Ok(Expr::InSubquery(InSubquery::new(
64-
expr,
85+
Box::new(expr_obj),
6586
Subquery {
6687
subquery: Arc::new(sub_plan),
6788
outer_ref_columns,
89+
spans,
6890
},
6991
negated,
7092
)))
@@ -78,12 +100,72 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
78100
) -> Result<Expr> {
79101
let old_outer_query_schema =
80102
planner_context.set_outer_query_schema(Some(input_schema.clone().into()));
103+
let mut spans = Spans::new();
104+
if let SetExpr::Select(select) = subquery.body.as_ref() {
105+
for item in &select.projection {
106+
if let SelectItem::ExprWithAlias { alias, .. } = item {
107+
if let Some(span) = Span::try_from_sqlparser_span(alias.span) {
108+
spans.add_span(span);
109+
}
110+
}
111+
}
112+
}
81113
let sub_plan = self.query_to_plan(subquery, planner_context)?;
82114
let outer_ref_columns = sub_plan.all_out_ref_exprs();
83115
planner_context.set_outer_query_schema(old_outer_query_schema);
116+
117+
self.validate_single_column(
118+
&sub_plan,
119+
spans.clone(),
120+
"Too many columns! The subquery should only return one column",
121+
"Select only one column in the subquery",
122+
)?;
123+
84124
Ok(Expr::ScalarSubquery(Subquery {
85125
subquery: Arc::new(sub_plan),
86126
outer_ref_columns,
127+
spans,
87128
}))
88129
}
130+
131+
fn validate_single_column(
132+
&self,
133+
sub_plan: &LogicalPlan,
134+
spans: Spans,
135+
error_message: &str,
136+
help_message: &str,
137+
) -> Result<()> {
138+
if sub_plan.schema().fields().len() > 1 {
139+
let sub_schema = sub_plan.schema();
140+
let field_names = sub_schema.field_names();
141+
142+
plan_err!("{}: {}", error_message, field_names.join(", ")).map_err(|err| {
143+
let diagnostic = self.build_multi_column_diagnostic(
144+
spans,
145+
error_message,
146+
help_message,
147+
);
148+
err.with_diagnostic(diagnostic)
149+
})
150+
} else {
151+
Ok(())
152+
}
153+
}
154+
155+
fn build_multi_column_diagnostic(
156+
&self,
157+
spans: Spans,
158+
error_message: &str,
159+
help_message: &str,
160+
) -> Diagnostic {
161+
let full_span = Span::union_iter(spans.0.iter().cloned());
162+
let mut diagnostic = Diagnostic::new_error(error_message, full_span);
163+
164+
for (i, span) in spans.iter().skip(1).enumerate() {
165+
diagnostic.add_note(format!("Extra column {}", i + 1), Some(*span));
166+
}
167+
168+
diagnostic.add_help(help_message, None);
169+
diagnostic
170+
}
89171
}

datafusion/sql/src/relation/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
2121

2222
use datafusion_common::tree_node::{Transformed, TreeNode};
2323
use datafusion_common::{
24-
not_impl_err, plan_err, DFSchema, Diagnostic, Result, Span, TableReference,
24+
not_impl_err, plan_err, DFSchema, Diagnostic, Result, Span, Spans, TableReference,
2525
};
2626
use datafusion_expr::builder::subquery_alias;
2727
use datafusion_expr::{expr::Unnest, Expr, LogicalPlan, LogicalPlanBuilder};
@@ -211,13 +211,15 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
211211
LogicalPlan::Subquery(Subquery {
212212
subquery: input,
213213
outer_ref_columns,
214+
spans: Spans::new(),
214215
}),
215216
alias,
216217
)
217218
}
218219
plan => Ok(LogicalPlan::Subquery(Subquery {
219220
subquery: Arc::new(plan),
220221
outer_ref_columns,
222+
spans: Spans::new(),
221223
})),
222224
}
223225
}

0 commit comments

Comments
 (0)