Skip to content

Commit 8169342

Browse files
committed
fix: Ensure only tables or aliases that exist are projected (#52)
fix: More dangling references (#54) UPSTREAM NOTE: This PR was attempted to be upstreamed in apache#13405 - but it was not accepted due to the complexity it brought. Phillip needs to figure out what a good solution that solves our problem and can be upstreamed is.
1 parent 7909909 commit 8169342

File tree

4 files changed

+256
-28
lines changed

4 files changed

+256
-28
lines changed

datafusion/sql/src/unparser/ast.rs

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ impl QueryBuilder {
5555
self.order_by_kind = Some(value);
5656
self
5757
}
58+
pub fn get_order_by(&self) -> Option<OrderByKind> {
59+
self.order_by_kind.clone()
60+
}
5861
pub fn limit(&mut self, value: Option<ast::Expr>) -> &mut Self {
5962
self.limit = value;
6063
self
@@ -166,6 +169,9 @@ impl SelectBuilder {
166169
self.top = value;
167170
self
168171
}
172+
pub fn get_projection(&self) -> Vec<ast::SelectItem> {
173+
self.projection.clone()
174+
}
169175
pub fn projection(&mut self, value: Vec<ast::SelectItem>) -> &mut Self {
170176
self.projection = value;
171177
self
@@ -269,6 +275,9 @@ impl SelectBuilder {
269275
self.sort_by = value;
270276
self
271277
}
278+
pub fn get_sort_by(&self) -> Vec<ast::Expr> {
279+
self.sort_by.clone()
280+
}
272281
pub fn having(&mut self, value: Option<ast::Expr>) -> &mut Self {
273282
self.having = value;
274283
self
@@ -362,7 +371,9 @@ impl TableWithJoinsBuilder {
362371
self.relation = Some(value);
363372
self
364373
}
365-
374+
pub fn get_joins(&self) -> Vec<ast::Join> {
375+
self.joins.clone()
376+
}
366377
pub fn joins(&mut self, value: Vec<ast::Join>) -> &mut Self {
367378
self.joins = value;
368379
self
@@ -417,6 +428,25 @@ impl RelationBuilder {
417428
pub fn has_relation(&self) -> bool {
418429
self.relation.is_some()
419430
}
431+
pub fn get_name(&self) -> Option<String> {
432+
match self.relation {
433+
Some(TableFactorBuilder::Table(ref value)) => {
434+
value.name.as_ref().map(|a| a.to_string())
435+
}
436+
_ => None,
437+
}
438+
}
439+
pub fn get_alias(&self) -> Option<String> {
440+
match self.relation {
441+
Some(TableFactorBuilder::Table(ref value)) => {
442+
value.alias.as_ref().map(|a| a.name.to_string())
443+
}
444+
Some(TableFactorBuilder::Derived(ref value)) => {
445+
value.alias.as_ref().map(|a| a.name.to_string())
446+
}
447+
_ => None,
448+
}
449+
}
420450
pub fn table(&mut self, value: TableRelationBuilder) -> &mut Self {
421451
self.relation = Some(TableFactorBuilder::Table(value));
422452
self

datafusion/sql/src/unparser/plan.rs

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use super::{
2222
},
2323
rewrite::{
2424
inject_column_aliases_into_subquery, normalize_union_schema,
25-
rewrite_plan_for_sort_on_non_projected_fields,
25+
remove_dangling_identifiers, rewrite_plan_for_sort_on_non_projected_fields,
2626
subquery_alias_inner_query_and_columns, TableAliasRewriter,
2727
},
2828
utils::{
@@ -209,10 +209,70 @@ impl Unparser<'_> {
209209
)]);
210210
}
211211

212+
// Construct a list of all the identifiers present in query sources
213+
let mut all_idents = Vec::new();
214+
if let Some(source_alias) = relation_builder.get_alias() {
215+
all_idents.push(source_alias);
216+
} else if let Some(source_name) = relation_builder.get_name() {
217+
all_idents.push(source_name);
218+
}
219+
212220
let mut twj = select_builder.pop_from().unwrap();
221+
twj.get_joins()
222+
.iter()
223+
.for_each(|join| match &join.relation {
224+
ast::TableFactor::Table { alias, name, .. } => {
225+
if let Some(alias) = alias {
226+
all_idents.push(alias.name.to_string());
227+
} else {
228+
all_idents.push(name.to_string());
229+
}
230+
}
231+
ast::TableFactor::Derived { alias, .. } => {
232+
if let Some(alias) = alias {
233+
all_idents.push(alias.name.to_string());
234+
}
235+
}
236+
_ => {}
237+
});
238+
213239
twj.relation(relation_builder);
214240
select_builder.push_from(twj);
215241

242+
// Ensure that the projection contains references to sources that actually exist
243+
let mut projection = select_builder.get_projection();
244+
projection
245+
.iter_mut()
246+
.for_each(|select_item| match select_item {
247+
ast::SelectItem::UnnamedExpr(ast::Expr::CompoundIdentifier(idents)) => {
248+
remove_dangling_identifiers(idents, &all_idents);
249+
}
250+
_ => {}
251+
});
252+
253+
// Check the order by as well
254+
if let Some(query) = query.as_mut() {
255+
if let Some(OrderByKind::Expressions(mut order_by)) = query.get_order_by() {
256+
order_by.iter_mut().for_each(|sort_item| {
257+
if let ast::Expr::CompoundIdentifier(idents) = &mut sort_item.expr {
258+
remove_dangling_identifiers(idents, &all_idents);
259+
}
260+
});
261+
262+
query.order_by(OrderByKind::Expressions(order_by));
263+
}
264+
}
265+
266+
// Order by could be a sort in the select builder
267+
let mut sort = select_builder.get_sort_by();
268+
sort.iter_mut().for_each(|sort_item| {
269+
if let ast::Expr::CompoundIdentifier(idents) = sort_item {
270+
remove_dangling_identifiers(idents, &all_idents);
271+
}
272+
});
273+
274+
select_builder.projection(projection);
275+
216276
Ok(SetExpr::Select(Box::new(select_builder.build()?)))
217277
}
218278

datafusion/sql/src/unparser/rewrite.rs

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use datafusion_common::{
2525
};
2626
use datafusion_expr::expr::{Alias, UNNEST_COLUMN_PREFIX};
2727
use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr};
28-
use sqlparser::ast::Ident;
28+
use sqlparser::ast::{display_separated, Ident};
2929

3030
/// Normalize the schema of a union plan to remove qualifiers from the schema fields and sort expressions.
3131
///
@@ -418,3 +418,68 @@ impl TreeNodeRewriter for TableAliasRewriter<'_> {
418418
}
419419
}
420420
}
421+
422+
/// Takes an input list of identifiers and a list of identifiers that are available from relations or joins.
423+
/// Removes any table identifiers that are not present in the list of available identifiers, retains original column names.
424+
pub fn remove_dangling_identifiers(
425+
idents: &mut Vec<Ident>,
426+
available_idents: &Vec<String>,
427+
) -> () {
428+
if idents.len() > 1 {
429+
let ident_source = display_separated(
430+
&idents
431+
.clone()
432+
.into_iter()
433+
.take(idents.len() - 1)
434+
.collect::<Vec<Ident>>(),
435+
".",
436+
)
437+
.to_string();
438+
// If the identifier is not present in the list of all identifiers, it refers to a table that does not exist
439+
if !available_idents.contains(&ident_source) {
440+
let Some(last) = idents.last() else {
441+
unreachable!("CompoundIdentifier must have a last element");
442+
};
443+
// Reset the identifiers to only the last element, which is the column name
444+
*idents = vec![last.clone()];
445+
}
446+
}
447+
}
448+
449+
#[cfg(test)]
450+
mod test {
451+
use super::*;
452+
453+
#[test]
454+
fn test_remove_dangling_identifiers() {
455+
let tests = vec![
456+
(vec![], vec![Ident::new("column1".to_string())]),
457+
(
458+
vec!["table1.table2".to_string()],
459+
vec![
460+
Ident::new("table1".to_string()),
461+
Ident::new("table2".to_string()),
462+
Ident::new("column1".to_string()),
463+
],
464+
),
465+
(
466+
vec!["table1".to_string()],
467+
vec![Ident::new("column1".to_string())],
468+
),
469+
];
470+
471+
for test in tests {
472+
let test_in = test.0;
473+
let test_out = test.1;
474+
475+
let mut idents = vec![
476+
Ident::new("table1".to_string()),
477+
Ident::new("table2".to_string()),
478+
Ident::new("column1".to_string()),
479+
];
480+
481+
remove_dangling_identifiers(&mut idents, &test_in);
482+
assert_eq!(idents, test_out);
483+
}
484+
}
485+
}

0 commit comments

Comments
 (0)