-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: When consuming Substrait, temporarily rename clashing duplicate columns #11329
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,7 +59,7 @@ use datafusion::{ | |
prelude::{Column, SessionContext}, | ||
scalar::ScalarValue, | ||
}; | ||
use std::collections::HashMap; | ||
use std::collections::{HashMap, HashSet}; | ||
use std::str::FromStr; | ||
use std::sync::Arc; | ||
use substrait::proto::exchange_rel::ExchangeKind; | ||
|
@@ -403,22 +403,33 @@ pub async fn from_substrait_rel( | |
let mut input = LogicalPlanBuilder::from( | ||
from_substrait_rel(ctx, input, extensions).await?, | ||
); | ||
let mut names: HashSet<String> = HashSet::new(); | ||
let mut exprs: Vec<Expr> = vec![]; | ||
for e in &p.expressions { | ||
let x = | ||
from_substrait_rex(ctx, e, input.clone().schema(), extensions) | ||
.await?; | ||
// if the expression is WindowFunction, wrap in a Window relation | ||
// before returning and do not add to list of this Projection's expression list | ||
// otherwise, add expression to the Projection's expression list | ||
match &*x { | ||
Expr::WindowFunction(_) => { | ||
input = input.window(vec![x.as_ref().clone()])?; | ||
exprs.push(x.as_ref().clone()); | ||
} | ||
_ => { | ||
exprs.push(x.as_ref().clone()); | ||
} | ||
if let Expr::WindowFunction(_) = x.as_ref() { | ||
// Adding the same expression here and in the project below | ||
// works because the project's builder uses columnize_expr(..) | ||
// to transform it into a column reference | ||
input = input.window(vec![x.as_ref().clone()])? | ||
} | ||
// Ensure the expression has a unique display name, so that project's | ||
// validate_unique_names doesn't fail | ||
let name = x.display_name()?; | ||
let mut new_name = name.clone(); | ||
let mut i = 0; | ||
while names.contains(&new_name) { | ||
new_name = format!("{}__temp__{}", name, i); | ||
i += 1; | ||
} | ||
names.insert(new_name.clone()); | ||
if new_name != name { | ||
exprs.push(x.as_ref().clone().alias(new_name.clone())); | ||
} else { | ||
exprs.push(x.as_ref().clone()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was confused at first why this was When I investigated, it seems that it is due to the fact that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Turns out I can remove them: #11337 |
||
} | ||
} | ||
input.project(exprs)?.build() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could probably save a clone by putting the
names.insert
call below theexprs.push
callsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made this change in #11337 as I had the code open anyways