Skip to content

Commit 0b717a5

Browse files
phillipleblancsgrebnov
authored andcommitted
Support unparsing UNION for distinct results (#75)
UPSTREAM NOTE: This was submitted upstream: apache#15814 and will be in DF 48
1 parent d2fedaf commit 0b717a5

File tree

3 files changed

+44
-1
lines changed

3 files changed

+44
-1
lines changed

datafusion/sql/src/unparser/ast.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ pub struct QueryBuilder {
3232
fetch: Option<ast::Fetch>,
3333
locks: Vec<ast::LockClause>,
3434
for_clause: Option<ast::ForClause>,
35+
// If true, we need to unparse LogicalPlan::Union as a SQL `UNION` rather than a `UNION ALL`.
36+
distinct_union: bool,
3537
}
3638

3739
#[allow(dead_code)]
@@ -78,6 +80,13 @@ impl QueryBuilder {
7880
self.for_clause = value;
7981
self
8082
}
83+
pub fn distinct_union(&mut self) -> &mut Self {
84+
self.distinct_union = true;
85+
self
86+
}
87+
pub fn is_distinct_union(&self) -> bool {
88+
self.distinct_union
89+
}
8190
pub fn build(&self) -> Result<ast::Query, BuilderError> {
8291
let order_by = self
8392
.order_by_kind
@@ -115,6 +124,7 @@ impl QueryBuilder {
115124
fetch: Default::default(),
116125
locks: Default::default(),
117126
for_clause: Default::default(),
127+
distinct_union: false,
118128
}
119129
}
120130
}

datafusion/sql/src/unparser/plan.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,23 @@ impl Unparser<'_> {
605605
false,
606606
);
607607
}
608+
609+
// If this distinct is the parent of a Union and we're in a query context,
610+
// then we need to unparse as a `UNION` rather than a `UNION ALL`.
611+
if let Distinct::All(input) = distinct {
612+
if matches!(input.as_ref(), LogicalPlan::Union(_)) {
613+
if let Some(query_mut) = query.as_mut() {
614+
query_mut.distinct_union();
615+
return self.select_to_sql_recursively(
616+
input.as_ref(),
617+
query,
618+
select,
619+
relation,
620+
);
621+
}
622+
}
623+
}
624+
608625
let (select_distinct, input) = match distinct {
609626
Distinct::All(input) => (ast::Distinct::Distinct, input.as_ref()),
610627
Distinct::On(on) => {
@@ -889,14 +906,23 @@ impl Unparser<'_> {
889906
return internal_err!("UNION operator requires at least 2 inputs");
890907
}
891908

909+
let set_quantifier =
910+
if query.as_ref().is_some_and(|q| q.is_distinct_union()) {
911+
// Setting the SetQuantifier to None will unparse as a `UNION`
912+
// rather than a `UNION ALL`.
913+
ast::SetQuantifier::None
914+
} else {
915+
ast::SetQuantifier::All
916+
};
917+
892918
// Build the union expression tree bottom-up by reversing the order
893919
// note that we are also swapping left and right inputs because of the rev
894920
let union_expr = input_exprs
895921
.into_iter()
896922
.rev()
897923
.reduce(|a, b| SetExpr::SetOperation {
898924
op: ast::SetOperator::Union,
899-
set_quantifier: ast::SetQuantifier::All,
925+
set_quantifier,
900926
left: Box::new(b),
901927
right: Box::new(a),
902928
})

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,13 @@ fn roundtrip_statement() -> Result<()> {
172172
UNION ALL
173173
SELECT j3_string AS col1, j3_id AS id FROM j3
174174
) AS subquery GROUP BY col1, id ORDER BY col1 ASC, id ASC"#,
175+
r#"SELECT col1, id FROM (
176+
SELECT j1_string AS col1, j1_id AS id FROM j1
177+
UNION
178+
SELECT j2_string AS col1, j2_id AS id FROM j2
179+
UNION
180+
SELECT j3_string AS col1, j3_id AS id FROM j3
181+
) AS subquery ORDER BY col1 ASC, id ASC"#,
175182
"SELECT id, count(*) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
176183
last_name, sum(id) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
177184
first_name from person",

0 commit comments

Comments
 (0)