Skip to content

Commit ffab86a

Browse files
committed
improve dos
1 parent 177ceea commit ffab86a

File tree

2 files changed

+77
-61
lines changed

2 files changed

+77
-61
lines changed

datafusion/common/src/tree_node.rs

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ pub trait TreeNode: Sized {
100100
/// Visit the tree node using the given [`TreeNodeVisitor`], performing a
101101
/// depth-first walk of the node and its children.
102102
///
103+
/// See also:
104+
/// * [`Self::mutate`] to rewrite `TreeNode`s in place
105+
/// * [`Self::rewrite`] to rewrite owned `TreeNode`s
106+
///
103107
/// Consider the following tree structure:
104108
/// ```text
105109
/// ParentNode
@@ -144,6 +148,10 @@ pub trait TreeNode: Sized {
144148
/// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for
145149
/// recursively transforming [`TreeNode`]s.
146150
///
151+
/// See also:
152+
/// * [`Self::mutate`] to rewrite `TreeNode`s in place
153+
/// * [`Self::visit`] for inspecting (without modification) `TreeNode`s
154+
///
147155
/// Consider the following tree structure:
148156
/// ```text
149157
/// ParentNode
@@ -175,7 +183,11 @@ pub trait TreeNode: Sized {
175183
}
176184

177185
/// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for
178-
/// recursively mutating / rewriting [`TreeNode`]s in place
186+
/// recursively mutating / rewriting [`TreeNode`]s in place.
187+
///
188+
/// See also:
189+
/// * [`Self::rewrite`] to rewrite owned `TreeNode`s
190+
/// * [`Self::visit`] for inspecting (without modification) `TreeNode`s
179191
///
180192
/// Consider the following tree structure:
181193
/// ```text
@@ -184,7 +196,7 @@ pub trait TreeNode: Sized {
184196
/// right: ChildNode2
185197
/// ```
186198
///
187-
/// Here, the nodes would be mutated using the following order:
199+
/// Here, the nodes would be mutataed in the following order:
188200
/// ```text
189201
/// TreeNodeMutator::f_down(ParentNode)
190202
/// TreeNodeMutator::f_down(ChildNode1)
@@ -422,13 +434,17 @@ pub trait TreeNode: Sized {
422434

423435
/// Rewrite the node's children in place using `F`.
424436
///
425-
/// Using [`Self::map_children`], the owned API, is more ideomatic and
426-
/// has clearer semantics on error (the node is consumed). However, it requires
427-
/// copying the interior fields of the tree node during rewrite
437+
/// On error, `self` is left partially rewritten.
438+
///
439+
/// # Notes
440+
///
441+
/// Using [`Self::map_children`], the owned API, has clearer semantics on
442+
/// error (the node is consumed). However, it requires copying the interior
443+
/// fields of the tree node during rewrite.
428444
///
429445
/// This API writes the nodes in place, which can be faster as it avoids
430-
/// copying. However, one downside is that the tree node can be left in an
431-
/// partially rewritten state when an error occurs.
446+
/// copying, but leaves the tree node in an partially rewritten state when
447+
/// an error occurs.
432448
fn mutate_children<F: FnMut(&mut Self) -> Result<Transformed<()>>>(
433449
&mut self,
434450
_f: F,
@@ -492,30 +508,35 @@ pub trait TreeNodeRewriter: Sized {
492508
}
493509
}
494510

495-
/// Trait for potentially rewriting tree of [`TreeNode`]s in place
511+
/// Trait for mutating (rewriting in place) [`TreeNode`]s in place
496512
///
497-
/// See [`TreeNodeRewriter`] for rewriting owned tree ndoes
498-
/// See [`TreeNodeVisitor`] for visiting, but not changing, tree nodes
513+
/// # See Also:
514+
/// * [`TreeNodeRewriter`] for rewriting owned `TreeNode`e
515+
/// * [`TreeNodeVisitor`] for visiting, but not changing, `TreeNode`s
499516
pub trait TreeNodeMutator: Sized {
500-
/// The node type to rewrite.
517+
/// The node type to mutating.
501518
type Node: TreeNode;
502519

503-
/// Invoked while traversing down the tree before any children are rewritten.
504-
/// Default implementation returns the node as is and continues recursion.
520+
/// Invoked while traversing down the tree before any children are mutated.
521+
/// Default implementation does nothing to the node and continues recursion.
522+
///
523+
/// # Notes
505524
///
506-
/// Since this mutates the nodes in place, the returned Transformed object
525+
/// As the node maybe mutated in place, the returned [`Transformed`] object
507526
/// returns `()` (no data).
508527
///
509528
/// If the node's children are changed by `f_down`, the *new* children are
510-
/// visited, not the original.
529+
/// visited, not the original children.
511530
fn f_down(&mut self, _node: &mut Self::Node) -> Result<Transformed<()>> {
512531
Ok(Transformed::no(()))
513532
}
514533

515-
/// Invoked while traversing up the tree after all children have been rewritten.
516-
/// Default implementation returns the node as is and continues recursion.
534+
/// Invoked while traversing up the tree after all children have been mutated.
535+
/// Default implementation does nothing to the node and continues recursion.
536+
///
537+
/// # Notes
517538
///
518-
/// Since this mutates the nodes in place, the returned Transformed object
539+
/// As the node maybe mutated in place, the returned [`Transformed`] object
519540
/// returns `()` (no data).
520541
fn f_up(&mut self, _node: &mut Self::Node) -> Result<Transformed<()>> {
521542
Ok(Transformed::no(()))
@@ -603,7 +624,7 @@ impl<T> Transformed<T> {
603624
/// Invokes f(), depending on the value of self.tnr.
604625
///
605626
/// This is used to conditionally apply a function during a f_up tree
606-
/// traversal, if the result of children traversal was `Continue`.
627+
/// traversal, if the result of children traversal was `[`TreeNodeRecursion::Continue`].
607628
///
608629
/// Handling [`TreeNodeRecursion::Continue`] and [`TreeNodeRecursion::Stop`]
609630
/// is straightforward, but [`TreeNodeRecursion::Jump`] can behave differently
@@ -650,11 +671,12 @@ impl<T> Transformed<T> {
650671

651672
impl Transformed<()> {
652673
/// Invoke the given function `f` and combine the transformed state with
653-
/// the current state,
674+
/// the current state:
675+
///
676+
/// * if `f` returns an Err, returns that err
654677
///
655-
/// if f() returns an Err, returns that err
656-
/// If f() returns Ok, returns a true transformed flag if either self or
657-
/// the result of f() was transformed
678+
/// * If `f` returns Ok, sets `self.transformed` to `true` if either self or
679+
/// the result of `f` were transformed.
658680
pub fn and_then<F>(self, f: F) -> Result<Transformed<()>>
659681
where
660682
F: FnOnce() -> Result<Transformed<()>>,

datafusion/expr/src/logical_plan/mutate.rs

Lines changed: 32 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ use datafusion_common::{Column, DFSchema, DFSchemaRef};
2424
use std::sync::{Arc, OnceLock};
2525

2626
impl LogicalPlan {
27-
/// applies the closure `f` to each expression of this node, potentially
28-
/// rewriting it in place
27+
/// applies `f` to each expression of this node, potentially rewriting it in
28+
/// place
2929
///
30-
/// If the closure returns an error, the error is returned and the expressions
31-
/// are left in a partially modified state
30+
/// If `f` returns an error, the error is returned and the expressions are
31+
/// left in a partially modified state
3232
pub fn rewrite_exprs<F>(&mut self, mut f: F) -> Result<Transformed<()>>
3333
where
3434
F: FnMut(&mut Expr) -> Result<Transformed<()>>,
@@ -66,7 +66,6 @@ impl LogicalPlan {
6666
// 1. the first part is `on.len()` equijoin expressions, and the struct of each expr is `left-on = right-on`.
6767
// 2. the second part is non-equijoin(filter).
6868
LogicalPlan::Join(Join { on, filter, .. }) => {
69-
// don't look at the equijoin expressions as a whole
7069
let exprs = on
7170
.iter_mut()
7271
.flat_map(|(e1, e2)| std::iter::once(e1).chain(std::iter::once(e2)));
@@ -88,10 +87,7 @@ impl LogicalPlan {
8887
LogicalPlan::TableScan(TableScan { filters, .. }) => {
8988
rewrite_expr_iter_mut(filters.iter_mut(), f)
9089
}
91-
LogicalPlan::Unnest(Unnest { column, .. }) => {
92-
// it would be really nice to avoid this and instead have unnest have Expr
93-
rewrite_column(column, f)
94-
}
90+
LogicalPlan::Unnest(Unnest { column, .. }) => rewrite_column(column, f),
9591
LogicalPlan::Distinct(Distinct::On(DistinctOn {
9692
on_expr,
9793
select_expr,
@@ -125,12 +121,14 @@ impl LogicalPlan {
125121
}
126122
}
127123

128-
/// applies the closure `f` to each input of this node, in place.
124+
/// applies `f` to each input of this node, rewriting them in place.
129125
///
130-
/// Inputs include both direct children as well as any embedded
131-
/// `LogicalPlan`s embedded in `Expr::Exists`, etc.
126+
/// # Notes
127+
/// Inputs include both direct children as well as any embedded subquery
128+
/// `LogicalPlan`s, for example such as are in [`Expr::Exists`].
132129
///
133-
/// If Err is returned, the inputs may be in a partially modified state
130+
/// If `f` returns an `Err`, that Err is returned, and the inputs are left
131+
/// in a partially modified state
134132
pub fn rewrite_inputs<F>(&mut self, mut f: F) -> Result<Transformed<()>>
135133
where
136134
F: FnMut(&mut LogicalPlan) -> Result<Transformed<()>>,
@@ -202,7 +200,7 @@ impl LogicalPlan {
202200
children_result.and_then(|| self.rewrite_subqueries(&mut f))
203201
}
204202

205-
/// applies the closure `f` to LogicalPlans in any subquery expressions
203+
/// applies `f` to LogicalPlans in any subquery expressions
206204
///
207205
/// If Err is returned, the plan may be left in a partially modified state
208206
fn rewrite_subqueries<F>(&mut self, mut f: F) -> Result<Transformed<()>>
@@ -220,7 +218,7 @@ impl LogicalPlan {
220218
}
221219
}
222220

223-
/// writes each element in the iterator using `f`
221+
/// writes each `&mut Expr` in the iterator using `f`
224222
fn rewrite_expr_iter_mut<'a, F>(
225223
i: impl IntoIterator<Item = &'a mut Expr>,
226224
mut f: F,
@@ -233,17 +231,18 @@ where
233231
}
234232

235233
/// A temporary node that is left in place while rewriting the children of a
236-
/// logical plan to ensure that the logical plan is always in a valid state
234+
/// [`LogicalPlan`]. This is necessary to ensure that the `LogicalPlan` is
235+
/// always in a valid state (from the Rust perspective)
237236
static PLACEHOLDER: OnceLock<Arc<LogicalPlan>> = OnceLock::new();
238237

239-
/// Applies f() to rewrite each child of the logical plan, replacing the existing
240-
/// node with the result, trying to avoid clone'ing.
238+
/// Applies `f` to rewrite the existing node, while avoiding `clone`'ing as much
239+
/// as possiblw.
241240
///
242-
/// TODO we would remove the Arc<LogicalPlan> nonsense entirely from LogicalPlan
243-
/// and have it own its inputs however, for now do a horrible hack and swap out the value
244-
/// of the Arc to avoid cloning the entire plan
241+
/// TODO eventually remove `Arc<LogicalPlan>` from `LogicalPlan` and have it own
242+
/// its inputs, so this code would not be needed. However, for now we try and
243+
/// unwrap the `Arc` which avoids `clone`ing in most cases.
245244
///
246-
/// On error, the node will be partially rewritten (left with a placeholder logical plan)
245+
/// On error, node be left with a placeholder logical plan
247246
fn rewrite_arc<F>(node: &mut Arc<LogicalPlan>, mut f: F) -> Result<Transformed<()>>
248247
where
249248
F: FnMut(&mut LogicalPlan) -> Result<Transformed<()>>,
@@ -263,16 +262,11 @@ where
263262
std::mem::swap(node, &mut new_node);
264263

265264
// try to update existing node, if it isn't shared with others
266-
let mut new_node = match Arc::try_unwrap(new_node) {
267-
Ok(node) => {
268-
//println!("Unwrapped arc yay");
269-
node
270-
}
271-
Err(node) => {
272-
//println!("Failed to unwrap arc boo");
273-
node.as_ref().clone()
274-
}
275-
};
265+
let mut new_node = Arc::try_unwrap(new_node)
266+
// if None is returned, there is another reference to this
267+
// LogicalPlan, so we must clone instead
268+
.unwrap_or_else(|node| node.as_ref().clone());
269+
276270
// apply the actual transform
277271
let result = f(&mut new_node)?;
278272

@@ -283,24 +277,23 @@ where
283277
Ok(result)
284278
}
285279

286-
/// Rewrites a `Column` in place using the provided closure
280+
/// Rewrites a [`Column`] in place using the provided closure
287281
fn rewrite_column<F>(column: &mut Column, mut f: F) -> Result<Transformed<()>>
288282
where
289283
F: FnMut(&mut Expr) -> Result<Transformed<()>>,
290284
{
291-
// Column's isn't an Expr to visit, but the closure is to rewrite Exprs.
292-
// So we need to make a temporary Expr to rewrite and then put it bac,
285+
// Since `Column`'s isn't an `Expr`, but the closure in terms of Exprs,
286+
// we make a temporary Expr to rewrite and then put it back
293287

294288
let mut swap_column = Column::new_unqualified("TEMP_unnest_column");
295289
std::mem::swap(column, &mut swap_column);
296290

297291
let mut expr = Expr::Column(swap_column);
298292
let result = f(&mut expr)?;
299-
// put the column back
293+
// Get the rewritten column
300294
let Expr::Column(mut swap_column) = expr else {
301295
return internal_err!(
302-
"Rewrite of Unnest expr must return Column, returned {:?}",
303-
expr
296+
"Rewrite of Column Expr must return Column, returned {expr:?}"
304297
);
305298
};
306299
// put the rewritten column back
@@ -310,6 +303,7 @@ where
310303

311304
/// Rewrites all expressions for an Extension node "in place"
312305
/// (it currently has to copy values because there are no APIs for in place modification)
306+
/// TODO file ticket for inplace modificiation of Extension nodes
313307
///
314308
/// Should be removed when we have an API for in place modifications of the
315309
/// extension to avoid these copies

0 commit comments

Comments
 (0)