-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify and encapsulate window function state management #6621
Changes from 1 commit
0e8ca7d
6decd54
fb0c17e
7d3e361
4542799
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
//! that can evaluated at runtime during query execution | ||
|
||
use crate::window::partition_evaluator::PartitionEvaluator; | ||
use crate::window::window_expr::{BuiltinWindowState, NthValueKind, NthValueState}; | ||
use crate::window::window_expr::{NthValueKind, NthValueState}; | ||
use crate::window::{BuiltInWindowFunctionExpr, WindowAggState}; | ||
use crate::PhysicalExpr; | ||
use arrow::array::{Array, ArrayRef}; | ||
|
@@ -152,11 +152,6 @@ pub(crate) struct NthValueEvaluator { | |
} | ||
|
||
impl PartitionEvaluator for NthValueEvaluator { | ||
fn state(&self) -> Result<BuiltinWindowState> { | ||
// If we do not use state we just return Default | ||
Ok(BuiltinWindowState::NthValue(self.state.clone())) | ||
} | ||
|
||
fn update_state( | ||
&mut self, | ||
state: &WindowAggState, | ||
|
@@ -169,9 +164,29 @@ impl PartitionEvaluator for NthValueEvaluator { | |
Ok(()) | ||
} | ||
|
||
fn set_state(&mut self, state: &BuiltinWindowState) -> Result<()> { | ||
if let BuiltinWindowState::NthValue(nth_value_state) = state { | ||
self.state = nth_value_state.clone() | ||
fn memoize(&mut self, state: &mut WindowAggState) -> Result<()> { | ||
let out = &state.out_col; | ||
let size = out.len(); | ||
let (is_prunable, new_prunable) = match self.state.kind { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know that is is not introduced here (and introduced by me) but while reading the code maybe instead of |
||
NthValueKind::First => { | ||
let n_range = | ||
state.window_frame_range.end - state.window_frame_range.start; | ||
(n_range > 0 && size > 0, true) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in case we use |
||
} | ||
NthValueKind::Last => (true, false), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in case we use |
||
NthValueKind::Nth(n) => { | ||
let n_range = | ||
state.window_frame_range.end - state.window_frame_range.start; | ||
(n_range >= (n as usize) && size >= (n as usize), true) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In case we use |
||
} | ||
}; | ||
if is_prunable { | ||
if self.state.finalized_result.is_none() && new_prunable { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in case we use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree the intent is clearer using the term |
||
let result = ScalarValue::try_from_array(out, size - 1)?; | ||
self.state.finalized_result = Some(result); | ||
} | ||
state.window_frame_range.start = | ||
state.window_frame_range.end.saturating_sub(1); | ||
} | ||
Ok(()) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,6 @@ | |
|
||
//! Partition evaluation module | ||
|
||
use crate::window::window_expr::BuiltinWindowState; | ||
use crate::window::WindowAggState; | ||
use arrow::array::ArrayRef; | ||
use datafusion_common::Result; | ||
|
@@ -100,14 +99,6 @@ pub trait PartitionEvaluator: Debug + Send { | |
false | ||
} | ||
|
||
/// Returns the internal state of the window function | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The point of the PR is to get of the functions on this trait that serialized / set state (which has a single use) |
||
/// | ||
/// Only used for stateful evaluation | ||
fn state(&self) -> Result<BuiltinWindowState> { | ||
// If we do not use state we just return Default | ||
Ok(BuiltinWindowState::Default) | ||
} | ||
|
||
/// Updates the internal state for window function | ||
/// | ||
/// Only used for stateful evaluation | ||
|
@@ -127,13 +118,14 @@ pub trait PartitionEvaluator: Debug + Send { | |
Ok(()) | ||
} | ||
|
||
/// Sets the internal state for window function | ||
/// | ||
/// Only used for stateful evaluation | ||
fn set_state(&mut self, _state: &BuiltinWindowState) -> Result<()> { | ||
Err(DataFusionError::NotImplemented( | ||
"set_state is not implemented for this window function".to_string(), | ||
)) | ||
/// When the window frame has a fixed beginning (e.g UNBOUNDED | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While writing this explanation it occurred to me that the term There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will leave it named |
||
/// PRECEDING), some functions such as FIRST_VALUE, LAST_VALUE and | ||
/// NTH_VALUE we can memoize result. Once result is calculated it | ||
/// will always stay same. Hence, we do not need to keep past data | ||
/// as we process the entire dataset. This feature enables us to | ||
/// prune rows from table. The default implementation does nothing | ||
fn memoize(&mut self, _state: &mut WindowAggState) -> Result<()> { | ||
Ok(()) | ||
} | ||
|
||
/// Gets the range where the window function result is calculated. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -327,16 +327,7 @@ pub struct LeadLagState { | |
pub idx: usize, | ||
} | ||
|
||
#[derive(Debug, Clone, Default)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It turns out this structure is not used anywhere else |
||
pub enum BuiltinWindowState { | ||
Rank(RankState), | ||
NumRows(NumRowsState), | ||
NthValue(NthValueState), | ||
LeadLag(LeadLagState), | ||
#[default] | ||
Default, | ||
} | ||
|
||
/// Holds the state of evaluating a window function | ||
#[derive(Debug)] | ||
pub struct WindowAggState { | ||
/// The range that we calculate the window function | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is actually more readable as well as cleaning up the trait