Skip to content

Commit 230f31b

Browse files
authored
Speed up optimize_projection (apache#15787)
* save * fmt
1 parent ab0da55 commit 230f31b

File tree

4 files changed

+102
-7
lines changed

4 files changed

+102
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/optimizer/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,15 @@ regex-syntax = "0.8.0"
5555

5656
[dev-dependencies]
5757
async-trait = { workspace = true }
58+
criterion = { workspace = true }
5859
ctor = { workspace = true }
5960
datafusion-functions-aggregate = { workspace = true }
6061
datafusion-functions-window = { workspace = true }
6162
datafusion-functions-window-common = { workspace = true }
6263
datafusion-sql = { workspace = true }
6364
env_logger = { workspace = true }
6465
insta = { workspace = true }
66+
67+
[[bench]]
68+
name = "projection_unnecessary"
69+
harness = false
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::datatypes::{DataType, Field, Schema};
19+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
20+
use datafusion_common::ToDFSchema;
21+
use datafusion_common::{Column, TableReference};
22+
use datafusion_expr::{logical_plan::LogicalPlan, projection_schema, Expr};
23+
use datafusion_optimizer::optimize_projections::is_projection_unnecessary;
24+
use std::sync::Arc;
25+
26+
fn is_projection_unnecessary_old(
27+
input: &LogicalPlan,
28+
proj_exprs: &[Expr],
29+
) -> datafusion_common::Result<bool> {
30+
// First check if all expressions are trivial (cheaper operation than `projection_schema`)
31+
if !proj_exprs
32+
.iter()
33+
.all(|expr| matches!(expr, Expr::Column(_) | Expr::Literal(_)))
34+
{
35+
return Ok(false);
36+
}
37+
let proj_schema = projection_schema(input, proj_exprs)?;
38+
Ok(&proj_schema == input.schema())
39+
}
40+
41+
fn create_plan_with_many_exprs(num_exprs: usize) -> (LogicalPlan, Vec<Expr>) {
42+
// Create schema with many fields
43+
let fields = (0..num_exprs)
44+
.map(|i| Field::new(format!("col{}", i), DataType::Int32, false))
45+
.collect::<Vec<_>>();
46+
let schema = Schema::new(fields);
47+
48+
// Create table scan
49+
let table_scan = LogicalPlan::EmptyRelation(datafusion_expr::EmptyRelation {
50+
produce_one_row: true,
51+
schema: Arc::new(schema.clone().to_dfschema().unwrap()),
52+
});
53+
54+
// Create projection expressions (just column references)
55+
let exprs = (0..num_exprs)
56+
.map(|i| Expr::Column(Column::new(None::<TableReference>, format!("col{}", i))))
57+
.collect();
58+
59+
(table_scan, exprs)
60+
}
61+
62+
fn benchmark_is_projection_unnecessary(c: &mut Criterion) {
63+
let (plan, exprs) = create_plan_with_many_exprs(1000);
64+
65+
let mut group = c.benchmark_group("projection_unnecessary_comparison");
66+
67+
group.bench_function("is_projection_unnecessary_new", |b| {
68+
b.iter(|| black_box(is_projection_unnecessary(&plan, &exprs).unwrap()))
69+
});
70+
71+
group.bench_function("is_projection_unnecessary_old", |b| {
72+
b.iter(|| black_box(is_projection_unnecessary_old(&plan, &exprs).unwrap()))
73+
});
74+
75+
group.finish();
76+
}
77+
78+
criterion_group!(benches, benchmark_is_projection_unnecessary);
79+
criterion_main!(benches);

datafusion/optimizer/src/optimize_projections/mod.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ use datafusion_common::{
3131
use datafusion_expr::expr::Alias;
3232
use datafusion_expr::Unnest;
3333
use datafusion_expr::{
34-
logical_plan::LogicalPlan, projection_schema, Aggregate, Distinct, Expr, Projection,
35-
TableScan, Window,
34+
logical_plan::LogicalPlan, Aggregate, Distinct, Expr, Projection, TableScan, Window,
3635
};
3736

3837
use crate::optimize_projections::required_indices::RequiredIndices;
@@ -785,13 +784,24 @@ fn rewrite_projection_given_requirements(
785784
/// Projection is unnecessary, when
786785
/// - input schema of the projection, output schema of the projection are same, and
787786
/// - all projection expressions are either Column or Literal
788-
fn is_projection_unnecessary(input: &LogicalPlan, proj_exprs: &[Expr]) -> Result<bool> {
789-
// First check if all expressions are trivial (cheaper operation than `projection_schema`)
790-
if !proj_exprs.iter().all(is_expr_trivial) {
787+
pub fn is_projection_unnecessary(
788+
input: &LogicalPlan,
789+
proj_exprs: &[Expr],
790+
) -> Result<bool> {
791+
// First check if the number of expressions is equal to the number of fields in the input schema.
792+
if proj_exprs.len() != input.schema().fields().len() {
791793
return Ok(false);
792794
}
793-
let proj_schema = projection_schema(input, proj_exprs)?;
794-
Ok(&proj_schema == input.schema())
795+
Ok(input.schema().iter().zip(proj_exprs.iter()).all(
796+
|((field_relation, field_name), expr)| {
797+
// Check if the expression is a column and if it matches the field name
798+
if let Expr::Column(col) = expr {
799+
col.relation.as_ref() == field_relation && col.name.eq(field_name.name())
800+
} else {
801+
false
802+
}
803+
},
804+
))
795805
}
796806

797807
#[cfg(test)]

0 commit comments

Comments
 (0)