Skip to content

Commit e00d993

Browse files
committed
feat: add avg_fanout summary metric to HashJoinExec
1 parent 421098a commit e00d993

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

datafusion/physical-plan/src/joins/hash_join/stream.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,12 @@ impl HashJoinStream {
537537
.probe_hit_rate
538538
.add_part(distinct_right_indices_count);
539539

540+
self.join_metrics.avg_fanout.add_part(left_indices.len());
541+
542+
self.join_metrics
543+
.avg_fanout
544+
.add_total(distinct_right_indices_count);
545+
540546
// apply join filter if exists
541547
let (left_indices, right_indices) = if let Some(filter) = &self.filter {
542548
apply_join_filter_to_indices(

datafusion/physical-plan/src/joins/utils.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ use std::sync::Arc;
2727
use std::task::{Context, Poll};
2828

2929
use crate::joins::SharedBitmapBuilder;
30-
use crate::metrics::{self, BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder, MetricType};
30+
use crate::metrics::{
31+
self, BaselineMetrics, ExecutionPlanMetricsSet, MetricBuilder, MetricType,
32+
};
3133
use crate::projection::{ProjectionExec, ProjectionExpr};
3234
use crate::{
3335
ColumnStatistics, ExecutionPlan, ExecutionPlanProperties, Partitioning, Statistics,
@@ -1331,6 +1333,8 @@ pub(crate) struct BuildProbeJoinMetrics {
13311333
pub(crate) output_batches: metrics::Count,
13321334
/// Fraction of probe rows that found more than one match
13331335
pub(crate) probe_hit_rate: metrics::RatioMetrics,
1336+
/// Average number of build matches per matched probe row
1337+
pub(crate) avg_fanout: metrics::RatioMetrics,
13341338
}
13351339

13361340
// This Drop implementation updates the elapsed compute part of the metrics.
@@ -1381,6 +1385,10 @@ impl BuildProbeJoinMetrics {
13811385
.with_type(MetricType::SUMMARY)
13821386
.ratio_metrics("probe_hit_rate", partition);
13831387

1388+
let avg_fanout = MetricBuilder::new(metrics)
1389+
.with_type(MetricType::SUMMARY)
1390+
.ratio_metrics("avg_fanout", partition);
1391+
13841392
Self {
13851393
build_time,
13861394
build_input_batches,
@@ -1392,6 +1400,7 @@ impl BuildProbeJoinMetrics {
13921400
output_batches,
13931401
baseline,
13941402
probe_hit_rate,
1403+
avg_fanout,
13951404
}
13961405
}
13971406
}

0 commit comments

Comments
 (0)