Skip to content

Commit

Permalink
fix multivalue ff index creation regression (#1543)
Browse files Browse the repository at this point in the history
fixes multivalue ff regression by avoiding using `get_val`. Line::train calls repeatedly get_val, but get_val implementation on Column for multivalues is very slow. The fix is to use the iterator instead. Longterm fix should be to remove get_val access in serialization.

Old Code

test fastfield::bench::bench_multi_value_ff_merge_few_segments                                                           ... bench:  46,103,960 ns/iter (+/- 2,066,083)
test fastfield::bench::bench_multi_value_ff_merge_many_segments                                                          ... bench:  83,073,036 ns/iter (+/- 4,373,615)
est fastfield::bench::bench_multi_value_ff_merge_many_segments_log_merge                                                ... bench:  64,178,576 ns/iter (+/- 1,466,700)

Current

running 3 tests
test fastfield::multivalued::bench::bench_multi_value_ff_merge_few_segments                                              ... bench:  57,379,523 ns/iter (+/- 3,220,787)
test fastfield::multivalued::bench::bench_multi_value_ff_merge_many_segments                                             ... bench:  90,831,688 ns/iter (+/- 1,445,486)
test fastfield::multivalued::bench::bench_multi_value_ff_merge_many_segments_log_merge                                   ... bench: 158,313,264 ns/iter (+/- 28,823,250)

With Fix

running 3 tests
test fastfield::multivalued::bench::bench_multi_value_ff_merge_few_segments                                              ... bench:  57,635,671 ns/iter (+/- 2,707,361)
test fastfield::multivalued::bench::bench_multi_value_ff_merge_many_segments                                             ... bench:  91,468,712 ns/iter (+/- 11,393,581)
test fastfield::multivalued::bench::bench_multi_value_ff_merge_many_segments_log_merge                                   ... bench:  73,909,138 ns/iter (+/- 15,846,097)
  • Loading branch information
PSeitz authored Sep 23, 2022
1 parent f9c3947 commit 20c8790
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions fastfield_codecs/src/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,17 @@ impl Line {

// Same as train, but the intercept is only estimated from provided sample positions
pub fn estimate(ys: &dyn Column, sample_positions: &[u64]) -> Self {
Self::train_from(ys, sample_positions.iter().cloned())
Self::train_from(
ys,
sample_positions
.iter()
.cloned()
.map(|pos| (pos, ys.get_val(pos))),
)
}

// Intercept is only computed from provided positions
fn train_from(ys: &dyn Column, positions: impl Iterator<Item = u64>) -> Self {
fn train_from(ys: &dyn Column, positions_and_values: impl Iterator<Item = (u64, u64)>) -> Self {
let num_vals = if let Some(num_vals) = NonZeroU64::new(ys.num_vals() - 1) {
num_vals
} else {
Expand Down Expand Up @@ -114,11 +120,8 @@ impl Line {
intercept: 0,
};
let heuristic_shift = y0.wrapping_sub(MID_POINT);
line.intercept = positions
.map(|pos| {
let y = ys.get_val(pos);
y.wrapping_sub(line.eval(pos))
})
line.intercept = positions_and_values
.map(|(pos, y)| y.wrapping_sub(line.eval(pos)))
.min_by_key(|&val| val.wrapping_sub(heuristic_shift))
.unwrap_or(0u64); //< Never happens.
line
Expand All @@ -135,7 +138,10 @@ impl Line {
/// This function is only invariable by translation if all of the
/// `ys` are packaged into half of the space. (See heuristic below)
pub fn train(ys: &dyn Column) -> Self {
Self::train_from(ys, 0..ys.num_vals())
Self::train_from(
ys,
ys.iter().enumerate().map(|(pos, val)| (pos as u64, val)),
)
}
}

Expand Down

0 comments on commit 20c8790

Please sign in to comment.