From f4de3b9d50f171179e64f059582d6f19c58776c7 Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Mon, 26 Apr 2021 11:24:29 +0200 Subject: [PATCH 01/11] TY-1686: Implemented nDCG@k analytics for all accessible stats. Currently this is not implemented for initial ranking as it is currently not accessible. (Defaults to NaN.) Some parts are currently implemented in a performance wise suboptimal way. --- xayn-ai/src/analytics.rs | 505 +++++++++++++++++++++++++++++++++++- xayn-ai/src/reranker/mod.rs | 7 +- 2 files changed, 507 insertions(+), 5 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index c6d201e43..c710362b1 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -1,20 +1,517 @@ +use std::cmp::{min, Ordering}; + use crate::{ data::{document::DocumentHistory, document_data::DocumentDataWithMab}, error::Error, reranker::systems, + Relevance, }; +/// Which k to use for nDCG@k +const DEFAULT_NDCG_K: usize = 2; #[derive(Clone)] -pub struct Analytics; +pub struct Analytics { + /// The nDCG@k score between the initial ranking and the relevance based ranking + pub ndcg_initial: f32, + /// The nDCG@k score between the LTR ranking and the relevance based ranking + pub ndcg_ltr: f32, + /// The nDCG@k score between the Context ranking and the relevance based ranking + pub ndcg_context: f32, + /// THe nDCG@k score between the final ranking and the relevance based ranking + pub ndcg_final_ranking: f32, +} pub(crate) struct AnalyticsSystem; impl systems::AnalyticsSystem for AnalyticsSystem { fn compute_analytics( &self, - _history: &[DocumentHistory], - _documents: &[DocumentDataWithMab], + history: &[DocumentHistory], + documents: &[DocumentDataWithMab], ) -> Result { - Ok(Analytics) + let mut relevances = Vec::new(); + let mut ltr_scores = Vec::new(); + let mut context_scores = Vec::new(); + let mut final_ranking_score = Vec::new(); + + for document in documents { + // - FIXME this is *slow* we probably want to + // have some lookup by DocumentId for + // this. Depending on a lot of factors it + // might make sense to create a hashmap once + // before the loop. (But that might also be slower + // depending of the size of history...). + // - FIXME the dart version doesn't hadnle the + // not found case? Should I panic? + // Currently I give it a relevance of 0, + // which will lead to the entry not having + // an effect on the final score which is nice + // so even if it can't happen it might be good + // to just do so anyway instead of panicing. + let relevance = history + .iter() + .find(|h| &h.id == &document.document_id.id) + .map(|h| match h.relevance { + Relevance::Low => 0., + Relevance::Medium => 1., + Relevance::High => 2., + }) + .unwrap_or(0.0); + relevances.push(relevance); + ltr_scores.push(document.ltr.ltr_score); + context_scores.push(document.context.context_value); + // nDCG expects higher scores to be better but for the ranking + // it's the oposite, the solution carried over from the dart impl + // is to multiply by -1. Another would be to have the max rank (or + // and number greater then it which isn't too big) and then use `max-rank`. + // While negative ranks work mathematically fine I'm not sure about + // rounding problems due to f32. I really can't judge it it's a problem + // or not. + final_ranking_score.push(-(document.mab.rank as f32)); + } + + let pair_buffer = &mut Vec::with_capacity(relevances.len()); + + // FIXME with cloneable/resetable iterators we can eleminate this buffer + let buffer = &mut Vec::with_capacity(relevances.len()); + + let ndcg_ltr = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ltr_scores, + DEFAULT_NDCG_K, + pair_buffer, + buffer, + ); + + let ndcg_context = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + context_scores, + DEFAULT_NDCG_K, + pair_buffer, + buffer, + ); + + let ndcg_final_ranking = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + final_ranking_score, + DEFAULT_NDCG_K, + pair_buffer, + buffer, + ); + + Ok(Analytics { + //FIXME: We currently have no access to the initial score as thiss will require + // some changes to the main applications type state/component system this + // will be done in a followup PR. + ndcg_initial: f32::NAN, + ndcg_ltr, + ndcg_context, + ndcg_final_ranking, + }) + } +} + +fn calcuate_reordered_ndcg_at_k_score( + relevances: impl IntoIterator, + ordering_scores: impl IntoIterator, + k: usize, + pair_buffer: &mut Vec<(f32, f32)>, + buffer: &mut Vec, +) -> f32 { + reorder_relevances_based_on_scores_replacing_buffer(relevances, ordering_scores, pair_buffer); + copy_second_value_replacing_buffer(pair_buffer.drain(..), buffer); + ndcg_at_k(&buffer[..], k) +} + +fn reorder_relevances_based_on_scores_replacing_buffer( + relevances: impl IntoIterator, + ordering_scores: impl IntoIterator, + buffer: &mut Vec<(f32, f32)>, +) { + buffer.truncate(0); + buffer.extend(ordering_scores.into_iter().zip(relevances.into_iter())); + buffer + .sort_by(|(ord_sc_1, _), (ord_sc_2, _)| nan_safe_sort_desc_comparsion(ord_sc_1, ord_sc_2)); +} + +fn copy_second_value_replacing_buffer( + input: impl IntoIterator, + output: &mut Vec, +) { + output.truncate(0); + output.extend(input.into_iter().map(|(_, second)| second)); +} + +/// Calculates the nDCG@k, `k` defaults to 2 if `None` is passed in. +/// +/// This taks the first k values for the DCG score and the "best" k values +/// for the IDCG score and then calculates the nDCG score with that. +fn ndcg_at_k(scores: &[f32], k: usize) -> f32 { + // if we have less then k values we just use a smaller k + // it's mathematically equivalent to padding with 0 scores. + let k = min(k, scores.len()); + + let dcg_at_k = dcg(&scores[..k]); + + let other_scores = pick_k_highest_scores(scores, k); + let idcg_at_k = dcg(&other_scores); + + // if there is no ideal score our score pretent the ideal score is 1 + if idcg_at_k == 0.0 { + dcg_at_k + } else { + dcg_at_k / idcg_at_k + } +} + +/// Pick the k-highest values (as if score.sort() and then &score[..k]). +/// +/// If `NaN`'s is treated as the smallest possible value, i.e. +/// preferably not picked at all if possible. +/// +/// # Panics +/// +/// If `k > scores.len()` this will panic. +fn pick_k_highest_scores(scores: &[f32], k: usize) -> Vec { + let mut k_highest = Vec::from(&scores[..k]); + + // TODO: Potentially handle `NaN` better by treating them as + // "lowest possible" scores. + k_highest.sort_by(nan_safe_sort_desc_comparsion); + + for score in &scores[k..] { + let idx = k_highest + .binary_search_by(|other| nan_safe_sort_desc_comparsion(other, score)) + .unwrap_or_else(|not_found_insert_idx| not_found_insert_idx); + + if idx < k { + let _ = k_highest.pop(); + k_highest.insert(idx, *score); + } + } + + k_highest +} + +fn dcg(scores: &[f32]) -> f32 { + //Note: It migth seem to be faster to create two ndarrays and then use + // a / broadcast in the hope this will take advantage of SIMD at + // least on some platforms. But given that `scores` is more or + // less alwasys very small (e.g. k=2) this is unlikely to yield + // any benefits and migth even slow things down due to uneccesary + // allocation. If k is fixed we could use stack allocated buffers + // and a tight loop, which problably would be the fastest. + + // a "simple commulative" sum is ok as we only use small number of scores (default k=2) + let mut sum = 0.; + for (i, score) in scores.iter().copied().enumerate() { + //it's i+2 as our i starts with 0, while the formular starts with 1 and uses i+1 + sum += (2f32.powf(score) - 1.) / (i as f32 + 2.).log2() + } + sum +} + +/// Use for getting a descending ordering of floats. +/// +/// `NaN`'s are treated as the smallest possible value +/// for this sorting they are also treated as equal to each other. +/// this is not standard comform but works for sorting. +fn nan_safe_sort_desc_comparsion(a: &f32, b: &f32) -> Ordering { + // switched a,b to have descending instead of ascending sorting + b.partial_cmp(a) + .unwrap_or_else(|| match (a.is_nan(), b.is_nan()) { + (true, true) => Ordering::Equal, + (true, false) => Ordering::Greater, + (false, true) => Ordering::Less, + (false, false) => unreachable!(), + }) +} + +#[cfg(test)] +mod tests { + + mod create_reordered_ndcg_at_k_score { + use super::super::*; + use float_cmp::approx_eq; + + #[test] + fn without_reordering() { + let buffer = &mut Vec::with_capacity(6); + let pair_buffer = &mut Vec::with_capacity(6); + + let relevances = &[1., 4., 10., 3., 0., 6.]; + let ordering_scores = &[12., 9., 7., 5., 4., 1.]; + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 2, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 4, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 100, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + + let relevances = &[-1., 7., -10., 3., 0., -6.]; + let ordering_scores = &[12., 9., 7., 5., 4., 1.]; + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 2, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 4, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); + + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 100, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); + } + + #[test] + fn with_reordering() { + let buffer = &mut Vec::with_capacity(6); + let pair_buffer = &mut Vec::with_capacity(6); + + let relevances = &[4., 10., 6., 0., 3., 1.]; + let ordering_scores = &[9., 7., 1., 4., 5., 12.]; + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 2, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 4, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 100, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + + let relevances = &[3., -10., 0., -1., 7., -6.]; + let ordering_scores = &[5., 7., 4., 12., 9., 1.]; + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 2, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 4, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); + + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 100, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); + } + + #[test] + fn buffers_can_be_any_capacity() { + let buffer = &mut Vec::new(); + let pair_buffer = &mut Vec::new(); + + let relevances = &[4., 10., 6., 0., 3., 1.]; + let ordering_scores = &[9., 7., 1., 4., 5., 12.]; + let res = calcuate_reordered_ndcg_at_k_score( + relevances.iter().copied(), + ordering_scores.iter().copied(), + 2, + pair_buffer, + buffer, + ); + assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + } + } + + mod ndcg_at_k { + use super::super::*; + use float_cmp::approx_eq; + #[test] + fn produces_expected_values_for_k_larger_then_input() { + let res = ndcg_at_k(&[1., 4., 10., 3., 0., 6.], 100); + assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + + let res = ndcg_at_k(&[-1., 7., -10., 3., 0., -6.], 100); + assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); + } + + #[test] + fn produces_expected_values_for_k_smaller_then_input() { + let res = ndcg_at_k(&[1., 4., 10., 3., 0., 6.], 2); + assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + let res = ndcg_at_k(&[1., 4., 10., 3., 0., 6.], 4); + assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + + let res = ndcg_at_k(&[-1., 7., -10., 3., 0., -6.], 2); + assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + let res = ndcg_at_k(&[-1., 7., -10., 3., 0., -6.], 4); + assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); + } + } + + mod dcg { + use super::super::*; + use float_cmp::approx_eq; + + #[test] + fn running_it_results_in_expected_results() { + //FIXME we should test for the result to be at most 1 float increament above/below the given value + // not that it's exact the same as "valid" changes in how we can do the calculation can lead to + // slightly different result due to rounding + assert!(approx_eq!( + f32, + dcg(&[3f32, 2., 3., 0., 1., 2.]), + 13.848263629272981, + ulps = 2 + )); + assert!(approx_eq!( + f32, + dcg(&[-3.2, -2., -4., 0., -1., -2.]), + -2.293710288714865, + ulps = 2 + )); + } + } + + mod pick_k_highest_scores { + use super::super::*; + + #[test] + fn picks_the_highest_values_and_only_them() { + let cases: &[(&[f32], &[f32])] = &[ + (&[3., 2., 1., 0.], &[3., 2.]), + (&[0., 1., 2., 3.], &[3., 2.]), + (&[-2., -2.], &[-2., -2.]), + (&[-30., 3., 2., 10., -3., 0.], &[10., 3.]), + (&[-3., 0., -1., -2.], &[0., -1.]), + ]; + + for (input, pick) in cases { + let res = pick_k_highest_scores(input, 2); + assert_eq!( + &*res, &**pick, + "res={:?}, expected={:?}, input={:?}", + res, pick, input + ); + } + } + + #[test] + fn nans_are_preferably_not_picked_at_all() { + let res = pick_k_highest_scores(&[3., 2., f32::NAN], 2); + assert_eq!(&*res, &[3., 2.]); + + let res = + pick_k_highest_scores(&[f32::NAN, 3., f32::NAN, f32::NAN, 2., 4., f32::NAN], 2); + assert_eq!(&*res, &[4., 3.]); + + let res = pick_k_highest_scores(&[f32::NAN, 3., 2., f32::NAN], 3); + assert_eq!(&res[..2], &[3., 2.]); + assert!(res[2].is_nan()); + + let res = pick_k_highest_scores(&[f32::NAN], 1); + assert_eq!(res.len(), 1); + assert!(res[0].is_nan()); + } + } + + mod nan_safe_sort_desc_comparsion { + use super::super::*; + + #[test] + fn sorting_sorts_in_the_right_order() { + let data = &mut [f32::NAN, 1., 5., f32::NAN, 4.]; + data.sort_by(nan_safe_sort_desc_comparsion); + + assert_eq!(&data[..3], &[5., 4., 1.]); + assert!(data[3].is_nan()); + assert!(data[4].is_nan()); + + let data = &mut [1., 5., 3., 4.]; + data.sort_by(nan_safe_sort_desc_comparsion); + + assert_eq!(&data[..], &[5., 4., 3., 1.]); + } + + #[test] + fn nans_compare_as_expected() { + assert_eq!( + nan_safe_sort_desc_comparsion(&f32::NAN, &f32::NAN), + Ordering::Equal + ); + assert_eq!( + nan_safe_sort_desc_comparsion(&-12., &f32::NAN), + Ordering::Less + ); + assert_eq!( + nan_safe_sort_desc_comparsion(&f32::NAN, &-12.), + Ordering::Greater + ); + assert_eq!( + nan_safe_sort_desc_comparsion(&12., &f32::NAN), + Ordering::Less + ); + assert_eq!( + nan_safe_sort_desc_comparsion(&f32::NAN, &12.), + Ordering::Greater + ); + } } } diff --git a/xayn-ai/src/reranker/mod.rs b/xayn-ai/src/reranker/mod.rs index 97f9c34ef..9015d0654 100644 --- a/xayn-ai/src/reranker/mod.rs +++ b/xayn-ai/src/reranker/mod.rs @@ -597,7 +597,12 @@ mod tests { let cs = common_systems_with_fail!(analytics, MockAnalyticsSystem, compute_analytics, |_,_|); let mut reranker = Reranker::new(cs).unwrap(); - reranker.analytics = Some(Analytics); + reranker.analytics = Some(Analytics { + ndcg_initial: 0., + ndcg_ltr: 0., + ndcg_context: 0., + ndcg_final_ranking: 0., + }); let documents = car_interest_example::documents(); let history = history_for_prev_docs( &reranker.data.prev_documents.to_coi_system_data(), From a535aafc891b398002ccc4e77cd25e0fbb785a99 Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Mon, 26 Apr 2021 11:24:50 +0200 Subject: [PATCH 02/11] TY-1686: Improved implementation. --- xayn-ai/src/analytics.rs | 403 +++++++++++++++++---------------------- 1 file changed, 171 insertions(+), 232 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index c710362b1..717a4fcaa 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -1,4 +1,8 @@ -use std::cmp::{min, Ordering}; +use std::{ + cmp::Ordering, + collections::HashMap, + iter::{FromIterator, FusedIterator}, +}; use crate::{ data::{document::DocumentHistory, document_data::DocumentDataWithMab}, @@ -9,6 +13,7 @@ use crate::{ /// Which k to use for nDCG@k const DEFAULT_NDCG_K: usize = 2; +/// Calculated analytics data. #[derive(Clone)] pub struct Analytics { /// The nDCG@k score between the initial ranking and the relevance based ranking @@ -29,37 +34,33 @@ impl systems::AnalyticsSystem for AnalyticsSystem { history: &[DocumentHistory], documents: &[DocumentDataWithMab], ) -> Result { - let mut relevances = Vec::new(); - let mut ltr_scores = Vec::new(); - let mut context_scores = Vec::new(); - let mut final_ranking_score = Vec::new(); + // We need to be able to lookup relevances by document id. + // and linear search is most likely a bad idea (FIXME: + // check if `history` len is very small). So we create + // a hashmap for the lookups. + let relevance_lookups: HashMap<_, _> = { + history + .iter() + .map(|h_doc| (&h_doc.id, score_for_relevance(h_doc.relevance))) + .collect() + }; + + let mut paired_ltr_scores = Vec::new(); + let mut paired_context_scores = Vec::new(); + let mut paired_final_ranking_score = Vec::new(); for document in documents { - // - FIXME this is *slow* we probably want to - // have some lookup by DocumentId for - // this. Depending on a lot of factors it - // might make sense to create a hashmap once - // before the loop. (But that might also be slower - // depending of the size of history...). - // - FIXME the dart version doesn't hadnle the - // not found case? Should I panic? - // Currently I give it a relevance of 0, - // which will lead to the entry not having - // an effect on the final score which is nice - // so even if it can't happen it might be good - // to just do so anyway instead of panicing. - let relevance = history - .iter() - .find(|h| &h.id == &document.document_id.id) - .map(|h| match h.relevance { - Relevance::Low => 0., - Relevance::Medium => 1., - Relevance::High => 2., - }) + // We should never need the `_or(0.0)` but if we run into + // it it's best to give it a relevance of 0. As a document + // not in the history is irrelevant for this analytics. + let relevance = relevance_lookups + .get(&document.document_id.id) + .copied() .unwrap_or(0.0); - relevances.push(relevance); - ltr_scores.push(document.ltr.ltr_score); - context_scores.push(document.context.context_value); + + paired_ltr_scores.push((relevance, document.ltr.ltr_score)); + paired_context_scores.push((relevance, document.context.context_value)); + // nDCG expects higher scores to be better but for the ranking // it's the oposite, the solution carried over from the dart impl // is to multiply by -1. Another would be to have the max rank (or @@ -67,37 +68,17 @@ impl systems::AnalyticsSystem for AnalyticsSystem { // While negative ranks work mathematically fine I'm not sure about // rounding problems due to f32. I really can't judge it it's a problem // or not. - final_ranking_score.push(-(document.mab.rank as f32)); + let final_ranking_desc = -(document.mab.rank as f32); + paired_final_ranking_score.push((relevance, final_ranking_desc)); } - let pair_buffer = &mut Vec::with_capacity(relevances.len()); - - // FIXME with cloneable/resetable iterators we can eleminate this buffer - let buffer = &mut Vec::with_capacity(relevances.len()); - - let ndcg_ltr = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ltr_scores, - DEFAULT_NDCG_K, - pair_buffer, - buffer, - ); - - let ndcg_context = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - context_scores, - DEFAULT_NDCG_K, - pair_buffer, - buffer, - ); - - let ndcg_final_ranking = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - final_ranking_score, - DEFAULT_NDCG_K, - pair_buffer, - buffer, - ); + let ndcg_ltr = calcuate_reordered_ndcg_at_k_score(&mut paired_ltr_scores, DEFAULT_NDCG_K); + + let ndcg_context = + calcuate_reordered_ndcg_at_k_score(&mut paired_context_scores, DEFAULT_NDCG_K); + + let ndcg_final_ranking = + calcuate_reordered_ndcg_at_k_score(&mut paired_final_ranking_score, DEFAULT_NDCG_K); Ok(Analytics { //FIXME: We currently have no access to the initial score as thiss will require @@ -111,50 +92,49 @@ impl systems::AnalyticsSystem for AnalyticsSystem { } } -fn calcuate_reordered_ndcg_at_k_score( - relevances: impl IntoIterator, - ordering_scores: impl IntoIterator, - k: usize, - pair_buffer: &mut Vec<(f32, f32)>, - buffer: &mut Vec, -) -> f32 { - reorder_relevances_based_on_scores_replacing_buffer(relevances, ordering_scores, pair_buffer); - copy_second_value_replacing_buffer(pair_buffer.drain(..), buffer); - ndcg_at_k(&buffer[..], k) -} - -fn reorder_relevances_based_on_scores_replacing_buffer( - relevances: impl IntoIterator, - ordering_scores: impl IntoIterator, - buffer: &mut Vec<(f32, f32)>, -) { - buffer.truncate(0); - buffer.extend(ordering_scores.into_iter().zip(relevances.into_iter())); - buffer - .sort_by(|(ord_sc_1, _), (ord_sc_2, _)| nan_safe_sort_desc_comparsion(ord_sc_1, ord_sc_2)); +/// Returns a score for the given `Relevance`. +fn score_for_relevance(relevance: Relevance) -> f32 { + match relevance { + Relevance::Low => 0., + Relevance::Medium => 1., + Relevance::High => 2., + } } -fn copy_second_value_replacing_buffer( - input: impl IntoIterator, - output: &mut Vec, -) { - output.truncate(0); - output.extend(input.into_iter().map(|(_, second)| second)); +/// Calculates the nDCG@k for given paired relevances. +/// +/// The input is a tuple of `(relevance, ordering_score)` pair, +/// where the `ordering_score` is used to reorder the relevances +/// based on sorting them in descending order. +/// +/// **Note that the `paired_relevances` are sorted in place.** +/// +/// After the reordering of the pairs the `relevance` values +/// are used to calculate the nDCG@k. +/// +/// ## NaN Handling. +/// +/// NaN values are treated as the lowest possible socres wrt. the sorting. +/// +/// If a `NaN` is in the k-first relevances the resulting nDCG@k score will be `NaN`. +fn calcuate_reordered_ndcg_at_k_score(paired_relevances: &mut [(f32, f32)], k: usize) -> f32 { + paired_relevances + .sort_by(|(_, ord_sc_1), (_, ord_sc_2)| nan_safe_sort_desc_comparsion(ord_sc_1, ord_sc_2)); + ndcg_at_k(paired_relevances.iter().map(|(rel, _ord)| *rel), k) } /// Calculates the nDCG@k, `k` defaults to 2 if `None` is passed in. /// /// This taks the first k values for the DCG score and the "best" k values /// for the IDCG score and then calculates the nDCG score with that. -fn ndcg_at_k(scores: &[f32], k: usize) -> f32 { - // if we have less then k values we just use a smaller k - // it's mathematically equivalent to padding with 0 scores. - let k = min(k, scores.len()); - - let dcg_at_k = dcg(&scores[..k]); +fn ndcg_at_k( + relevances: impl Iterator + Clone + ExactSizeIterator + FusedIterator, + k: usize, +) -> f32 { + let dcg_at_k = dcg(relevances.clone().take(k)); - let other_scores = pick_k_highest_scores(scores, k); - let idcg_at_k = dcg(&other_scores); + let ideal_relevances = pick_k_highest_sorted_desc(relevances, k); + let idcg_at_k = dcg(ideal_relevances.into_iter()); // if there is no ideal score our score pretent the ideal score is 1 if idcg_at_k == 0.0 { @@ -164,7 +144,7 @@ fn ndcg_at_k(scores: &[f32], k: usize) -> f32 { } } -/// Pick the k-highest values (as if score.sort() and then &score[..k]). +/// Pick the k-highest values in given iterator (as if a vector is sorted and then &sorted_score[..k]). /// /// If `NaN`'s is treated as the smallest possible value, i.e. /// preferably not picked at all if possible. @@ -172,28 +152,31 @@ fn ndcg_at_k(scores: &[f32], k: usize) -> f32 { /// # Panics /// /// If `k > scores.len()` this will panic. -fn pick_k_highest_scores(scores: &[f32], k: usize) -> Vec { - let mut k_highest = Vec::from(&scores[..k]); +//TODO: SmallVec? Buffer reuse? +fn pick_k_highest_sorted_desc( + mut scores: impl Iterator + ExactSizeIterator + FusedIterator, + k: usize, +) -> Vec { + let mut k_highest = Vec::from_iter((&mut scores).take(k)); - // TODO: Potentially handle `NaN` better by treating them as - // "lowest possible" scores. k_highest.sort_by(nan_safe_sort_desc_comparsion); - for score in &scores[k..] { + for score in scores { let idx = k_highest - .binary_search_by(|other| nan_safe_sort_desc_comparsion(other, score)) + .binary_search_by(|other| nan_safe_sort_desc_comparsion(other, &score)) .unwrap_or_else(|not_found_insert_idx| not_found_insert_idx); if idx < k { let _ = k_highest.pop(); - k_highest.insert(idx, *score); + k_highest.insert(idx, score); } } k_highest } -fn dcg(scores: &[f32]) -> f32 { +/// Calculates the DCG of given input sequence. +fn dcg(scores: impl Iterator) -> f32 { //Note: It migth seem to be faster to create two ndarrays and then use // a / broadcast in the hope this will take advantage of SIMD at // least on some platforms. But given that `scores` is more or @@ -201,10 +184,11 @@ fn dcg(scores: &[f32]) -> f32 { // any benefits and migth even slow things down due to uneccesary // allocation. If k is fixed we could use stack allocated buffers // and a tight loop, which problably would be the fastest. + // (But there are libraries to provide vectorized powf, log2 and similar) // a "simple commulative" sum is ok as we only use small number of scores (default k=2) let mut sum = 0.; - for (i, score) in scores.iter().copied().enumerate() { + for (i, score) in scores.enumerate() { //it's i+2 as our i starts with 0, while the formular starts with 1 and uses i+1 sum += (2f32.powf(score) - 1.) / (i as f32 + 2.).log2() } @@ -236,148 +220,99 @@ mod tests { #[test] fn without_reordering() { - let buffer = &mut Vec::with_capacity(6); - let pair_buffer = &mut Vec::with_capacity(6); - - let relevances = &[1., 4., 10., 3., 0., 6.]; - let ordering_scores = &[12., 9., 7., 5., 4., 1.]; - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 2, - pair_buffer, - buffer, - ); + let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 4, - pair_buffer, - buffer, - ); + let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 100, - pair_buffer, - buffer, - ); + let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); - let relevances = &[-1., 7., -10., 3., 0., -6.]; - let ordering_scores = &[12., 9., 7., 5., 4., 1.]; - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 2, - pair_buffer, - buffer, - ); + let relevances = &mut [ + (-1., 12.), + (7., 9.), + (-10., 7.), + (3., 5.), + (0., 4.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 4, - pair_buffer, - buffer, - ); + let relevances = &mut [ + (-1., 12.), + (7., 9.), + (-10., 7.), + (3., 5.), + (0., 4.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 100, - pair_buffer, - buffer, - ); + let relevances = &mut [ + (-1., 12.), + (7., 9.), + (-10., 7.), + (3., 5.), + (0., 4.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); } #[test] fn with_reordering() { - let buffer = &mut Vec::with_capacity(6); - let pair_buffer = &mut Vec::with_capacity(6); - - let relevances = &[4., 10., 6., 0., 3., 1.]; - let ordering_scores = &[9., 7., 1., 4., 5., 12.]; - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 2, - pair_buffer, - buffer, - ); + let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 4, - pair_buffer, - buffer, - ); + let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 100, - pair_buffer, - buffer, - ); + let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); - let relevances = &[3., -10., 0., -1., 7., -6.]; - let ordering_scores = &[5., 7., 4., 12., 9., 1.]; - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 2, - pair_buffer, - buffer, - ); + let relevances = &mut [ + (3., 5.), + (-10., 7.), + (0., 4.), + (-1., 12.), + (7., 9.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 4, - pair_buffer, - buffer, - ); + let relevances = &mut [ + (3., 5.), + (-10., 7.), + (0., 4.), + (-1., 12.), + (7., 9.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 100, - pair_buffer, - buffer, - ); + let relevances = &mut [ + (3., 5.), + (-10., 7.), + (0., 4.), + (-1., 12.), + (7., 9.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); } - - #[test] - fn buffers_can_be_any_capacity() { - let buffer = &mut Vec::new(); - let pair_buffer = &mut Vec::new(); - - let relevances = &[4., 10., 6., 0., 3., 1.]; - let ordering_scores = &[9., 7., 1., 4., 5., 12.]; - let res = calcuate_reordered_ndcg_at_k_score( - relevances.iter().copied(), - ordering_scores.iter().copied(), - 2, - pair_buffer, - buffer, - ); - assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); - } } mod ndcg_at_k { @@ -385,23 +320,23 @@ mod tests { use float_cmp::approx_eq; #[test] fn produces_expected_values_for_k_larger_then_input() { - let res = ndcg_at_k(&[1., 4., 10., 3., 0., 6.], 100); + let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 100); assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); - let res = ndcg_at_k(&[-1., 7., -10., 3., 0., -6.], 100); + let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 100); assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); } #[test] fn produces_expected_values_for_k_smaller_then_input() { - let res = ndcg_at_k(&[1., 4., 10., 3., 0., 6.], 2); + let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 2); assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); - let res = ndcg_at_k(&[1., 4., 10., 3., 0., 6.], 4); + let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 4); assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); - let res = ndcg_at_k(&[-1., 7., -10., 3., 0., -6.], 2); + let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 2); assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); - let res = ndcg_at_k(&[-1., 7., -10., 3., 0., -6.], 4); + let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 4); assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); } } @@ -417,13 +352,13 @@ mod tests { // slightly different result due to rounding assert!(approx_eq!( f32, - dcg(&[3f32, 2., 3., 0., 1., 2.]), + dcg([3f32, 2., 3., 0., 1., 2.].iter().copied()), 13.848263629272981, ulps = 2 )); assert!(approx_eq!( f32, - dcg(&[-3.2, -2., -4., 0., -1., -2.]), + dcg([-3.2, -2., -4., 0., -1., -2.].iter().copied()), -2.293710288714865, ulps = 2 )); @@ -444,7 +379,7 @@ mod tests { ]; for (input, pick) in cases { - let res = pick_k_highest_scores(input, 2); + let res = pick_k_highest_sorted_desc(input.iter().copied(), 2); assert_eq!( &*res, &**pick, "res={:?}, expected={:?}, input={:?}", @@ -455,18 +390,22 @@ mod tests { #[test] fn nans_are_preferably_not_picked_at_all() { - let res = pick_k_highest_scores(&[3., 2., f32::NAN], 2); + let res = pick_k_highest_sorted_desc([3., 2., f32::NAN].iter().copied(), 2); assert_eq!(&*res, &[3., 2.]); - let res = - pick_k_highest_scores(&[f32::NAN, 3., f32::NAN, f32::NAN, 2., 4., f32::NAN], 2); + let res = pick_k_highest_sorted_desc( + [f32::NAN, 3., f32::NAN, f32::NAN, 2., 4., f32::NAN] + .iter() + .copied(), + 2, + ); assert_eq!(&*res, &[4., 3.]); - let res = pick_k_highest_scores(&[f32::NAN, 3., 2., f32::NAN], 3); + let res = pick_k_highest_sorted_desc([f32::NAN, 3., 2., f32::NAN].iter().copied(), 3); assert_eq!(&res[..2], &[3., 2.]); assert!(res[2].is_nan()); - let res = pick_k_highest_scores(&[f32::NAN], 1); + let res = pick_k_highest_sorted_desc([f32::NAN].iter().copied(), 1); assert_eq!(res.len(), 1); assert!(res[0].is_nan()); } From cc8fe17b34b8da96ae31a07fd1276e730af79d2e Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Mon, 26 Apr 2021 12:30:46 +0200 Subject: [PATCH 03/11] TY-1686: Cleaned up comments and code. Tests now use the test_ prefix and are not in sub-modules. --- xayn-ai/src/analytics.rs | 465 ++++++++++++++++++--------------------- 1 file changed, 212 insertions(+), 253 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index 717a4fcaa..aee5b7a22 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -1,8 +1,4 @@ -use std::{ - cmp::Ordering, - collections::HashMap, - iter::{FromIterator, FusedIterator}, -}; +use std::{cmp::Ordering, collections::HashMap, iter::FusedIterator}; use crate::{ data::{document::DocumentHistory, document_data::DocumentDataWithMab}, @@ -35,8 +31,7 @@ impl systems::AnalyticsSystem for AnalyticsSystem { documents: &[DocumentDataWithMab], ) -> Result { // We need to be able to lookup relevances by document id. - // and linear search is most likely a bad idea (FIXME: - // check if `history` len is very small). So we create + // and linear search is most likely a bad idea. So we create // a hashmap for the lookups. let relevance_lookups: HashMap<_, _> = { history @@ -63,11 +58,7 @@ impl systems::AnalyticsSystem for AnalyticsSystem { // nDCG expects higher scores to be better but for the ranking // it's the oposite, the solution carried over from the dart impl - // is to multiply by -1. Another would be to have the max rank (or - // and number greater then it which isn't too big) and then use `max-rank`. - // While negative ranks work mathematically fine I'm not sure about - // rounding problems due to f32. I really can't judge it it's a problem - // or not. + // is to multiply by -1. let final_ranking_desc = -(document.mab.rank as f32); paired_final_ranking_score.push((relevance, final_ranking_desc)); } @@ -136,7 +127,7 @@ fn ndcg_at_k( let ideal_relevances = pick_k_highest_sorted_desc(relevances, k); let idcg_at_k = dcg(ideal_relevances.into_iter()); - // if there is no ideal score our score pretent the ideal score is 1 + // if there is no ideal score, pretent the ideal score is 1 if idcg_at_k == 0.0 { dcg_at_k } else { @@ -144,20 +135,17 @@ fn ndcg_at_k( } } -/// Pick the k-highest values in given iterator (as if a vector is sorted and then &sorted_score[..k]). +/// Pick the k-highest values in given iterator. +/// +/// (As if a vector is sorted and then &sorted_score[..k]). /// /// If `NaN`'s is treated as the smallest possible value, i.e. /// preferably not picked at all if possible. -/// -/// # Panics -/// -/// If `k > scores.len()` this will panic. -//TODO: SmallVec? Buffer reuse? fn pick_k_highest_sorted_desc( mut scores: impl Iterator + ExactSizeIterator + FusedIterator, k: usize, ) -> Vec { - let mut k_highest = Vec::from_iter((&mut scores).take(k)); + let mut k_highest: Vec<_> = (&mut scores).take(k).collect(); k_highest.sort_by(nan_safe_sort_desc_comparsion); @@ -177,16 +165,9 @@ fn pick_k_highest_sorted_desc( /// Calculates the DCG of given input sequence. fn dcg(scores: impl Iterator) -> f32 { - //Note: It migth seem to be faster to create two ndarrays and then use - // a / broadcast in the hope this will take advantage of SIMD at - // least on some platforms. But given that `scores` is more or - // less alwasys very small (e.g. k=2) this is unlikely to yield - // any benefits and migth even slow things down due to uneccesary - // allocation. If k is fixed we could use stack allocated buffers - // and a tight loop, which problably would be the fastest. - // (But there are libraries to provide vectorized powf, log2 and similar) - - // a "simple commulative" sum is ok as we only use small number of scores (default k=2) + // - As this is only used for analytics and bound by `k`(==2) and `&[Document].len()` (~ 10 to 40) + // no further optimizations make sense. Especially not if they require memory allocations. + // - A "simple commulative" sum is ok as we only use small number of scores (default k=2) let mut sum = 0.; for (i, score) in scores.enumerate() { //it's i+2 as our i starts with 0, while the formular starts with 1 and uses i+1 @@ -195,11 +176,12 @@ fn dcg(scores: impl Iterator) -> f32 { sum } -/// Use for getting a descending ordering of floats. +/// Use for getting a descending sort ordering of floats. /// /// `NaN`'s are treated as the smallest possible value /// for this sorting they are also treated as equal to each other. -/// this is not standard comform but works for sorting. +/// This is not standard comform but works for sorting, +/// at least for our use-case. fn nan_safe_sort_desc_comparsion(a: &f32, b: &f32) -> Ordering { // switched a,b to have descending instead of ascending sorting b.partial_cmp(a) @@ -213,244 +195,221 @@ fn nan_safe_sort_desc_comparsion(a: &f32, b: &f32) -> Ordering { #[cfg(test)] mod tests { + use super::*; + use float_cmp::approx_eq; + + #[test] + fn test_create_reordered_ndcg_at_k_score_without_reordering() { + let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); + assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + + let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); + assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + + let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); + assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + + let relevances = &mut [ + (-1., 12.), + (7., 9.), + (-10., 7.), + (3., 5.), + (0., 4.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); + assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + + let relevances = &mut [ + (-1., 12.), + (7., 9.), + (-10., 7.), + (3., 5.), + (0., 4.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); + assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); + + let relevances = &mut [ + (-1., 12.), + (7., 9.), + (-10., 7.), + (3., 5.), + (0., 4.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); + assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); + } - mod create_reordered_ndcg_at_k_score { - use super::super::*; - use float_cmp::approx_eq; - - #[test] - fn without_reordering() { - let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); - - let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); - - let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); - - let relevances = &mut [ - (-1., 12.), - (7., 9.), - (-10., 7.), - (3., 5.), - (0., 4.), - (-6., 1.), - ]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); - - let relevances = &mut [ - (-1., 12.), - (7., 9.), - (-10., 7.), - (3., 5.), - (0., 4.), - (-6., 1.), - ]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); - - let relevances = &mut [ - (-1., 12.), - (7., 9.), - (-10., 7.), - (3., 5.), - (0., 4.), - (-6., 1.), - ]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); - } - - #[test] - fn with_reordering() { - let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); - - let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); - - let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); - - let relevances = &mut [ - (3., 5.), - (-10., 7.), - (0., 4.), - (-1., 12.), - (7., 9.), - (-6., 1.), - ]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); - - let relevances = &mut [ - (3., 5.), - (-10., 7.), - (0., 4.), - (-1., 12.), - (7., 9.), - (-6., 1.), - ]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); - - let relevances = &mut [ - (3., 5.), - (-10., 7.), - (0., 4.), - (-1., 12.), - (7., 9.), - (-6., 1.), - ]; - let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); - } + #[test] + fn test_create_reordered_ndcg_at_k_score_with_reordering() { + let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); + assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + + let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); + assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + + let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); + assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + + let relevances = &mut [ + (3., 5.), + (-10., 7.), + (0., 4.), + (-1., 12.), + (7., 9.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); + assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + + let relevances = &mut [ + (3., 5.), + (-10., 7.), + (0., 4.), + (-1., 12.), + (7., 9.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); + assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); + + let relevances = &mut [ + (3., 5.), + (-10., 7.), + (0., 4.), + (-1., 12.), + (7., 9.), + (-6., 1.), + ]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); + assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); } - mod ndcg_at_k { - use super::super::*; - use float_cmp::approx_eq; - #[test] - fn produces_expected_values_for_k_larger_then_input() { - let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 100); - assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + #[test] + fn ndcg_at_k_produces_expected_values_for_k_larger_then_input() { + let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 100); + assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); - let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 100); - assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); - } - - #[test] - fn produces_expected_values_for_k_smaller_then_input() { - let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 2); - assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); - let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 4); - assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); - - let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 2); - assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); - let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 4); - assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); - } + let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 100); + assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); } - mod dcg { - use super::super::*; - use float_cmp::approx_eq; - - #[test] - fn running_it_results_in_expected_results() { - //FIXME we should test for the result to be at most 1 float increament above/below the given value - // not that it's exact the same as "valid" changes in how we can do the calculation can lead to - // slightly different result due to rounding - assert!(approx_eq!( - f32, - dcg([3f32, 2., 3., 0., 1., 2.].iter().copied()), - 13.848263629272981, - ulps = 2 - )); - assert!(approx_eq!( - f32, - dcg([-3.2, -2., -4., 0., -1., -2.].iter().copied()), - -2.293710288714865, - ulps = 2 - )); - } + #[test] + fn ndcg_at_k_produces_expected_values_for_k_smaller_then_input() { + let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 2); + assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 4); + assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + + let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 2); + assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 4); + assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); } - mod pick_k_highest_scores { - use super::super::*; - - #[test] - fn picks_the_highest_values_and_only_them() { - let cases: &[(&[f32], &[f32])] = &[ - (&[3., 2., 1., 0.], &[3., 2.]), - (&[0., 1., 2., 3.], &[3., 2.]), - (&[-2., -2.], &[-2., -2.]), - (&[-30., 3., 2., 10., -3., 0.], &[10., 3.]), - (&[-3., 0., -1., -2.], &[0., -1.]), - ]; - - for (input, pick) in cases { - let res = pick_k_highest_sorted_desc(input.iter().copied(), 2); - assert_eq!( - &*res, &**pick, - "res={:?}, expected={:?}, input={:?}", - res, pick, input - ); - } - } - - #[test] - fn nans_are_preferably_not_picked_at_all() { - let res = pick_k_highest_sorted_desc([3., 2., f32::NAN].iter().copied(), 2); - assert_eq!(&*res, &[3., 2.]); + #[test] + fn dcg_produces_expected_results() { + assert!(approx_eq!( + f32, + dcg([3f32, 2., 3., 0., 1., 2.].iter().copied()), + 13.848263629272981, + ulps = 2 + )); + assert!(approx_eq!( + f32, + dcg([-3.2, -2., -4., 0., -1., -2.].iter().copied()), + -2.293710288714865, + ulps = 2 + )); + } - let res = pick_k_highest_sorted_desc( - [f32::NAN, 3., f32::NAN, f32::NAN, 2., 4., f32::NAN] - .iter() - .copied(), - 2, + #[test] + fn test_pick_k_highest_picks_the_highest_values_and_only_them() { + let cases: &[(&[f32], &[f32])] = &[ + (&[3., 2., 1., 0.], &[3., 2.]), + (&[0., 1., 2., 3.], &[3., 2.]), + (&[-2., -2.], &[-2., -2.]), + (&[-30., 3., 2., 10., -3., 0.], &[10., 3.]), + (&[-3., 0., -1., -2.], &[0., -1.]), + ]; + + for (input, pick) in cases { + let res = pick_k_highest_sorted_desc(input.iter().copied(), 2); + assert_eq!( + &*res, &**pick, + "res={:?}, expected={:?}, input={:?}", + res, pick, input ); - assert_eq!(&*res, &[4., 3.]); - - let res = pick_k_highest_sorted_desc([f32::NAN, 3., 2., f32::NAN].iter().copied(), 3); - assert_eq!(&res[..2], &[3., 2.]); - assert!(res[2].is_nan()); - - let res = pick_k_highest_sorted_desc([f32::NAN].iter().copied(), 1); - assert_eq!(res.len(), 1); - assert!(res[0].is_nan()); } } - mod nan_safe_sort_desc_comparsion { - use super::super::*; + #[test] + fn test_pick_k_highest_does_not_pick_nans_if_possible() { + let res = pick_k_highest_sorted_desc([3., 2., f32::NAN].iter().copied(), 2); + assert_eq!(&*res, &[3., 2.]); - #[test] - fn sorting_sorts_in_the_right_order() { - let data = &mut [f32::NAN, 1., 5., f32::NAN, 4.]; - data.sort_by(nan_safe_sort_desc_comparsion); + let res = pick_k_highest_sorted_desc( + [f32::NAN, 3., f32::NAN, f32::NAN, 2., 4., f32::NAN] + .iter() + .copied(), + 2, + ); + assert_eq!(&*res, &[4., 3.]); + + let res = pick_k_highest_sorted_desc([f32::NAN, 3., 2., f32::NAN].iter().copied(), 3); + assert_eq!(&res[..2], &[3., 2.]); + assert!(res[2].is_nan()); + + let res = pick_k_highest_sorted_desc([f32::NAN].iter().copied(), 1); + assert_eq!(res.len(), 1); + assert!(res[0].is_nan()); + } - assert_eq!(&data[..3], &[5., 4., 1.]); - assert!(data[3].is_nan()); - assert!(data[4].is_nan()); + #[test] + fn test_nan_safe_sort_desc_comparsion_sorts_in_the_right_order() { + let data = &mut [f32::NAN, 1., 5., f32::NAN, 4.]; + data.sort_by(nan_safe_sort_desc_comparsion); - let data = &mut [1., 5., 3., 4.]; - data.sort_by(nan_safe_sort_desc_comparsion); + assert_eq!(&data[..3], &[5., 4., 1.]); + assert!(data[3].is_nan()); + assert!(data[4].is_nan()); - assert_eq!(&data[..], &[5., 4., 3., 1.]); - } + let data = &mut [1., 5., 3., 4.]; + data.sort_by(nan_safe_sort_desc_comparsion); - #[test] - fn nans_compare_as_expected() { - assert_eq!( - nan_safe_sort_desc_comparsion(&f32::NAN, &f32::NAN), - Ordering::Equal - ); - assert_eq!( - nan_safe_sort_desc_comparsion(&-12., &f32::NAN), - Ordering::Less - ); - assert_eq!( - nan_safe_sort_desc_comparsion(&f32::NAN, &-12.), - Ordering::Greater - ); - assert_eq!( - nan_safe_sort_desc_comparsion(&12., &f32::NAN), - Ordering::Less - ); - assert_eq!( - nan_safe_sort_desc_comparsion(&f32::NAN, &12.), - Ordering::Greater - ); - } + assert_eq!(&data[..], &[5., 4., 3., 1.]); + } + + #[test] + fn test_nan_safe_sort_desc_comparsion_nans_compare_as_expected() { + assert_eq!( + nan_safe_sort_desc_comparsion(&f32::NAN, &f32::NAN), + Ordering::Equal + ); + assert_eq!( + nan_safe_sort_desc_comparsion(&-12., &f32::NAN), + Ordering::Less + ); + assert_eq!( + nan_safe_sort_desc_comparsion(&f32::NAN, &-12.), + Ordering::Greater + ); + assert_eq!( + nan_safe_sort_desc_comparsion(&12., &f32::NAN), + Ordering::Less + ); + assert_eq!( + nan_safe_sort_desc_comparsion(&f32::NAN, &12.), + Ordering::Greater + ); } } From 0f2ced94f17e3c1551fddcb09a880e2b0c078345 Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Mon, 26 Apr 2021 13:05:57 +0200 Subject: [PATCH 04/11] TY-1686: Fix clippy hints. --- xayn-ai/src/analytics.rs | 44 ++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index aee5b7a22..d30a09e71 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -202,15 +202,15 @@ mod tests { fn test_create_reordered_ndcg_at_k_score_without_reordering() { let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + assert!(approx_eq!(f32, res, 0.009_846_116, ulps = 2)); let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + assert!(approx_eq!(f32, res, 0.489_142_48, ulps = 2)); let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + assert!(approx_eq!(f32, res, 0.509_867_9, ulps = 2)); let relevances = &mut [ (-1., 12.), @@ -221,7 +221,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + assert!(approx_eq!(f32, res, 0.605_921_45, ulps = 2)); let relevances = &mut [ (-1., 12.), @@ -232,7 +232,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); + assert!(approx_eq!(f32, res, 0.626_086_65, ulps = 2)); let relevances = &mut [ (-1., 12.), @@ -243,22 +243,22 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); + assert!(approx_eq!(f32, res, 0.626_934_23, ulps = 2)); } #[test] fn test_create_reordered_ndcg_at_k_score_with_reordering() { let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + assert!(approx_eq!(f32, res, 0.009_846_116, ulps = 2)); let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + assert!(approx_eq!(f32, res, 0.489_142_48, ulps = 2)); let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + assert!(approx_eq!(f32, res, 0.509_867_9, ulps = 2)); let relevances = &mut [ (3., 5.), @@ -269,7 +269,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + assert!(approx_eq!(f32, res, 0.605_921_45, ulps = 2)); let relevances = &mut [ (3., 5.), @@ -280,7 +280,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); + assert!(approx_eq!(f32, res, 0.626_086_65, ulps = 2)); let relevances = &mut [ (3., 5.), @@ -291,29 +291,29 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); + assert!(approx_eq!(f32, res, 0.626_934_23, ulps = 2)); } #[test] fn ndcg_at_k_produces_expected_values_for_k_larger_then_input() { let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 100); - assert!(approx_eq!(f32, res, 0.5098678822644145, ulps = 2)); + assert!(approx_eq!(f32, res, 0.509_867_9, ulps = 2)); let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 100); - assert!(approx_eq!(f32, res, 0.6269342228326248, ulps = 2)); + assert!(approx_eq!(f32, res, 0.626_934_23, ulps = 2)); } #[test] fn ndcg_at_k_produces_expected_values_for_k_smaller_then_input() { let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 2); - assert!(approx_eq!(f32, res, 0.009846116527364958, ulps = 2)); + assert!(approx_eq!(f32, res, 0.009_846_116, ulps = 2)); let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 4); - assert!(approx_eq!(f32, res, 0.4891424845441425, ulps = 2)); + assert!(approx_eq!(f32, res, 0.489_142_48, ulps = 2)); let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 2); - assert!(approx_eq!(f32, res, 0.6059214306390379, ulps = 2)); + assert!(approx_eq!(f32, res, 0.605_921_45, ulps = 2)); let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 4); - assert!(approx_eq!(f32, res, 0.6260866644243038, ulps = 2)); + assert!(approx_eq!(f32, res, 0.626_086_65, ulps = 2)); } #[test] @@ -321,13 +321,13 @@ mod tests { assert!(approx_eq!( f32, dcg([3f32, 2., 3., 0., 1., 2.].iter().copied()), - 13.848263629272981, + 13.848_264, ulps = 2 )); assert!(approx_eq!( f32, dcg([-3.2, -2., -4., 0., -1., -2.].iter().copied()), - -2.293710288714865, + -2.293_710_2, ulps = 2 )); } @@ -354,6 +354,8 @@ mod tests { #[test] fn test_pick_k_highest_does_not_pick_nans_if_possible() { + #![allow(clippy::float_cmp)] + let res = pick_k_highest_sorted_desc([3., 2., f32::NAN].iter().copied(), 2); assert_eq!(&*res, &[3., 2.]); @@ -376,6 +378,8 @@ mod tests { #[test] fn test_nan_safe_sort_desc_comparsion_sorts_in_the_right_order() { + #![allow(clippy::float_cmp)] + let data = &mut [f32::NAN, 1., 5., f32::NAN, 4.]; data.sort_by(nan_safe_sort_desc_comparsion); From b66c3478b56cf3b5bac33a3ca35e28897f97aac8 Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Mon, 26 Apr 2021 20:35:22 +0200 Subject: [PATCH 05/11] Added more tests. This includes a test of the `compute_analytics` methods, and a port of the test from the prev. dart implementation (as far as applicable). --- xayn-ai/src/analytics.rs | 104 ++++++++++++++++++++++++++++++++++-- xayn-ai/src/reranker/mod.rs | 2 +- 2 files changed, 100 insertions(+), 6 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index d30a09e71..7c65a86de 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -12,12 +12,12 @@ const DEFAULT_NDCG_K: usize = 2; /// Calculated analytics data. #[derive(Clone)] pub struct Analytics { - /// The nDCG@k score between the initial ranking and the relevance based ranking - pub ndcg_initial: f32, /// The nDCG@k score between the LTR ranking and the relevance based ranking pub ndcg_ltr: f32, /// The nDCG@k score between the Context ranking and the relevance based ranking pub ndcg_context: f32, + /// The nDCG@k score between the initial ranking and the relevance based ranking + pub ndcg_initial_ranking: f32, /// THe nDCG@k score between the final ranking and the relevance based ranking pub ndcg_final_ranking: f32, } @@ -75,7 +75,7 @@ impl systems::AnalyticsSystem for AnalyticsSystem { //FIXME: We currently have no access to the initial score as thiss will require // some changes to the main applications type state/component system this // will be done in a followup PR. - ndcg_initial: f32::NAN, + ndcg_initial_ranking: f32::NAN, ndcg_ltr, ndcg_context, ndcg_final_ranking, @@ -195,11 +195,73 @@ fn nan_safe_sort_desc_comparsion(a: &f32, b: &f32) -> Ordering { #[cfg(test)] mod tests { + use crate::{reranker::systems::AnalyticsSystem, tests, UserFeedback}; + use super::*; use float_cmp::approx_eq; #[test] - fn test_create_reordered_ndcg_at_k_score_without_reordering() { + fn test_full_analytics_system() { + let history = tests::document_history(vec![ + (2, Relevance::Low, UserFeedback::None), + (3, Relevance::Medium, UserFeedback::None), + (1, Relevance::High, UserFeedback::None), + (0, Relevance::Medium, UserFeedback::None), + (10, Relevance::Low, UserFeedback::None), + ]); + + let mut documents = tests::data_with_mab(tests::from_ids(0..3)); + documents[0].ltr.ltr_score = 3.; + documents[0].context.context_value = 3.5; + documents[0].mab.rank = 1; + + documents[1].ltr.ltr_score = 2.; + documents[1].context.context_value = 7.; + documents[1].mab.rank = 0; + + documents[2].ltr.ltr_score = 7.; + documents[2].context.context_value = 6.; + documents[2].mab.rank = 2; + + let Analytics { + ndcg_initial_ranking: _, + ndcg_ltr, + ndcg_context, + ndcg_final_ranking, + } = AnalyticsSystem + .compute_analytics(&history, &documents) + .unwrap(); + + assert!(approx_eq!(f32, ndcg_ltr, 0.17376534287144002, ulps = 2)); + assert!(approx_eq!(f32, ndcg_context, 0.8262346571285599, ulps = 2)); + //FIXME: Currently not possible as `ndcg_initial_ranking` is not yet computed + // assert!(approx_eq!(f32, ndcg_initial_ranking, 0.7967075809905066, ulps = 2)); + assert!(approx_eq!(f32, ndcg_final_ranking, 1.0, ulps = 2)); + } + + #[test] + fn test_calcuate_reordered_ndcg_at_k_score_tests_from_dart() { + let relevances = &mut [(0., -50.), (0., 0.001), (1., 4.14), (2., 1000.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); + assert_eq!(format!("{:.4}", res), "1.0000"); + + let relevances = &mut [(0., -10.), (0., 1.), (1., 0.), (2., 6.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); + let res2 = ndcg_at_k([2., 0., 1., 0.].iter().copied(), 2); + assert!(approx_eq!(f32, res, res2, ulps = 2)); + + let relevances = &mut [(0., 1.), (0., -10.), (1., -11.), (2., -11.6)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); + assert!(approx_eq!(f32, res, 0.0, ulps = 2)); + + let relevances = &mut [(0., 1.), (0., -10.), (1., 100.), (2., 99.)]; + let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); + let res2 = ndcg_at_k([1., 2., 1., 0.].iter().copied(), 2); + assert!(approx_eq!(f32, res, res2, ulps = 2)); + } + + #[test] + fn test_calcuate_reordered_ndcg_at_k_score_without_reordering() { let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); assert!(approx_eq!(f32, res, 0.009_846_116, ulps = 2)); @@ -247,7 +309,7 @@ mod tests { } #[test] - fn test_create_reordered_ndcg_at_k_score_with_reordering() { + fn test_calcuate_reordered_ndcg_at_k_score_with_reordering() { let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); assert!(approx_eq!(f32, res, 0.009_846_116, ulps = 2)); @@ -294,6 +356,12 @@ mod tests { assert!(approx_eq!(f32, res, 0.626_934_23, ulps = 2)); } + #[test] + fn test_ndcg_at_k_tests_from_dart() { + let res = ndcg_at_k([0., 0., 1., 2.].iter().copied(), 4); + assert_eq!(format!("{:.4}", res), "0.4935"); + } + #[test] fn ndcg_at_k_produces_expected_values_for_k_larger_then_input() { let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 100); @@ -316,6 +384,32 @@ mod tests { assert!(approx_eq!(f32, res, 0.626_086_65, ulps = 2)); } + #[test] + fn test_dcg_tests_from_dart() { + /* + List relevances1 = [0, 0, 1, 1]; + List relevances2 = [0, 0, 1, 2]; + expect(Metrics.dcgAtK(relevances1, 2), 0); + expect(Metrics.dcgAtK(relevances1, 4).toStringAsFixed(4), 1.34268.toStringAsFixed(4)); + expect(Metrics.dcgAtK(relevances2, 4).toStringAsFixed(4), 2.5853523.toStringAsFixed(4)); + */ + // there is no dcg@k function in my code. It's dcg(input_iter.take(k)). + let res = dcg([0., 0., 1., 1.].iter().copied().take(2)); + assert!(approx_eq!(f32, res, 0.0, ulps = 2)); + + // FIXME: It turns out dart uses `log` (natural) but we and wikipedia do use `log2`... + // so this test will fail if the dart test values are used. + // Until this is resolved I will used the values from calculating the result + // "by hand: using log2. + let res = dcg([0., 0., 1., 1.].iter().copied().take(4)); + // assert_eq!(format!("{:.4}", res), "1.3426"); + assert_eq!(format!("{:.4}", res), "0.9307"); + + let res = dcg([0., 0., 1., 2.].iter().copied().take(4)); + // assert_eq!(format!("{:.4}", res), "2.5853"); + assert_eq!(format!("{:.4}", res), "1.7920"); + } + #[test] fn dcg_produces_expected_results() { assert!(approx_eq!( diff --git a/xayn-ai/src/reranker/mod.rs b/xayn-ai/src/reranker/mod.rs index 9015d0654..d70b65e90 100644 --- a/xayn-ai/src/reranker/mod.rs +++ b/xayn-ai/src/reranker/mod.rs @@ -598,7 +598,7 @@ mod tests { common_systems_with_fail!(analytics, MockAnalyticsSystem, compute_analytics, |_,_|); let mut reranker = Reranker::new(cs).unwrap(); reranker.analytics = Some(Analytics { - ndcg_initial: 0., + ndcg_initial_ranking: 0., ndcg_ltr: 0., ndcg_context: 0., ndcg_final_ranking: 0., From 575b35b7996eae465b00e3ec25aa05d228cacb26 Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Mon, 26 Apr 2021 20:49:53 +0200 Subject: [PATCH 06/11] Fixed various documentation issues found in the review. --- xayn-ai/src/analytics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index 7c65a86de..701e5f8a9 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -18,7 +18,7 @@ pub struct Analytics { pub ndcg_context: f32, /// The nDCG@k score between the initial ranking and the relevance based ranking pub ndcg_initial_ranking: f32, - /// THe nDCG@k score between the final ranking and the relevance based ranking + /// The nDCG@k score between the final ranking and the relevance based ranking pub ndcg_final_ranking: f32, } @@ -94,7 +94,7 @@ fn score_for_relevance(relevance: Relevance) -> f32 { /// Calculates the nDCG@k for given paired relevances. /// -/// The input is a tuple of `(relevance, ordering_score)` pair, +/// The input is a slice over `(relevance, ordering_score)` pairs, /// where the `ordering_score` is used to reorder the relevances /// based on sorting them in descending order. /// @@ -114,7 +114,7 @@ fn calcuate_reordered_ndcg_at_k_score(paired_relevances: &mut [(f32, f32)], k: u ndcg_at_k(paired_relevances.iter().map(|(rel, _ord)| *rel), k) } -/// Calculates the nDCG@k, `k` defaults to 2 if `None` is passed in. +/// Calculates the nDCG@k. /// /// This taks the first k values for the DCG score and the "best" k values /// for the IDCG score and then calculates the nDCG score with that. From f2c9c7b34da16c99730f4902a85a063b80ae5223 Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Mon, 26 Apr 2021 21:40:20 +0200 Subject: [PATCH 07/11] Applied changes discussed in the review - skip if no history available for a document - if we have no relevant historic information at all we do return an error - use fold in dcg - in pick highest skip search if we won't be able to insert it anyway - use Iterator::fuse() instead of FusedIterator --- xayn-ai/src/analytics.rs | 73 +++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index 701e5f8a9..9c1a8f25b 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -1,4 +1,8 @@ -use std::{cmp::Ordering, collections::HashMap, iter::FusedIterator}; +use std::{cmp::Ordering, collections::HashMap}; + +use anyhow::bail; +use displaydoc::Display; +use thiserror::Error; use crate::{ data::{document::DocumentHistory, document_data::DocumentDataWithMab}, @@ -22,6 +26,10 @@ pub struct Analytics { pub ndcg_final_ranking: f32, } +#[derive(Error, Debug, Display)] +/// Can not calculate Analytics as no relevant history is available. +pub(crate) struct NoRelevantHistoricInfo; + pub(crate) struct AnalyticsSystem; impl systems::AnalyticsSystem for AnalyticsSystem { @@ -45,22 +53,20 @@ impl systems::AnalyticsSystem for AnalyticsSystem { let mut paired_final_ranking_score = Vec::new(); for document in documents { - // We should never need the `_or(0.0)` but if we run into - // it it's best to give it a relevance of 0. As a document - // not in the history is irrelevant for this analytics. - let relevance = relevance_lookups - .get(&document.document_id.id) - .copied() - .unwrap_or(0.0); - - paired_ltr_scores.push((relevance, document.ltr.ltr_score)); - paired_context_scores.push((relevance, document.context.context_value)); - - // nDCG expects higher scores to be better but for the ranking - // it's the oposite, the solution carried over from the dart impl - // is to multiply by -1. - let final_ranking_desc = -(document.mab.rank as f32); - paired_final_ranking_score.push((relevance, final_ranking_desc)); + if let Some(relevance) = relevance_lookups.get(&document.document_id.id).copied() { + paired_ltr_scores.push((relevance, document.ltr.ltr_score)); + paired_context_scores.push((relevance, document.context.context_value)); + + // nDCG expects higher scores to be better but for the ranking + // it's the oposite, the solution carried over from the dart impl + // is to multiply by -1. + let final_ranking_desc = -(document.mab.rank as f32); + paired_final_ranking_score.push((relevance, final_ranking_desc)); + } + } + + if paired_ltr_scores.is_empty() { + bail!(NoRelevantHistoricInfo); } let ndcg_ltr = calcuate_reordered_ndcg_at_k_score(&mut paired_ltr_scores, DEFAULT_NDCG_K); @@ -118,10 +124,7 @@ fn calcuate_reordered_ndcg_at_k_score(paired_relevances: &mut [(f32, f32)], k: u /// /// This taks the first k values for the DCG score and the "best" k values /// for the IDCG score and then calculates the nDCG score with that. -fn ndcg_at_k( - relevances: impl Iterator + Clone + ExactSizeIterator + FusedIterator, - k: usize, -) -> f32 { +fn ndcg_at_k(relevances: impl Iterator + Clone + ExactSizeIterator, k: usize) -> f32 { let dcg_at_k = dcg(relevances.clone().take(k)); let ideal_relevances = pick_k_highest_sorted_desc(relevances, k); @@ -142,20 +145,24 @@ fn ndcg_at_k( /// If `NaN`'s is treated as the smallest possible value, i.e. /// preferably not picked at all if possible. fn pick_k_highest_sorted_desc( - mut scores: impl Iterator + ExactSizeIterator + FusedIterator, + scores: impl Iterator + ExactSizeIterator, k: usize, ) -> Vec { + // Due to specialization this has no overhead if scores is already fused. + let mut scores = scores.fuse(); let mut k_highest: Vec<_> = (&mut scores).take(k).collect(); k_highest.sort_by(nan_safe_sort_desc_comparsion); for score in scores { - let idx = k_highest - .binary_search_by(|other| nan_safe_sort_desc_comparsion(other, &score)) - .unwrap_or_else(|not_found_insert_idx| not_found_insert_idx); - - if idx < k { + //Supposed to act as NaN safe version of: if k_highest[k-1] < score { + if nan_safe_sort_desc_comparsion(&k_highest[k - 1], &score) == Ordering::Greater { let _ = k_highest.pop(); + + let idx = k_highest + .binary_search_by(|other| nan_safe_sort_desc_comparsion(other, &score)) + .unwrap_or_else(|not_found_insert_idx| not_found_insert_idx); + k_highest.insert(idx, score); } } @@ -168,12 +175,10 @@ fn dcg(scores: impl Iterator) -> f32 { // - As this is only used for analytics and bound by `k`(==2) and `&[Document].len()` (~ 10 to 40) // no further optimizations make sense. Especially not if they require memory allocations. // - A "simple commulative" sum is ok as we only use small number of scores (default k=2) - let mut sum = 0.; - for (i, score) in scores.enumerate() { + scores.enumerate().fold(0.0, |sum, (idx, score)| { //it's i+2 as our i starts with 0, while the formular starts with 1 and uses i+1 - sum += (2f32.powf(score) - 1.) / (i as f32 + 2.).log2() - } - sum + sum + (2f32.powf(score) - 1.) / (idx as f32 + 2.).log2() + }) } /// Use for getting a descending sort ordering of floats. @@ -232,8 +237,8 @@ mod tests { .compute_analytics(&history, &documents) .unwrap(); - assert!(approx_eq!(f32, ndcg_ltr, 0.17376534287144002, ulps = 2)); - assert!(approx_eq!(f32, ndcg_context, 0.8262346571285599, ulps = 2)); + assert!(approx_eq!(f32, ndcg_ltr, 0.173_765_35, ulps = 2)); + assert!(approx_eq!(f32, ndcg_context, 0.826_234_64, ulps = 2)); //FIXME: Currently not possible as `ndcg_initial_ranking` is not yet computed // assert!(approx_eq!(f32, ndcg_initial_ranking, 0.7967075809905066, ulps = 2)); assert!(approx_eq!(f32, ndcg_final_ranking, 1.0, ulps = 2)); From be645da3e801c905ec2fb0e74bf86c1eb999ab28 Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Mon, 26 Apr 2021 22:03:27 +0200 Subject: [PATCH 08/11] Unified f32_total_cmp and nan_safe_sort_desc_comparsion. The new function is called nan_safe_f32_cmp. --- xayn-ai/src/analytics.rs | 67 +++-------------------------------- xayn-ai/src/mab.rs | 20 ++--------- xayn-ai/src/utils.rs | 76 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 79 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index 9c1a8f25b..3e67ae2d6 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -8,6 +8,7 @@ use crate::{ data::{document::DocumentHistory, document_data::DocumentDataWithMab}, error::Error, reranker::systems, + utils::nan_safe_f32_cmp_desc, Relevance, }; @@ -116,7 +117,7 @@ fn score_for_relevance(relevance: Relevance) -> f32 { /// If a `NaN` is in the k-first relevances the resulting nDCG@k score will be `NaN`. fn calcuate_reordered_ndcg_at_k_score(paired_relevances: &mut [(f32, f32)], k: usize) -> f32 { paired_relevances - .sort_by(|(_, ord_sc_1), (_, ord_sc_2)| nan_safe_sort_desc_comparsion(ord_sc_1, ord_sc_2)); + .sort_by(|(_, ord_sc_1), (_, ord_sc_2)| nan_safe_f32_cmp_desc(ord_sc_1, ord_sc_2)); ndcg_at_k(paired_relevances.iter().map(|(rel, _ord)| *rel), k) } @@ -152,15 +153,15 @@ fn pick_k_highest_sorted_desc( let mut scores = scores.fuse(); let mut k_highest: Vec<_> = (&mut scores).take(k).collect(); - k_highest.sort_by(nan_safe_sort_desc_comparsion); + k_highest.sort_by(nan_safe_f32_cmp_desc); for score in scores { //Supposed to act as NaN safe version of: if k_highest[k-1] < score { - if nan_safe_sort_desc_comparsion(&k_highest[k - 1], &score) == Ordering::Greater { + if nan_safe_f32_cmp_desc(&k_highest[k - 1], &score) == Ordering::Greater { let _ = k_highest.pop(); let idx = k_highest - .binary_search_by(|other| nan_safe_sort_desc_comparsion(other, &score)) + .binary_search_by(|other| nan_safe_f32_cmp_desc(other, &score)) .unwrap_or_else(|not_found_insert_idx| not_found_insert_idx); k_highest.insert(idx, score); @@ -181,23 +182,6 @@ fn dcg(scores: impl Iterator) -> f32 { }) } -/// Use for getting a descending sort ordering of floats. -/// -/// `NaN`'s are treated as the smallest possible value -/// for this sorting they are also treated as equal to each other. -/// This is not standard comform but works for sorting, -/// at least for our use-case. -fn nan_safe_sort_desc_comparsion(a: &f32, b: &f32) -> Ordering { - // switched a,b to have descending instead of ascending sorting - b.partial_cmp(a) - .unwrap_or_else(|| match (a.is_nan(), b.is_nan()) { - (true, true) => Ordering::Equal, - (true, false) => Ordering::Greater, - (false, true) => Ordering::Less, - (false, false) => unreachable!(), - }) -} - #[cfg(test)] mod tests { use crate::{reranker::systems::AnalyticsSystem, tests, UserFeedback}; @@ -474,45 +458,4 @@ mod tests { assert_eq!(res.len(), 1); assert!(res[0].is_nan()); } - - #[test] - fn test_nan_safe_sort_desc_comparsion_sorts_in_the_right_order() { - #![allow(clippy::float_cmp)] - - let data = &mut [f32::NAN, 1., 5., f32::NAN, 4.]; - data.sort_by(nan_safe_sort_desc_comparsion); - - assert_eq!(&data[..3], &[5., 4., 1.]); - assert!(data[3].is_nan()); - assert!(data[4].is_nan()); - - let data = &mut [1., 5., 3., 4.]; - data.sort_by(nan_safe_sort_desc_comparsion); - - assert_eq!(&data[..], &[5., 4., 3., 1.]); - } - - #[test] - fn test_nan_safe_sort_desc_comparsion_nans_compare_as_expected() { - assert_eq!( - nan_safe_sort_desc_comparsion(&f32::NAN, &f32::NAN), - Ordering::Equal - ); - assert_eq!( - nan_safe_sort_desc_comparsion(&-12., &f32::NAN), - Ordering::Less - ); - assert_eq!( - nan_safe_sort_desc_comparsion(&f32::NAN, &-12.), - Ordering::Greater - ); - assert_eq!( - nan_safe_sort_desc_comparsion(&12., &f32::NAN), - Ordering::Less - ); - assert_eq!( - nan_safe_sort_desc_comparsion(&f32::NAN, &12.), - Ordering::Greater - ); - } } diff --git a/xayn-ai/src/mab.rs b/xayn-ai/src/mab.rs index babdd6f79..da993e7bc 100644 --- a/xayn-ai/src/mab.rs +++ b/xayn-ai/src/mab.rs @@ -6,6 +6,7 @@ use crate::{ UserInterests, }, reranker::systems::MabSystem, + utils::nan_safe_f32_cmp, Error, }; @@ -49,21 +50,6 @@ impl BetaSample for BetaSampler { } } -/// Pretend that comparing two f32 is total. The function will rank `NaN` -/// as the lowest value, similar to what [`f32::max`] does. -fn f32_total_cmp(a: &f32, b: &f32) -> Ordering { - a.partial_cmp(&b).unwrap_or_else(|| { - // if `partial_cmp` returns None we have at least one `NaN`, - // we treat it as the lowest value - match (a.is_nan(), b.is_nan()) { - (true, true) => Ordering::Equal, - (true, _) => Ordering::Less, - (_, true) => Ordering::Greater, - _ => unreachable!("partial_cmp returned None but both numbers are not NaN"), - } - }) -} - /// Wrapper to order documents by `context_value`. /// We need to implement `Ord` to use it in the `BinaryHeap`. #[cfg_attr(test, derive(Debug, Clone))] @@ -84,7 +70,7 @@ impl PartialOrd for DocumentByContext { impl Ord for DocumentByContext { fn cmp(&self, other: &Self) -> Ordering { - f32_total_cmp( + nan_safe_f32_cmp( &self.0.context.context_value, &other.0.context.context_value, ) @@ -169,7 +155,7 @@ fn pull_arms( |max, coi_id| -> Result<_, MabError> { let sample = sample_from_coi(coi_id)?; - if let Ordering::Greater = f32_total_cmp(&sample, &max.0) { + if let Ordering::Greater = nan_safe_f32_cmp(&sample, &max.0) { Ok((sample, coi_id)) } else { Ok(max) diff --git a/xayn-ai/src/utils.rs b/xayn-ai/src/utils.rs index b9769c74b..944a5fb62 100644 --- a/xayn-ai/src/utils.rs +++ b/xayn-ai/src/utils.rs @@ -1,3 +1,5 @@ +use std::cmp::Ordering; + #[macro_export] macro_rules! to_vec_of_ref_of { ($data: expr, $type:ty) => { @@ -7,3 +9,77 @@ macro_rules! to_vec_of_ref_of { .collect::>() }; } + +/// Allows comparing and sorting f32 even if `NaN` is involved. +/// +/// Pretend that f32 has a total ordering. +/// +/// `NaN` is treated as the lowest possible value, similar to what [`f32::max`] does. +/// +/// If this is used for sorting this will lead to an ascending order, like +/// for example `[NaN, 0.5, 1.5, 2.0]`. +/// +/// By switching the input parameters around this can be used to create a +/// descending sorted order, like e.g.: `[2.0, 1.5, 0.5, NaN]`. +pub(crate) fn nan_safe_f32_cmp(a: &f32, b: &f32) -> Ordering { + a.partial_cmp(&b).unwrap_or_else(|| { + // if `partial_cmp` returns None we have at least one `NaN`, + // we treat it as the lowest value + match (a.is_nan(), b.is_nan()) { + (true, true) => Ordering::Equal, + (true, _) => Ordering::Less, + (_, true) => Ordering::Greater, + _ => unreachable!("partial_cmp returned None but both numbers are not NaN"), + } + }) +} + +/// `nan_safe_f32_cmp_desc(a,b)` is syntax suggar for `nan_safe_f32_cmp(b, a)` +#[inline] +pub(crate) fn nan_safe_f32_cmp_desc(a: &f32, b: &f32) -> Ordering { + nan_safe_f32_cmp(b, a) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nan_safe_f32_cmp_sorts_in_the_right_order() { + #![allow(clippy::float_cmp)] + + let data = &mut [f32::NAN, 1., 5., f32::NAN, 4.]; + data.sort_by(nan_safe_f32_cmp); + + assert_eq!(&data[2..], &[1., 4., 5.]); + assert!(data[0].is_nan()); + assert!(data[1].is_nan()); + + data.sort_by(nan_safe_f32_cmp_desc); + + assert_eq!(&data[..3], &[5., 4., 1.]); + assert!(data[3].is_nan()); + assert!(data[4].is_nan()); + + let data = &mut [1., 5., 3., 4.]; + + data.sort_by(nan_safe_f32_cmp); + assert_eq!(&data[..], &[1., 3., 4., 5.]); + + data.sort_by(nan_safe_f32_cmp_desc); + assert_eq!(&data[..], &[5., 4., 3., 1.]); + } + + #[test] + fn test_nan_safe_f32_cmp_nans_compare_as_expected() { + assert_eq!(nan_safe_f32_cmp(&f32::NAN, &f32::NAN), Ordering::Equal); + assert_eq!(nan_safe_f32_cmp(&-12., &f32::NAN), Ordering::Greater); + assert_eq!(nan_safe_f32_cmp_desc(&-12., &f32::NAN), Ordering::Less); + assert_eq!(nan_safe_f32_cmp(&f32::NAN, &-12.), Ordering::Less); + assert_eq!(nan_safe_f32_cmp_desc(&f32::NAN, &-12.), Ordering::Greater); + assert_eq!(nan_safe_f32_cmp(&12., &f32::NAN), Ordering::Greater); + assert_eq!(nan_safe_f32_cmp_desc(&12., &f32::NAN), Ordering::Less); + assert_eq!(nan_safe_f32_cmp(&f32::NAN, &12.), Ordering::Less); + assert_eq!(nan_safe_f32_cmp_desc(&f32::NAN, &12.), Ordering::Greater); + } +} From ccff90d229b0594313fe40a37bd1501a6b62c7fd Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Tue, 27 Apr 2021 11:16:48 +0200 Subject: [PATCH 09/11] Updated comments wrt. tests from dart and ln vs log2. --- xayn-ai/src/analytics.rs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index 3e67ae2d6..eb48ef990 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -375,26 +375,17 @@ mod tests { #[test] fn test_dcg_tests_from_dart() { - /* - List relevances1 = [0, 0, 1, 1]; - List relevances2 = [0, 0, 1, 2]; - expect(Metrics.dcgAtK(relevances1, 2), 0); - expect(Metrics.dcgAtK(relevances1, 4).toStringAsFixed(4), 1.34268.toStringAsFixed(4)); - expect(Metrics.dcgAtK(relevances2, 4).toStringAsFixed(4), 2.5853523.toStringAsFixed(4)); - */ // there is no dcg@k function in my code. It's dcg(input_iter.take(k)). let res = dcg([0., 0., 1., 1.].iter().copied().take(2)); assert!(approx_eq!(f32, res, 0.0, ulps = 2)); - // FIXME: It turns out dart uses `log` (natural) but we and wikipedia do use `log2`... - // so this test will fail if the dart test values are used. - // Until this is resolved I will used the values from calculating the result - // "by hand: using log2. let res = dcg([0., 0., 1., 1.].iter().copied().take(4)); + // Dart used ln instead of log2 so the values diverge. // assert_eq!(format!("{:.4}", res), "1.3426"); assert_eq!(format!("{:.4}", res), "0.9307"); let res = dcg([0., 0., 1., 2.].iter().copied().take(4)); + // Dart used ln instead of log2 so the values diverge. // assert_eq!(format!("{:.4}", res), "2.5853"); assert_eq!(format!("{:.4}", res), "1.7920"); } From 92315e074db4de348027c270372bd7d9ac931a7b Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Tue, 27 Apr 2021 11:44:47 +0200 Subject: [PATCH 10/11] Added approx_f32_eq which uses float_comp::approx_eq. assert!(approx_eq!(f32, left, right, ulps=n)) has the problem that it doesn't print information about the values when it fails so assert_f32_eq adds a format string which on failure will be used as error message. --- xayn-ai/src/analytics.rs | 71 ++++++++++++++++++---------------------- xayn-ai/src/lib.rs | 4 ++- xayn-ai/src/utils.rs | 19 +++++++++++ 3 files changed, 53 insertions(+), 41 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index eb48ef990..c2570b04d 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -184,10 +184,8 @@ fn dcg(scores: impl Iterator) -> f32 { #[cfg(test)] mod tests { - use crate::{reranker::systems::AnalyticsSystem, tests, UserFeedback}; - use super::*; - use float_cmp::approx_eq; + use crate::{reranker::systems::AnalyticsSystem, tests, UserFeedback}; #[test] fn test_full_analytics_system() { @@ -213,19 +211,19 @@ mod tests { documents[2].mab.rank = 2; let Analytics { - ndcg_initial_ranking: _, ndcg_ltr, ndcg_context, + ndcg_initial_ranking: _, ndcg_final_ranking, } = AnalyticsSystem .compute_analytics(&history, &documents) .unwrap(); - assert!(approx_eq!(f32, ndcg_ltr, 0.173_765_35, ulps = 2)); - assert!(approx_eq!(f32, ndcg_context, 0.826_234_64, ulps = 2)); + assert_f32_eq!(ndcg_ltr, 0.173_765_35); + assert_f32_eq!(ndcg_context, 0.826_234_64); //FIXME: Currently not possible as `ndcg_initial_ranking` is not yet computed // assert!(approx_eq!(f32, ndcg_initial_ranking, 0.7967075809905066, ulps = 2)); - assert!(approx_eq!(f32, ndcg_final_ranking, 1.0, ulps = 2)); + assert_f32_eq!(ndcg_final_ranking, 1.0); } #[test] @@ -237,31 +235,31 @@ mod tests { let relevances = &mut [(0., -10.), (0., 1.), (1., 0.), (2., 6.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); let res2 = ndcg_at_k([2., 0., 1., 0.].iter().copied(), 2); - assert!(approx_eq!(f32, res, res2, ulps = 2)); + assert_f32_eq!(res, res2); let relevances = &mut [(0., 1.), (0., -10.), (1., -11.), (2., -11.6)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.0, ulps = 2)); + assert_f32_eq!(res, 0.0); let relevances = &mut [(0., 1.), (0., -10.), (1., 100.), (2., 99.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); let res2 = ndcg_at_k([1., 2., 1., 0.].iter().copied(), 2); - assert!(approx_eq!(f32, res, res2, ulps = 2)); + assert_f32_eq!(res, res2); } #[test] fn test_calcuate_reordered_ndcg_at_k_score_without_reordering() { let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.009_846_116, ulps = 2)); + assert_f32_eq!(res, 0.009_846_116); let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.489_142_48, ulps = 2)); + assert_f32_eq!(res, 0.489_142_48); let relevances = &mut [(1., 12.), (4., 9.), (10., 7.), (3., 5.), (0., 4.), (6., 1.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.509_867_9, ulps = 2)); + assert_f32_eq!(res, 0.509_867_9); let relevances = &mut [ (-1., 12.), @@ -272,7 +270,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.605_921_45, ulps = 2)); + assert_f32_eq!(res, 0.605_921_45); let relevances = &mut [ (-1., 12.), @@ -283,7 +281,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.626_086_65, ulps = 2)); + assert_f32_eq!(res, 0.626_086_65); let relevances = &mut [ (-1., 12.), @@ -294,22 +292,22 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.626_934_23, ulps = 2)); + assert_f32_eq!(res, 0.626_934_23); } #[test] fn test_calcuate_reordered_ndcg_at_k_score_with_reordering() { let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.009_846_116, ulps = 2)); + assert_f32_eq!(res, 0.009_846_116); let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.489_142_48, ulps = 2)); + assert_f32_eq!(res, 0.489_142_48); let relevances = &mut [(4., 9.), (10., 7.), (6., 1.), (0., 4.), (3., 5.), (1., 12.)]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.509_867_9, ulps = 2)); + assert_f32_eq!(res, 0.509_867_9); let relevances = &mut [ (3., 5.), @@ -320,7 +318,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 2); - assert!(approx_eq!(f32, res, 0.605_921_45, ulps = 2)); + assert_f32_eq!(res, 0.605_921_45); let relevances = &mut [ (3., 5.), @@ -331,7 +329,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 4); - assert!(approx_eq!(f32, res, 0.626_086_65, ulps = 2)); + assert_f32_eq!(res, 0.626_086_65); let relevances = &mut [ (3., 5.), @@ -342,7 +340,7 @@ mod tests { (-6., 1.), ]; let res = calcuate_reordered_ndcg_at_k_score(relevances, 100); - assert!(approx_eq!(f32, res, 0.626_934_23, ulps = 2)); + assert_f32_eq!(res, 0.626_934_23); } #[test] @@ -354,30 +352,30 @@ mod tests { #[test] fn ndcg_at_k_produces_expected_values_for_k_larger_then_input() { let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 100); - assert!(approx_eq!(f32, res, 0.509_867_9, ulps = 2)); + assert_f32_eq!(res, 0.509_867_9); let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 100); - assert!(approx_eq!(f32, res, 0.626_934_23, ulps = 2)); + assert_f32_eq!(res, 0.626_934_23); } #[test] fn ndcg_at_k_produces_expected_values_for_k_smaller_then_input() { let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 2); - assert!(approx_eq!(f32, res, 0.009_846_116, ulps = 2)); + assert_f32_eq!(res, 0.009_846_116); let res = ndcg_at_k([1., 4., 10., 3., 0., 6.].iter().copied(), 4); - assert!(approx_eq!(f32, res, 0.489_142_48, ulps = 2)); + assert_f32_eq!(res, 0.489_142_48); let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 2); - assert!(approx_eq!(f32, res, 0.605_921_45, ulps = 2)); + assert_f32_eq!(res, 0.605_921_45); let res = ndcg_at_k([-1., 7., -10., 3., 0., -6.].iter().copied(), 4); - assert!(approx_eq!(f32, res, 0.626_086_65, ulps = 2)); + assert_f32_eq!(res, 0.626_086_65); } #[test] fn test_dcg_tests_from_dart() { // there is no dcg@k function in my code. It's dcg(input_iter.take(k)). let res = dcg([0., 0., 1., 1.].iter().copied().take(2)); - assert!(approx_eq!(f32, res, 0.0, ulps = 2)); + assert_f32_eq!(res, 0.0); let res = dcg([0., 0., 1., 1.].iter().copied().take(4)); // Dart used ln instead of log2 so the values diverge. @@ -392,18 +390,11 @@ mod tests { #[test] fn dcg_produces_expected_results() { - assert!(approx_eq!( - f32, - dcg([3f32, 2., 3., 0., 1., 2.].iter().copied()), - 13.848_264, - ulps = 2 - )); - assert!(approx_eq!( - f32, + assert_f32_eq!(dcg([3f32, 2., 3., 0., 1., 2.].iter().copied()), 13.848_264); + assert_f32_eq!( dcg([-3.2, -2., -4., 0., -1., -2.].iter().copied()), - -2.293_710_2, - ulps = 2 - )); + -2.293_710_2 + ); } #[test] diff --git a/xayn-ai/src/lib.rs b/xayn-ai/src/lib.rs index 8a8fd8e4c..88e8ab8b6 100644 --- a/xayn-ai/src/lib.rs +++ b/xayn-ai/src/lib.rs @@ -1,3 +1,6 @@ +#[macro_use] +mod utils; + mod analytics; mod bert; mod coi; @@ -7,7 +10,6 @@ mod error; mod ltr; mod mab; mod reranker; -mod utils; pub use crate::{ analytics::Analytics, diff --git a/xayn-ai/src/utils.rs b/xayn-ai/src/utils.rs index 944a5fb62..ed3e1d6bd 100644 --- a/xayn-ai/src/utils.rs +++ b/xayn-ai/src/utils.rs @@ -40,6 +40,25 @@ pub(crate) fn nan_safe_f32_cmp_desc(a: &f32, b: &f32) -> Ordering { nan_safe_f32_cmp(b, a) } +#[cfg(test)] +macro_rules! assert_f32_eq { + ($left:expr, $right:expr) => { + assert_f32_eq! { $left, $right, ulps = 2 } + }; + ($left:expr, $right:expr, ulps = $ulps:expr) => {{ + let left = $left; + let right = $right; + let ulps = $ulps; + assert!( + ::float_cmp::approx_eq!(f32, $left, $right, ulps = ulps), + "approximated equal assertion failed (ulps={}): {} == {}", + ulps, + left, + right + ); + }}; +} + #[cfg(test)] mod tests { use super::*; From b246d56d11eb29f983c91f65f5f059deb3d6a577 Mon Sep 17 00:00:00 2001 From: Philipp Korber Date: Tue, 27 Apr 2021 14:02:09 +0200 Subject: [PATCH 11/11] Import Relevance from source modile insted of crate root. --- xayn-ai/src/analytics.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xayn-ai/src/analytics.rs b/xayn-ai/src/analytics.rs index c2570b04d..d9fafb092 100644 --- a/xayn-ai/src/analytics.rs +++ b/xayn-ai/src/analytics.rs @@ -5,11 +5,13 @@ use displaydoc::Display; use thiserror::Error; use crate::{ - data::{document::DocumentHistory, document_data::DocumentDataWithMab}, + data::{ + document::{DocumentHistory, Relevance}, + document_data::DocumentDataWithMab, + }, error::Error, reranker::systems, utils::nan_safe_f32_cmp_desc, - Relevance, }; /// Which k to use for nDCG@k