From d1c97290b40534f5a44dfe7d45697842e54114d0 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Mon, 5 Aug 2024 19:13:17 +0800 Subject: [PATCH] feat: return BM25 scores for FTS Signed-off-by: BubbleCal --- rust/lance/src/dataset/scanner.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index fb29d76167..91f63d3f27 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -624,6 +624,10 @@ impl Scanner { extra_columns.push(ArrowField::new(DIST_COL, DataType::Float32, true)); }; + if self.full_text_query.is_some() { + extra_columns.push(ArrowField::new(SCORE_COL, DataType::Float32, true)); + } + if self.with_row_id || in_projection { extra_columns.push(ROW_ID_FIELD.clone()); } @@ -666,6 +670,11 @@ impl Scanner { output_expr.push((vector_expr, DIST_COL.to_string())); } + if self.full_text_query.is_some() && output_expr.iter().all(|(_, name)| name != SCORE_COL) { + let score_expr = expressions::col(SCORE_COL, &physical_schema)?; + output_expr.push((score_expr, SCORE_COL.to_string())); + } + if self.with_row_id && output_expr.iter().all(|(_, name)| name != ROW_ID) { let row_id_expr = expressions::col(ROW_ID, &physical_schema)?; output_expr.push((row_id_expr, ROW_ID.to_string()));