diff --git a/rust/lance-index/src/scalar/inverted/builder.rs b/rust/lance-index/src/scalar/inverted/builder.rs index c6ceba8c88..459c8e73a2 100644 --- a/rust/lance-index/src/scalar/inverted/builder.rs +++ b/rust/lance-index/src/scalar/inverted/builder.rs @@ -586,6 +586,14 @@ mod tests { .await .unwrap(); assert_eq!(row_ids.len(), Some(0)); + + let row_ids = invert_index + .search(&SargableQuery::FullTextSearch( + FullTextSearchQuery::new("\"lance unknown\"".to_owned()).limit(Some(10)), + )) + .await + .unwrap(); + assert_eq!(row_ids.len(), Some(0)); } #[tokio::test] diff --git a/rust/lance-index/src/scalar/inverted/index.rs b/rust/lance-index/src/scalar/inverted/index.rs index 2cab09e37f..8aa1349ca9 100644 --- a/rust/lance-index/src/scalar/inverted/index.rs +++ b/rust/lance-index/src/scalar/inverted/index.rs @@ -108,7 +108,12 @@ impl InvertedIndex { let token_ids = if !is_phrase_query(&query.query) { token_ids.sorted_unstable().dedup().collect() } else { - token_ids.collect() + let token_ids = token_ids.collect::>(); + // for phrase query, all tokens must be present + if token_ids.len() != tokens.len() { + return Ok(Vec::new()); + } + token_ids }; self.bm25_search(token_ids, query, prefilter).await }