Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
resolve phrase hardcore
Browse files Browse the repository at this point in the history
  • Loading branch information
irevoire committed Aug 19, 2022
1 parent 7e20d5c commit 8e4920b
Showing 1 changed file with 15 additions and 26 deletions.
41 changes: 15 additions & 26 deletions milli/src/search/criteria/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use self::words::Words;
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use crate::search::criteria::geo::Geo;
use crate::search::{word_derivations, WordDerivationsCache};
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
use crate::{AscDesc as AscDescName, DocumentId, Error, FieldId, Index, Member, Result};

mod asc_desc;
mod attribute;
Expand Down Expand Up @@ -318,33 +318,22 @@ pub fn resolve_query_tree(
}

pub fn resolve_phrase(ctx: &dyn Context, phrase: &[String]) -> Result<RoaringBitmap> {
let mut candidates = RoaringBitmap::new();
let winsize = phrase.len().min(7);

for win in phrase.windows(winsize) {
// Get all the documents with the matching distance for each word pairs.
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
for (offset, s1) in win.iter().enumerate() {
for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
Some(m) => bitmaps.push(m),
// If there are no document for this distance, there will be no
// results for the phrase query.
None => return Ok(RoaringBitmap::new()),
}
}
}

// We sort the bitmaps so that we perform the small intersections first, which is faster.
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
candidates &= bitmaps.and();

// There will be no match, return early
if candidates.is_empty() {
break;
}
}
Ok(candidates)
phrase
.windows(winsize)
.flat_map(|win| {
win.iter().enumerate().flat_map(move |(offset, s1)| {
win.iter().skip(offset + 1).enumerate().map(move |(dist, s2)| {
ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)
// If there are no document for this distance, there will be no
// results for the phrase query.
.map(|m| m.unwrap_or_default())
})
})
})
.and()
.map_err(Error::from)
}

fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
Expand Down

0 comments on commit 8e4920b

Please sign in to comment.