quickwit-oss · fulmicoton · Oct 20, 2022 · Oct 19, 2022 · Oct 20, 2022 · Oct 20, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -27,7 +27,6 @@ Tantivy 0.19
   - [#1582](https://github.com/quickwit-oss/tantivy/pull/1582 (@PSeitz)
   - [#1611](https://github.com/quickwit-oss/tantivy/pull/1611 (@PSeitz)
 
-
 Tantivy 0.18
 ================================
 
@@ -44,6 +43,10 @@ Tantivy 0.18
 - Add terms aggregation (@PSeitz)
 - Add support for zstd compression (@kryesh)
 
+Tantivy 0.18.1
+================================
+- Hotfix: positions computation.  #1629 (@fmassot, @fulmicoton, @PSeitz)
+
 Tantivy 0.17
 ================================
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.18.0"
+version = "0.18.1"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -14,7 +14,7 @@ edition = "2021"
 rust-version = "1.62"
 
 [dependencies]
-oneshot = "0.1.3"
+oneshot = "0.1.5"
 base64 = "0.13.0"
 byteorder = "1.4.3"
 crc32fast = "1.3.2"

diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs
@@ -751,4 +751,38 @@ mod tests {
         let phrase_query = PhraseQuery::new(vec![nothello_term, happy_term]);
         assert_eq!(searcher.search(&phrase_query, &Count).unwrap(), 0);
     }
+
+    #[test]
+    fn test_bug_regression_1629_position_when_array_with_a_field_value_that_does_not_contain_any_token(
+    ) {
+        // We experienced a bug where we would have a position underflow when computing position
+        // delta in an horrible corner case.
+        //
+        // See the commit with this unit test if you want the details.
+        let mut schema_builder = Schema::builder();
+        let text = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+        let doc = schema
+            .parse_document(r#"{"text": [ "bbb", "aaa", "", "aaa"]}"#)
+            .unwrap();
+        let index = Index::create_in_ram(schema);
+        let mut index_writer = index.writer_for_tests().unwrap();
+        index_writer.add_document(doc).unwrap();
+        // On debug this did panic on the underflow
+        index_writer.commit().unwrap();
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+        let seg_reader = searcher.segment_reader(0);
+        let inv_index = seg_reader.inverted_index(text).unwrap();
+        let term = Term::from_field_text(text, "aaa");
+        let mut postings = inv_index
+            .read_postings(&term, IndexRecordOption::WithFreqsAndPositions)
+            .unwrap()
+            .unwrap();
+        assert_eq!(postings.doc(), 0u32);
+        let mut positions = Vec::new();
+        postings.positions(&mut positions);
+        // On release this was [2, 1]. (< note the decreasing values)
+        assert_eq!(positions, &[2, 5]);
+    }
 }
diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs
@@ -155,7 +155,7 @@ pub(crate) trait PostingsWriter: Send + Sync {
     ) {
         let end_of_path_idx = term_buffer.len_bytes();
         let mut num_tokens = 0;
-        let mut end_position = 0;
+        let mut end_position = indexing_position.end_position;
         token_stream.process(&mut |token: &Token| {
             // We skip all tokens with a len greater than u16.
             if token.text.len() > MAX_TOKEN_LEN {