From a266f41974f7b65d8916d31716e44d21e78ca4a3 Mon Sep 17 00:00:00 2001 From: Sidhant Arora <32369693+Sidhant29@users.noreply.github.com> Date: Fri, 21 Jul 2023 09:44:47 +1000 Subject: [PATCH] feat: tantivy_0.20.1_upgrade (#82) * Added api changes from tantivy-0.20.1 * lint fix * Increase test writer heap to 10_000_000 * Revert test back to original check * Update src/searcher.rs Co-authored-by: Cameron <561860+wallies@users.noreply.github.com> --------- Co-authored-by: Caleb Hattingh Co-authored-by: Cameron <561860+wallies@users.noreply.github.com> --- Cargo.toml | 6 ++--- src/index.rs | 7 +++--- src/lib.rs | 2 +- src/schemabuilder.rs | 58 +++++++++---------------------------------- src/searcher.rs | 5 ++-- tests/tantivy_test.py | 10 ++++---- 6 files changed, 27 insertions(+), 61 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 96a2fada..83ac9245 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.19.2" +version = "0.20.1" readme = "README.md" authors = ["Damir Jelić "] edition = "2018" @@ -15,11 +15,11 @@ pyo3-build-config = "0.18.0" [dependencies] chrono = "0.4.23" -tantivy = "0.19.2" +tantivy = "0.20.1" itertools = "0.10.5" futures = "0.3.26" serde_json = "1.0.91" [dependencies.pyo3] version = "0.18.0" -features = ["extension-module"] \ No newline at end of file +features = ["extension-module"] diff --git a/src/index.rs b/src/index.rs index 00b544c7..3cdadf60 100644 --- a/src/index.rs +++ b/src/index.rs @@ -331,7 +331,7 @@ impl Index { let schema = self.index.schema(); if let Some(default_field_names_vec) = default_field_names { for default_field_name in &default_field_names_vec { - if let Some(field) = schema.get_field(default_field_name) { + if let Ok(field) = schema.get_field(default_field_name) { let field_entry = schema.get_field_entry(field); if !field_entry.is_indexed() { return Err(exceptions::PyValueError::new_err( @@ -385,10 +385,11 @@ impl Index { ]; for (name, lang) in &analyzers { - let an = TextAnalyzer::from(SimpleTokenizer) + let an = TextAnalyzer::builder(SimpleTokenizer::default()) .filter(RemoveLongFilter::limit(40)) .filter(LowerCaser) - .filter(Stemmer::new(*lang)); + .filter(Stemmer::new(*lang)) + .build(); index.tokenizers().register(name, an); } } diff --git a/src/lib.rs b/src/lib.rs index 64c4f0c5..7fe6c2af 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -86,7 +86,7 @@ pub(crate) fn get_field( schema: &tv::schema::Schema, field_name: &str, ) -> PyResult { - let field = schema.get_field(field_name).ok_or_else(|| { + let field = schema.get_field(field_name).map_err(|_err| { exceptions::PyValueError::new_err(format!( "Field `{field_name}` is not defined in the schema." )) diff --git a/src/schemabuilder.rs b/src/schemabuilder.rs index 17493c0b..dc5b5150 100644 --- a/src/schemabuilder.rs +++ b/src/schemabuilder.rs @@ -110,13 +110,13 @@ impl SchemaBuilder { /// /// Returns the associated field handle. /// Raises a ValueError if there was an error with the field creation. - #[pyo3(signature = (name, stored = false, indexed = false, fast = None))] + #[pyo3(signature = (name, stored = false, indexed = false, fast = false))] fn add_integer_field( &mut self, name: &str, stored: bool, indexed: bool, - fast: Option<&str>, + fast: bool, ) -> PyResult { let builder = &mut self.builder; @@ -132,13 +132,13 @@ impl SchemaBuilder { Ok(self.clone()) } - #[pyo3(signature = (name, stored = false, indexed = false, fast = None))] + #[pyo3(signature = (name, stored = false, indexed = false, fast = false))] fn add_float_field( &mut self, name: &str, stored: bool, indexed: bool, - fast: Option<&str>, + fast: bool, ) -> PyResult { let builder = &mut self.builder; @@ -174,13 +174,13 @@ impl SchemaBuilder { /// /// Returns the associated field handle. /// Raises a ValueError if there was an error with the field creation. - #[pyo3(signature = (name, stored = false, indexed = false, fast = None))] + #[pyo3(signature = (name, stored = false, indexed = false, fast = false))] fn add_unsigned_field( &mut self, name: &str, stored: bool, indexed: bool, - fast: Option<&str>, + fast: bool, ) -> PyResult { let builder = &mut self.builder; @@ -216,13 +216,13 @@ impl SchemaBuilder { /// /// Returns the associated field handle. /// Raises a ValueError if there was an error with the field creation. - #[pyo3(signature = (name, stored = false, indexed = false, fast = None))] + #[pyo3(signature = (name, stored = false, indexed = false, fast = false))] fn add_date_field( &mut self, name: &str, stored: bool, indexed: bool, - fast: Option<&str>, + fast: bool, ) -> PyResult { let builder = &mut self.builder; @@ -233,21 +233,8 @@ impl SchemaBuilder { if indexed { opts = opts.set_indexed(); } - let fast = match fast { - Some(f) => { - let f = f.to_lowercase(); - match f.as_ref() { - "single" => Some(schema::Cardinality::SingleValue), - "multi" => Some(schema::Cardinality::MultiValues), - _ => return Err(exceptions::PyValueError::new_err( - "Invalid index option, valid choices are: 'multi' and 'single'" - )), - } - } - None => None, - }; - if let Some(f) = fast { - opts = opts.set_fast(f); + if fast { + opts = opts.set_fast(); } if let Some(builder) = builder.write().unwrap().as_mut() { @@ -368,33 +355,12 @@ impl SchemaBuilder { fn build_numeric_option( stored: bool, indexed: bool, - fast: Option<&str>, + fast: bool, ) -> PyResult { let opts = schema::NumericOptions::default(); - let opts = if stored { opts.set_stored() } else { opts }; let opts = if indexed { opts.set_indexed() } else { opts }; - - let fast = match fast { - Some(f) => { - let f = f.to_lowercase(); - match f.as_ref() { - "single" => Some(schema::Cardinality::SingleValue), - "multi" => Some(schema::Cardinality::MultiValues), - _ => return Err(exceptions::PyValueError::new_err( - "Invalid index option, valid choices are: 'multivalue' and 'singlevalue'" - )), - } - } - None => None, - }; - - let opts = if let Some(f) = fast { - opts.set_fast(f) - } else { - opts - }; - + let opts = if fast { opts.set_fast() } else { opts }; Ok(opts) } diff --git a/src/searcher.rs b/src/searcher.rs index ea625d43..8375c5f0 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -1,6 +1,6 @@ #![allow(clippy::new_ret_no_self)] -use crate::{document::Document, get_field, query::Query, to_pyerr}; +use crate::{document::Document, query::Query, to_pyerr}; use pyo3::{exceptions::PyValueError, prelude::*}; use tantivy as tv; use tantivy::collector::{Count, MultiCollector, TopDocs}; @@ -113,10 +113,9 @@ impl Searcher { let (mut multifruit, hits) = { if let Some(order_by) = order_by_field { - let field = get_field(&self.inner.index().schema(), order_by)?; let collector = TopDocs::with_limit(limit) .and_offset(offset) - .order_by_u64_field(field); + .order_by_u64_field(order_by); let top_docs_handle = multicollector.add_collector(collector); let ret = self.inner.search(query.get(), &multicollector); diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index e833fa14..328c5352 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -26,7 +26,7 @@ def create_index(dir=None): # assume all tests will use the same documents for now # other methods may set up function-local indexes index = Index(schema(), dir) - writer = index.writer() + writer = index.writer(10_000_000, 1) # 2 ways of adding documents # 1 @@ -77,7 +77,7 @@ def create_index(dir=None): def create_index_with_numeric_fields(dir=None): index = Index(schema_numeric_fields(), dir) - writer = index.writer() + writer = index.writer(10_000_000, 1) doc = Document() doc.add_integer("id", 1) @@ -260,13 +260,13 @@ def test_and_query_numeric_fields(self, ram_index_numeric_fields): def test_and_query_parser_default_fields(self, ram_index): query = ram_index.parse_query("winter", default_field_names=["title"]) - assert repr(query) == """Query(TermQuery(Term(type=Str, field=0, "winter")))""" + assert repr(query) == """Query(TermQuery(Term(field=0, type=Str, "winter")))""" def test_and_query_parser_default_fields_undefined(self, ram_index): query = ram_index.parse_query("winter") assert ( repr(query) - == """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, "winter"))), (Should, TermQuery(Term(type=Str, field=1, "winter")))] })""" + == """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, "winter"))), (Should, TermQuery(Term(field=1, type=Str, "winter")))] })""" ) def test_query_errors(self, ram_index): @@ -278,7 +278,7 @@ def test_query_errors(self, ram_index): def test_order_by_search(self): schema = ( SchemaBuilder() - .add_unsigned_field("order", fast="single") + .add_unsigned_field("order", fast=True) .add_text_field("title", stored=True) .build() )