Skip to content

Commit

Permalink
fix name and description bm25 search (#160)
Browse files Browse the repository at this point in the history
* fix name and description bm25 search

* bump version to 1.10.1

* filter out documents in passage tests
  • Loading branch information
kdutia authored Dec 16, 2024
1 parent 92969cc commit 31e1d02
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/cpr_sdk/version.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
_MAJOR = "1"
_MINOR = "10"
_PATCH = "0"
_PATCH = "1"
_SUFFIX = ""

VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
Expand Down
8 changes: 4 additions & 4 deletions src/cpr_sdk/yql_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def build_search_term(self) -> str:
return """
(
{"targetHits": 1000} weakAnd(
family_name contains(@query_string),
family_description contains(@query_string),
family_name_index contains(@query_string),
family_description_index contains(@query_string),
text_block contains(@query_string)
)
)
Expand All @@ -71,8 +71,8 @@ def build_search_term(self) -> str:
(
(
{"targetHits": 1000} weakAnd(
family_name contains(@query_string),
family_description contains(@query_string),
family_name_index contains(@query_string),
family_description_index contains(@query_string),
text_block contains(@query_string)
)
) or (
Expand Down
46 changes: 39 additions & 7 deletions tests/test_search_adaptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,11 @@ def test_vespa_search_adaptor__continuation_tokens__passages(test_vespa):

# Collect family & hits for comparison later
initial_family_id = initial_response.families[0].id
initial_passages = [h.text_block_id for h in initial_response.families[0].hits]
initial_passages = [
h.text_block_id
for h in initial_response.families[0].hits
if isinstance(h, Passage)
]

this_continuation = initial_response.this_continuation_token
passage_continuation = initial_response.families[0].continuation_token
Expand All @@ -358,7 +362,9 @@ def test_vespa_search_adaptor__continuation_tokens__passages(test_vespa):
assert response.families[0].id == initial_family_id

# But Passages SHOULD have changed
new_passages = sorted([h.text_block_id for h in response.families[0].hits])
new_passages = sorted(
[h.text_block_id for h in response.families[0].hits if isinstance(h, Passage)]
)
assert sorted(new_passages) != sorted(initial_passages)

# Previous passage continuation gives initial results
Expand All @@ -370,7 +376,9 @@ def test_vespa_search_adaptor__continuation_tokens__passages(test_vespa):
)
response = vespa_search(test_vespa, request)
assert response.families[0].id == initial_family_id
prev_passages = sorted([h.text_block_id for h in response.families[0].hits])
prev_passages = sorted(
[h.text_block_id for h in response.families[0].hits if isinstance(h, Passage)]
)
assert sorted(prev_passages) != sorted(new_passages)
assert sorted(prev_passages) == sorted(initial_passages)

Expand Down Expand Up @@ -426,10 +434,34 @@ def test_vespa_search_adaptor__continuation_tokens__families_and_passages(

# All of these should have different passages from each other
assert (
sorted([h.text_block_id for h in response_one.families[0].hits])
!= sorted([h.text_block_id for h in response_two.families[0].hits])
!= sorted([h.text_block_id for h in response_three.families[0].hits])
!= sorted([h.text_block_id for h in response_four.families[0].hits])
sorted(
[
h.text_block_id
for h in response_one.families[0].hits
if isinstance(h, Passage)
]
)
!= sorted(
[
h.text_block_id
for h in response_two.families[0].hits
if isinstance(h, Passage)
]
)
!= sorted(
[
h.text_block_id
for h in response_three.families[0].hits
if isinstance(h, Passage)
]
)
!= sorted(
[
h.text_block_id
for h in response_four.families[0].hits
if isinstance(h, Passage)
]
)
)


Expand Down

0 comments on commit 31e1d02

Please sign in to comment.