Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debugging and adding a failing test. #108

Merged
merged 4 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/cpr_sdk/models/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ def from_vespa_response(cls, response_hit: dict) -> "Passage":
document_content_type=fields.get("document_content_type"),
document_cdn_object=fields.get("document_cdn_object"),
document_source_url=fields.get("document_source_url"),
corpus_type_name=fields.get("corpus_type_name"),
corpus_import_id=fields.get("corpus_import_id"),
text_block=fields["text_block"],
text_block_id=fields["text_block_id"],
text_block_type=fields["text_block_type"],
Expand Down
6 changes: 6 additions & 0 deletions tests/local_vespa/test_app/schemas/document_passage.sd
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,16 @@ schema document_passage {
import field family_document_ref.family_publication_ts as family_publication_ts {}
import field family_document_ref.family_publication_year as family_publication_year {}
import field family_document_ref.family_geography as family_geography {}
import field family_document_ref.family_geographies as family_geographies {}
import field family_document_ref.family_source as family_source {}
import field family_document_ref.document_import_id as document_import_id {}
import field family_document_ref.document_slug as document_slug {}
import field family_document_ref.document_languages as document_languages {}
import field family_document_ref.document_content_type as document_content_type {}
import field family_document_ref.document_cdn_object as document_cdn_object {}
import field family_document_ref.document_source_url as document_source_url {}
import field family_document_ref.corpus_import_id as corpus_import_id {}
import field family_document_ref.corpus_type_name as corpus_type_name {}
import field search_weights_ref.passage_weight as passage_weight {}

fieldset default {
Expand All @@ -75,13 +78,16 @@ schema document_passage {
summary family_category {}
summary family_publication_ts {}
summary family_geography {}
summary family_geographies {}
summary family_source {}
summary document_import_id {}
summary document_slug {}
summary document_languages {}
summary document_content_type {}
summary document_cdn_object {}
summary document_source_url {}
summary corpus_import_id {}
summary corpus_type_name {}
summary text_block {}
summary text_block_id {}
summary text_block_type {}
Expand Down
2 changes: 1 addition & 1 deletion tests/local_vespa/test_documents/family_document.json
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
"family_slug": "environmental-strategy-for-2014-2023_9f8e",
"document_source_url": "https://wedocs.unep.org/bitstream/handle/20.500.11822/9507/-Environmental_Strategy_for_the_years_2014-2023-2014Moldova_EnvironmentalStrategy_2014-202.pdf?sequence=3&isAllowed=y",
"family_geography": "MDA",
"family_geographies": null,
"family_geographies": ["MDA"],
"family_category": "Executive",
"document_md5_sum": "bea7a05dae73fbbd629e687a71a15b95",
"family_name_index": "Environmental Strategy for 2014-2023",
Expand Down
40 changes: 40 additions & 0 deletions tests/test_search_adaptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
from cpr_sdk.models.search import (
Document,
Filters,
Hit,
MetadataFilter,
Passage,
SearchParameters,
SearchResponse,
sort_fields,
)
from cpr_sdk.search_adaptors import VespaSearchAdapter
from cpr_sdk.utils import dig
from cpr_sdk.vespa import build_vespa_request_body


def vespa_search(
Expand Down Expand Up @@ -455,6 +458,7 @@ def test_vespa_search_adaptor__corpus_type_name(
response = vespa_search(test_vespa, request)
assert response.total_family_hits > 0
for family in response.families:
assert len(family.hits) > 0
for hit in family.hits:
assert hit.corpus_type_name not in [None, []]
assert hit.corpus_type_name in corpus_type_names
Expand Down Expand Up @@ -553,3 +557,39 @@ def test_vespa_search_adaptor__filters(test_vespa, query_string, filters):
attribute_value_from_hit = getattr(hit, filter_name)
assert attribute_value_from_hit not in [None, []]
assert all([val in attribute_value_from_hit for val in filter_values])


@pytest.mark.vespa
@pytest.mark.parametrize("query_string", ["e"])
@pytest.mark.parametrize("exact_match", [True, False])
@pytest.mark.parametrize(
"metadata_filters", [None, [{"name": "family.sector", "value": "Price"}]]
)
@pytest.mark.parametrize("geographies", [None, {"family_geographies": ["BIH"]}])
def test_vespa_search_response__geographies(
test_vespa, query_string, exact_match, metadata_filters, geographies
) -> None:
"""Test that the search response includes geographies"""
parameters = SearchParameters(
query_string=query_string,
exact_match=exact_match,
filters=Filters.model_validate(geographies) if geographies else None,
metadata=(
[
MetadataFilter.model_validate(metadata_filter)
for metadata_filter in metadata_filters
]
if metadata_filters
else None
),
)

vespa_response = test_vespa.client.query(body=build_vespa_request_body(parameters))

root = vespa_response.json["root"]
response_families = dig(root, "children", 0, "children", 0, "children", default=[])
for family in response_families:
for hit in dig(family, "children", 0, "children", default=[]):
hit = Hit.from_vespa_response(response_hit=hit)
assert hit.family_geography not in [None, []]
assert hit.family_geographies not in [None, []]
olaughter marked this conversation as resolved.
Show resolved Hide resolved
Loading