Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve association table search #268

Merged
merged 5 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/src/monarch_py/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ def association_table(
help="The association category to get associations for, ex. biolink:GeneToPhenotypicFeatureAssociation",
),
q: str = typer.Option(None, "--query", "-q"),
sort: List[str] = typer.Option(None, "--sort", "-s"),
limit: int = typer.Option(5, "--limit", "-l"),
offset: int = typer.Option(0, "--offset"),
fmt: str = typer.Option(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def get_associations(
object_closure: str = None,
entity: List[str] = None,
direct: bool = None,
q: str = None,
offset: int = 0,
limit: int = 20,
) -> AssociationResults:
Expand All @@ -237,6 +238,7 @@ def get_associations(
subject_closure: Filter to only associations with the specified term ID as an ancestor of the subject. Defaults to None
object_closure: Filter to only associations with the specified term ID as an ancestor of the object. Defaults to None
entity: Filter to only associations where the specified entities are the subject or the object. Defaults to None.
q: Query string to search within matches. Defaults to None.
offset: Result offset, for pagination. Defaults to 0.
limit: Limit results to specified number. Defaults to 20.

Expand All @@ -254,6 +256,7 @@ def get_associations(
subject_closure=subject_closure,
object_closure=object_closure,
direct=direct,
q=q,
offset=offset,
limit=limit,
)
Expand Down
18 changes: 16 additions & 2 deletions backend/src/monarch_py/implementations/solr/solr_query_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ def build_association_query(
if q:
# We don't yet have tokenization strategies for the association index, initially we'll limit searching to
# the visible fields in an association table plus their ID equivalents and use a wildcard query for substring matching
query.q = f"*{q}*"
query.query_fields = "subject subject_label predicate object object_label"
query.q = q
query.def_type = "edismax"
query.query_fields = association_search_query_fields()
if sort:
query.sort = ", ".join(sort)
if facet_fields:
Expand Down Expand Up @@ -214,3 +215,16 @@ def entity_query_fields():
since the field list and boosts are currently the same
"""
return "id^100 name^10 name_t^5 name_ac symbol^10 symbol_t^5 symbol_ac synonym synonym_t synonym_ac"


def association_search_query_fields():
"""
Shared field list for free text search on associations (e.g. for the association table)
"""

return (
"subject subject_label^2 subject_label_t subject_closure subject_closure_label subject_closure_label_t"
" predicate predicate_t"
" object object_label^2 object_label_t object_closure object_closure_label object_closure_label_t"
" publications has_evidence primary_knowledge_source aggregator_knowledge_source provided_by "
)
2 changes: 1 addition & 1 deletion backend/src/monarch_py/service/solr_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def get(self, id):
def query(self, q: SolrQuery) -> SolrQueryResult:
url = f"{self.base_url}/{self.core.value}/select?{q.query_string()}"
response = requests.get(url)

logger.debug(f"SolrService.query: {url}")
data = json.loads(response.text)
if "error" in data:
logger.error("Solr error message: " + data["error"]["msg"])
Expand Down
3 changes: 2 additions & 1 deletion backend/src/monarch_py/solr_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ def association_table(
help="The association category to get associations for, ex. biolink:GeneToPhenotypicFeatureAssociation",
),
q: str = typer.Option(None, "--query", "-q"),
sort: List[str] = typer.Option(None, "--sort", "-s"),
limit: int = typer.Option(5, "--limit", "-l"),
offset: int = typer.Option(0, "--offset"),
fmt: str = typer.Option(
Expand All @@ -354,7 +355,7 @@ def association_table(
output: str = typer.Option(None, "--output", "-O", help="The path to the output file"),
):
solr = get_solr(update=False)
response = solr.get_association_table(entity=entity, category=category, q=q, limit=limit, offset=offset)
response = solr.get_association_table(entity=entity, category=category, sort=sort, q=q, limit=limit, offset=offset)
format_output(fmt, response, output)


Expand Down
4 changes: 2 additions & 2 deletions backend/tests/fixtures/association_query_direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
@pytest.fixture
def association_query_direct():
return {
"q": "*test:q*",
"q": "test:q",
"rows": 100,
"start": 100,
"facet": True,
Expand All @@ -19,7 +19,7 @@ def association_query_direct():
"object_closure:TEST\\:0000004",
'subject:"TEST\\:0000005" OR object:"TEST\\:0000005"',
],
"query_fields": "subject subject_label predicate object object_label",
"query_fields": "subject subject_label^2 subject_label_t subject_closure subject_closure_label subject_closure_label_t predicate predicate_t object object_label^2 object_label_t object_closure object_closure_label object_closure_label_t publications has_evidence primary_knowledge_source aggregator_knowledge_source provided_by ",
"def_type": "edismax",
"q_op": "AND",
"mm": "100%",
Expand Down
4 changes: 2 additions & 2 deletions backend/tests/fixtures/association_query_indirect.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
@pytest.fixture
def association_query_indirect():
return {
"q": "*test:q*",
"q": "test:q",
"rows": 100,
"start": 100,
"facet": True,
Expand All @@ -19,7 +19,7 @@ def association_query_indirect():
"object_closure:TEST\\:0000004",
'subject:"TEST\\:0000005" OR subject_closure:"TEST\\:0000005" OR object:"TEST\\:0000005" OR object_closure:"TEST\\:0000005"',
],
"query_fields": "subject subject_label predicate object object_label",
"query_fields": "subject subject_label^2 subject_label_t subject_closure subject_closure_label subject_closure_label_t predicate predicate_t object object_label^2 object_label_t object_closure object_closure_label object_closure_label_t publications has_evidence primary_knowledge_source aggregator_knowledge_source provided_by ",
"def_type": "edismax",
"q_op": "AND",
"mm": "100%",
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/fixtures/association_table_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def association_table_response():
return {
"responseHeader": {
"QTime": 1,
"QTime": 0,
"params": {
"mm": "100%",
"q": "*:*",
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/fixtures/histopheno_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def histopheno_response():
return {
"responseHeader": {
"QTime": 4,
"QTime": 1,
"params": {
"facet.query": [
'object_closure:"HP:0000924"',
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/fixtures/phenotype_explorer_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
def phenotype_explorer_compare():
return {
"subject_termset": {
"MP:0002169": {"id": "MP:0002169", "label": "no abnormal phenotype detected (MPO)"},
"MP:0010771": {"id": "MP:0010771", "label": "integument phenotype (MPO)"},
"MP:0002169": {"id": "MP:0002169", "label": "no abnormal phenotype detected (MPO)"},
},
"object_termset": {"HP:0004325": {"id": "HP:0004325", "label": "Decreased body weight (HPO)"}},
"subject_best_matches": {
Expand Down
24 changes: 24 additions & 0 deletions backend/tests/integration/test_solr_association.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,27 @@ def test_multi_entity_associations():
for c in response[0].associated_categories:
if c.counterpart_category == "biolink:Disease":
assert c.total > 0


@pytest.mark.parametrize("q", ["eyebrow", "thick", "Thick", "Thick eyebrow", "thick eyebrow", "Thick eyebrow (HPO)"])
def test_association_search_partial_match(q: str):
si = SolrImplementation()
response = si.get_associations(
q=q, subject="MONDO:0011518", category="biolink:DiseaseToPhenotypicFeatureAssociation"
)
assert response
assert response.total > 0
assert "HP:0000574" in [item.object for item in response.items]


@pytest.mark.parametrize("q", ["eyebrow", "thick", "Thick", "Thick eyebrow", "thick eyebrow", "Thick eyebrow (HPO)"])
def test_association_table_search_partial_match(q: str):
si = SolrImplementation()
response = si.get_association_table(
entity="MONDO:0011518",
category="biolink:DiseaseToPhenotypicFeatureAssociation",
q=q,
)
assert response
assert response.total > 0
assert "HP:0000574" in [item.object for item in response.items]
8 changes: 4 additions & 4 deletions frontend/fixtures/phenotype-explorer-compare.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"subject_termset": {
"MP:0002169": {
"id": "MP:0002169",
"label": "no abnormal phenotype detected (MPO)"
},
"MP:0010771": {
"id": "MP:0010771",
"label": "integument phenotype (MPO)"
},
"MP:0002169": {
"id": "MP:0002169",
"label": "no abnormal phenotype detected (MPO)"
}
},
"object_termset": {
Expand Down