Skip to content

Commit

Permalink
Merge pull request #130 from gyorilab/grounder-input
Browse files Browse the repository at this point in the history
Make instantiation of Grounder more flexible
  • Loading branch information
bgyori authored Feb 6, 2024
2 parents adee2cf + d88a6b1 commit f336ebe
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 7 deletions.
16 changes: 9 additions & 7 deletions gilda/grounder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
import gzip
import logging
import itertools
import collections.abc
from pathlib import Path
from collections import defaultdict, Counter
from textwrap import dedent
from typing import Iterator, List, Mapping, Optional, Set, Tuple, Union
from typing import Iterator, List, Mapping, Optional, Set, Tuple, Union, Iterable
from adeft.disambiguate import load_disambiguator
from adeft.modeling.classify import load_model_info
from adeft import available_shortforms as available_adeft_models
Expand All @@ -32,7 +33,7 @@
logger = logging.getLogger(__name__)


GrounderInput = Union[str, Path, List[Term], Mapping[str, List[Term]]]
GrounderInput = Union[str, Path, Iterable[Term], Mapping[str, List[Term]]]

#: The default namespace priority order
DEFAULT_NAMESPACE_PRIORITY = [
Expand All @@ -53,11 +54,12 @@ class Grounder(object):
- If :class:`str` or :class:`pathlib.Path`, it is interpreted
as a path to a grounding terms gzipped TSV file which is then
loaded.
- If :class:`list`, it is assumed to be a flat list of
:class:`gilda.term.Term` instances.
- If :class:`dict`, it is assumed to be a grounding terms dict with
normalized entity strings as keys and :class:`gilda.term.Term`
instances as values.
- If :class:`list`, :class:`set`, :class:`tuple`, or any other iterable,
it is assumed to be a flat list of
:class:`gilda.term.Term` instances.
namespace_priority :
Specifies a term namespace priority order. For example, if multiple
terms are matched with the same score, will use this list to decide
Expand Down Expand Up @@ -85,13 +87,13 @@ def __init__(
self.entries = SqliteEntries(terms)
else:
self.entries = load_terms_file(terms)
elif isinstance(terms, list):
elif isinstance(terms, dict):
self.entries = terms
elif isinstance(terms, collections.abc.Iterable):
self.entries = defaultdict(list)
for term in terms:
self.entries[term.norm_text].append(term)
self.entries = dict(self.entries)
elif isinstance(terms, dict):
self.entries = terms
else:
raise TypeError('terms is neither a path nor a list of terms,'
'nor a normalized entry name to term dictionary')
Expand Down
40 changes: 40 additions & 0 deletions gilda/tests/test_grounder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from gilda.term import Term
from gilda.grounder import Grounder, filter_for_organism
import pytest
from . import appreq


Expand Down Expand Up @@ -256,3 +257,42 @@ def test_sqlite():
def test_strip_whitespace():
matches = gr.ground(' inflammatory response ')
assert matches


def test_instantiate():
"""Test instantiating the grounder with different data structures."""
term = Term(
"mitochondria",
"Mitochondria",
"GO",
"GO:0005739",
"mitochondrion",
"synonym",
"mesh",
None,
"MESH",
"D008928",
)

# test instantiating with list
gr = Grounder([term])
assert len(gr.ground("mitochondria")) == 1

# test instantiating with set
gr = Grounder({term})
assert len(gr.ground("mitochondria")) == 1

# test instantiating with tuple
gr = Grounder((term,))
assert len(gr.ground("mitochondria")) == 1

# test instantiating with iterable
gr = Grounder(iter([term]))
assert len(gr.ground("mitochondria")) == 1

# test instantiating with dict
gr = Grounder({term.norm_text: [term]})
assert len(gr.ground("mitochondria")) == 1

with pytest.raises(TypeError):
Grounder(5)

0 comments on commit f336ebe

Please sign in to comment.