Merge pull request #130 from gyorilab/grounder-input

Make instantiation of Grounder more flexible
gyorilab · Feb 6, 2024 · f336ebe · f336ebe
2 parents adee2cf + d88a6b1
commit f336ebe
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 7 deletions.
diff --git a/gilda/grounder.py b/gilda/grounder.py
@@ -4,10 +4,11 @@
 import gzip
 import logging
 import itertools
+import collections.abc
 from pathlib import Path
 from collections import defaultdict, Counter
 from textwrap import dedent
-from typing import Iterator, List, Mapping, Optional, Set, Tuple, Union
+from typing import Iterator, List, Mapping, Optional, Set, Tuple, Union, Iterable
 from adeft.disambiguate import load_disambiguator
 from adeft.modeling.classify import load_model_info
 from adeft import available_shortforms as available_adeft_models
@@ -32,7 +33,7 @@
 logger = logging.getLogger(__name__)
 
 
-GrounderInput = Union[str, Path, List[Term], Mapping[str, List[Term]]]
+GrounderInput = Union[str, Path, Iterable[Term], Mapping[str, List[Term]]]
 
 #: The default namespace priority order
 DEFAULT_NAMESPACE_PRIORITY = [
@@ -53,11 +54,12 @@ class Grounder(object):
         - If :class:`str` or :class:`pathlib.Path`, it is interpreted
           as a path to a grounding terms gzipped TSV file which is then
           loaded.
-        - If :class:`list`, it is assumed to be a flat list of
-          :class:`gilda.term.Term` instances.
         - If :class:`dict`, it is assumed to be a grounding terms dict with
           normalized entity strings as keys and :class:`gilda.term.Term`
           instances as values.
+        - If :class:`list`, :class:`set`, :class:`tuple`, or any other iterable,
+          it is assumed to be a flat list of
+          :class:`gilda.term.Term` instances.
     namespace_priority :
         Specifies a term namespace priority order. For example, if multiple
         terms are matched with the same score, will use this list to decide
@@ -85,13 +87,13 @@ def __init__(
                 self.entries = SqliteEntries(terms)
             else:
                 self.entries = load_terms_file(terms)
-        elif isinstance(terms, list):
+        elif isinstance(terms, dict):
+            self.entries = terms
+        elif isinstance(terms, collections.abc.Iterable):
             self.entries = defaultdict(list)
             for term in terms:
                 self.entries[term.norm_text].append(term)
             self.entries = dict(self.entries)
-        elif isinstance(terms, dict):
-            self.entries = terms
         else:
             raise TypeError('terms is neither a path nor a list of terms,'
                             'nor a normalized entry name to term dictionary')

diff --git a/gilda/tests/test_grounder.py b/gilda/tests/test_grounder.py
@@ -1,5 +1,6 @@
 from gilda.term import Term
 from gilda.grounder import Grounder, filter_for_organism
+import pytest
 from . import appreq
 
 
@@ -256,3 +257,42 @@ def test_sqlite():
 def test_strip_whitespace():
     matches = gr.ground(' inflammatory response ')
     assert matches
+
+
+def test_instantiate():
+    """Test instantiating the grounder with different data structures."""
+    term = Term(
+        "mitochondria",
+        "Mitochondria",
+        "GO",
+        "GO:0005739",
+        "mitochondrion",
+        "synonym",
+        "mesh",
+        None,
+        "MESH",
+        "D008928",
+    )
+
+    # test instantiating with list
+    gr = Grounder([term])
+    assert len(gr.ground("mitochondria")) == 1
+
+    # test instantiating with set
+    gr = Grounder({term})
+    assert len(gr.ground("mitochondria")) == 1
+
+    # test instantiating with tuple
+    gr = Grounder((term,))
+    assert len(gr.ground("mitochondria")) == 1
+
+    # test instantiating with iterable
+    gr = Grounder(iter([term]))
+    assert len(gr.ground("mitochondria")) == 1
+
+    # test instantiating with dict
+    gr = Grounder({term.norm_text: [term]})
+    assert len(gr.ground("mitochondria")) == 1
+
+    with pytest.raises(TypeError):
+        Grounder(5)