Streamline relation string handling

biopragmatics · Nov 19, 2024 · d2746c7 · d2746c7
1 parent 36f8888
commit d2746c7
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 56 deletions.
diff --git a/src/pyobo/api/relations.py b/src/pyobo/api/relations.py
@@ -20,7 +20,8 @@
 )
 from ..getters import get_ontology
 from ..identifier_utils import wrap_norm_prefix
-from ..struct import Reference, RelationHint, TypeDef, get_reference_tuple
+from ..struct import Reference, TypeDef
+from ..struct.struct import ReferenceHint, _ensure_ref
 from ..utils.cache import cached_df
 from ..utils.path import prefix_cache_join
 
@@ -78,36 +79,37 @@ def _df_getter() -> pd.DataFrame:
 @wrap_norm_prefix
 def get_filtered_relations_df(
     prefix: str,
-    relation: RelationHint,
+    relation: ReferenceHint,
     *,
     use_tqdm: bool = False,
     force: bool = False,
     version: str | None = None,
     force_process: bool = False,
 ) -> pd.DataFrame:
     """Get all the given relation."""
-    relation_prefix, relation_identifier = relation = get_reference_tuple(relation)
+    relation_prefix, relation_identifier = relation = _ensure_ref(relation).pair
     if version is None:
         version = get_version(prefix)
+
+    all_relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
+    if all_relations_path.is_file():
+        logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
+        df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
+        idx = (df[RELATION_PREFIX] == relation_prefix) & (
+            df[RELATION_ID] == relation_identifier
+        )
+        columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
+        return df.loc[idx, columns]
+
     path = prefix_cache_join(
         prefix,
         "relations",
         name=f"{relation_prefix}:{relation_identifier}.tsv",
         version=version,
     )
-    all_relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
 
     @cached_df(path=path, dtype=str, force=force or force_process)
     def _df_getter() -> pd.DataFrame:
-        if os.path.exists(all_relations_path):
-            logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
-            df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
-            idx = (df[RELATION_PREFIX] == relation_prefix) & (
-                df[RELATION_ID] == relation_identifier
-            )
-            columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
-            return df.loc[idx, columns]
-
         logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
         ontology = get_ontology(prefix, force=force, version=version, rewrite=force_process)
         return ontology.get_filtered_relations_df(relation, use_tqdm=use_tqdm)
@@ -136,7 +138,7 @@ def get_id_multirelations_mapping(
 @wrap_norm_prefix
 def get_relation_mapping(
     prefix: str,
-    relation: RelationHint,
+    relation: ReferenceHint,
     target_prefix: str,
     *,
     use_tqdm: bool = False,
@@ -168,7 +170,7 @@ def get_relation_mapping(
 def get_relation(
     prefix: str,
     source_identifier: str,
-    relation: RelationHint,
+    relation: ReferenceHint,
     target_prefix: str,
     *,
     use_tqdm: bool = False,

diff --git a/src/pyobo/struct/__init__.py b/src/pyobo/struct/__init__.py
@@ -13,13 +13,11 @@
     make_ad_hoc_ontology,
 )
 from .typedef import (
-    RelationHint,
     TypeDef,
     derives_from,
     enables,
     from_species,
     gene_product_member_of,
-    get_reference_tuple,
     has_gene_product,
     has_member,
     has_part,
@@ -50,7 +48,6 @@
     "enables",
     "from_species",
     "gene_product_member_of",
-    "get_reference_tuple",
     "has_gene_product",
     "has_member",
     "has_part",

diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py
@@ -14,7 +14,7 @@
 from operator import attrgetter
 from pathlib import Path
 from textwrap import dedent
-from typing import Any, ClassVar, Literal, TextIO, Union
+from typing import Any, ClassVar, Literal, TextIO, TypeAlias
 
 import bioregistry
 import click
@@ -25,13 +25,11 @@
 
 from .reference import Reference, Referenced
 from .typedef import (
-    RelationHint,
     TypeDef,
     comment,
     default_typedefs,
     exact_match,
     from_species,
-    get_reference_tuple,
     has_ontology_root_term,
     has_part,
     is_a,
@@ -50,12 +48,12 @@
     TARGET_ID,
     TARGET_PREFIX,
 )
-from ..identifier_utils import normalize_curie
 from ..utils.io import multidict, write_iterable_tsv
 from ..utils.path import prefix_directory_join
 
 __all__ = [
     "Obo",
+    "ReferenceHint",
     "Synonym",
     "SynonymSpecificities",
     "SynonymSpecificity",
@@ -132,18 +130,26 @@ def to_obo(self) -> str:
 )
 acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))
 
-ReferenceHint = Union[Reference, "Term", tuple[str, str], str]
+ReferenceHint: TypeAlias = Reference | Referenced | tuple[str, str] | str
 
 
-def _ensure_ref(reference: ReferenceHint) -> Reference:
+def _ensure_ref(
+    reference: ReferenceHint,
+    *,
+    ontology_prefix: str | None = None,
+) -> Reference:
     if reference is None:
         raise ValueError("can not append null reference")
-    if isinstance(reference, Term):
+    if isinstance(reference, Referenced):
         return reference.reference
     if isinstance(reference, str):
-        _rv = Reference.from_curie(reference)
+        if ":" not in reference:
+            if not ontology_prefix:
+                raise ValueError
+            return default_reference(ontology_prefix, reference)
+        _rv = Reference.from_curie(reference, strict=True)
         if _rv is None:
-            raise ValueError(f"could not parse CURIE from {reference}")
+            raise RuntimeError  # not possible, need typing for Reference.from_curie
         return _rv
     if isinstance(reference, tuple):
         return Reference(prefix=reference[0], identifier=reference[1])
@@ -1236,14 +1242,14 @@ def iter_relation_rows(
 
     def iterate_filtered_relations(
         self,
-        relation: RelationHint,
+        relation: ReferenceHint,
         *,
         use_tqdm: bool = False,
     ) -> Iterable[tuple[Term, Reference]]:
         """Iterate over tuples of terms and ther targets for the given relation."""
-        _target_prefix, _target_identifier = get_reference_tuple(relation)
-        for term, typedef, reference in self.iterate_relations(use_tqdm=use_tqdm):
-            if typedef.prefix == _target_prefix and typedef.identifier == _target_identifier:
+        _pair = _ensure_ref(relation, ontology_prefix=self.ontology).pair
+        for term, predicate, reference in self.iterate_relations(use_tqdm=use_tqdm):
+            if _pair == predicate.pair:
                 yield term, reference
 
     @property
@@ -1260,7 +1266,7 @@ def get_relations_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
 
     def get_filtered_relations_df(
         self,
-        relation: RelationHint,
+        relation: ReferenceHint,
         *,
         use_tqdm: bool = False,
     ) -> pd.DataFrame:
@@ -1275,7 +1281,7 @@ def get_filtered_relations_df(
 
     def iterate_filtered_relations_filtered_targets(
         self,
-        relation: RelationHint,
+        relation: ReferenceHint,
         target_prefix: str,
         *,
         use_tqdm: bool = False,
@@ -1289,7 +1295,7 @@ def iterate_filtered_relations_filtered_targets(
 
     def get_relation_mapping(
         self,
-        relation: RelationHint,
+        relation: ReferenceHint,
         target_prefix: str,
         *,
         use_tqdm: bool = False,
@@ -1319,7 +1325,7 @@ def get_relation_mapping(
     def get_relation(
         self,
         source_identifier: str,
-        relation: RelationHint,
+        relation: ReferenceHint,
         target_prefix: str,
         *,
         use_tqdm: bool = False,
@@ -1339,7 +1345,7 @@ def get_relation(
 
     def get_relation_multimapping(
         self,
-        relation: RelationHint,
+        relation: ReferenceHint,
         target_prefix: str,
         *,
         use_tqdm: bool = False,

diff --git a/src/pyobo/struct/typedef.py b/src/pyobo/struct/typedef.py
@@ -11,7 +11,6 @@
 from ..resources.ro import load_ro
 
 __all__ = [
-    "RelationHint",
     "TypeDef",
     "alternative_term",
     "default_typedefs",
@@ -21,7 +20,6 @@
     "example_of_usage",
     "from_species",
     "gene_product_member_of",
-    "get_reference_tuple",
     "has_dbxref",
     "has_gene_product",
     "has_homepage",
@@ -139,24 +137,6 @@ def from_curie(cls, curie: str, name: str | None = None) -> TypeDef:
         return cls(reference=reference)
 
 
-RelationHint = Reference | TypeDef | tuple[str, str] | str
-
-
-def get_reference_tuple(relation: RelationHint) -> tuple[str, str]:
-    """Get tuple for typedef/reference."""
-    if isinstance(relation, Reference | TypeDef):
-        return relation.pair
-    elif isinstance(relation, tuple):
-        return relation
-    elif isinstance(relation, str):
-        reference = Reference.from_curie(relation, strict=True)
-        if reference is None:
-            raise ValueError(f"string given is not valid curie: {relation}")
-        return reference.pair
-    else:
-        raise TypeError(f"Relation is invalid type: {relation}")
-
-
 RO_PREFIX = "RO"
 BFO_PREFIX = "BFO"
 IAO_PREFIX = "IAO"