Skip to content

Commit

Permalink
Streamline relation string handling
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Nov 19, 2024
1 parent 36f8888 commit d2746c7
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 56 deletions.
32 changes: 17 additions & 15 deletions src/pyobo/api/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
)
from ..getters import get_ontology
from ..identifier_utils import wrap_norm_prefix
from ..struct import Reference, RelationHint, TypeDef, get_reference_tuple
from ..struct import Reference, TypeDef
from ..struct.struct import ReferenceHint, _ensure_ref
from ..utils.cache import cached_df
from ..utils.path import prefix_cache_join

Expand Down Expand Up @@ -78,36 +79,37 @@ def _df_getter() -> pd.DataFrame:
@wrap_norm_prefix
def get_filtered_relations_df(
prefix: str,
relation: RelationHint,
relation: ReferenceHint,
*,
use_tqdm: bool = False,
force: bool = False,
version: str | None = None,
force_process: bool = False,
) -> pd.DataFrame:
"""Get all the given relation."""
relation_prefix, relation_identifier = relation = get_reference_tuple(relation)
relation_prefix, relation_identifier = relation = _ensure_ref(relation).pair
if version is None:
version = get_version(prefix)

all_relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
if all_relations_path.is_file():
logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
idx = (df[RELATION_PREFIX] == relation_prefix) & (
df[RELATION_ID] == relation_identifier
)
columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
return df.loc[idx, columns]

path = prefix_cache_join(
prefix,
"relations",
name=f"{relation_prefix}:{relation_identifier}.tsv",
version=version,
)
all_relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)

@cached_df(path=path, dtype=str, force=force or force_process)
def _df_getter() -> pd.DataFrame:
if os.path.exists(all_relations_path):
logger.debug("[%] loading all relations from %s", prefix, all_relations_path)
df = pd.read_csv(all_relations_path, sep="\t", dtype=str)
idx = (df[RELATION_PREFIX] == relation_prefix) & (
df[RELATION_ID] == relation_identifier
)
columns = [f"{prefix}_id", TARGET_PREFIX, TARGET_ID]
return df.loc[idx, columns]

logger.info("[%s] no cached relations found. getting from OBO loader", prefix)
ontology = get_ontology(prefix, force=force, version=version, rewrite=force_process)
return ontology.get_filtered_relations_df(relation, use_tqdm=use_tqdm)
Expand Down Expand Up @@ -136,7 +138,7 @@ def get_id_multirelations_mapping(
@wrap_norm_prefix
def get_relation_mapping(
prefix: str,
relation: RelationHint,
relation: ReferenceHint,
target_prefix: str,
*,
use_tqdm: bool = False,
Expand Down Expand Up @@ -168,7 +170,7 @@ def get_relation_mapping(
def get_relation(
prefix: str,
source_identifier: str,
relation: RelationHint,
relation: ReferenceHint,
target_prefix: str,
*,
use_tqdm: bool = False,
Expand Down
3 changes: 0 additions & 3 deletions src/pyobo/struct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,11 @@
make_ad_hoc_ontology,
)
from .typedef import (
RelationHint,
TypeDef,
derives_from,
enables,
from_species,
gene_product_member_of,
get_reference_tuple,
has_gene_product,
has_member,
has_part,
Expand Down Expand Up @@ -50,7 +48,6 @@
"enables",
"from_species",
"gene_product_member_of",
"get_reference_tuple",
"has_gene_product",
"has_member",
"has_part",
Expand Down
42 changes: 24 additions & 18 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from operator import attrgetter
from pathlib import Path
from textwrap import dedent
from typing import Any, ClassVar, Literal, TextIO, Union
from typing import Any, ClassVar, Literal, TextIO, TypeAlias

import bioregistry
import click
Expand All @@ -25,13 +25,11 @@

from .reference import Reference, Referenced
from .typedef import (
RelationHint,
TypeDef,
comment,
default_typedefs,
exact_match,
from_species,
get_reference_tuple,
has_ontology_root_term,
has_part,
is_a,
Expand All @@ -50,12 +48,12 @@
TARGET_ID,
TARGET_PREFIX,
)
from ..identifier_utils import normalize_curie
from ..utils.io import multidict, write_iterable_tsv
from ..utils.path import prefix_directory_join

__all__ = [
"Obo",
"ReferenceHint",
"Synonym",
"SynonymSpecificities",
"SynonymSpecificity",
Expand Down Expand Up @@ -132,18 +130,26 @@ def to_obo(self) -> str:
)
acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))

ReferenceHint = Union[Reference, "Term", tuple[str, str], str]
ReferenceHint: TypeAlias = Reference | Referenced | tuple[str, str] | str


def _ensure_ref(reference: ReferenceHint) -> Reference:
def _ensure_ref(
reference: ReferenceHint,
*,
ontology_prefix: str | None = None,
) -> Reference:
if reference is None:
raise ValueError("can not append null reference")
if isinstance(reference, Term):
if isinstance(reference, Referenced):
return reference.reference
if isinstance(reference, str):
_rv = Reference.from_curie(reference)
if ":" not in reference:
if not ontology_prefix:
raise ValueError
return default_reference(ontology_prefix, reference)
_rv = Reference.from_curie(reference, strict=True)
if _rv is None:
raise ValueError(f"could not parse CURIE from {reference}")
raise RuntimeError # not possible, need typing for Reference.from_curie
return _rv
if isinstance(reference, tuple):
return Reference(prefix=reference[0], identifier=reference[1])
Expand Down Expand Up @@ -1236,14 +1242,14 @@ def iter_relation_rows(

def iterate_filtered_relations(
self,
relation: RelationHint,
relation: ReferenceHint,
*,
use_tqdm: bool = False,
) -> Iterable[tuple[Term, Reference]]:
"""Iterate over tuples of terms and ther targets for the given relation."""
_target_prefix, _target_identifier = get_reference_tuple(relation)
for term, typedef, reference in self.iterate_relations(use_tqdm=use_tqdm):
if typedef.prefix == _target_prefix and typedef.identifier == _target_identifier:
_pair = _ensure_ref(relation, ontology_prefix=self.ontology).pair
for term, predicate, reference in self.iterate_relations(use_tqdm=use_tqdm):
if _pair == predicate.pair:
yield term, reference

@property
Expand All @@ -1260,7 +1266,7 @@ def get_relations_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:

def get_filtered_relations_df(
self,
relation: RelationHint,
relation: ReferenceHint,
*,
use_tqdm: bool = False,
) -> pd.DataFrame:
Expand All @@ -1275,7 +1281,7 @@ def get_filtered_relations_df(

def iterate_filtered_relations_filtered_targets(
self,
relation: RelationHint,
relation: ReferenceHint,
target_prefix: str,
*,
use_tqdm: bool = False,
Expand All @@ -1289,7 +1295,7 @@ def iterate_filtered_relations_filtered_targets(

def get_relation_mapping(
self,
relation: RelationHint,
relation: ReferenceHint,
target_prefix: str,
*,
use_tqdm: bool = False,
Expand Down Expand Up @@ -1319,7 +1325,7 @@ def get_relation_mapping(
def get_relation(
self,
source_identifier: str,
relation: RelationHint,
relation: ReferenceHint,
target_prefix: str,
*,
use_tqdm: bool = False,
Expand All @@ -1339,7 +1345,7 @@ def get_relation(

def get_relation_multimapping(
self,
relation: RelationHint,
relation: ReferenceHint,
target_prefix: str,
*,
use_tqdm: bool = False,
Expand Down
20 changes: 0 additions & 20 deletions src/pyobo/struct/typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from ..resources.ro import load_ro

__all__ = [
"RelationHint",
"TypeDef",
"alternative_term",
"default_typedefs",
Expand All @@ -21,7 +20,6 @@
"example_of_usage",
"from_species",
"gene_product_member_of",
"get_reference_tuple",
"has_dbxref",
"has_gene_product",
"has_homepage",
Expand Down Expand Up @@ -139,24 +137,6 @@ def from_curie(cls, curie: str, name: str | None = None) -> TypeDef:
return cls(reference=reference)


RelationHint = Reference | TypeDef | tuple[str, str] | str


def get_reference_tuple(relation: RelationHint) -> tuple[str, str]:
"""Get tuple for typedef/reference."""
if isinstance(relation, Reference | TypeDef):
return relation.pair
elif isinstance(relation, tuple):
return relation
elif isinstance(relation, str):
reference = Reference.from_curie(relation, strict=True)
if reference is None:
raise ValueError(f"string given is not valid curie: {relation}")
return reference.pair
else:
raise TypeError(f"Relation is invalid type: {relation}")


RO_PREFIX = "RO"
BFO_PREFIX = "BFO"
IAO_PREFIX = "IAO"
Expand Down

0 comments on commit d2746c7

Please sign in to comment.