diff --git a/pyproject.toml b/pyproject.toml index 7491fb91..14260f67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ dependencies = [ "class_resolver", "psycopg2-binary", "pydantic>=2.0", - "curies>=0.9.0", + "curies>=0.10.2", # Resource Downloaders "drugbank_downloader", "chembl_downloader", diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index 0c3937e5..16e916cc 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any +from collections.abc import Iterable, Mapping, Sequence +from typing import Any, NamedTuple import bioontologies.relations import bioontologies.upgrade @@ -192,7 +193,9 @@ def reference_escape( """Write a reference with default namespace removed.""" if reference.prefix == "obo" and reference.identifier.startswith(f"{ontology_prefix}#"): return reference.identifier.removeprefix(f"{ontology_prefix}#") - rv = reference.preferred_curie + # get sneaky, to allow a variety of the base class from curies.Reference to + # the extended version in pyobo.Reference + rv = getattr(reference, "preferred_curie", reference.curie) if add_name_comment and reference.name: rv += f" ! {reference.name}" return rv @@ -235,3 +238,99 @@ def _parse_identifier( unspecified_matching = Reference( prefix="semapv", identifier="UnspecifiedMatching", name="unspecified matching process" ) + + +class OBOLiteral(NamedTuple): + """A tuple representing a property with a literal value.""" + + value: str + datatype: Reference + + +AxiomsHint = Mapping[ + tuple[Reference, Reference | OBOLiteral], Sequence[tuple[Reference, Reference | OBOLiteral]] +] + + +def _iterate_obo_relations( + relations: Mapping[Reference, Sequence[Reference | OBOLiteral]], + annotations: AxiomsHint, + *, + ontology_prefix: str, +) -> Iterable[str]: + """Iterate over relations/property values for OBO.""" + for predicate, values in relations.items(): + # TODO typedef warning: ``_typedef_warn(prefix=ontology_prefix, predicate=predicate, typedefs=typedefs)`` + pc = reference_escape(predicate, ontology_prefix=ontology_prefix) + start = f"{pc} " + for value in values: + match value: + case OBOLiteral(dd, datatype): + # TODO how to clean/escape value? + end = f'"{dd}" {datatype.preferred_curie}' + name = None + case curies.Reference(): # it's a reference + end = reference_escape(value, ontology_prefix=ontology_prefix) + name = value.name + case _: + raise TypeError(f"got unexpected value: {values}") + end += _get_obo_trailing_modifiers( + predicate, value, annotations, ontology_prefix=ontology_prefix + ) + if predicate.name and name: + end += f" ! {predicate.name} {name}" + yield start + end + + +def _get_obo_trailing_modifiers( + p: Reference, o: Reference | OBOLiteral, axioms: AxiomsHint, *, ontology_prefix: str +) -> str: + """Lookup then format a sequence of axioms for OBO trailing modifiers.""" + if annotations := axioms.get((p, o), []): + return _format_obo_trailing_modifiers(annotations, ontology_prefix=ontology_prefix) + return "" + + +def _format_obo_trailing_modifiers( + annotations: Sequence[tuple[Reference, Reference | OBOLiteral]], *, ontology_prefix: str +) -> str: + """Format a sequence of axioms for OBO trailing modifiers. + + :param annotations: A list of annnotations + :param ontology_prefix: The ontology prefix + :return: The trailing modifiers string + + See https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.1.4 + trailing modifiers can be both axioms and some other implementation-specific + things, so split up the place where axioms are put in here. + """ + modifiers: list[tuple[str, str]] = [] + for predicate, part in annotations: + ap_str = reference_escape(predicate, ontology_prefix=ontology_prefix) + match part: + case OBOLiteral(dd, _datatype): + ao_str = str(dd) + case _: # it's a reference + ao_str = reference_escape(part, ontology_prefix=ontology_prefix) + modifiers.append((ap_str, ao_str)) + inner = ", ".join(f"{key}={value}" for key, value in sorted(modifiers)) + return " {" + inner + "}" + + +def _chain_tag(tag: str, chain: list[Reference] | None, ontology_prefix: str) -> Iterable[str]: + if chain: + yv = f"{tag}: " + yv += " ".join( + reference_escape(reference, ontology_prefix=ontology_prefix) for reference in chain + ) + if any(reference.name for reference in chain): + _names = " / ".join(link.name or "_" for link in chain) + yv += f" ! {_names}" + yield yv + + +def _reference_list_tag( + tag: str, references: list[Reference], ontology_prefix: str +) -> Iterable[str]: + for reference in references: + yield f"{tag}: {reference_escape(reference, ontology_prefix=ontology_prefix, add_name_comment=True)}" diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index b16bf81e..5aecd122 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -28,8 +28,12 @@ from typing_extensions import Self from .reference import ( + OBOLiteral, Reference, Referenced, + _get_obo_trailing_modifiers, + _iterate_obo_relations, + _reference_list_tag, comma_separate_references, default_reference, reference_escape, @@ -54,7 +58,7 @@ see_also, term_replaced_by, ) -from .utils import obo_escape_slim +from .utils import _boolean_tag, obo_escape_slim from ..api.utils import get_version from ..constants import ( DATE_FORMAT, @@ -128,14 +132,20 @@ def _sort_key(self) -> tuple[str, str, Reference]: return self.name, self.specificity, self.type def to_obo( - self, ontology_prefix: str, synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] + self, + ontology_prefix: str, + synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None, ) -> str: """Write this synonym as an OBO line to appear in a [Term] stanza.""" return f"synonym: {self._fp(ontology_prefix, synonym_typedefs)}" def _fp( - self, ontology_prefix: str, synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] + self, + ontology_prefix: str, + synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None, ) -> str: + if synonym_typedefs is None: + synonym_typedefs = {} _synonym_typedef_warn(ontology_prefix, self.type, synonym_typedefs) # TODO inherit specificity from typedef? # TODO validation of specificity against typedef @@ -262,12 +272,9 @@ class Term(Referenced): default_factory=lambda: defaultdict(list) ) - _axioms: dict[tuple[Reference, Reference], list[ObjectProperty]] = field( - default_factory=lambda: defaultdict(list) - ) - _str_axioms: dict[tuple[Reference, Reference], list[LiteralProperty]] = field( - default_factory=lambda: defaultdict(list) - ) + _axioms: dict[ + tuple[Reference, Reference | OBOLiteral], list[tuple[Reference, Reference | OBOLiteral]] + ] = field(default_factory=lambda: defaultdict(list)) #: Annotation properties pointing to objects (i.e., references) annotations_object: dict[Reference, list[Reference]] = field( @@ -275,7 +282,7 @@ class Term(Referenced): ) #: Annotation properties pointing to literals - annotations_literal: dict[Reference, list[tuple[str, Reference]]] = field( + annotations_literal: dict[Reference, list[OBOLiteral]] = field( default_factory=lambda: defaultdict(list) ) @@ -300,6 +307,10 @@ class Term(Referenced): type: Literal["Term", "Instance"] = "Term" + builtin: bool | None = None + is_anonymous: bool | None = None + subsets: list[Reference] = field(default_factory=list) + def __hash__(self) -> int: # have to re-define hash because of the @dataclass return hash((self.__class__, self.prefix, self.identifier)) @@ -475,15 +486,21 @@ def get_mappings( def _get_object_axiom_target( self, p: Reference, o: Reference, ap: Reference ) -> Reference | None: - for axiom_predicate, axiom_target, _ in self._axioms.get((p, o), []): - if axiom_predicate == ap: - return axiom_target + for axiom_predicate, axiom_target in self._axioms.get((p, o), []): + if axiom_predicate != ap: + continue + if isinstance(axiom_target, OBOLiteral): + raise TypeError + return axiom_target return None def _get_str_axiom_target(self, p: Reference, o: Reference, ap: Reference) -> str | None: - for axiom_predicate, axiom_target, _ in self._str_axioms.get((p, o), []): - if axiom_predicate == ap: - return axiom_target + for axiom_predicate, axiom_target in self._axioms.get((p, o), []): + if axiom_predicate != ap: + continue + if isinstance(axiom_target, Reference): + raise TypeError + return axiom_target.value return None def _get_mapping_context(self, p: Reference, o: Reference) -> MappingContext: @@ -593,9 +610,9 @@ def _annotate_axiom( self, p: Reference, o: Reference, axiom: ObjectProperty | LiteralProperty ) -> None: if isinstance(axiom, ObjectProperty): - self._axioms[p, o].append(axiom) + self._axioms[p, o].append((axiom.predicate, axiom.object)) elif isinstance(axiom, LiteralProperty): - self._str_axioms[p, o].append(axiom) + self._axioms[p, o].append((axiom.predicate, OBOLiteral(axiom.value, axiom.datatype))) else: raise TypeError @@ -639,7 +656,7 @@ def annotate_literal( """Append an object annotation.""" prop = _ensure_ref(prop) self.annotations_literal[prop].append( - (value, datatype or Reference(prefix="xsd", identifier="string")) + OBOLiteral(value, datatype or Reference(prefix="xsd", identifier="string")) ) return self @@ -694,125 +711,77 @@ def iterate_obo_lines( """Iterate over the lines to write in an OBO file.""" yield f"\n[{self.type}]" yield f"id: {self._reference(self.reference, ontology_prefix)}" - if self.is_obsolete: - yield "is_obsolete: true" + # 2 + yield from _boolean_tag("is_anonymous", self.is_anonymous) + # 3 if self.name: yield f"name: {obo_escape_slim(self.name)}" + # 4 if self.namespace and self.namespace != "?": namespace_normalized = ( self.namespace.replace(" ", "_").replace("-", "_").replace("(", "").replace(")", "") ) yield f"namespace: {namespace_normalized}" - - xrefs = list(self.xrefs) - - if self.definition or self.provenance: - yield f"def: {self._definition_fp()}" - + # 5 for alt in sorted(self.alt_ids): yield f"alt_id: {self._reference(alt, ontology_prefix, add_name_comment=True)}" - - for xref in sorted(xrefs): + # 6 + if self.definition or self.provenance: + yield f"def: {self._definition_fp()}" + # 7 TODO comment + yield from _reference_list_tag("subset", self.subsets, ontology_prefix) + # 9 + for synonym in sorted(self.synonyms): + yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs) + # 10 + for xref in sorted(self.xrefs): xref_yv = f"xref: {self._reference(xref, ontology_prefix, add_name_comment=False)}" - xref_yv += self._trailing_modifiers( - has_dbxref.reference, xref, ontology_prefix=ontology_prefix + xref_yv += _get_obo_trailing_modifiers( + has_dbxref.reference, xref, self._axioms, ontology_prefix=ontology_prefix ) if xref.name: xref_yv += f" ! {xref.name}" yield xref_yv - + yield from _boolean_tag("builtin", self.builtin) + # 12 + if emit_annotation_properties: + for line in self._emit_properties(ontology_prefix, typedefs): + yield f"property_value: {line}" + # 13 parent_tag = "is_a" if self.type == "Term" else "instance_of" for parent in sorted(self.parents): yield f"{parent_tag}: {self._reference(parent, ontology_prefix, add_name_comment=True)}" - + # 14 TODO intersection_of + # 15 TODO union_of + # 16 TODO equivalent_to + # 17 TODO disjoint_from + # 18 if emit_object_properties: - yield from self._emit_relations(ontology_prefix, typedefs) - - if emit_annotation_properties: - for line in self._emit_properties(ontology_prefix, typedefs): - yield f"property_value: {line}" - - if synonym_typedefs is None: - synonym_typedefs = {} - for synonym in sorted(self.synonyms): - yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs) - - def _emit_relations( - self, ontology_prefix: str, typedefs: Mapping[ReferenceTuple, TypeDef] - ) -> Iterable[str]: - for typedef, reference in self.iterate_relations(): - _typedef_warn(prefix=ontology_prefix, predicate=typedef, typedefs=typedefs) - predicate_reference = self._reference(typedef, ontology_prefix) - s = f"relationship: {predicate_reference} {self._reference(reference, ontology_prefix)}" - s += self._trailing_modifiers(typedef, reference, ontology_prefix=ontology_prefix) - if typedef.name or reference.name: - s += " !" - if typedef.name: - s += f" {typedef.name}" - if reference.name: - s += f" {reference.name}" - yield s - - @classmethod - def _format_trailing_modifiers( - cls, axioms: list[ObjectProperty | LiteralProperty], ontology_prefix: str - ) -> str: - # See https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.1.4 - # trailing modifiers can be both axioms and some other implementation-specific - # things, so split up the place where axioms are put in here - modifiers: list[tuple[str, str]] = [] - - for axiom in axioms: - match axiom: - case ObjectProperty(predicate, target, _): - right = cls._reference(target, ontology_prefix) - case LiteralProperty(predicate, target, _datatype): - right = target - modifiers.append((cls._reference(predicate, ontology_prefix), right)) - - inner = ", ".join(f"{key}={value}" for key, value in sorted(modifiers)) - return "{" + inner + "}" + for part in _iterate_obo_relations( + self.relationships, self._axioms, ontology_prefix=ontology_prefix + ): + yield f"relationship: {part}" + # 19 TODO created_by + # 20 TODO creation_date + # 21 + yield from _boolean_tag("is_obsolete", self.is_obsolete) + # 22 TODO replaced_by, weird since this conflicts with other annotations + # 23 TODO consider def _emit_properties( self, ontology_prefix: str, typedefs: Mapping[ReferenceTuple, TypeDef] ) -> Iterable[str]: - yield from self._emit_object_properties(ontology_prefix, typedefs) - yield from self._emit_literal_properties(ontology_prefix, typedefs) - - def _emit_object_properties( - self, ontology_prefix: str, typedefs: Mapping[ReferenceTuple, TypeDef] - ) -> Iterable[str]: - for predicate, values in sorted(self.annotations_object.items()): - _typedef_warn(prefix=ontology_prefix, predicate=predicate, typedefs=typedefs) - predicate_curie = self._reference(predicate, ontology_prefix) - for value in sorted(values): - yv = f"{predicate_curie} {self._reference(value, ontology_prefix)}" - yv += self._trailing_modifiers(predicate, value, ontology_prefix=ontology_prefix) - if predicate.name and value.name: - yv += f" ! {predicate.name} {value.name}" - yield yv - - def _trailing_modifiers( - self, predicate: Reference, value: Reference, *, ontology_prefix: str - ) -> str: - axioms: list[ObjectProperty | LiteralProperty] = [ - *self._axioms.get((predicate, value), []), - *self._str_axioms.get((predicate, value), []), - ] - if axioms: - return f" {self._format_trailing_modifiers(axioms, ontology_prefix)}" - return "" - - def _emit_literal_properties( - self, ontology_prefix: str, typedefs: Mapping[ReferenceTuple, TypeDef] - ) -> Iterable[str]: - for predicate, value_datatype_pairs in sorted(self.annotations_literal.items()): - _typedef_warn(prefix=ontology_prefix, predicate=predicate, typedefs=typedefs) - predicate_curie = self._reference(predicate, ontology_prefix) - for value, datatype in sorted(value_datatype_pairs): - # TODO clean/escape value? - # TODO what about axioms here? - yield f'{predicate_curie} "{value}" {datatype.preferred_curie}' + yield from _iterate_obo_relations( + self.annotations_object, self._axioms, ontology_prefix=ontology_prefix + ) + yield from _iterate_obo_relations( + # the type checker seems to be a bit confused, this is an okay typing since we're + # passing a more explicit version. The issue is that list is used for the typing, + # which means it can't narrow properly + self.annotations_literal, # type:ignore + self._axioms, + ontology_prefix=ontology_prefix, + ) @staticmethod def _reference( @@ -822,10 +791,6 @@ def _reference( predicate, ontology_prefix=ontology_prefix, add_name_comment=add_name_comment ) - @staticmethod - def _escape(s) -> str: - return s.replace("\n", "\\n").replace('"', '\\"') - #: A set of warnings, used to make sure we don't show the same one over and over _TYPEDEF_WARNINGS: set[tuple[str, Reference]] = set() diff --git a/src/pyobo/struct/typedef.py b/src/pyobo/struct/typedef.py index 016fa3fd..9d391702 100644 --- a/src/pyobo/struct/typedef.py +++ b/src/pyobo/struct/typedef.py @@ -2,15 +2,31 @@ from __future__ import annotations -from collections.abc import Iterable, Sequence +import datetime +from collections import defaultdict +from collections.abc import Iterable, Mapping, Sequence from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Annotated from curies import ReferenceTuple from typing_extensions import Self -from .reference import Reference, Referenced, default_reference, reference_escape +from .reference import ( + OBOLiteral, + Reference, + Referenced, + _chain_tag, + _iterate_obo_relations, + _reference_list_tag, + default_reference, + reference_escape, +) +from .utils import _boolean_tag from ..resources.ro import load_ro +if TYPE_CHECKING: + from pyobo.struct.struct import Synonym, SynonymTypeDef + __all__ = [ "TypeDef", "alternative_term", @@ -60,10 +76,6 @@ ] -def _bool_to_obo(v: bool) -> str: - return "true" if v else "false" - - @dataclass class TypeDef(Referenced): """A type definition in OBO. @@ -71,69 +83,246 @@ class TypeDef(Referenced): See the subsection of https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.2.2. """ - reference: Reference - comment: str | None = None - namespace: str | None = None - definition: str | None = None - is_transitive: bool | None = None - is_symmetric: bool | None = None - domain: Reference | None = None - range: Reference | None = None - parents: list[Reference] = field(default_factory=list) - xrefs: list[Reference] = field(default_factory=list) - inverse: Reference | None = None - created_by: str | None = None - holds_over_chain: list[Reference] | None = None + reference: Annotated[Reference, 1] + is_anonymous: Annotated[bool | None, 2] = None + # 3 - name is covered by reference + namespace: Annotated[str | None, 4] = None + alt_id: Annotated[list[Reference], 5] = field(default_factory=list) + definition: Annotated[str | None, 6] = None + comment: Annotated[str | None, 7] = None + subsets: Annotated[list[Reference], 8] = field(default_factory=list) + synonyms: Annotated[list[Synonym], 9] = field(default_factory=list) + xrefs: Annotated[list[Reference], 10] = field(default_factory=list) + annotations: dict[ + tuple[Reference, Reference | OBOLiteral], list[tuple[Reference, Reference | OBOLiteral]] + ] = field(default_factory=lambda: defaultdict(list)) + properties: Annotated[dict[Reference, list[Reference | OBOLiteral]], 11] = field( + default_factory=lambda: defaultdict(list) + ) + domain: Annotated[Reference | None, 12, "typedef-only"] = None + range: Annotated[Reference | None, 13, "typedef-only"] = None + builtin: Annotated[bool | None, 14] = None + holds_over_chain: Annotated[list[Reference] | None, 15, "typedef-only"] = None + is_anti_symmetric: Annotated[bool | None, 16, "typedef-only"] = None + is_cyclic: Annotated[bool | None, 17, "typedef-only"] = None + is_reflexive: Annotated[bool | None, 18, "typedef-only"] = None + is_symmetric: Annotated[bool | None, 19, "typedef-only"] = None + is_transitive: Annotated[bool | None, 20, "typedef-only"] = None + is_functional: Annotated[bool | None, 21, "typedef-only"] = None + is_inverse_functional: Annotated[bool | None, 22, "typedef-only"] = None + parents: Annotated[list[Reference], 23] = field(default_factory=list) + intersection_of: Annotated[list[Reference | tuple[Reference, Reference]], 24] = field( + default_factory=list + ) + union_of: Annotated[list[Reference], 25] = field(default_factory=list) + equivalent_to: Annotated[list[Reference], 26] = field(default_factory=list) + disjoint_from: Annotated[list[Reference], 27] = field(default_factory=list) + # TODO inverse should be inverse_of, cardinality any + inverse: Annotated[Reference | None, 28, "typedef-only"] = None + # TODO check if there are any examples of this being multiple + transitive_over: Annotated[list[Reference], 29, "typedef-only"] = field(default_factory=list) + equivalent_to_chain: Annotated[list[Reference], 30, "typedef-only"] = field( + default_factory=list + ) + #: From the OBO spec: + #: + #: For example: spatially_disconnected_from is disjoint_over part_of, in that two + #: disconnected entities have no parts in common. This can be translated to OWL as: + #: ``disjoint_over(R S), R(A B) ==> (S some A) disjointFrom (S some B)`` + disjoint_over: Annotated[Reference | None, 31] = None + relationships: Annotated[dict[Reference, list[Reference]], 32] = field( + default_factory=lambda: defaultdict(list) + ) + is_obsolete: Annotated[bool | None, 33] = None + created_by: Annotated[str | None, 34] = None + creation_date: Annotated[datetime.datetime | None, 35] = None + replaced_by: Annotated[list[Reference], 36] = field(default_factory=list) + consider: Annotated[list[Reference], 37] = field(default_factory=list) + # TODO expand_assertion_to + # TODO expand_expression_to #: Whether this relationship is a metadata tag. Properties that are marked as metadata tags are #: used to record object metadata. Object metadata is additional information about an object #: that is useful to track, but does not impact the definition of the object or how it should #: be treated by a reasoner. Metadata tags might be used to record special term synonyms or #: structured notes about a term, for example. - is_metadata_tag: bool | None = None + is_metadata_tag: Annotated[bool | None, 40, "typedef-only"] = None + is_class_level: Annotated[bool | None, 41] = None def __hash__(self) -> int: # have to re-define hash because of the @dataclass return hash((self.__class__, self.prefix, self.identifier)) - def iterate_obo_lines(self, ontology_prefix: str) -> Iterable[str]: - """Iterate over the lines to write in an OBO file.""" + def iterate_obo_lines( + self, + ontology_prefix: str, + synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None, + ) -> Iterable[str]: + """Iterate over the lines to write in an OBO file. + + :param ontology_prefix: + The prefix of the ontology into which the type definition is being written. + This is used for compressing builtin identifiers + :yield: + The lines to write to an OBO file + + `S.3.5.5 `_ + of the OBO Flat File Specification v1.4 says tags should appear in the following order: + + 1. id + 2. is_anonymous + 3. name + 4. namespace + 5. alt_id + 6. def + 7. comment + 8. subset + 9. synonym + 10. xref + 11. property_value + 12. domain + 13. range + 14. builtin + 15. holds_over_chain + 16. is_anti_symmetric + 17. is_cyclic + 18. is_reflexive + 19. is_symmetric + 20. is_transitive + 21. is_functional + 22. is_inverse_functional + 23. is_a + 24. intersection_of + 25. union_of + 26. equivalent_to + 27. disjoint_from + 28. inverse_of + 29. transitive_over + 30. equivalent_to_chain + 31. disjoint_over + 32. relationship + 33. is_obsolete + 34. created_by + 35. creation_date + 36. replaced_by + 37. consider + 38. expand_assertion_to + 39. expand_expression_to + 40. is_metadata_tag + 41. is_class_level + """ yield "\n[Typedef]" + # 1 yield f"id: {reference_escape(self.reference, ontology_prefix=ontology_prefix)}" + # 2 + yield from _boolean_tag("is_anonymous", self.is_anonymous) + # 3 if self.name: - yield f"name: {self.reference.name}" - if self.definition: - yield f'def: "{self.definition}"' - - if self.is_metadata_tag is not None: - yield f"is_metadata_tag: {_bool_to_obo(self.is_metadata_tag)}" - + yield f"name: {self.name}" + # 4 if self.namespace: yield f"namespace: {self.namespace}" - - if self.created_by: - yield f"created_by: {self.created_by}" - + # 5 + yield from _reference_list_tag("alt_id", self.alt_id, ontology_prefix) + # 6 + if self.definition: + yield f'def: "{self.definition}"' + # 7 if self.comment: yield f"comment: {self.comment}" - - for xref in self.xrefs: - yield f"xref: {xref.preferred_curie}" - - if self.is_transitive is not None: - yield f'is_transitive: {"true" if self.is_transitive else "false"}' - - if self.is_symmetric is not None: - yield f'is_symmetric: {"true" if self.is_symmetric else "false"}' - if self.holds_over_chain: - _chain = " ".join(link.preferred_curie for link in self.holds_over_chain) - _names = " / ".join(link.name or "_" for link in self.holds_over_chain) - yield f"holds_over_chain: {_chain} ! {_names}" - if self.inverse: - yield f"inverse_of: {self.inverse}" + # 8 + yield from _reference_list_tag("subset", self.subsets, ontology_prefix) + # 9 + for synonym in self.synonyms: + yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs) + # 10 + yield from _reference_list_tag("xref", self.xrefs, ontology_prefix) + # 11 + for line in _iterate_obo_relations( + # the type checker seems to be a bit confused, this is an okay typing since we're + # passing a more explicit version. The issue is that list is used for the typing, + # which means it can't narrow properly + self.properties, # type:ignore + self.annotations, + ontology_prefix=ontology_prefix, + ): + yield f"property_value: {line}" + # 12 if self.domain: - yield f"domain: {self.domain}" + yield f"domain: {reference_escape(self.domain, ontology_prefix=ontology_prefix, add_name_comment=True)}" + # 13 if self.range: - yield f"range: {self.range}" + yield f"range: {reference_escape(self.range, ontology_prefix=ontology_prefix, add_name_comment=True)}" + # 14 + yield from _boolean_tag("builtin", self.builtin) + # 15 + yield from _chain_tag("holds_over_chain", self.holds_over_chain, ontology_prefix) + # 16 + yield from _boolean_tag("is_anti_symmetric", self.is_anti_symmetric) + # 17 + yield from _boolean_tag("is_cyclic", self.is_cyclic) + # 18 + yield from _boolean_tag("is_reflexive", self.is_reflexive) + # 19 + yield from _boolean_tag("is_symmetric", self.is_symmetric) + # 20 + yield from _boolean_tag("is_transitive", self.is_transitive) + # 21 + yield from _boolean_tag("is_functional", self.is_functional) + # 22 + yield from _boolean_tag("is_inverse_functional", self.is_inverse_functional) + # 23 + yield from _reference_list_tag("is_a", self.parents, ontology_prefix) + # 24 + for p in self.intersection_of: + if isinstance(p, Reference): + yv = reference_escape(p, ontology_prefix=ontology_prefix, add_name_comment=True) + else: # this is a 2-tuple of references + yv = " ".join(reference_escape(x, ontology_prefix=ontology_prefix) for x in p) + if all(x.name for x in p): + yv += " ! " + " ".join(x.name for x in p) # type:ignore + yield f"intersection_of: {yv}" + # 25 + yield from _reference_list_tag("union_of", self.union_of, ontology_prefix) + # 26 + yield from _reference_list_tag("equivalent_to", self.equivalent_to, ontology_prefix) + # 27 + yield from _reference_list_tag("disjoint_from", self.disjoint_from, ontology_prefix) + # 28 + if self.inverse: + yield f"inverse_of: {reference_escape(self.inverse, ontology_prefix=ontology_prefix, add_name_comment=True)}" + # 29 + yield from _reference_list_tag("transitive_over", self.transitive_over, ontology_prefix) + # 30 + yield from _chain_tag("equivalent_to_chain", self.equivalent_to_chain, ontology_prefix) + # 31 TODO disjoint_over, see https://github.com/search?q=%22disjoint_over%3A%22+path%3A*.obo&type=code + # 32 + for line in _iterate_obo_relations( + # the type checker seems to be a bit confused, this is an okay typing since we're + # passing a more explicit version. The issue is that list is used for the typing, + # which means it can't narrow properly + self.relationships, # type:ignore + self.annotations, + ontology_prefix=ontology_prefix, + ): + yield f"relationship: {line}" + # 33 + yield from _boolean_tag("is_obsolete", self.is_obsolete) + # 34 + if self.created_by: + yield f"created_by: {self.created_by}" + # 35 + if self.creation_date is not None: + yield f"creation_date: {self.creation_date.isoformat()}" + # 36 + yield from _reference_list_tag("replaced_by", self.replaced_by, ontology_prefix) + # 37 + yield from _reference_list_tag("consider", self.consider, ontology_prefix=ontology_prefix) + # 38 TODO expand_assertion_to + # 39 TODO expand_expression_to + # 40 + yield from _boolean_tag("is_metadata_tag", self.is_metadata_tag) + # 41 + yield from _boolean_tag("is_class_level", self.is_class_level) @classmethod def from_triple(cls, prefix: str, identifier: str, name: str | None = None) -> TypeDef: diff --git a/src/pyobo/struct/utils.py b/src/pyobo/struct/utils.py index d30dfb7b..6192710d 100644 --- a/src/pyobo/struct/utils.py +++ b/src/pyobo/struct/utils.py @@ -2,6 +2,8 @@ from __future__ import annotations +from collections.abc import Iterable + __all__ = [ "OBO_ESCAPE", "OBO_ESCAPE_SLIM", @@ -23,3 +25,12 @@ def obo_escape_slim(string: str) -> str: rv = "".join(OBO_ESCAPE_SLIM.get(character, character) for character in string) rv = rv.replace("\n", "\\n") return rv + + +def _bool_to_obo(v: bool) -> str: + return "true" if v else "false" + + +def _boolean_tag(tag: str, bv: bool | None) -> Iterable[str]: + if bv is not None: + yield f"{tag}: {_bool_to_obo(bv)}" diff --git a/tests/test_struct.py b/tests/test_struct.py index ff50fb8e..9d3ce256 100644 --- a/tests/test_struct.py +++ b/tests/test_struct.py @@ -12,8 +12,6 @@ from pyobo.struct.reference import unspecified_matching from pyobo.struct.struct import ( BioregistryError, - LiteralProperty, - ObjectProperty, SynonymTypeDef, Term, TypeDef, @@ -22,7 +20,6 @@ from pyobo.struct.typedef import ( exact_match, has_contributor, - has_dbxref, mapping_has_confidence, mapping_has_justification, see_also, @@ -542,8 +539,8 @@ def test_obsolete(self) -> None: """\ [Term] id: GO:0050069 - is_obsolete: true name: lysine dehydrogenase activity + is_obsolete: true """, term.iterate_obo_lines(ontology_prefix="go", typedefs={}), ) @@ -554,6 +551,7 @@ def test_obsolete(self) -> None: [Term] id: GO:0050069 name: lysine dehydrogenase activity + is_obsolete: false """, term.iterate_obo_lines(ontology_prefix="go", typedefs={}), ) @@ -625,35 +623,6 @@ def test_default_term(self) -> None: term.iterate_obo_lines(ontology_prefix="gard", typedefs={}), ) - def test_format_axioms(self) -> None: - """Test formatting axioms.""" - axioms = [ - ObjectProperty( - mapping_has_justification, - Reference(prefix="semapv", identifier="UnspecifiedMapping"), - None, - ), - ] - self.assertEqual( - "{sssom:mapping_justification=semapv:UnspecifiedMapping}", - Term._format_trailing_modifiers(axioms, "chebi"), - ) - - axioms = [ - ObjectProperty( - mapping_has_justification, - Reference(prefix="semapv", identifier="UnspecifiedMapping"), - None, - ), - ObjectProperty( - has_contributor, Reference(prefix="orcid", identifier="0000-0003-4423-4370"), None - ), - ] - self.assertEqual( - "{dcterms:contributor=orcid:0000-0003-4423-4370, sssom:mapping_justification=semapv:UnspecifiedMapping}", - Term._format_trailing_modifiers(axioms, "chebi"), - ) - def test_append_exact_match_axioms(self) -> None: """Test emitting a relationship with axioms.""" target = Reference(prefix="eccode", identifier="1.4.1.15", name="lysine dehydrogenase") @@ -663,22 +632,6 @@ def test_append_exact_match_axioms(self) -> None: mapping_justification=unspecified_matching, confidence=0.99, ) - self.assertEqual( - { - (exact_match.reference, target): [ - ObjectProperty(mapping_has_justification.reference, unspecified_matching, None) - ] - }, - dict(term._axioms), - ) - self.assertEqual( - { - (exact_match.reference, target): [ - LiteralProperty.float(mapping_has_confidence.reference, 0.99) - ] - }, - dict(term._str_axioms), - ) lines = dedent("""\ [Term] id: GO:0050069 @@ -730,14 +683,6 @@ def test_append_xref_with_axioms(self) -> None: target = Reference(prefix="eccode", identifier="1.4.1.15", name="lysine dehydrogenase") term = Term(LYSINE_DEHYDROGENASE_ACT) term.append_xref(target, confidence=0.99) - self.assertEqual( - { - (has_dbxref.reference, target): [ - LiteralProperty.float(mapping_has_confidence.reference, 0.99) - ] - }, - dict(term._str_axioms), - ) lines = dedent("""\ [Term] id: GO:0050069 diff --git a/tests/test_typedef.py b/tests/test_typedef.py new file mode 100644 index 00000000..6f04a3af --- /dev/null +++ b/tests/test_typedef.py @@ -0,0 +1,542 @@ +"""Tests for the OBO data structures.""" + +import unittest +from collections.abc import Iterable +from textwrap import dedent +from typing import cast + +import bioregistry +from curies import vocabulary as v + +from pyobo import Obo, Reference, default_reference +from pyobo.struct.reference import OBOLiteral +from pyobo.struct.struct import ( + Synonym, + make_ad_hoc_ontology, +) +from pyobo.struct.typedef import ( + TypeDef, + exact_match, + has_contributor, + has_inchi, + has_role, + part_of, +) + +PREFIX = has_role.prefix +IDENTIFIER = has_role.identifier +REF = Reference(prefix=has_role.prefix, identifier=has_role.identifier) +ONTOLOGY_PREFIX = "go" + + +def _ontology_from_typedef(prefix: str, typedef: TypeDef) -> Obo: + name = cast(str, bioregistry.get_name(prefix)) + return make_ad_hoc_ontology( + _ontology=prefix, + _name=name, + _typedefs=[typedef], + terms=[], + ) + + +class TestTypeDef(unittest.TestCase): + """Test type definitions.""" + + def assert_lines(self, text: str, lines: Iterable[str]) -> None: + """Assert the lines are equal.""" + self.assertEqual(dedent(text).strip(), "\n".join(lines).strip()) + + def assert_obo_stanza( + self, text: str, typedef: TypeDef, *, ontology_prefix: str = ONTOLOGY_PREFIX + ) -> None: + """Assert the typedef text.""" + self.assert_lines( + text, + typedef.iterate_obo_lines(ontology_prefix), + ) + + def test_1_declaration(self) -> None: + """Test the declaration.""" + object_property = TypeDef(reference=REF) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + """, + object_property, + ) + + annotation_property = TypeDef( + reference=Reference(prefix=exact_match.prefix, identifier=exact_match.identifier), + is_metadata_tag=True, + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: skos:exactMatch + is_metadata_tag: true + """, + annotation_property, + ) + + def test_2_is_anonymous(self) -> None: + """Test the ``is_anonymous`` tag.""" + typedef = TypeDef(reference=REF, is_anonymous=True) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + is_anonymous: true + """, + typedef, + ) + + typedef = TypeDef(reference=REF, is_anonymous=False) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + is_anonymous: false + """, + typedef, + ) + + def test_3_name(self) -> None: + """Test outputting a name.""" + typedef = TypeDef( + reference=Reference( + prefix=has_role.prefix, identifier=has_role.identifier, name=has_role.name + ), + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + name: has role + """, + typedef, + ) + + def test_4_namespace(self) -> None: + """Test the ``namespace`` tag.""" + typedef = TypeDef(reference=REF, namespace="NS") + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + namespace: NS + """, + typedef, + ) + + def test_5_alt_id(self) -> None: + """Test the ``alt_id`` tag.""" + typedef = TypeDef( + reference=REF, + alt_id=[ + Reference(prefix="RO", identifier="1234567"), + Reference(prefix="RO", identifier="1234568", name="test"), + ], + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + alt_id: RO:1234567 + alt_id: RO:1234568 ! test + """, + typedef, + ) + + def test_6_description(self) -> None: + """Test outputting a description.""" + typedef = TypeDef(reference=REF, definition=has_role.definition) + self.assert_obo_stanza( + f"""\ + [Typedef] + id: RO:0000087 + def: "{has_role.definition}" + """, + typedef, + ) + + def test_7_comment(self) -> None: + """Test outputting a comment.""" + comment = "comment text" + typedef = TypeDef(reference=REF, comment=comment) + self.assert_obo_stanza( + f"""\ + [Typedef] + id: RO:0000087 + comment: {comment} + """, + typedef, + ) + + def test_8_subset(self) -> None: + """Test the ``subset`` tag.""" + typedef = TypeDef(reference=REF, subsets=[default_reference("go", "SUBSET_1")]) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + subset: SUBSET_1 + """, + typedef, + ) + + def test_9_synonym(self) -> None: + """Test the ``synonym`` tag.""" + typedef = TypeDef(reference=REF, synonyms=[Synonym("bears role")]) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + synonym: "bears role" EXACT [] + """, + typedef, + ) + + def test_10_xref(self) -> None: + """Test the ``xref`` tag.""" + typedef = TypeDef(reference=REF, xrefs=[default_reference("chebi", "has_role")]) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + xref: obo:chebi#has_role + """, + typedef, + ) + + def test_11_property_value(self) -> None: + """Test the ``property_value`` tag.""" + typedef = TypeDef( + reference=REF, + properties={ + has_contributor.reference: [v.charlie], + has_inchi: [OBOLiteral("abc", Reference(prefix="xsd", identifier="string"))], + }, + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: RO:0000087 + property_value: dcterms:contributor orcid:0000-0003-4423-4370 ! contributor Charles Tapley Hoyt + property_value: debio:0000020 "abc" xsd:string + """, + typedef, + ) + + def test_12_domain(self) -> None: + """Test the ``domain`` tag. + + Here's a real example of this tag being used in BFO: + + .. code-block:: + + [Typedef] + id: BFO:0000066 + name: occurs in + domain: BFO:0000003 ! occurrent + range: BFO:0000004 ! independent continuant + holds_over_chain: BFO:0000050 BFO:0000066 ! part of / occurs in + inverse_of: BFO:0000067 ! contains process + transitive_over: BFO:0000050 ! part of + """ + typedef = TypeDef( + reference=Reference(prefix="BFO", identifier="0000066"), + domain=Reference(prefix="BFO", identifier="0000003", name="occurrent"), + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: BFO:0000066 + domain: BFO:0000003 ! occurrent + """, + typedef, + ) + + def test_13_range(self) -> None: + """Test the ``range`` tag. + + Here's a real example of this tag being used in BFO: + + .. code-block:: + + [Typedef] + id: BFO:0000066 + name: occurs in + domain: BFO:0000003 ! occurrent + range: BFO:0000004 ! independent continuant + holds_over_chain: BFO:0000050 BFO:0000066 ! part of / occurs in + inverse_of: BFO:0000067 ! contains process + transitive_over: BFO:0000050 ! part of + """ + typedef = TypeDef( + reference=Reference(prefix="BFO", identifier="0000066"), + range=Reference(prefix="BFO", identifier="0000004", name="independent continuant"), + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: BFO:0000066 + range: BFO:0000004 ! independent continuant + """, + typedef, + ) + + def test_14_builtin(self) -> None: + """Test the ``builtin`` tag.""" + typedef = TypeDef( + reference=Reference(prefix="rdfs", identifier="subClassOf"), + builtin=True, + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: rdfs:subClassOf + builtin: true + """, + typedef, + ) + + def test_15_holds_over_chain(self) -> None: + """Test the ``holds_over_chain`` tag. + + Here's a real example of this tag being used in BFO: + + .. code-block:: + + [Typedef] + id: BFO:0000066 + name: occurs in + domain: BFO:0000003 + range: BFO:0000004 + holds_over_chain: BFO:0000050 BFO:0000066 ! part of / occurs in + inverse_of: BFO:0000067 ! contains process + transitive_over: BFO:0000050 ! part of + """ + typedef = TypeDef( + reference=Reference(prefix="BFO", identifier="0000066"), + holds_over_chain=[ + Reference(prefix="BFO", identifier="0000050", name="part of"), + Reference(prefix="BFO", identifier="0000066", name="occurs in"), + ], + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: BFO:0000066 + holds_over_chain: BFO:0000050 BFO:0000066 ! part of / occurs in + """, + typedef, + ) + + def test_16_is_anti_symmetric(self) -> None: + """Test the ``anti_symmetric`` tag.""" + typedef = TypeDef( + reference=Reference(prefix="rdfs", identifier="subClassOf"), is_anti_symmetric=True + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: rdfs:subClassOf + is_anti_symmetric: true + """, + typedef, + ) + + def test_17_is_cyclic(self) -> None: + """Test the ``is_cyclic`` tag.""" + typedef = TypeDef( + reference=default_reference(prefix="chebi", identifier="is_conjugate_acid_of"), + is_cyclic=True, + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: is_conjugate_acid_of + is_cyclic: true + """, + typedef, + ontology_prefix="chebi", + ) + + def test_18_is_reflexive(self) -> None: + """Test the ``is_reflexive`` tag.""" + typedef = TypeDef( + reference=Reference(prefix="rdfs", identifier="subClassOf"), is_reflexive=True + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: rdfs:subClassOf + is_reflexive: true + """, + typedef, + ) + + def test_19_is_symmetric(self) -> None: + """Test the ``is_symmetric`` tag.""" + typedef = TypeDef( + reference=default_reference(prefix="ro", identifier="attached_to"), is_symmetric=True + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: attached_to + is_symmetric: true + """, + typedef, + ontology_prefix="ro", + ) + + def test_20_is_transitive(self) -> None: + """Test the ``is_transitive`` tag.""" + typedef = TypeDef( + reference=Reference(prefix="rdfs", identifier="subClassOf"), is_transitive=True + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: rdfs:subClassOf + is_transitive: true + """, + typedef, + ) + + def test_21_is_functional(self) -> None: + """Test the ``is_functional`` tag.""" + + def test_22_is_inverse_functional(self) -> None: + """Test the ``is_inverse_functional`` tag.""" + + def test_23_is_a(self) -> None: + """Test the ``is_a`` tag.""" + + def test_24_intersection_of(self) -> None: + """Test the ``intersection_-f`` tag.""" + typedef = TypeDef( + reference=Reference( + prefix="GO", identifier="0000085", name="G2 phase of mitotic cell cycle" + ), + intersection_of=[ + Reference(prefix="GO", identifier="0051319", name="G2 phase"), + (part_of, Reference(prefix="GO", identifier="0000278", name="mitotic cell cycle")), + ], + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: GO:0000085 + name: G2 phase of mitotic cell cycle + intersection_of: GO:0051319 ! G2 phase + intersection_of: BFO:0000050 GO:0000278 ! part of mitotic cell cycle + """, + typedef, + ) + + def test_25_union_of(self) -> None: + """Test the ``union_of`` tag.""" + + def test_26_equivalent_to(self) -> None: + """Test the ``equivalent_to`` tag.""" + + def test_27_disjoint_from(self) -> None: + """Test the ``disjoint_from`` tag.""" + + def test_28_inverse_of(self) -> None: + """Test the ``inverse_of`` tag. + + Here's a real example of this tag being used in BFO: + + .. code-block:: + + [Typedef] + id: BFO:0000066 + name: occurs in + domain: BFO:0000003 + range: BFO:0000004 + holds_over_chain: BFO:0000050 BFO:0000066 ! part of / occurs in + inverse_of: BFO:0000067 ! contains process + transitive_over: BFO:0000050 ! part of + """ + typedef = TypeDef( + reference=Reference(prefix="BFO", identifier="0000066"), + inverse=Reference(prefix="BFO", identifier="0000067", name="contains process"), + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: BFO:0000066 + inverse_of: BFO:0000067 ! contains process + """, + typedef, + ) + + def test_29_transitive_over(self) -> None: + """Test the ``transitive_over`` tag. + + Here's a real example of this tag being used in BFO: + + .. code-block:: + + [Typedef] + id: BFO:0000066 + name: occurs in + domain: BFO:0000003 ! occurrent + range: BFO:0000004 ! independent continuant + holds_over_chain: BFO:0000050 BFO:0000066 ! part of / occurs in + inverse_of: BFO:0000067 ! contains process + transitive_over: BFO:0000050 ! part of + """ + typedef = TypeDef( + reference=Reference(prefix="BFO", identifier="0000066"), + transitive_over=[Reference(prefix="BFO", identifier="0000050", name="part of")], + ) + self.assert_obo_stanza( + """\ + [Typedef] + id: BFO:0000066 + transitive_over: BFO:0000050 ! part of + """, + typedef, + ) + + def test_30_equivalent_to_chain(self) -> None: + """Test the ``equivalent_to_chain`` tag. + + Interestingly, this property doesn't appear to be used anywhere + on GitHub publicly except: + + - https://github.com/geneontology/go-ontology/blob/ce41588cbdc05223f9cfd029985df3cadd1e0399/src/ontology/extensions/gorel.obo#L1277-L1285 + - https://github.com/cmungall/bioperl-owl/blob/0b52048975c078d3bc50f6611235e9f8cb9b9475/ont/interval_relations.obo~#L86-L103 + """ + + def test_31_disjoint_over(self) -> None: + """Test the ``disjoint_over`` tag.""" + + def test_32_relationship(self) -> None: + """Test the ``relationship`` tag.""" + + def test_33_is_obsolete(self) -> None: + """Test the ``is_obsolete`` tag.""" + + def test_34_created_by(self) -> None: + """Test the ``created_by`` tag.""" + + def test_35_creation_date(self) -> None: + """Test the ``creation_date`` tag.""" + + def test_36_replaced_by(self) -> None: + """Test the ``replaced_by`` tag.""" + + def test_37_consider(self) -> None: + """Test the ``consider`` tag.""" + + def test_40_is_metadata_tag(self) -> None: + """Test the ``is_metadata_tag`` tag.""" + + def test_41_is_class_level(self) -> None: + """Test the ``is_class_level`` tag."""