Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify getting a curies.Converter from a prefixmaps.Context #37

Merged
merged 7 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 10 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ This repository and the corresponding library is designed to satisfy the followi
- no single authoritative source of either prefixes or prefix-namespace mappings (clash-resilient)
- preferred semantic namespace is prioritized over web URLs
- authority preferred prefix is prioritized where possible
- each individual prefixmap is case-insenstive bijective
- prefixmap composition and custom ordering of prefixmaps
- each individual prefix map is case-insensitive bijective
- prefix map composition and custom ordering of prefixmaps
- lightweight / low footprint
- fast (TODO)
- network-independence / versioned prefix maps
Expand All @@ -22,11 +22,11 @@ This repository and the corresponding library is designed to satisfy the followi
What this is NOT intended for:

- a general source of metadata about either prefixes or namespaces
- a mechansim for resolving identifiers to web URLs for humans to find information
- a mechanism for resolving identifiers to web URLs for humans to find information

## Installation

```
```shell
pip install prefixmaps
```

Expand All @@ -39,8 +39,7 @@ from prefixmaps.io.parser import load_multi_context
from curies import Converter

context = load_multi_context(["obo", "bioregistry.upper", "linked_data", "prefixcc"])
extended_prefix_map = context.as_extended_prefix_map()
converter = Converter.from_extended_prefix_map(extended_prefix_map)
converter: Converter = context.as_converter()

>>> converter.expand("CHEBI:1")
'http://purl.obolibrary.org/obo/CHEBI_1'
Expand All @@ -62,8 +61,7 @@ If we prioritize prefix.cc the OBO prefix is ignored:

```python
context = load_multi_context(["prefixcc", "obo"])
extended_prefix_map = context.as_extended_prefix_map()
converter = Converter.from_extended_prefix_map(extended_prefix_map)
converter: Converter = context.as_converter()

>>> converter.expand("GEO:1")
>>> converter.expand("geo:1")
Expand All @@ -76,8 +74,7 @@ If we push `bioregistry` at the start of the list then GEOGEO can be used as the

```python
context = load_multi_context(["bioregistry", "prefixcc", "obo"])
extended_prefix_map = context.as_extended_prefix_map()
converter = Converter.from_extended_prefix_map(extended_prefix_map)
converter: Converter = context.as_converter()

>>> converter.expand("geo:1")
'http://identifiers.org/geo/1'
Expand All @@ -92,8 +89,7 @@ We get similar results using the upper-normalized variant of `bioregistry`:

```python
context = load_multi_context(["bioregistry.upper", "prefixcc", "obo"])
extended_prefix_map = context.as_extended_prefix_map()
converter = Converter.from_extended_prefix_map(extended_prefix_map)
converter: Converter = context.as_converter()

>>> converter.expand("GEO:1")
'http://identifiers.org/geo/1'
Expand All @@ -106,8 +102,7 @@ Users of OBO ontologies will want to place OBO at the start of the list:

```python
context = load_multi_context(["obo", "bioregistry.upper", "prefixcc"])
extended_prefix_map = context.as_extended_prefix_map()
converter = Converter.from_extended_prefix_map(extended_prefix_map)
converter: Converter = context.as_converter()

>>> converter.expand("geo:1")
>>> converter.expand("GEO:1")
Expand All @@ -123,8 +118,7 @@ You can use the ready-made "merged" prefix set, which prioritizes OBO:

```python
context = load_context("merged")
extended_prefix_map = context.as_extended_prefix_map()
converter = Converter.from_extended_prefix_map(extended_prefix_map)
converter: Converter = context.as_converter()

>>> converter.expand("GEOGEO:1")
>>> converter.expand("GEO:1")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ typing-extensions = "^4.4.0"
requests = {version = "^2.28.1", extras = ["refresh"]}
bioregistry = {version = "^0.8.0", extras = ["refresh"]}
rdflib = {version = "^6.2.0", extras = ["refresh"]}
curies = "^0.5.3"

[tool.poetry.dev-dependencies]
pytest = "^6.2"
linkml = "^1.3.1"
curies = "^0.5.3"
coverage = "^6.4.4"

[tool.poetry.extras]
Expand Down
22 changes: 9 additions & 13 deletions src/prefixmaps/datamodel/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from enum import Enum
from typing import List, Mapping, Optional

from typing_extensions import TypedDict
import curies

__all__ = [
"StatusType",
Expand Down Expand Up @@ -47,15 +47,6 @@ class StatusType(Enum):
"""Both the prefix and the namespace are aliases for existing canonical namespaces."""


class RecordDict(TypedDict):
"""A record that is compatible with :mod:`curies`."""

prefix: str
uri_prefix: str
prefix_synonyms: List[str]
uri_prefix_synonyms: List[str]


@dataclass
class PrefixExpansion:
"""
Expand Down Expand Up @@ -271,8 +262,8 @@ def as_inverted_dict(self) -> INVERSE_PREFIX_EXPANSION_DICT:
"""
return {pe.namespace: pe.prefix for pe in self.prefix_expansions if pe.canonical()}

def as_extended_prefix_map(self) -> List[RecordDict]:
"""Return an extended prfix, appropriate for generating a :class:`curies.Converter`.
def as_extended_prefix_map(self) -> List[curies.Record]:
"""Return an extended prefix, appropriate for generating a :class:`curies.Converter`.

An extended prefix map is a collection of dictionaries, each of which has the following
fields:
Expand Down Expand Up @@ -307,7 +298,7 @@ def as_extended_prefix_map(self) -> List[RecordDict]:
prefix_synonyms[reverse_prefix_map[expansion.namespace]].add(expansion.prefix)

return [
RecordDict(
curies.Record(
prefix=prefix,
prefix_synonyms=sorted(prefix_synonyms[prefix]),
uri_prefix=uri_prefix,
Expand All @@ -316,6 +307,11 @@ def as_extended_prefix_map(self) -> List[RecordDict]:
for prefix, uri_prefix in sorted(prefix_map.items())
]

def as_converter(self) -> curies.Converter:
"""Get a converter from this prefix map."""
extended_prefix_map = self.as_extended_prefix_map()
return curies.Converter.from_extended_prefix_map(extended_prefix_map)

def validate(self, canonical_only=True) -> List[str]:
"""
Validates each prefix expansion in the context.
Expand Down
4 changes: 2 additions & 2 deletions tests/test_core/test_curies.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def test_load(self):
self.assertIn(uri_prefix_1, context_namespaces)
self.assertIn(uri_prefix_2, context_namespaces)

extended_prefix_map = context.as_extended_prefix_map()
converter = Converter.from_extended_prefix_map(extended_prefix_map)
converter = context.as_converter()
self.assertIsInstance(converter, Converter)

# prefix map checks
self.assertIn(prefix, converter.prefix_map)
Expand Down