From 78adc4eb378986d779c2e100b686afd17d058b61 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 9 Sep 2023 11:10:38 +0200 Subject: [PATCH 1/4] Add high level converter getter --- README.md | 26 ++++++++++---------------- src/prefixmaps/__init__.py | 3 ++- src/prefixmaps/io/parser.py | 11 ++++++++++- tests/test_core/test_curies.py | 3 +++ 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index b1e84fa..abbf7af 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,10 @@ pip install prefixmaps To use in combination with [curies](https://github.com/cthoyt/curies) library: ```python -from prefixmaps.io.parser import load_multi_context +from prefixmaps import load_converter from curies import Converter -context = load_multi_context(["obo", "bioregistry.upper", "linked_data", "prefixcc"]) -converter: Converter = context.as_converter() +converter: Converter = load_converter(["obo", "bioregistry.upper", "linked_data", "prefixcc"]) >>> converter.expand("CHEBI:1") 'http://purl.obolibrary.org/obo/CHEBI_1' @@ -60,21 +59,19 @@ converter: Converter = context.as_converter() If we prioritize prefix.cc the OBO prefix is ignored: ```python -context = load_multi_context(["prefixcc", "obo"]) -converter: Converter = context.as_converter() +converter = load_converter(["prefixcc", "obo"]) >>> converter.expand("GEO:1") >>> converter.expand("geo:1") 'http://www.opengis.net/ont/geosparql#1' ``` -Even though prefix expansion is case sensitive, we intentionally block conflicts that differ only in case. +Even though prefix expansion is case-sensitive, we intentionally block conflicts that differ only in case. If we push `bioregistry` at the start of the list then GEOGEO can be used as the prefix for the OBO ontology: ```python -context = load_multi_context(["bioregistry", "prefixcc", "obo"]) -converter: Converter = context.as_converter() +converter = load_converter(["bioregistry", "prefixcc", "obo"]) >>> converter.expand("geo:1") 'http://identifiers.org/geo/1' @@ -88,8 +85,7 @@ Note that from the OBO perspective, GEOGEO is non-canonical. We get similar results using the upper-normalized variant of `bioregistry`: ```python -context = load_multi_context(["bioregistry.upper", "prefixcc", "obo"]) -converter: Converter = context.as_converter() +converter = load_converter(["bioregistry.upper", "prefixcc", "obo"]) >>> converter.expand("GEO:1") 'http://identifiers.org/geo/1' @@ -101,8 +97,7 @@ converter: Converter = context.as_converter() Users of OBO ontologies will want to place OBO at the start of the list: ```python -context = load_multi_context(["obo", "bioregistry.upper", "prefixcc"]) -converter: Converter = context.as_converter() +converter = load_converter(["obo", "bioregistry.upper", "prefixcc"]) >>> converter.expand("geo:1") >>> converter.expand("GEO:1") @@ -117,8 +112,7 @@ GEO. This could be added in future with a unique OBO prefix. You can use the ready-made "merged" prefix set, which prioritizes OBO: ```python -context = load_context("merged") -converter: Converter = context.as_converter() +converter = load_converter("merged") >>> converter.expand("GEOGEO:1") >>> converter.expand("GEO:1") @@ -128,13 +122,13 @@ converter: Converter = context.as_converter() ### Network independence and requesting latest versions -By default this will make use of metadata distributed alongside the package. This has certain advantages in terms +By default, this will make use of metadata distributed alongside the package. This has certain advantages in terms of reproducibility, but it means if a new ontology or prefix is added to an upstream source you won't see this. To refresh and use the latest upstream: ```python -ctxt = load_context("obo", refresh=True) +converter = load_converter("obo", refresh=True) ``` This will perform a fetch from http://obofoundry.org/registry/obo_prefixes.ttl diff --git a/src/prefixmaps/__init__.py b/src/prefixmaps/__init__.py index dd36ef2..b1db5c1 100644 --- a/src/prefixmaps/__init__.py +++ b/src/prefixmaps/__init__.py @@ -1,5 +1,5 @@ from .datamodel.context import Context, PrefixExpansion, StatusType -from .io.parser import load_context, load_multi_context +from .io.parser import load_context, load_multi_context, load_converter try: from importlib.metadata import version @@ -7,6 +7,7 @@ from importlib_metadata import version __all__ = [ + "load_converter", "load_context", "load_multi_context", "Context", diff --git a/src/prefixmaps/io/parser.py b/src/prefixmaps/io/parser.py index a6fdd42..8bd9c21 100644 --- a/src/prefixmaps/io/parser.py +++ b/src/prefixmaps/io/parser.py @@ -1,8 +1,9 @@ from csv import DictReader from pathlib import Path -from typing import List, TextIO +from typing import List, TextIO, Union import yaml +from curies import Converter from prefixmaps.data import data_path from prefixmaps.datamodel.context import CONTEXT, Context, PrefixExpansion, StatusType @@ -10,6 +11,7 @@ __all__ = [ "load_multi_context", "load_context", + "load_converter", ] @@ -23,6 +25,13 @@ def context_path(name: CONTEXT) -> Path: return data_path / f"{name}.csv" +def load_converter(names: Union[CONTEXT, List[CONTEXT]], refresh: bool = False) -> Converter: + """Get a converter.""" + if isinstance(names, str): + return load_context(names, refresh=refresh).as_converter() + return load_multi_context(names, refresh=refresh).as_converter() + + def load_multi_context(names: List[CONTEXT], refresh=False) -> Context: """ Merges multiple contexts diff --git a/tests/test_core/test_curies.py b/tests/test_core/test_curies.py index 6debcdd..34aa7da 100644 --- a/tests/test_core/test_curies.py +++ b/tests/test_core/test_curies.py @@ -24,6 +24,9 @@ def test_load(self): converter = context.as_converter() self.assertIsInstance(converter, Converter) + self.assertEqual(converter, prefixmaps.load_converter("bioportal")) + self.assertEqual(converter, prefixmaps.load_converter(["bioportal"])) + # prefix map checks self.assertIn(prefix, converter.prefix_map) self.assertEqual(uri_prefix_1, converter.prefix_map[prefix]) From 51cd0e8af51329cf26f1f00ca84fc3c20e39efb4 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 9 Sep 2023 11:13:52 +0200 Subject: [PATCH 2/4] Update __init__.py --- src/prefixmaps/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prefixmaps/__init__.py b/src/prefixmaps/__init__.py index b1db5c1..2c13a91 100644 --- a/src/prefixmaps/__init__.py +++ b/src/prefixmaps/__init__.py @@ -1,5 +1,5 @@ from .datamodel.context import Context, PrefixExpansion, StatusType -from .io.parser import load_context, load_multi_context, load_converter +from .io.parser import load_context, load_converter, load_multi_context try: from importlib.metadata import version From abc4732bae622e134ecc644870f6b632804600a0 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 9 Sep 2023 17:14:07 +0200 Subject: [PATCH 3/4] Update test_curies.py --- tests/test_core/test_curies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_core/test_curies.py b/tests/test_core/test_curies.py index 34aa7da..a441970 100644 --- a/tests/test_core/test_curies.py +++ b/tests/test_core/test_curies.py @@ -24,8 +24,8 @@ def test_load(self): converter = context.as_converter() self.assertIsInstance(converter, Converter) - self.assertEqual(converter, prefixmaps.load_converter("bioportal")) - self.assertEqual(converter, prefixmaps.load_converter(["bioportal"])) + self.assertEqual(converter.prefix_map, prefixmaps.load_converter("bioportal").prefix_map) + self.assertEqual(converter.prefix_map, prefixmaps.load_converter(["bioportal"].prefix_map)) # prefix map checks self.assertIn(prefix, converter.prefix_map) From 9bcdfc40b4c7b7d4df9d0ad4f95fc4d822b6ac7e Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sat, 9 Sep 2023 17:23:28 +0200 Subject: [PATCH 4/4] Update test_curies.py --- tests/test_core/test_curies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core/test_curies.py b/tests/test_core/test_curies.py index a441970..b41edfd 100644 --- a/tests/test_core/test_curies.py +++ b/tests/test_core/test_curies.py @@ -25,7 +25,7 @@ def test_load(self): self.assertIsInstance(converter, Converter) self.assertEqual(converter.prefix_map, prefixmaps.load_converter("bioportal").prefix_map) - self.assertEqual(converter.prefix_map, prefixmaps.load_converter(["bioportal"].prefix_map)) + self.assertEqual(converter.prefix_map, prefixmaps.load_converter(["bioportal"]).prefix_map) # prefix map checks self.assertIn(prefix, converter.prefix_map)