From 87cf5c3e39ac34c21599be5cc8eacb37ea7aa388 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 15 Mar 2023 00:37:07 +0100 Subject: [PATCH 1/6] Add direct function for getting curies.Converter This PR introduces `curies` as a requirement and simplifies the usage in many examples in the README for generating a `curies.Converter` object from a `prefixmaps.Context` object down to a single line --- .gitignore | 1 + README.md | 26 ++++++++++---------------- src/prefixmaps/datamodel/context.py | 21 +++++++++------------ 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 92a59e8..2f323e3 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ notebooks/api-key.txt .venv .tox/ .coverage +.DS_Store diff --git a/README.md b/README.md index add060e..b1e84fa 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,8 @@ This repository and the corresponding library is designed to satisfy the followi - no single authoritative source of either prefixes or prefix-namespace mappings (clash-resilient) - preferred semantic namespace is prioritized over web URLs - authority preferred prefix is prioritized where possible -- each individual prefixmap is case-insenstive bijective -- prefixmap composition and custom ordering of prefixmaps +- each individual prefix map is case-insensitive bijective +- prefix map composition and custom ordering of prefixmaps - lightweight / low footprint - fast (TODO) - network-independence / versioned prefix maps @@ -22,11 +22,11 @@ This repository and the corresponding library is designed to satisfy the followi What this is NOT intended for: - a general source of metadata about either prefixes or namespaces -- a mechansim for resolving identifiers to web URLs for humans to find information +- a mechanism for resolving identifiers to web URLs for humans to find information ## Installation -``` +```shell pip install prefixmaps ``` @@ -39,8 +39,7 @@ from prefixmaps.io.parser import load_multi_context from curies import Converter context = load_multi_context(["obo", "bioregistry.upper", "linked_data", "prefixcc"]) -extended_prefix_map = context.as_extended_prefix_map() -converter = Converter.from_extended_prefix_map(extended_prefix_map) +converter: Converter = context.as_converter() >>> converter.expand("CHEBI:1") 'http://purl.obolibrary.org/obo/CHEBI_1' @@ -62,8 +61,7 @@ If we prioritize prefix.cc the OBO prefix is ignored: ```python context = load_multi_context(["prefixcc", "obo"]) -extended_prefix_map = context.as_extended_prefix_map() -converter = Converter.from_extended_prefix_map(extended_prefix_map) +converter: Converter = context.as_converter() >>> converter.expand("GEO:1") >>> converter.expand("geo:1") @@ -76,8 +74,7 @@ If we push `bioregistry` at the start of the list then GEOGEO can be used as the ```python context = load_multi_context(["bioregistry", "prefixcc", "obo"]) -extended_prefix_map = context.as_extended_prefix_map() -converter = Converter.from_extended_prefix_map(extended_prefix_map) +converter: Converter = context.as_converter() >>> converter.expand("geo:1") 'http://identifiers.org/geo/1' @@ -92,8 +89,7 @@ We get similar results using the upper-normalized variant of `bioregistry`: ```python context = load_multi_context(["bioregistry.upper", "prefixcc", "obo"]) -extended_prefix_map = context.as_extended_prefix_map() -converter = Converter.from_extended_prefix_map(extended_prefix_map) +converter: Converter = context.as_converter() >>> converter.expand("GEO:1") 'http://identifiers.org/geo/1' @@ -106,8 +102,7 @@ Users of OBO ontologies will want to place OBO at the start of the list: ```python context = load_multi_context(["obo", "bioregistry.upper", "prefixcc"]) -extended_prefix_map = context.as_extended_prefix_map() -converter = Converter.from_extended_prefix_map(extended_prefix_map) +converter: Converter = context.as_converter() >>> converter.expand("geo:1") >>> converter.expand("GEO:1") @@ -123,8 +118,7 @@ You can use the ready-made "merged" prefix set, which prioritizes OBO: ```python context = load_context("merged") -extended_prefix_map = context.as_extended_prefix_map() -converter = Converter.from_extended_prefix_map(extended_prefix_map) +converter: Converter = context.as_converter() >>> converter.expand("GEOGEO:1") >>> converter.expand("GEO:1") diff --git a/src/prefixmaps/datamodel/context.py b/src/prefixmaps/datamodel/context.py index d94ff6e..8c14563 100644 --- a/src/prefixmaps/datamodel/context.py +++ b/src/prefixmaps/datamodel/context.py @@ -6,6 +6,7 @@ from enum import Enum from typing import List, Mapping, Optional +import curies from typing_extensions import TypedDict __all__ = [ @@ -47,15 +48,6 @@ class StatusType(Enum): """Both the prefix and the namespace are aliases for existing canonical namespaces.""" -class RecordDict(TypedDict): - """A record that is compatible with :mod:`curies`.""" - - prefix: str - uri_prefix: str - prefix_synonyms: List[str] - uri_prefix_synonyms: List[str] - - @dataclass class PrefixExpansion: """ @@ -271,8 +263,8 @@ def as_inverted_dict(self) -> INVERSE_PREFIX_EXPANSION_DICT: """ return {pe.namespace: pe.prefix for pe in self.prefix_expansions if pe.canonical()} - def as_extended_prefix_map(self) -> List[RecordDict]: - """Return an extended prfix, appropriate for generating a :class:`curies.Converter`. + def as_extended_prefix_map(self) -> List[curies.Record]: + """Return an extended prefix, appropriate for generating a :class:`curies.Converter`. An extended prefix map is a collection of dictionaries, each of which has the following fields: @@ -307,7 +299,7 @@ def as_extended_prefix_map(self) -> List[RecordDict]: prefix_synonyms[reverse_prefix_map[expansion.namespace]].add(expansion.prefix) return [ - RecordDict( + curies.Record( prefix=prefix, prefix_synonyms=sorted(prefix_synonyms[prefix]), uri_prefix=uri_prefix, @@ -316,6 +308,11 @@ def as_extended_prefix_map(self) -> List[RecordDict]: for prefix, uri_prefix in sorted(prefix_map.items()) ] + def as_converter(self) -> curies.Converter: + """Get a converter from this prefix map.""" + extended_prefix_map = self.as_extended_prefix_map() + return curies.Converter.from_extended_prefix_map(extended_prefix_map) + def validate(self, canonical_only=True) -> List[str]: """ Validates each prefix expansion in the context. From 98e133b6bf36760d4bb5fd1cbe6b493893a2d6de Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 15 Mar 2023 00:45:50 +0100 Subject: [PATCH 2/6] Add curies as requirement --- poetry.lock | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 5c8f04e..078288c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2532,4 +2532,4 @@ refresh = ["bioregistry", "rdflib", "requests"] [metadata] lock-version = "2.0" python-versions = "^3.7.6" -content-hash = "89d896ddb2ac7a41ccd12c6f412446a3fef4f2110a18ea9b262ef341a9594e34" +content-hash = "91592dbfa29fdf0c81cf77b29479f7f2d8713e08bbe68a3e0d978d4e34805096" diff --git a/pyproject.toml b/pyproject.toml index 0346d5f..09a0747 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ typing-extensions = "^4.4.0" requests = {version = "^2.28.1", extras = ["refresh"]} bioregistry = {version = "^0.6.0", extras = ["refresh"]} rdflib = {version = "^6.2.0", extras = ["refresh"]} +curies = "^0.4.5" [tool.poetry.dev-dependencies] pytest = "^5.2" From a5819398d2be95f7dbc9807cf33492b3e011616d Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 15 Mar 2023 00:47:31 +0100 Subject: [PATCH 3/6] Update test_curies.py --- tests/test_core/test_curies.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_core/test_curies.py b/tests/test_core/test_curies.py index f2b4928..7e13e3d 100644 --- a/tests/test_core/test_curies.py +++ b/tests/test_core/test_curies.py @@ -21,8 +21,7 @@ def test_load(self): self.assertIn(uri_prefix_1, context_namespaces) self.assertIn(uri_prefix_2, context_namespaces) - extended_prefix_map = context.as_extended_prefix_map() - converter = Converter.from_extended_prefix_map(extended_prefix_map) + converter = context.as_converter() # prefix map checks self.assertIn(prefix, converter.prefix_map) From 7f5b72a4512bae956934a65cfc6c8a649ae7efbb Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 15 Mar 2023 00:47:55 +0100 Subject: [PATCH 4/6] Update context.py --- src/prefixmaps/datamodel/context.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/prefixmaps/datamodel/context.py b/src/prefixmaps/datamodel/context.py index 8c14563..5a27e35 100644 --- a/src/prefixmaps/datamodel/context.py +++ b/src/prefixmaps/datamodel/context.py @@ -7,7 +7,6 @@ from typing import List, Mapping, Optional import curies -from typing_extensions import TypedDict __all__ = [ "StatusType", From 2757ec14d18192624de1b008eb127bdafa515aa3 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 15 Mar 2023 00:49:18 +0100 Subject: [PATCH 5/6] Update test_curies.py --- tests/test_core/test_curies.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_core/test_curies.py b/tests/test_core/test_curies.py index 7e13e3d..6debcdd 100644 --- a/tests/test_core/test_curies.py +++ b/tests/test_core/test_curies.py @@ -22,6 +22,7 @@ def test_load(self): self.assertIn(uri_prefix_2, context_namespaces) converter = context.as_converter() + self.assertIsInstance(converter, Converter) # prefix map checks self.assertIn(prefix, converter.prefix_map) From 3b31b0c960ea2d672aecbf0935fd55f0d21deab7 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 11 Apr 2023 11:46:22 +0200 Subject: [PATCH 6/6] Update pyproject.toml --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f5f2de1..f42a0cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,12 +36,11 @@ typing-extensions = "^4.4.0" requests = {version = "^2.28.1", extras = ["refresh"]} bioregistry = {version = "^0.8.0", extras = ["refresh"]} rdflib = {version = "^6.2.0", extras = ["refresh"]} -curies = "^0.4.5" +curies = "^0.5.3" [tool.poetry.dev-dependencies] pytest = "^6.2" linkml = "^1.3.1" -curies = "^0.5.3" coverage = "^6.4.4" [tool.poetry.extras]