Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make getting obo context prefix map easier #501

Merged
merged 5 commits into from
Aug 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/bioregistry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
get_n2t_prefix,
get_name,
get_namespace_in_lui,
get_obo_context_prefix_map,
get_obo_download,
get_obo_health_url,
get_obofoundry_prefix,
Expand Down
5 changes: 2 additions & 3 deletions src/bioregistry/app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,12 @@
serialize,
)
from .. import normalize_prefix
from ..export.prefix_maps import collection_to_context_jsonlds
from ..export.rdf_export import (
collection_to_rdf_str,
metaresource_to_rdf_str,
resource_to_rdf_str,
)
from ..schema import sanitize_mapping
from ..schema import Collection, sanitize_mapping
from ..schema_utils import (
read_collections_contributions,
read_contributors,
Expand Down Expand Up @@ -198,7 +197,7 @@ def collection(identifier: str):
return serialize(
data,
serializers=[
("context", "application/ld+json", collection_to_context_jsonlds),
("context", "application/ld+json", Collection.as_context_jsonld_str),
("turtle", "text/plain", partial(collection_to_rdf_str, fmt="turtle")),
("jsonld", "application/ld+json", partial(collection_to_rdf_str, fmt="json-ld")),
],
Expand Down
5 changes: 3 additions & 2 deletions src/bioregistry/collection_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from typing import Optional

from .resource_manager import manager
from .schema import Collection, Context
from .schema_utils import read_collections, read_contexts
from .schema_utils import read_collections

__all__ = [
"get_collection",
Expand All @@ -20,4 +21,4 @@ def get_collection(identifier: str) -> Optional[Collection]:

def get_context(identifier: str) -> Optional[Context]:
"""Get the context for the given identifier."""
return read_contexts().get(identifier)
return manager.get_context(identifier)
71 changes: 9 additions & 62 deletions src/bioregistry/export/prefix_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,18 @@
"""Export the Bioregistry as a JSON-LD context."""

import json
from collections import ChainMap
from pathlib import Path
from textwrap import dedent
from typing import Mapping, Optional, Tuple
from typing import Mapping, Optional

import click

import bioregistry
from bioregistry import get_pattern_map, get_prefix_map
from bioregistry.constants import (
CONTEXT_BIOREGISTRY_PATH,
EXPORT_CONTEXTS,
SHACL_TURTLE_PATH,
)
from bioregistry.schema import Collection
from bioregistry.resource_manager import manager


@click.command()
Expand All @@ -26,72 +23,37 @@ def generate_contexts():
_context_prefix_maps()
_collection_prefix_maps()

prefix_map = get_prefix_map()
pattern_map = get_pattern_map()
prefix_map = manager.get_prefix_map()
pattern_map = manager.get_pattern_map()
_write_prefix_map(CONTEXT_BIOREGISTRY_PATH, prefix_map=prefix_map)
_write_shacl(SHACL_TURTLE_PATH, prefix_map=prefix_map, pattern_map=pattern_map)


def _collection_prefix_maps():
for collection in bioregistry.read_collections().values():
for collection in manager.collections.values():
name = collection.context
if name is None:
continue
path_stub = EXPORT_CONTEXTS.joinpath(name)
prefix_map = collection.as_prefix_map()
pattern_map = get_pattern_map()
pattern_map = manager.get_pattern_map()
_write_prefix_map(path_stub.with_suffix(".context.jsonld"), prefix_map=prefix_map)
_write_shacl(
path_stub.with_suffix(".context.ttl"), prefix_map=prefix_map, pattern_map=pattern_map
)


def get_prescriptive_artifacts(
key: str, include_synonyms: Optional[bool] = None
) -> Tuple[Mapping[str, str], Mapping[str, str]]:
"""Get a prescriptive prefix map."""
context = bioregistry.get_context(key)
if context is None:
raise KeyError
remapping = dict(
ChainMap(
*(
bioregistry.get_registry_map(metaprefix)
for metaprefix in context.prefix_priority or []
),
context.prefix_remapping or {},
)
)
include_synonyms = (
include_synonyms if include_synonyms is not None else context.include_synonyms
)
prescriptive_prefix_map = get_prefix_map(
remapping=remapping,
priority=context.uri_prefix_priority,
include_synonyms=include_synonyms,
use_preferred=context.use_preferred,
blacklist=context.blacklist,
)
prescriptive_pattern_map = get_pattern_map(
remapping=remapping,
include_synonyms=include_synonyms,
use_preferred=context.use_preferred,
blacklist=context.blacklist,
)
return prescriptive_prefix_map, prescriptive_pattern_map


def _context_prefix_maps():
for key in bioregistry.read_contexts():
prefix_map, pattern_map = get_prescriptive_artifacts(key)
for key in manager.contexts:
prefix_map, pattern_map = manager.get_context_artifacts(key)
stub = EXPORT_CONTEXTS.joinpath(key)
_write_prefix_map(stub.with_suffix(".context.jsonld"), prefix_map=prefix_map)
_write_shacl(
stub.with_suffix(".context.ttl"), prefix_map=prefix_map, pattern_map=pattern_map
)

if key == "obo": # Special case, maybe put this in data model
prefix_map, pattern_map = get_prescriptive_artifacts(key, include_synonyms=True)
prefix_map, pattern_map = manager.get_context_artifacts(key, include_synonyms=True)
stub_double = EXPORT_CONTEXTS.joinpath(f"{key}_synonyms")
_write_prefix_map(stub_double.with_suffix(".context.jsonld"), prefix_map=prefix_map)
_write_shacl(
Expand Down Expand Up @@ -135,20 +97,5 @@ def _write_prefix_map(path: Path, *, prefix_map: Mapping[str, str]) -> None:
)


def collection_to_context_jsonlds(collection: Collection) -> str:
"""Get the JSON-LD context as a string from a given collection."""
return json.dumps(collection.as_context_jsonld())


def get_obofoundry_prefix_map(include_synonyms: bool = False) -> Mapping[str, str]:
"""Get the OBO Foundry prefix map.

:param include_synonyms: Should synonyms of each prefix also be included as additional prefixes, but with
the same URL prefix?
:return: A mapping from prefixes to prefix URLs.
"""
return get_prescriptive_artifacts("obo")[0]


if __name__ == "__main__":
generate_contexts()
11 changes: 11 additions & 0 deletions src/bioregistry/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"count_mappings",
"get_versions",
"get_parts_collections",
"get_obo_context_prefix_map",
]

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -952,3 +953,13 @@ def get_parts_collections():
resource and not a vocabulary.
"""
return manager.get_parts_collections()


def get_obo_context_prefix_map(include_synonyms: bool = False) -> Mapping[str, str]:
"""Get the OBO Foundry prefix map.

:param include_synonyms: Should synonyms of each prefix also be included as additional prefixes, but with
the same URL prefix?
:return: A mapping from prefixes to prefix URLs.
"""
return manager.get_context_artifacts("obo", include_synonyms=include_synonyms)[0]
65 changes: 58 additions & 7 deletions src/bioregistry/resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@

import logging
import typing
from collections import Counter, defaultdict
from collections import ChainMap, Counter, defaultdict
from functools import lru_cache
from pathlib import Path
from typing import (
Any,
Callable,
Collection,
Dict,
Iterable,
List,
Expand All @@ -25,9 +24,11 @@

from .constants import BIOREGISTRY_REMOTE_URL, IDENTIFIERS_ORG_URL_PREFIX, LINK_PRIORITY
from .license_standardizer import standardize_license
from .schema import Registry, Resource, sanitize_model
from .schema import Collection, Context, Registry, Resource, sanitize_model
from .schema_utils import (
_registry_from_path,
read_collections,
read_contexts,
read_metaregistry,
read_registry,
write_registry,
Expand Down Expand Up @@ -69,21 +70,29 @@ class Manager:

registry: Dict[str, Resource]
metaregistry: Dict[str, Registry]
contexts: Dict[str, Context]
collections: Dict[str, Collection]

def __init__(
self,
registry: Optional[Mapping[str, Resource]] = None,
metaregistry: Optional[Mapping[str, Registry]] = None,
collections: Optional[Mapping[str, Collection]] = None,
contexts: Optional[Mapping[str, Context]] = None,
):
"""Instantiate a registry manager.

:param registry: A custom registry. If none given, defaults to the Bioregistry.
:param metaregistry: A custom metaregistry. If none, defaults to the Bioregistry's metaregistry.
:param collections: A custom collections dictionary. If none, defaults to the Bioregistry's collections.
:param contexts: A custom contexts dictionary. If none, defaults to the Bioregistry's contexts.
"""
self.registry = dict(read_registry() if registry is None else registry)
self.synonyms = _synonym_to_canonical(self.registry)

self.metaregistry = dict(read_metaregistry() if metaregistry is None else metaregistry)
self.contexts = dict(read_contexts() if contexts is None else contexts)
self.collections = dict(read_collections() if collections is None else collections)

canonical_for = defaultdict(list)
provided_by = defaultdict(list)
Expand Down Expand Up @@ -292,7 +301,7 @@ def get_pattern_map(
include_synonyms: bool = False,
remapping: Optional[Mapping[str, str]] = None,
use_preferred: bool = False,
blacklist: Optional[Collection[str]] = None,
blacklist: Optional[typing.Collection[str]] = None,
) -> Mapping[str, str]:
"""Get a mapping from prefixes to their regular expression patterns.

Expand All @@ -315,7 +324,7 @@ def _iter_pattern_map(
*,
include_synonyms: bool = False,
use_preferred: bool = False,
blacklist: Optional[Collection[str]] = None,
blacklist: Optional[typing.Collection[str]] = None,
) -> Iterable[Tuple[str, str]]:
blacklist = set(blacklist or [])
for prefix, resource in self.registry.items():
Expand All @@ -340,7 +349,7 @@ def get_prefix_map(
include_synonyms: bool = False,
remapping: Optional[Mapping[str, str]] = None,
use_preferred: bool = False,
blacklist: Optional[Collection[str]] = None,
blacklist: Optional[typing.Collection[str]] = None,
) -> Mapping[str, str]:
"""Get a mapping from Bioregistry prefixes to their URI prefixes .

Expand Down Expand Up @@ -368,7 +377,7 @@ def _iter_prefix_map(
priority: Optional[Sequence[str]] = None,
include_synonyms: bool = False,
use_preferred: bool = False,
blacklist: Optional[Collection[str]] = None,
blacklist: Optional[typing.Collection[str]] = None,
) -> Iterable[Tuple[str, str]]:
blacklist = set(blacklist or [])
for prefix, resource in self.registry.items():
Expand Down Expand Up @@ -1057,6 +1066,48 @@ def is_standardizable_curie(self, curie: str) -> Optional[bool]:
return False
return self.is_valid_curie(norm_curie)

def get_context(self, key: str) -> Optional[Context]:
"""Get a prescriptive context.

:param key: The identifier for the prescriptive context, e.g., `obo`.
:returns: A prescriptive context object, if available
"""
return self.contexts.get(key)

def get_context_artifacts(
self, key: str, include_synonyms: Optional[bool] = None
) -> Tuple[Mapping[str, str], Mapping[str, str]]:
"""Get a prescriptive prefix map and pattern map."""
context = self.get_context(key)
if context is None:
raise KeyError
remapping = dict(
ChainMap(
*(
self.get_registry_map(metaprefix)
for metaprefix in context.prefix_priority or []
),
context.prefix_remapping or {},
)
)
include_synonyms = (
include_synonyms if include_synonyms is not None else context.include_synonyms
)
prescriptive_prefix_map = self.get_prefix_map(
remapping=remapping,
priority=context.uri_prefix_priority,
include_synonyms=include_synonyms,
use_preferred=context.use_preferred,
blacklist=context.blacklist,
)
prescriptive_pattern_map = self.get_pattern_map(
remapping=remapping,
include_synonyms=include_synonyms,
use_preferred=context.use_preferred,
blacklist=context.blacklist,
)
return prescriptive_prefix_map, prescriptive_pattern_map


def prepare_prefix_list(prefix_map: Mapping[str, str]) -> List[Tuple[str, str]]:
"""Prepare a priority prefix list from a prefix map."""
Expand Down
4 changes: 4 additions & 0 deletions src/bioregistry/schema/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -1843,6 +1843,10 @@ def add_triples(self, graph):

return node

def as_context_jsonld_str(self) -> str:
"""Get the JSON-LD context as a string from a given collection."""
return json.dumps(self.as_context_jsonld())

def as_context_jsonld(self) -> Mapping[str, Mapping[str, str]]:
"""Get the JSON-LD context from a given collection."""
return {
Expand Down
6 changes: 3 additions & 3 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import bioregistry
from bioregistry import Resource
from bioregistry.constants import BIOREGISTRY_PATH
from bioregistry.export.prefix_maps import get_obofoundry_prefix_map
from bioregistry.export.rdf_export import resource_to_rdf_str
from bioregistry.license_standardizer import REVERSE_LICENSES
from bioregistry.resolve import get_obo_context_prefix_map
from bioregistry.schema.struct import SCHEMA_PATH, get_json_schema
from bioregistry.schema.utils import EMAIL_RE
from bioregistry.schema_utils import is_mismatch
Expand Down Expand Up @@ -575,11 +575,11 @@ def test_default_prefix_map_no_miriam(self):

def test_obo_prefix_map(self):
"""Test the integrity of the OBO prefix map."""
obofoundry_prefix_map = get_obofoundry_prefix_map()
obofoundry_prefix_map = get_obo_context_prefix_map()
self.assert_no_idot(obofoundry_prefix_map)
self.assertIn("FlyBase", set(obofoundry_prefix_map))

self.assert_no_idot(get_obofoundry_prefix_map(include_synonyms=True))
self.assert_no_idot(get_obo_context_prefix_map(include_synonyms=True))

def assert_no_idot(self, prefix_map: Mapping[str, str]) -> None:
"""Assert none of the URI prefixes have identifiers.org in them."""
Expand Down