From f200722bfecb15d108d3f6c752eee5b34aacc058 Mon Sep 17 00:00:00 2001 From: "Jeffrey C. Lerman" Date: Sun, 21 May 2023 02:35:47 -0700 Subject: [PATCH] feat: add `curie` method to `NamespaceManager` (#2365) Added a `curie` method to `NamespaceManager`, which can be used to generate a CURIE from a URI. Other changes: - Fixed `NamespaceManager.expand_curie` to work with CURIES that have blank prefixes (e.g. `:something`), which are valid according to [CURIE Syntax 1.0](https://www.w3.org/TR/2010/NOTE-curie-20101216/). - Added a test to confirm . Fixes . --------- Co-authored-by: Iwan Aucamp --- rdflib/namespace/__init__.py | 31 ++++- test/test_namespace/test_namespacemanager.py | 113 +++++++++++++++++++ test/utils/exceptions.py | 44 ++++++-- 3 files changed, 179 insertions(+), 9 deletions(-) diff --git a/rdflib/namespace/__init__.py b/rdflib/namespace/__init__.py index 8455e2b63..3e591fcf7 100644 --- a/rdflib/namespace/__init__.py +++ b/rdflib/namespace/__init__.py @@ -490,6 +490,35 @@ def qname(self, uri: str) -> str: else: return ":".join((prefix, name)) + def curie(self, uri: str, generate: bool = True) -> str: + """ + From a URI, generate a valid CURIE. + + Result is guaranteed to contain a colon separating the prefix from the + name, even if the prefix is an empty string. + + .. warning:: + + When ``generate`` is `True` (which is the default) and there is no + matching namespace for the URI in the namespace manager then a new + namespace will be added with prefix ``ns{index}``. + + Thus, when ``generate`` is `True`, this function is not a pure + function because of this side-effect. + + This default behaviour is chosen so that this function operates + similarly to `NamespaceManager.qname`. + + :param uri: URI to generate CURIE for. + :param generate: Whether to add a prefix for the namespace if one doesn't + already exist. Default: `True`. + :return: CURIE for the URI. + :raises KeyError: If generate is `False` and the namespace doesn't already have + a prefix. + """ + prefix, namespace, name = self.compute_qname(uri, generate=generate) + return ":".join((prefix, name)) + def qname_strict(self, uri: str) -> str: prefix, namespace, name = self.compute_qname_strict(uri) if prefix == "": @@ -643,7 +672,7 @@ def expand_curie(self, curie: str) -> URIRef: if not type(curie) is str: raise TypeError(f"Argument must be a string, not {type(curie).__name__}.") parts = curie.split(":", 1) - if len(parts) != 2 or len(parts[0]) < 1: + if len(parts) != 2: raise ValueError( "Malformed curie argument, format should be e.g. “foaf:name”." ) diff --git a/test/test_namespace/test_namespacemanager.py b/test/test_namespace/test_namespacemanager.py index 20cb9594f..a35f3ac63 100644 --- a/test/test_namespace/test_namespacemanager.py +++ b/test/test_namespace/test_namespacemanager.py @@ -5,6 +5,7 @@ import sys from contextlib import ExitStack from pathlib import Path +from test.utils.exceptions import ExceptionChecker from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Set, Tuple, Type, Union import pytest @@ -484,3 +485,115 @@ def check() -> None: check() # Run a second time to check caching check() + + +def make_test_nsm() -> NamespaceManager: + namespaces = [ + ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"), + ("", "http://example.org/"), + ( + # Because of this + # will have no effect on the namespace manager. + "eg", + "http://example.org/", + ), + ] + graph = Graph(bind_namespaces="none") + for prefix, namespace in namespaces: + graph.bind(prefix, namespace, override=False) + + return graph.namespace_manager + + +@pytest.fixture(scope="session") +def test_nsm_session() -> NamespaceManager: + return make_test_nsm() + + +@pytest.fixture(scope="function") +def test_nsm_function() -> NamespaceManager: + return make_test_nsm() + + +@pytest.mark.parametrize( + ["curie", "expected_result"], + [ + ("rdf:type", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + (":foo", "http://example.org/foo"), + ("too_small", ExceptionChecker(ValueError, "Malformed curie argument")), + ( + "egdo:bar", + ExceptionChecker(ValueError, 'Prefix "egdo" not bound to any namespace'), + ), + pytest.param( + "eg:foo", + "http://example.org/foo", + marks=pytest.mark.xfail( + raises=ValueError, + reason="This is failing because of https://github.com/RDFLib/rdflib/issues/2077", + ), + ), + ], +) +def test_expand_curie( + test_nsm_session: NamespaceManager, + curie: str, + expected_result: Union[ExceptionChecker, str], +) -> None: + nsm = test_nsm_session + with ExitStack() as xstack: + if isinstance(expected_result, ExceptionChecker): + xstack.enter_context(expected_result) + result = nsm.expand_curie(curie) + + if not isinstance(expected_result, ExceptionChecker): + assert URIRef(expected_result) == result + + +@pytest.mark.parametrize( + ["uri", "generate", "expected_result"], + [ + ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", None, "rdf:type"), + ("http://example.org/foo", None, ":foo"), + ("http://example.com/a#chair", None, "ns1:chair"), + ("http://example.com/a#chair", True, "ns1:chair"), + ( + "http://example.com/a#chair", + False, + ExceptionChecker( + KeyError, "No known prefix for http://example.com/a# and generate=False" + ), + ), + ("http://example.com/b#chair", None, "ns1:chair"), + ("http://example.com/c", None, "ns1:c"), + ("", None, ExceptionChecker(ValueError, "Can't split ''")), + ( + "http://example.com/", + None, + ExceptionChecker(ValueError, "Can't split 'http://example.com/'"), + ), + ], +) +def test_generate_curie( + test_nsm_function: NamespaceManager, + uri: str, + generate: Optional[bool], + expected_result: Union[ExceptionChecker, str], +) -> None: + """ + .. note:: + + This is using the function scoped nsm fixture because curie has side + effects and will modify the namespace manager. + """ + nsm = test_nsm_function + with ExitStack() as xstack: + if isinstance(expected_result, ExceptionChecker): + xstack.enter_context(expected_result) + if generate is None: + result = nsm.curie(uri) + else: + result = nsm.curie(uri, generate=generate) + + if not isinstance(expected_result, ExceptionChecker): + assert expected_result == result diff --git a/test/utils/exceptions.py b/test/utils/exceptions.py index a814f9b40..94cfd9c29 100644 --- a/test/utils/exceptions.py +++ b/test/utils/exceptions.py @@ -1,15 +1,32 @@ +from __future__ import annotations + import logging import re from dataclasses import dataclass -from typing import Any, Dict, Optional, Pattern, Type, Union +from types import TracebackType +from typing import Any, ContextManager, Dict, Optional, Pattern, Type, Union + +import pytest +from pytest import ExceptionInfo -@dataclass(frozen=True) -class ExceptionChecker: +@dataclass +class ExceptionChecker(ContextManager[ExceptionInfo[Exception]]): type: Type[Exception] pattern: Optional[Union[Pattern[str], str]] = None attributes: Optional[Dict[str, Any]] = None + def __post_init__(self) -> None: + self._catcher = pytest.raises(self.type, match=self.pattern) + self._exception_info: Optional[ExceptionInfo[Exception]] = None + + def _check_attributes(self, exception: Exception) -> None: + if self.attributes is not None: + for key, value in self.attributes.items(): + logging.debug("checking exception attribute %s=%r", key, value) + assert hasattr(exception, key) + assert getattr(exception, key) == value + def check(self, exception: Exception) -> None: logging.debug("checking exception %s/%r", type(exception), exception) pattern = self.pattern @@ -19,11 +36,22 @@ def check(self, exception: Exception) -> None: assert isinstance(exception, self.type) if pattern is not None: assert pattern.match(f"{exception}") - if self.attributes is not None: - for key, value in self.attributes.items(): - logging.debug("checking exception attribute %s=%r", key, value) - assert hasattr(exception, key) - assert getattr(exception, key) == value + self._check_attributes(exception) except Exception: logging.error("problem checking exception", exc_info=exception) raise + + def __enter__(self) -> ExceptionInfo[Exception]: + self._exception_info = self._catcher.__enter__() + return self._exception_info + + def __exit__( + self, + __exc_type: Optional[Type[BaseException]], + __exc_value: Optional[BaseException], + __traceback: Optional[TracebackType], + ) -> bool: + result = self._catcher.__exit__(__exc_type, __exc_value, __traceback) + if self._exception_info is not None: + self._check_attributes(self._exception_info.value) + return result