diff --git a/sdmx/tests/test_urn.py b/sdmx/tests/test_urn.py index 5f76afeb..ea61c873 100644 --- a/sdmx/tests/test_urn.py +++ b/sdmx/tests/test_urn.py @@ -3,10 +3,36 @@ import pytest from sdmx.model import v21 as m -from sdmx.urn import make, match +from sdmx.urn import expand, make, match, normalize, shorten -def test_make(): +@pytest.mark.parametrize( + "value, expected", + ( + # MaintainableArtefact + ( + "Codelist=BAZ:FOO(1.2.3)", + "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)", + ), + # Item in a MaintainableArtefact + ( + "Code=BAZ:FOO(1.2.3).BAR", + "urn:sdmx:org.sdmx.infomodel.codelist.Code=BAZ:FOO(1.2.3).BAR", + ), + # Expand an already-complete URN: pass-through + ( + "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)", + "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)", + ), + # Not a URN: pass-through + ("foo", "foo"), + ), +) +def test_expand(value, expected) -> None: + assert expected == expand(value) + + +def test_make() -> None: """:func:`.make` can look up and use information about the parent ItemScheme.""" c = m.Code(id="BAR") @@ -42,7 +68,7 @@ def test_make(): assert "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)" == make(cl) -def test_match(): +def test_match() -> None: # Value containing a "." in the ID urn = ( "urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=LSD:" @@ -54,3 +80,42 @@ def test_match(): urn = "urn:sdmx:org.sdmx.infomodel.codelist=BBK:CLA_BBK_COLLECTION(1.0)" with pytest.raises(ValueError, match=re.escape(f"not a valid SDMX URN: {urn}")): match(urn) + + +@pytest.mark.parametrize( + "value, expected", + ( + # Other URN: pass-through + ( + "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)", + "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)", + ), + # Not a URN: pass-through + ("foo", "foo"), + ), +) +def test_normalize(value, expected) -> None: + assert expected == normalize(value) + + +@pytest.mark.parametrize( + "value, expected", + ( + # MaintainableArtefact + ( + "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)", + "Codelist=BAZ:FOO(1.2.3)", + ), + # Item in a MaintainableArtefact + ( + "urn:sdmx:org.sdmx.infomodel.codelist.Code=BAZ:FOO(1.2.3).BAR", + "Code=BAZ:FOO(1.2.3).BAR", + ), + # Shorten an already-partial URN: pass-through + ("Codelist=BAZ:FOO(1.2.3)", "Codelist=BAZ:FOO(1.2.3)"), + # Not a URN: pass-through + ("foo", "foo"), + ), +) +def test_shorten(value, expected) -> None: + assert expected == shorten(value) diff --git a/sdmx/urn.py b/sdmx/urn.py index 8cc94129..e0c5215d 100644 --- a/sdmx/urn.py +++ b/sdmx/urn.py @@ -1,10 +1,10 @@ import re -from typing import Dict +from typing import Dict, Optional from sdmx.model import PACKAGE, MaintainableArtefact #: Regular expression for URNs. -URN = re.compile( +_PATTERN = URN = re.compile( r"urn:sdmx:org\.sdmx\.infomodel" r"\.(?P[^\.]*)" r"\.(?P[^=]*)=((?P[^:]*):)?" @@ -12,13 +12,81 @@ r"(\.(?P.*))?" ) -_BASE = ( - "urn:sdmx:org.sdmx.infomodel.{package}.{obj.__class__.__name__}=" - "{ma.maintainer.id}:{ma.id}({ma.version}){extra_id}" -) + +class URN2: + package: str + klass: str + agency: str + id: str + version: str + item_id: Optional[str] + + def __init__(self, value, **kwargs) -> None: + if kwargs: + self.__dict__.update(kwargs) + + if value is None: + return + + try: + match = _PATTERN.match(value) + assert match is not None + except (AssertionError, TypeError): + raise ValueError(f"not a valid SDMX URN: {value}") + + g = self.groupdict = match.groupdict() + + self.package = ( + PACKAGE[g["class"]] if g["package"] == "package" else g["package"] + ) + self.klass = g["class"] + self.agency = g["agency"] + self.id = g["id"] + self.version = g["version"] + self.item_id = g["item_id"] + + def __str__(self) -> str: + return ( + f"urn:sdmx:org.sdmx.infomodel.{self.package}.{self.klass}={self.agency}:" + f"{self.id}({self.version})" + + (("." + self.item_id) if self.item_id else "") + ) + + +def expand(value: str) -> str: + """Return the full URN for `value`. + + Parameters + ---------- + value : str + Either the final part of a valid SDMX URN, for example + `Codelist=BAZ:FOO(1.2.3)`, or a full URN. + + Returns + ------- + str + The full SDMX URN. If `value` is not a partial or full URN, it is returned + unmodified. + + Raises + ------ + ValueError + If `value` is not a valid part of a SDMX URN. + """ + for candidate in (value, f"urn:sdmx:org.sdmx.infomodel.package.{value}"): + try: + return str(URN2(candidate)) + except ValueError: + continue + + return value -def make(obj, maintainable_parent=None, strict=False): +def make( + obj, + maintainable_parent: Optional["MaintainableArtefact"] = None, + strict: bool = False, +) -> str: """Create an SDMX URN for `obj`. If `obj` is not :class:`.MaintainableArtefact`, then `maintainable_parent` @@ -26,31 +94,67 @@ def make(obj, maintainable_parent=None, strict=False): """ if not isinstance(obj, MaintainableArtefact): ma = maintainable_parent or obj.get_scheme() - extra_id = f".{obj.id}" + item_id = obj.id else: - ma = obj - extra_id = "" + ma, item_id = obj, None if not isinstance(ma, MaintainableArtefact): raise ValueError( - f"Neither {repr(obj)} nor {repr(maintainable_parent)} are maintainable" + f"Neither {obj!r} nor {maintainable_parent!r} are maintainable" ) elif ma.maintainer is None: - raise ValueError(f"Cannot construct URN for {repr(ma)} without maintainer") + raise ValueError(f"Cannot construct URN for {ma!r} without maintainer") elif strict and ma.version is None: - raise ValueError(f"Cannot construct URN for {repr(ma)} without version") + raise ValueError(f"Cannot construct URN for {ma!r} without version") - return _BASE.format( - package=PACKAGE[obj.__class__.__name__], obj=obj, ma=ma, extra_id=extra_id + return str( + URN2( + None, + package=PACKAGE[obj.__class__.__name__], + klass=obj.__class__.__name__, + agency=ma.maintainer.id, + id=ma.id, + version=ma.version, + item_id=item_id, + ) ) def match(value: str) -> Dict[str, str]: - """Match :data:`URN` in `value`, returning a :class:`dict` with the match groups.""" + """Match :data:`URN` in `value`, returning a :class:`dict` with the match groups. + + Raises + ------ + ValueError + If `value` is not a well-formed SDMX URN. + """ + return URN2(value).groupdict + + +def normalize(value: str) -> str: + """Normalize URNs. + + Handle "…DataFlow=…" (SDMX 3.0) vs. "…DataFlowDefinition=…" (SDMX 2.1) in URNs; + prefer the former. + """ + return value.replace("Definition=", "=") + + +def shorten(value: str) -> str: + """Return a partial URN based on `value`. + + Parameters + ---------- + value : str + A full SDMX URN. If the value is not a URN, it is returned unmodified. + + Returns + ------- + str + `value`, but without the leading text + :py:`"urn:sdmx:org.sdmx.infomodel.{package}."` + """ try: - match = URN.match(value) - assert match is not None - except (AssertionError, TypeError): - raise ValueError(f"not a valid SDMX URN: {value}") - else: - return match.groupdict() + return str(URN2(value)).split(".", maxsplit=4)[-1] + except ValueError: + return value