Skip to content

Commit

Permalink
Add .urn.{expand,normalize,shorten}, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
khaeru committed Aug 14, 2024
1 parent 20cd9c2 commit cfe94b5
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 25 deletions.
71 changes: 68 additions & 3 deletions sdmx/tests/test_urn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,36 @@
import pytest

from sdmx.model import v21 as m
from sdmx.urn import make, match
from sdmx.urn import expand, make, match, normalize, shorten


def test_make():
@pytest.mark.parametrize(
"value, expected",
(
# MaintainableArtefact
(
"Codelist=BAZ:FOO(1.2.3)",
"urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)",
),
# Item in a MaintainableArtefact
(
"Code=BAZ:FOO(1.2.3).BAR",
"urn:sdmx:org.sdmx.infomodel.codelist.Code=BAZ:FOO(1.2.3).BAR",
),
# Expand an already-complete URN: pass-through
(
"urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)",
"urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)",
),
# Not a URN: pass-through
("foo", "foo"),
),
)
def test_expand(value, expected) -> None:
assert expected == expand(value)


def test_make() -> None:
""":func:`.make` can look up and use information about the parent ItemScheme."""
c = m.Code(id="BAR")

Expand Down Expand Up @@ -42,7 +68,7 @@ def test_make():
assert "urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)" == make(cl)


def test_match():
def test_match() -> None:
# Value containing a "." in the ID
urn = (
"urn:sdmx:org.sdmx.infomodel.datastructure.Dataflow=LSD:"
Expand All @@ -54,3 +80,42 @@ def test_match():
urn = "urn:sdmx:org.sdmx.infomodel.codelist=BBK:CLA_BBK_COLLECTION(1.0)"
with pytest.raises(ValueError, match=re.escape(f"not a valid SDMX URN: {urn}")):
match(urn)


@pytest.mark.parametrize(
"value, expected",
(
# Other URN: pass-through
(
"urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)",
"urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)",
),
# Not a URN: pass-through
("foo", "foo"),
),
)
def test_normalize(value, expected) -> None:
assert expected == normalize(value)


@pytest.mark.parametrize(
"value, expected",
(
# MaintainableArtefact
(
"urn:sdmx:org.sdmx.infomodel.codelist.Codelist=BAZ:FOO(1.2.3)",
"Codelist=BAZ:FOO(1.2.3)",
),
# Item in a MaintainableArtefact
(
"urn:sdmx:org.sdmx.infomodel.codelist.Code=BAZ:FOO(1.2.3).BAR",
"Code=BAZ:FOO(1.2.3).BAR",
),
# Shorten an already-partial URN: pass-through
("Codelist=BAZ:FOO(1.2.3)", "Codelist=BAZ:FOO(1.2.3)"),
# Not a URN: pass-through
("foo", "foo"),
),
)
def test_shorten(value, expected) -> None:
assert expected == shorten(value)
148 changes: 126 additions & 22 deletions sdmx/urn.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,160 @@
import re
from typing import Dict
from typing import Dict, Optional

from sdmx.model import PACKAGE, MaintainableArtefact

#: Regular expression for URNs.
URN = re.compile(
_PATTERN = URN = re.compile(
r"urn:sdmx:org\.sdmx\.infomodel"
r"\.(?P<package>[^\.]*)"
r"\.(?P<class>[^=]*)=((?P<agency>[^:]*):)?"
r"(?P<id>[^\(]*)(\((?P<version>[\d\.]*)\))?"
r"(\.(?P<item_id>.*))?"
)

_BASE = (
"urn:sdmx:org.sdmx.infomodel.{package}.{obj.__class__.__name__}="
"{ma.maintainer.id}:{ma.id}({ma.version}){extra_id}"
)

class URN2:
package: str
klass: str
agency: str
id: str
version: str
item_id: Optional[str]

def __init__(self, value, **kwargs) -> None:
if kwargs:
self.__dict__.update(kwargs)

if value is None:
return

try:
match = _PATTERN.match(value)
assert match is not None
except (AssertionError, TypeError):
raise ValueError(f"not a valid SDMX URN: {value}")

g = self.groupdict = match.groupdict()

self.package = (
PACKAGE[g["class"]] if g["package"] == "package" else g["package"]
)
self.klass = g["class"]
self.agency = g["agency"]
self.id = g["id"]
self.version = g["version"]
self.item_id = g["item_id"]

def __str__(self) -> str:
return (
f"urn:sdmx:org.sdmx.infomodel.{self.package}.{self.klass}={self.agency}:"
f"{self.id}({self.version})"
+ (("." + self.item_id) if self.item_id else "")
)


def expand(value: str) -> str:
"""Return the full URN for `value`.
Parameters
----------
value : str
Either the final part of a valid SDMX URN, for example
`Codelist=BAZ:FOO(1.2.3)`, or a full URN.
Returns
-------
str
The full SDMX URN. If `value` is not a partial or full URN, it is returned
unmodified.
Raises
------
ValueError
If `value` is not a valid part of a SDMX URN.
"""
for candidate in (value, f"urn:sdmx:org.sdmx.infomodel.package.{value}"):
try:
return str(URN2(candidate))
except ValueError:
continue

return value


def make(obj, maintainable_parent=None, strict=False):
def make(
obj,
maintainable_parent: Optional["MaintainableArtefact"] = None,
strict: bool = False,
) -> str:
"""Create an SDMX URN for `obj`.
If `obj` is not :class:`.MaintainableArtefact`, then `maintainable_parent`
must be supplied in order to construct the URN.
"""
if not isinstance(obj, MaintainableArtefact):
ma = maintainable_parent or obj.get_scheme()
extra_id = f".{obj.id}"
item_id = obj.id
else:
ma = obj
extra_id = ""
ma, item_id = obj, None

if not isinstance(ma, MaintainableArtefact):
raise ValueError(
f"Neither {repr(obj)} nor {repr(maintainable_parent)} are maintainable"
f"Neither {obj!r} nor {maintainable_parent!r} are maintainable"
)
elif ma.maintainer is None:
raise ValueError(f"Cannot construct URN for {repr(ma)} without maintainer")
raise ValueError(f"Cannot construct URN for {ma!r} without maintainer")
elif strict and ma.version is None:
raise ValueError(f"Cannot construct URN for {repr(ma)} without version")
raise ValueError(f"Cannot construct URN for {ma!r} without version")

return _BASE.format(
package=PACKAGE[obj.__class__.__name__], obj=obj, ma=ma, extra_id=extra_id
return str(
URN2(
None,
package=PACKAGE[obj.__class__.__name__],
klass=obj.__class__.__name__,
agency=ma.maintainer.id,
id=ma.id,
version=ma.version,
item_id=item_id,
)
)


def match(value: str) -> Dict[str, str]:
"""Match :data:`URN` in `value`, returning a :class:`dict` with the match groups."""
"""Match :data:`URN` in `value`, returning a :class:`dict` with the match groups.
Raises
------
ValueError
If `value` is not a well-formed SDMX URN.
"""
return URN2(value).groupdict


def normalize(value: str) -> str:
"""Normalize URNs.
Handle "…DataFlow=…" (SDMX 3.0) vs. "…DataFlowDefinition=…" (SDMX 2.1) in URNs;
prefer the former.
"""
return value.replace("Definition=", "=")


def shorten(value: str) -> str:
"""Return a partial URN based on `value`.
Parameters
----------
value : str
A full SDMX URN. If the value is not a URN, it is returned unmodified.
Returns
-------
str
`value`, but without the leading text
:py:`"urn:sdmx:org.sdmx.infomodel.{package}."`
"""
try:
match = URN.match(value)
assert match is not None
except (AssertionError, TypeError):
raise ValueError(f"not a valid SDMX URN: {value}")
else:
return match.groupdict()
return str(URN2(value)).split(".", maxsplit=4)[-1]
except ValueError:
return value

0 comments on commit cfe94b5

Please sign in to comment.