Skip to content

Commit

Permalink
Merge pull request #207 from khaeru/issue/205
Browse files Browse the repository at this point in the history
Handle reference to non-existent concept identity in SDMX-ML 2.1
  • Loading branch information
khaeru authored Nov 12, 2024
2 parents 240dd89 + 87893de commit 4116706
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 16 deletions.
4 changes: 4 additions & 0 deletions doc/sources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,10 @@ Website `(en) <https://www.insee.fr/en/information/2868055>`__,
`(fr) <https://www.insee.fr/fr/information/2862759>`__

- French name: Institut national de la statistique et des études économiques.
- Known issue(s) with this data source:

- :issue:`205`: as of 2024-11-12 some structures, for instance ``urn:sdmx:…DataStructure=FR1:CNA-2014-PIB(1.0)``, include :attr:`~.Component.concept_identity` references that do not exist, for instance ``urn:sdmx:…Concept=FR1:CONCEPTS_INSEE(1.0).TIME_PERIOD`` and ``urn:sdmx:…Concept=FR1:CONCEPTS_INSEE(1.0).OBS_VALUE``.
From :ref:`v2.20.0 <2.20.0>`, :mod:`.reader.xml.v21` discards such invalid references, leaving :py:`.concept_identity = None`.

.. autoclass:: sdmx.source.insee.Source()
:members:
Expand Down
14 changes: 11 additions & 3 deletions doc/whatsnew.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@
What's new?
***********

.. Next release
.. ============
.. _2.20.0:

Next release
============

- Improve tolerance of invalid references in SDMX-ML (:pull:`207`; thanks :gh-user:`nicolas-graves` for :issue:`205`).
Where a file gives a reference for a :attr:`.Component.concept_identity` (such as for a :class:`.Dimension` or :class:`.PrimaryMeasure`) that is invalid—that is, the specified :class:`.Concept` does not exist in the referenced :class:`.ConceptScheme`—log on level :data:`logging.WARNING` and discard the reference.
Previously such invalid references caused a :class:`KeyError`.
Prompted by an example in :ref:`INSEE <INSEE>`.
- Update the base URL of the :ref:`WB <WB>` source to use HTTPS instead of plain HTTP (:pull:`207`).

v2.19.1 (2024-10-23)
====================
Expand Down Expand Up @@ -36,7 +44,7 @@ v2.17.0 (2024-09-03)
- :class:`.XHTMLAttributeValue` contents are stored as :mod:`lxml.etree` nodes.
- MetadataStructureDefinition is included when writing :class:`.StructureMessage`.

- Update base url for :ref:`WB_WDI` source to use HTTPS instead of plain HTTP (:issue:`191`, :pull:`192`).
- Update the base url of the :ref:`WB_WDI <WB_WDI>` source to use HTTPS instead of plain HTTP (:issue:`191`, :pull:`192`).
- Improvements to :mod:`.reader.xml` and :mod:`.reader.xml.v21` (:pull:`192`).

- Correctly associate :class:`.Item` in :class:`.ItemScheme` with its parent, even if the parent is defined after the child (“forward reference”).
Expand Down
5 changes: 3 additions & 2 deletions sdmx/model/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2601,9 +2601,10 @@ class BaseContentConstraint:
},
"conceptscheme": {"Concept", "ConceptScheme"},
"datastructure": {
"DataflowDefinition", # SDMX 2.1
"Dataflow", # SDMX 3.0
"DataStructureDefinition",
"DataflowDefinition", # SDMX 2.1
"DataStructure", # SDMX 3.0
"DataStructureDefinition", # SDMX 2.1
"StructureUsage",
},
"mapping": {"CodelistMap", "StructureSet"},
Expand Down
14 changes: 13 additions & 1 deletion sdmx/reader/xml/v21.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,12 +606,24 @@ def _component_end(reader: Reader, elem): # noqa: C901

args = dict(
id=elem.attrib.get("id", common.MissingID),
concept_identity=reader.pop_resolved_ref("ConceptIdentity"),
local_representation=reader.pop_single(common.Representation),
)
if position := elem.attrib.get("position"):
args["order"] = int(position)

# Resolve a ConceptIdentity reference
ci_ref = reader.pop_single("ConceptIdentity")
try:
args["concept_identity"] = reader.resolve(ci_ref)
except KeyError:
message = (
f"Could not resolve {cls.__name__}.concept_identity reference to {ci_ref!s}"
)
log.error(message)
args.setdefault("annotations", []).append(
common.Annotation(id=f"{__name__}-parse-error", text=message)
)

# DataAttributeOnly
if us := elem.attrib.get("assignmentStatus"):
args["usage_status"] = model.UsageStatus[us.lower()]
Expand Down
2 changes: 1 addition & 1 deletion sdmx/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@
{
"id": "WB",
"name": "World Bank World Integrated Trade Solution",
"url": "http://wits.worldbank.org/API/V1/SDMX/V21/rest",
"url": "https://wits.worldbank.org/API/V1/SDMX/V21/rest",
"supports": {
"actualconstraint": false,
"agencyscheme": false,
Expand Down
2 changes: 2 additions & 0 deletions sdmx/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ def __init__(self, base_path):
("IMF_STA", "DSD_GFS.xml"), # khaeru/sdmx#164
("INSEE", "CNA-2010-CONSO-SI-A17-structure.xml"),
("INSEE", "dataflow.xml"),
("INSEE", "gh-205.xml"),
("INSEE", "IPI-2010-A21-structure.xml"),
("ISTAT", "22_289-structure.xml"),
("ISTAT", "47_850-structure.xml"),
Expand All @@ -309,6 +310,7 @@ def __init__(self, base_path):
("SPC", "metadatastructure-0.xml"),
("TEST", "gh-142.xml"),
("TEST", "gh-149.xml"),
("WB", "gh-78.xml"),
]
)

Expand Down
43 changes: 43 additions & 0 deletions sdmx/tests/reader/test_reader_xml_v21.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,24 @@ def test_read_ss_xml(specimen):
assert len(TIME_FORMAT.related_to.dimensions) == 5


def test_gh_078(specimen):
"""Test of https://github.com/khaeru/sdmx/issues/78.
This required adding support for :xml:`<mes:Department>` and :xml:`<mes:Role>` to
:mod:`.reader.xml`.
"""
# Message can be read
with specimen("WB/gh-78.xml") as f:
msg = sdmx.read_sdmx(f)

# Sender attributes are present and have the expected values
for attr, text in (
("org_unit", "DECDG"),
("responsibility", "Support"),
):
assert text == getattr(msg.header.sender.contact[0], attr).localizations["en"]


def test_gh_104(caplog, specimen):
"""Test of https://github.com/khaeru/sdmx/issues/104.
Expand Down Expand Up @@ -236,6 +254,31 @@ def test_gh_199():
sdmx.read_sdmx(f2, structure=dsd2)


def test_gh_205(caplog, specimen) -> None:
"""Test of https://github.com/khaeru/sdmx/issues/205."""
with specimen("INSEE/gh-205.xml") as f:
msg = sdmx.read_sdmx(f)

# Messages were logged
msg_template = "Could not resolve {cls}.concept_identity reference to ConceptScheme=FR1:CONCEPTS_INSEE(1.0) → Concept={id}"
m1 = msg_template.format(cls="TimeDimension", id="TIME_PERIOD")
m2 = msg_template.format(cls="PrimaryMeasure", id="OBS_VALUE")
assert m1 in caplog.messages
assert m2 in caplog.messages

# Access the parsed DSD
dsd = msg.structure["CNA-2014-PIB"]

# Components have annotations with expected ID and text
for component, text in (
(dsd.dimensions.get("TIME_PERIOD"), m1),
(dsd.measures.get("OBS_VALUE"), m2),
):
a = component.annotations[0]
assert "sdmx.reader.xml.v21-parse-error" == a.id
assert text == str(a.text)


# Each entry is a tuple with 2 elements:
# 1. an instance of lxml.etree.Element to be parsed.
# 2. Either:
Expand Down
9 changes: 0 additions & 9 deletions sdmx/tests/test_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,15 +672,6 @@ class TestWB(DataSourceTest):
"structureset": NotImplementedError, # 501
}

@pytest.mark.network
def test_gh_78(self, client):
"""Test of https://github.com/khaeru/sdmx/78.
This response required adding support for ``<mes:Department>`` and
``<mes:Role>`` to :mod:`.reader.xml`.
"""
client.data("DF_WITS_Tariff_TRAINS", key=".840.000.020110.reported")


class TestWB_WDI(DataSourceTest):
source_id = "WB_WDI"
Expand Down

0 comments on commit 4116706

Please sign in to comment.