Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: strict mode for parsing #55

Merged
merged 2 commits into from
Jan 22, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -45,6 +45,8 @@ Parsing::
doc = Document.parse(samplexml)
print(doc.trade.agreement.seller.name)

``Document.parse()`` taskes a boolean parameter ``strict`` which defaults to ``True``. This means that the parser will raise an error if it encounters any unknown element. If you set it to ``False``, the parser will not raise an error and parse whatever it can.

Generating::

import os
@@ -130,6 +132,7 @@ Generating::
with open("output.pdf", "wb") as f:
f.write(new_pdf_bytes)

``Document.serialize()`` will validate the generated XML against the specified schema and raise an error if it is not valid. If you want to avoid validation, you can set the ``schema`` parameter to ``None``.

Development
-----------
12 changes: 6 additions & 6 deletions drafthorse/models/container.py
Original file line number Diff line number Diff line change
@@ -19,9 +19,9 @@ def get_tag(self):
def empty_element(self):
return self.child_type()

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
childel = self.empty_element()
childel.from_etree(root)
childel.from_etree(root, strict)
self.add(childel)


@@ -46,7 +46,7 @@ def append_to(self, node):
self.set_element(el, child)
el.append_to(node)

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add(root.text)


@@ -60,7 +60,7 @@ def set_element(self, el, child):
el._amount = child[0]
el._currency = child[1]

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add((root.text, root.attrib.get("currencyID")))


@@ -74,7 +74,7 @@ def set_element(self, el, child):
el._text = child[1]
el._scheme_id = child[0]

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add((root.attrib["schemeID"], root.text))


@@ -87,5 +87,5 @@ def empty_element(self):
def set_element(self, el, child):
el._text = child

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add(root.text)
41 changes: 22 additions & 19 deletions drafthorse/models/elements.py
Original file line number Diff line number Diff line change
@@ -88,7 +88,7 @@ def __setattr__(self, key, value):
)
return super().__setattr__(key, value)

def from_etree(self, root):
def from_etree(self, root, strict=True):
if (
hasattr(self, "Meta")
and hasattr(self.Meta, "namespace")
@@ -109,19 +109,19 @@ def from_etree(self, root):
if child.tag in field_index:
name, _childel = field_index[child.tag]
if isinstance(getattr(self, name), Container):
getattr(self, name).add_from_etree(child)
getattr(self, name).add_from_etree(child, strict)
else:
getattr(self, name).from_etree(child)
else:
getattr(self, name).from_etree(child, strict)
elif strict:
raise TypeError("Unknown element {}".format(child.tag))
return self

@classmethod
def parse(cls, xmlinput):
def parse(cls, xmlinput, strict=True):
from lxml import etree

root = etree.fromstring(xmlinput)
return cls().from_etree(root)
return cls().from_etree(root, strict)


class StringElement(Element):
@@ -149,7 +149,7 @@ def to_etree(self):
node.text = self._text
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
self._set_on_input = True
return self
@@ -168,7 +168,7 @@ def to_etree(self):
def __str__(self):
return self._value

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._value = Decimal(root.text)
self._set_on_input = True
return self
@@ -189,7 +189,7 @@ def to_etree(self):
def __str__(self):
return "{} {}".format(self._amount, self._unit_code)

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._amount = Decimal(root.text)
self._unit_code = root.attrib["unitCode"]
self._set_on_input = True
@@ -211,7 +211,7 @@ def to_etree(self):
del node.attrib["currencyID"]
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._amount = Decimal(root.text)
self._currency = root.attrib.get("currencyID") or None
self._set_on_input = True
@@ -235,7 +235,7 @@ def to_etree(self):
node.attrib["listVersionID"] = self._list_version_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = Decimal(root.text)
self._list_id = root.attrib["listID"]
self._list_version_id = root.attrib["listVersionID"]
@@ -260,7 +260,7 @@ def to_etree(self):
node.text = self._text
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._mime_code = root.attrib["mimeCode"]
self._filename = root.attrib["filename"]
self._text = root.text
@@ -283,7 +283,7 @@ def to_etree(self):
node.attrib["schemeAgencyID"] = self._scheme_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
self._scheme_id = root.attrib["schemeAgencyID"]
self._set_on_input = True
@@ -306,7 +306,7 @@ def to_etree(self):
node.attrib["schemeID"] = self._scheme_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
try:
self._scheme_id = root.attrib["schemeID"]
@@ -346,11 +346,14 @@ def to_etree(self):
t.append(node)
return t

def from_etree(self, root):
def from_etree(self, root, strict=True):
if len(root) != 1:
raise TypeError("Date containers should have one child")
if root[0].tag != "{%s}%s" % (self._date_time_namespace, "DateTimeString"):
raise TypeError("Tag %s not recognized" % root[0].tag)
if strict:
raise TypeError("Tag %s not recognized" % root[0].tag)
else:
return self
self._format = root[0].attrib["format"]
if self._format == "102":
self._value = datetime.strptime(root[0].text, "%Y%m%d").date()
@@ -362,7 +365,7 @@ def from_etree(self, root):
self._value = w.monday()
else:
self._value = datetime.strptime(root[0].text + "1", "%G%V%u").date()
else:
elif strict:
raise TypeError(
"Date format %s cannot be parsed" % root[0].attrib["format"]
)
@@ -384,7 +387,7 @@ def to_etree(self):
t.text = self._value.strftime("%Y-%m-%dT%H:%M:%S")
return t

def from_etree(self, root):
def from_etree(self, root, strict=True):
try:
self._value = datetime.strptime(root.text, "%Y-%m-%dT%H:%M:%S").date()
except Exception:
@@ -416,7 +419,7 @@ def to_etree(self):
def __str__(self):
return "{}".format(self._value)

def from_etree(self, root):
def from_etree(self, root, strict=True):
if len(root) != 1:
raise TypeError("Indicator containers should have one child")
if root[0].tag != "{%s}%s" % (NS_UDT, "Indicator"):