Skip to content

Commit

Permalink
feat: strict mode for parsing (#55)
Browse files Browse the repository at this point in the history
* feat: strict mode for parsing

* docs: lax and strict parsing and serialization
  • Loading branch information
barredterra authored Jan 22, 2025
1 parent a6b98f6 commit 94eab55
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 25 deletions.
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Parsing::
doc = Document.parse(samplexml)
print(doc.trade.agreement.seller.name)

``Document.parse()`` taskes a boolean parameter ``strict`` which defaults to ``True``. This means that the parser will raise an error if it encounters any unknown element. If you set it to ``False``, the parser will not raise an error and parse whatever it can.

Generating::

import os
Expand Down Expand Up @@ -136,6 +138,7 @@ Generating::
with open("output.pdf", "wb") as f:
f.write(new_pdf_bytes)

``Document.serialize()`` will validate the generated XML against the specified schema and raise an error if it is not valid. If you want to avoid validation, you can set the ``schema`` parameter to ``None``.

Development
-----------
Expand Down
12 changes: 6 additions & 6 deletions drafthorse/models/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ def get_tag(self):
def empty_element(self):
return self.child_type()

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
childel = self.empty_element()
childel.from_etree(root)
childel.from_etree(root, strict)
self.add(childel)


Expand All @@ -46,7 +46,7 @@ def append_to(self, node):
self.set_element(el, child)
el.append_to(node)

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add(root.text)


Expand All @@ -60,7 +60,7 @@ def set_element(self, el, child):
el._amount = child[0]
el._currency = child[1]

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add((root.text, root.attrib.get("currencyID")))


Expand All @@ -74,7 +74,7 @@ def set_element(self, el, child):
el._text = child[1]
el._scheme_id = child[0]

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add((root.attrib["schemeID"], root.text))


Expand All @@ -87,5 +87,5 @@ def empty_element(self):
def set_element(self, el, child):
el._text = child

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add(root.text)
41 changes: 22 additions & 19 deletions drafthorse/models/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __setattr__(self, key, value):
)
return super().__setattr__(key, value)

def from_etree(self, root):
def from_etree(self, root, strict=True):
if (
hasattr(self, "Meta")
and hasattr(self.Meta, "namespace")
Expand All @@ -109,19 +109,19 @@ def from_etree(self, root):
if child.tag in field_index:
name, _childel = field_index[child.tag]
if isinstance(getattr(self, name), Container):
getattr(self, name).add_from_etree(child)
getattr(self, name).add_from_etree(child, strict)
else:
getattr(self, name).from_etree(child)
else:
getattr(self, name).from_etree(child, strict)
elif strict:
raise TypeError("Unknown element {}".format(child.tag))
return self

@classmethod
def parse(cls, xmlinput):
def parse(cls, xmlinput, strict=True):
from lxml import etree

root = etree.fromstring(xmlinput)
return cls().from_etree(root)
return cls().from_etree(root, strict)


class StringElement(Element):
Expand Down Expand Up @@ -149,7 +149,7 @@ def to_etree(self):
node.text = self._text
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
self._set_on_input = True
return self
Expand All @@ -168,7 +168,7 @@ def to_etree(self):
def __str__(self):
return self._value

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._value = Decimal(root.text)
self._set_on_input = True
return self
Expand All @@ -189,7 +189,7 @@ def to_etree(self):
def __str__(self):
return "{} {}".format(self._amount, self._unit_code)

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._amount = Decimal(root.text)
self._unit_code = root.attrib["unitCode"]
self._set_on_input = True
Expand All @@ -211,7 +211,7 @@ def to_etree(self):
del node.attrib["currencyID"]
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._amount = Decimal(root.text)
self._currency = root.attrib.get("currencyID") or None
self._set_on_input = True
Expand All @@ -235,7 +235,7 @@ def to_etree(self):
node.attrib["listVersionID"] = self._list_version_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = Decimal(root.text)
self._list_id = root.attrib["listID"]
self._list_version_id = root.attrib["listVersionID"]
Expand All @@ -260,7 +260,7 @@ def to_etree(self):
node.text = self._text
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._mime_code = root.attrib["mimeCode"]
self._filename = root.attrib["filename"]
self._text = root.text
Expand All @@ -283,7 +283,7 @@ def to_etree(self):
node.attrib["schemeAgencyID"] = self._scheme_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
self._scheme_id = root.attrib["schemeAgencyID"]
self._set_on_input = True
Expand All @@ -306,7 +306,7 @@ def to_etree(self):
node.attrib["schemeID"] = self._scheme_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
try:
self._scheme_id = root.attrib["schemeID"]
Expand Down Expand Up @@ -346,11 +346,14 @@ def to_etree(self):
t.append(node)
return t

def from_etree(self, root):
def from_etree(self, root, strict=True):
if len(root) != 1:
raise TypeError("Date containers should have one child")
if root[0].tag != "{%s}%s" % (self._date_time_namespace, "DateTimeString"):
raise TypeError("Tag %s not recognized" % root[0].tag)
if strict:
raise TypeError("Tag %s not recognized" % root[0].tag)
else:
return self
self._format = root[0].attrib["format"]
if self._format == "102":
self._value = datetime.strptime(root[0].text, "%Y%m%d").date()
Expand All @@ -362,7 +365,7 @@ def from_etree(self, root):
self._value = w.monday()
else:
self._value = datetime.strptime(root[0].text + "1", "%G%V%u").date()
else:
elif strict:
raise TypeError(
"Date format %s cannot be parsed" % root[0].attrib["format"]
)
Expand All @@ -384,7 +387,7 @@ def to_etree(self):
t.text = self._value.strftime("%Y-%m-%dT%H:%M:%S")
return t

def from_etree(self, root):
def from_etree(self, root, strict=True):
try:
self._value = datetime.strptime(root.text, "%Y-%m-%dT%H:%M:%S").date()
except Exception:
Expand Down Expand Up @@ -416,7 +419,7 @@ def to_etree(self):
def __str__(self):
return "{}".format(self._value)

def from_etree(self, root):
def from_etree(self, root, strict=True):
if len(root) != 1:
raise TypeError("Indicator containers should have one child")
if root[0].tag != "{%s}%s" % (NS_UDT, "Indicator"):
Expand Down

0 comments on commit 94eab55

Please sign in to comment.