Skip to content

Commit

Permalink
test custom xml parser for fromxml()
Browse files Browse the repository at this point in the history
  • Loading branch information
juarezr committed Oct 5, 2020
1 parent 07420ef commit d45e250
Showing 1 changed file with 121 additions and 1 deletion.
122 changes: 121 additions & 1 deletion petl/test/io/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from petl.test.helpers import ieq
from petl.util import nrows, look
from petl.io.xml import fromxml
from petl.compat import urlopen
from petl.compat import urlopen, izip_longest
from nose.tools import eq_


def test_fromxml():
Expand Down Expand Up @@ -206,3 +207,122 @@ def test_fromxml_url():
assert nrows(actual) > 0
expect = fromxml('.pydevproject', 'pydev_property', {'name': ( '.', 'name'), 'prop': '.'})
ieq(expect, actual)


def _write_temp_file(data):
with NamedTemporaryFile(delete=False, mode='wt') as f:
f.write(data)
res = f.name
f.close()
# txt = open(res, 'r').read()
# print('TEST %s:\n%s' % (res, txt), file=sys.stderr)
return res


def _write_test_file(data, pre='', pos=''):
content = pre + '<table>' + data + pos + '</table>'
return _write_temp_file(content)


def _compare(expected, actual):
try:
_eq_rows(expected, actual)
except Exception as ex:
print('Expected:\n', look(expected), file=sys.stderr)
print(' Actual:\n', look(actual), file=sys.stderr)
raise ex


def _eq_rows(expect, actual, cast=None):
'''test when values are equals for eacfh row and column'''
ie = iter(expect)
ia = iter(actual)
for re, ra in izip_longest(ie, ia, fillvalue=None):
if cast:
ra = cast(ra)
for ve, va in izip_longest(re, ra, fillvalue=None):
if isinstance(ve, list):
for je, ja in izip_longest(ve, va, fillvalue=None):
_eq2(je, ja, re, ra)
elif not isinstance(ve, dict):
_eq2(ve, va, re, ra)


def _eq2(ve, va, re, ra):
try:
eq_(ve, va)
except AssertionError as ea:
print('\nrow: ', re, ' != ', ra)
print('val: ', ve, ' != ', va)
raise ea


def test_fromxml_entity():
_DATA1 = """
<tr><td>foo</td><td>bar</td></tr>
<tr><td>a</td><td>1</td></tr>
<tr><td>b</td><td>2</td></tr>
<tr><td>c</td><td>3</td></tr>
"""

_DATA2 = '<td>X</td><td>9</td>'

_DOCTYPE = """<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE foo [
<!ELEMENT table ANY >
<!ENTITY inserted SYSTEM "file://%s" >]>
"""

_INSERTED = '<tr>&inserted;</tr>'

_TABLE1 = (('foo', 'bar'),
('a', '1'),
('b', '2'),
('c', '3'))

_EXPECT_IT = (('X', '9'),)

_EXPECT_NO = ((None, None),)

from lxml import etree
parser_off = etree.XMLParser(resolve_entities=False)
parser_onn = etree.XMLParser(resolve_entities=True)

data_file_tmp = _write_temp_file(_DATA2)
doc_type_temp = _DOCTYPE % data_file_tmp
doc_type_miss = _DOCTYPE % '/tmp/doesnotexist'

temp_file1 = _write_test_file(_DATA1)
temp_file2 = _write_test_file(_DATA1, pre=doc_type_temp, pos=_INSERTED)
temp_file3 = _write_test_file(_DATA1, pre=doc_type_miss, pos=_INSERTED)

actual11 = fromxml(temp_file1, 'tr', 'td')
_compare(_TABLE1, actual11)

actual12 = fromxml(temp_file1, 'tr', 'td', parser=parser_off)
_compare(_TABLE1, actual12)

actual21 = fromxml(temp_file2, 'tr', 'td')
_compare(_TABLE1 + _EXPECT_NO, actual21)

actual22 = fromxml(temp_file2, 'tr', 'td', parser=parser_off)
_compare(_TABLE1 + _EXPECT_NO, actual22)

actual23 = fromxml(temp_file2, 'tr', 'td', parser=parser_onn)
_compare(_TABLE1 + _EXPECT_IT, actual23)

actual31 = fromxml(temp_file3, 'tr', 'td')
_compare(_TABLE1 + _EXPECT_NO, actual31)

actual32 = fromxml(temp_file3, 'tr', 'td', parser=parser_off)
_compare(_TABLE1 + _EXPECT_NO, actual32)

try:
actual33 = fromxml(temp_file3, 'tr', 'td', parser=parser_onn)
for _ in actual33:
pass
except etree.XMLSyntaxError:
# print('XMLSyntaxError', ex, file=sys.stderr)
pass
else:
assert True, 'Error testing XML'

0 comments on commit d45e250

Please sign in to comment.