Skip to content

Commit 777d478

Browse files
gh-91810: Expand ElementTree.write() tests to use non-ASCII data (GH-91989)
(cherry picked from commit f60b4c3) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent a36d97e commit 777d478

File tree

1 file changed

+80
-17
lines changed

1 file changed

+80
-17
lines changed

Lib/test/test_xml_etree.py

+80-17
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ def newtest(*args, **kwargs):
130130
return newtest
131131
return decorator
132132

133+
def convlinesep(data):
134+
return data.replace(b'\n', os.linesep.encode())
135+
133136

134137
class ModuleTest(unittest.TestCase):
135138
def test_sanity(self):
@@ -3713,48 +3716,108 @@ def test_encoding(self):
37133716

37143717
def test_write_to_filename(self):
37153718
self.addCleanup(os_helper.unlink, TESTFN)
3716-
tree = ET.ElementTree(ET.XML('''<site />'''))
3719+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
37173720
tree.write(TESTFN)
37183721
with open(TESTFN, 'rb') as f:
3719-
self.assertEqual(f.read(), b'''<site />''')
3722+
self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3723+
3724+
def test_write_to_filename_with_encoding(self):
3725+
self.addCleanup(os_helper.unlink, TESTFN)
3726+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3727+
tree.write(TESTFN, encoding='utf-8')
3728+
with open(TESTFN, 'rb') as f:
3729+
self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3730+
3731+
tree.write(TESTFN, encoding='ISO-8859-1')
3732+
with open(TESTFN, 'rb') as f:
3733+
self.assertEqual(f.read(), convlinesep(
3734+
b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
3735+
b'''<site>\xf8</site>'''))
3736+
3737+
def test_write_to_filename_as_unicode(self):
3738+
self.addCleanup(os_helper.unlink, TESTFN)
3739+
with open(TESTFN, 'w') as f:
3740+
encoding = f.encoding
3741+
os_helper.unlink(TESTFN)
3742+
3743+
try:
3744+
'\xf8'.encode(encoding)
3745+
except UnicodeEncodeError:
3746+
self.skipTest(f'default file encoding {encoding} not supported')
3747+
3748+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3749+
tree.write(TESTFN, encoding='unicode')
3750+
with open(TESTFN, 'rb') as f:
3751+
data = f.read()
3752+
expected = "<site>\xf8</site>".encode(encoding, 'xmlcharrefreplace')
3753+
self.assertEqual(data, expected)
37203754

37213755
def test_write_to_text_file(self):
37223756
self.addCleanup(os_helper.unlink, TESTFN)
3723-
tree = ET.ElementTree(ET.XML('''<site />'''))
3757+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
37243758
with open(TESTFN, 'w', encoding='utf-8') as f:
37253759
tree.write(f, encoding='unicode')
37263760
self.assertFalse(f.closed)
37273761
with open(TESTFN, 'rb') as f:
3728-
self.assertEqual(f.read(), b'''<site />''')
3762+
self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3763+
3764+
with open(TESTFN, 'w', encoding='ascii', errors='xmlcharrefreplace') as f:
3765+
tree.write(f, encoding='unicode')
3766+
self.assertFalse(f.closed)
3767+
with open(TESTFN, 'rb') as f:
3768+
self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3769+
3770+
with open(TESTFN, 'w', encoding='ISO-8859-1') as f:
3771+
tree.write(f, encoding='unicode')
3772+
self.assertFalse(f.closed)
3773+
with open(TESTFN, 'rb') as f:
3774+
self.assertEqual(f.read(), b'''<site>\xf8</site>''')
37293775

37303776
def test_write_to_binary_file(self):
37313777
self.addCleanup(os_helper.unlink, TESTFN)
3732-
tree = ET.ElementTree(ET.XML('''<site />'''))
3778+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
37333779
with open(TESTFN, 'wb') as f:
37343780
tree.write(f)
37353781
self.assertFalse(f.closed)
37363782
with open(TESTFN, 'rb') as f:
3737-
self.assertEqual(f.read(), b'''<site />''')
3783+
self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3784+
3785+
def test_write_to_binary_file_with_encoding(self):
3786+
self.addCleanup(os_helper.unlink, TESTFN)
3787+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3788+
with open(TESTFN, 'wb') as f:
3789+
tree.write(f, encoding='utf-8')
3790+
self.assertFalse(f.closed)
3791+
with open(TESTFN, 'rb') as f:
3792+
self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3793+
3794+
with open(TESTFN, 'wb') as f:
3795+
tree.write(f, encoding='ISO-8859-1')
3796+
self.assertFalse(f.closed)
3797+
with open(TESTFN, 'rb') as f:
3798+
self.assertEqual(f.read(),
3799+
b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
3800+
b'''<site>\xf8</site>''')
37383801

37393802
def test_write_to_binary_file_with_bom(self):
37403803
self.addCleanup(os_helper.unlink, TESTFN)
3741-
tree = ET.ElementTree(ET.XML('''<site />'''))
3804+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
37423805
# test BOM writing to buffered file
37433806
with open(TESTFN, 'wb') as f:
37443807
tree.write(f, encoding='utf-16')
37453808
self.assertFalse(f.closed)
37463809
with open(TESTFN, 'rb') as f:
37473810
self.assertEqual(f.read(),
37483811
'''<?xml version='1.0' encoding='utf-16'?>\n'''
3749-
'''<site />'''.encode("utf-16"))
3812+
'''<site>\xf8</site>'''.encode("utf-16"))
37503813
# test BOM writing to non-buffered file
37513814
with open(TESTFN, 'wb', buffering=0) as f:
37523815
tree.write(f, encoding='utf-16')
37533816
self.assertFalse(f.closed)
37543817
with open(TESTFN, 'rb') as f:
37553818
self.assertEqual(f.read(),
37563819
'''<?xml version='1.0' encoding='utf-16'?>\n'''
3757-
'''<site />'''.encode("utf-16"))
3820+
'''<site>\xf8</site>'''.encode("utf-16"))
37583821

37593822
def test_read_from_stringio(self):
37603823
tree = ET.ElementTree()
@@ -3763,10 +3826,10 @@ def test_read_from_stringio(self):
37633826
self.assertEqual(tree.getroot().tag, 'site')
37643827

37653828
def test_write_to_stringio(self):
3766-
tree = ET.ElementTree(ET.XML('''<site />'''))
3829+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
37673830
stream = io.StringIO()
37683831
tree.write(stream, encoding='unicode')
3769-
self.assertEqual(stream.getvalue(), '''<site />''')
3832+
self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
37703833

37713834
def test_read_from_bytesio(self):
37723835
tree = ET.ElementTree()
@@ -3775,10 +3838,10 @@ def test_read_from_bytesio(self):
37753838
self.assertEqual(tree.getroot().tag, 'site')
37763839

37773840
def test_write_to_bytesio(self):
3778-
tree = ET.ElementTree(ET.XML('''<site />'''))
3841+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
37793842
raw = io.BytesIO()
37803843
tree.write(raw)
3781-
self.assertEqual(raw.getvalue(), b'''<site />''')
3844+
self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
37823845

37833846
class dummy:
37843847
pass
@@ -3792,12 +3855,12 @@ def test_read_from_user_text_reader(self):
37923855
self.assertEqual(tree.getroot().tag, 'site')
37933856

37943857
def test_write_to_user_text_writer(self):
3795-
tree = ET.ElementTree(ET.XML('''<site />'''))
3858+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
37963859
stream = io.StringIO()
37973860
writer = self.dummy()
37983861
writer.write = stream.write
37993862
tree.write(writer, encoding='unicode')
3800-
self.assertEqual(stream.getvalue(), '''<site />''')
3863+
self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
38013864

38023865
def test_read_from_user_binary_reader(self):
38033866
raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
@@ -3809,12 +3872,12 @@ def test_read_from_user_binary_reader(self):
38093872
tree = ET.ElementTree()
38103873

38113874
def test_write_to_user_binary_writer(self):
3812-
tree = ET.ElementTree(ET.XML('''<site />'''))
3875+
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
38133876
raw = io.BytesIO()
38143877
writer = self.dummy()
38153878
writer.write = raw.write
38163879
tree.write(writer)
3817-
self.assertEqual(raw.getvalue(), b'''<site />''')
3880+
self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
38183881

38193882
def test_write_to_user_binary_writer_with_bom(self):
38203883
tree = ET.ElementTree(ET.XML('''<site />'''))

0 commit comments

Comments
 (0)