[3.9] gh-91810: ElementTree: Use text file's encoding by default in XML declaration (GH-91903) (GH-92665)

ElementTree method write() and function tostring() now use the text file's
encoding ("UTF-8" if not available) instead of locale encoding in XML
declaration when encoding="unicode" is specified.
(cherry picked from commit 707839b0fe)


Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>

Automerge-Triggered-By: GH:serhiy-storchaka
This commit is contained in:
Miss Islington (bot) 2022-05-11 10:40:05 -07:00 committed by GitHub
parent 3f2113dd08
commit bfc88d3418
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 30 deletions

View file

@ -10,7 +10,6 @@
import html
import io
import itertools
import locale
import operator
import os
import pickle
@ -960,15 +959,13 @@ def test_tostring_xml_declaration(self):
def test_tostring_xml_declaration_unicode_encoding(self):
elem = ET.XML('<body><tag/></body>')
preferredencoding = locale.getpreferredencoding()
self.assertEqual(
f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>",
ET.tostring(elem, encoding='unicode', xml_declaration=True)
ET.tostring(elem, encoding='unicode', xml_declaration=True),
"<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
)
def test_tostring_xml_declaration_cases(self):
elem = ET.XML('<body><tag>ø</tag></body>')
preferredencoding = locale.getpreferredencoding()
TESTCASES = [
# (expected_retval, encoding, xml_declaration)
# ... xml_declaration = None
@ -995,7 +992,7 @@ def test_tostring_xml_declaration_cases(self):
b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
(b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
(f"<?xml version='1.0' encoding='{preferredencoding}'?>\n"
("<?xml version='1.0' encoding='utf-8'?>\n"
"<body><tag>ø</tag></body>", 'unicode', True),
]
@ -1033,11 +1030,10 @@ def test_tostringlist_xml_declaration(self):
b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
)
preferredencoding = locale.getpreferredencoding()
stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
self.assertEqual(
''.join(stringlist),
f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>"
"<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
)
self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
@ -3681,17 +3677,16 @@ def test_write_to_filename_as_unicode(self):
encoding = f.encoding
support.unlink(TESTFN)
try:
'\xf8'.encode(encoding)
except UnicodeEncodeError:
self.skipTest(f'default file encoding {encoding} not supported')
tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
tree.write(TESTFN, encoding='unicode')
with open(TESTFN, 'rb') as f:
data = f.read()
expected = "<site>\xf8</site>".encode(encoding, 'xmlcharrefreplace')
self.assertEqual(data, expected)
if encoding.lower() in ('utf-8', 'ascii'):
self.assertEqual(data, expected)
else:
self.assertIn(b"<?xml version='1.0' encoding=", data)
self.assertIn(expected, data)
def test_write_to_text_file(self):
self.addCleanup(support.unlink, TESTFN)
@ -3706,13 +3701,17 @@ def test_write_to_text_file(self):
tree.write(f, encoding='unicode')
self.assertFalse(f.closed)
with open(TESTFN, 'rb') as f:
self.assertEqual(f.read(), b'''<site>&#248;</site>''')
self.assertEqual(f.read(), convlinesep(
b'''<?xml version='1.0' encoding='ascii'?>\n'''
b'''<site>&#248;</site>'''))
with open(TESTFN, 'w', encoding='ISO-8859-1') as f:
tree.write(f, encoding='unicode')
self.assertFalse(f.closed)
with open(TESTFN, 'rb') as f:
self.assertEqual(f.read(), b'''<site>\xf8</site>''')
self.assertEqual(f.read(), convlinesep(
b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
b'''<site>\xf8</site>'''))
def test_write_to_binary_file(self):
self.addCleanup(support.unlink, TESTFN)