[3.13] gh-149489: Fix ElementTree serialization to HTML (GH-149490) (GH-150596) (GH-150609)

* The content of elements "xmp", "iframe", "noembed", "noframes",
  and "plaintext" is no longer escaped.
* The "plaintext" element no longer have the closing tag.
(cherry picked from commit c42e6d3f1a)


(cherry picked from commit bcd29e466f)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2026-05-30 13:16:32 +02:00 committed by GitHub
parent 3a15d1602a
commit b46df7fe42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 32 additions and 4 deletions

View file

@ -1248,7 +1248,12 @@ def check(p, expected, namespaces=None):
{'': 'http://www.w3.org/2001/XMLSchema',
'ns': 'http://www.w3.org/2001/XMLSchema'})
def test_processinginstruction(self):
def test_comment_serialization(self):
comm = ET.Comment('<spam> & ham')
# comments are not escaped
self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->')
def test_processinginstruction_serialization(self):
# Test ProcessingInstruction directly
self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
@ -1257,13 +1262,22 @@ def test_processinginstruction(self):
b'<?test instruction?>')
# Issue #2746
# processing instructions are not escaped
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
b'<?test <testing&>?>')
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
b"<?xml version='1.0' encoding='latin-1'?>\n"
b"<?test <testing&>\xe3?>")
@support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
def test_html_cdata_elems_serialization(self, tag):
# content of raw text elements is not escaped in html
tag = tag.title()
elem = ET.Element(tag)
elem.text = '<spam>&ham'
self.assertEqual(ET.tostring(elem, method='html'),
('<%s><spam>&ham</%s>' % (tag, tag)).encode())
def test_html_empty_elems_serialization(self):
# issue 15970
# from http://www.w3.org/TR/html401/index/elements.html
@ -1278,6 +1292,14 @@ def test_html_empty_elems_serialization(self):
method='html')
self.assertEqual(serialized, expected)
def test_html_plaintext_serialization(self):
# content of plaintext is not escaped in html
# no end tag for plaintext
elem = ET.Element('PlainText')
elem.text = '<spam>&ham'
self.assertEqual(ET.tostring(elem, method='html'),
b'<PlainText><spam>&ham')
def test_dump_attribute_order(self):
# See BPO 34160
e = ET.Element('cirriculum', status='public', company='example')

View file

@ -913,9 +913,12 @@ def _serialize_xml(write, elem, qnames, namespaces,
if elem.tail:
write(_escape_cdata(elem.tail))
_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
"noframes", "plaintext"}
HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
"img", "input", "isindex", "link", "meta", "param", "source",
"track", "wbr"}
"track", "wbr", "plaintext"}
def _serialize_html(write, elem, qnames, namespaces, **kwargs):
tag = elem.tag
@ -956,7 +959,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
write(">")
ltag = tag.lower()
if text:
if ltag == "script" or ltag == "style":
if ltag in _CDATA_CONTENT_ELEMENTS:
write(text)
else:
write(_escape_cdata(text))

View file

@ -0,0 +1,3 @@
Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of
elements "xmp", "iframe", "noembed", "noframes", and "plaintext" is no longer
escaped. The "plaintext" element no longer have the closing tag.