mirror of
https://github.com/python/cpython.git
synced 2026-06-08 02:41:11 +00:00
gh-149489: Fix ElementTree serialization to HTML (GH-149490)
* The content of comments, processing instructions and elements "xmp", "iframe", "noembed", "noframes", and "plaintext" is no longer escaped. * The "plaintext" element no longer have the closing tag. * Add support of empty attributes (with value None).
This commit is contained in:
parent
f87d9605d3
commit
bcd29e466f
3 changed files with 58 additions and 11 deletions
|
|
@ -1287,7 +1287,15 @@ def check(p, expected, namespaces=None):
|
|||
{'': 'http://www.w3.org/2001/XMLSchema',
|
||||
'ns': 'http://www.w3.org/2001/XMLSchema'})
|
||||
|
||||
def test_processinginstruction(self):
|
||||
def test_comment_serialization(self):
|
||||
comm = ET.Comment('<spam> & ham')
|
||||
# comments are not escaped
|
||||
self.assertEqual(ET.tostring(comm), b'<!--<spam> & ham-->')
|
||||
self.assertEqual(ET.tostring(comm, method='html'), b'<!--<spam> & ham-->')
|
||||
# no comments in text serialization
|
||||
self.assertEqual(ET.tostring(comm, method='text'), b'')
|
||||
|
||||
def test_processinginstruction_serialization(self):
|
||||
# Test ProcessingInstruction directly
|
||||
|
||||
self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
|
||||
|
|
@ -1296,12 +1304,32 @@ def test_processinginstruction(self):
|
|||
b'<?test instruction?>')
|
||||
|
||||
# Issue #2746
|
||||
|
||||
# processing instructions are not escaped
|
||||
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
|
||||
b'<?test <testing&>?>')
|
||||
self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
|
||||
b"<?xml version='1.0' encoding='latin-1'?>\n"
|
||||
b"<?test <testing&>\xe3?>")
|
||||
pi = ET.PI('test', 'ham & eggs < spam')
|
||||
self.assertEqual(ET.tostring(pi), b'<?test ham & eggs < spam?>')
|
||||
self.assertEqual(ET.tostring(pi, method='html'), b'<?test ham & eggs < spam?>')
|
||||
# no processing instructions in text serialization
|
||||
self.assertEqual(ET.tostring(pi, method='text'), b'')
|
||||
|
||||
def test_empty_attribute_serialization(self):
|
||||
# empty attrs only work in html
|
||||
elem = ET.Element('tag', attrib={'attr': None})
|
||||
self.assertRaises(TypeError, ET.tostring, elem)
|
||||
self.assertEqual(ET.tostring(elem, method='html'), b'<tag attr></tag>')
|
||||
|
||||
@support.subTests('tag', ("script", "style", "xmp", "iframe", "noembed", "noframes"))
|
||||
def test_html_cdata_elems_serialization(self, tag):
|
||||
# content of raw text elements is not escaped in html
|
||||
tag = tag.title()
|
||||
elem = ET.Element(tag)
|
||||
elem.text = '<spam>&ham'
|
||||
self.assertEqual(ET.tostring(elem, method='html'),
|
||||
('<%s><spam>&ham</%s>' % (tag, tag)).encode())
|
||||
|
||||
def test_html_empty_elems_serialization(self):
|
||||
# issue 15970
|
||||
|
|
@ -1317,6 +1345,14 @@ def test_html_empty_elems_serialization(self):
|
|||
method='html')
|
||||
self.assertEqual(serialized, expected)
|
||||
|
||||
def test_html_plaintext_serialization(self):
|
||||
# content of plaintext is not escaped in html
|
||||
# no end tag for plaintext
|
||||
elem = ET.Element('PlainText')
|
||||
elem.text = '<spam>&ham'
|
||||
self.assertEqual(ET.tostring(elem, method='html'),
|
||||
b'<PlainText><spam>&ham')
|
||||
|
||||
def test_dump_attribute_order(self):
|
||||
# See BPO 34160
|
||||
e = ET.Element('cirriculum', status='public', company='example')
|
||||
|
|
|
|||
|
|
@ -917,17 +917,20 @@ def _serialize_xml(write, elem, qnames, namespaces,
|
|||
if elem.tail:
|
||||
write(_escape_cdata(elem.tail))
|
||||
|
||||
_CDATA_CONTENT_ELEMENTS = {"script", "style", "xmp", "iframe", "noembed",
|
||||
"noframes", "plaintext"}
|
||||
|
||||
HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
|
||||
"img", "input", "isindex", "link", "meta", "param", "source",
|
||||
"track", "wbr"}
|
||||
"track", "wbr", "plaintext"}
|
||||
|
||||
def _serialize_html(write, elem, qnames, namespaces, **kwargs):
|
||||
tag = elem.tag
|
||||
text = elem.text
|
||||
if tag is Comment:
|
||||
write("<!--%s-->" % _escape_cdata(text))
|
||||
write("<!--%s-->" % text)
|
||||
elif tag is ProcessingInstruction:
|
||||
write("<?%s?>" % _escape_cdata(text))
|
||||
write("<?%s?>" % text)
|
||||
else:
|
||||
tag = qnames[tag]
|
||||
if tag is None:
|
||||
|
|
@ -951,16 +954,19 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
|
|||
for k, v in items:
|
||||
if isinstance(k, QName):
|
||||
k = k.text
|
||||
if isinstance(v, QName):
|
||||
v = qnames[v.text]
|
||||
k = qnames[k]
|
||||
if v is None:
|
||||
write(" %s" % k) # empty attr
|
||||
else:
|
||||
v = _escape_attrib_html(v)
|
||||
# FIXME: handle boolean attributes
|
||||
write(" %s=\"%s\"" % (qnames[k], v))
|
||||
if isinstance(v, QName):
|
||||
v = qnames[v.text]
|
||||
else:
|
||||
v = _escape_attrib_html(v)
|
||||
write(" %s=\"%s\"" % (k, v))
|
||||
write(">")
|
||||
ltag = tag.lower()
|
||||
if text:
|
||||
if ltag == "script" or ltag == "style":
|
||||
if ltag in _CDATA_CONTENT_ELEMENTS:
|
||||
write(text)
|
||||
else:
|
||||
write(_escape_cdata(text))
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
Fix :mod:`~xml.etree.ElementTree` serialization to HTML. The content of
|
||||
comments, processing instructions and elements "xmp", "iframe", "noembed",
|
||||
"noframes", and "plaintext" is no longer escaped. The "plaintext" element no
|
||||
longer have the closing tag. Add support of empty attributes (with value
|
||||
``None``).
|
||||
Loading…
Add table
Add a link
Reference in a new issue