mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
[3.13] gh-135661: Fix parsing unterminated bogus comments in HTMLParser (GH-137873) (GH-137875)
Bogus comments that start with "<![CDATA[" should not include the starting "!"
in its value.
(cherry picked from commit 7636a66635)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
133f7bd22b
commit
f2b7954ce0
2 changed files with 9 additions and 15 deletions
|
|
@ -271,11 +271,8 @@ def goahead(self, end):
|
||||||
j -= len(suffix)
|
j -= len(suffix)
|
||||||
break
|
break
|
||||||
self.handle_comment(rawdata[i+4:j])
|
self.handle_comment(rawdata[i+4:j])
|
||||||
elif startswith("<![CDATA[", i):
|
elif startswith("<![CDATA[", i) and self._support_cdata:
|
||||||
if self._support_cdata:
|
self.unknown_decl(rawdata[i+3:])
|
||||||
self.unknown_decl(rawdata[i+3:])
|
|
||||||
else:
|
|
||||||
self.handle_comment(rawdata[i+1:])
|
|
||||||
elif rawdata[i:i+9].lower() == '<!doctype':
|
elif rawdata[i:i+9].lower() == '<!doctype':
|
||||||
self.handle_decl(rawdata[i+2:])
|
self.handle_decl(rawdata[i+2:])
|
||||||
elif startswith("<!", i):
|
elif startswith("<!", i):
|
||||||
|
|
@ -350,15 +347,12 @@ def parse_html_declaration(self, i):
|
||||||
if rawdata[i:i+4] == '<!--':
|
if rawdata[i:i+4] == '<!--':
|
||||||
# this case is actually already handled in goahead()
|
# this case is actually already handled in goahead()
|
||||||
return self.parse_comment(i)
|
return self.parse_comment(i)
|
||||||
elif rawdata[i:i+9] == '<![CDATA[':
|
elif rawdata[i:i+9] == '<![CDATA[' and self._support_cdata:
|
||||||
if self._support_cdata:
|
j = rawdata.find(']]>', i+9)
|
||||||
j = rawdata.find(']]>', i+9)
|
if j < 0:
|
||||||
if j < 0:
|
return -1
|
||||||
return -1
|
self.unknown_decl(rawdata[i+3: j])
|
||||||
self.unknown_decl(rawdata[i+3: j])
|
return j + 3
|
||||||
return j + 3
|
|
||||||
else:
|
|
||||||
return self.parse_bogus_comment(i)
|
|
||||||
elif rawdata[i:i+9].lower() == '<!doctype':
|
elif rawdata[i:i+9].lower() == '<!doctype':
|
||||||
# find the closing >
|
# find the closing >
|
||||||
gtpos = rawdata.find('>', i+9)
|
gtpos = rawdata.find('>', i+9)
|
||||||
|
|
|
||||||
|
|
@ -791,7 +791,7 @@ def test_eof_in_cdata(self, content):
|
||||||
self._run_check('<![CDATA[' + content,
|
self._run_check('<![CDATA[' + content,
|
||||||
[('unknown decl', 'CDATA[' + content)])
|
[('unknown decl', 'CDATA[' + content)])
|
||||||
self._run_check('<![CDATA[' + content,
|
self._run_check('<![CDATA[' + content,
|
||||||
[('comment', '![CDATA[' + content)],
|
[('comment', '[CDATA[' + content)],
|
||||||
collector=EventCollector(autocdata=True))
|
collector=EventCollector(autocdata=True))
|
||||||
self._run_check('<svg><text y="100"><![CDATA[' + content,
|
self._run_check('<svg><text y="100"><![CDATA[' + content,
|
||||||
[('starttag', 'svg', []),
|
[('starttag', 'svg', []),
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue