[3.13] gh-135661: Fix parsing unterminated bogus comments in HTMLParser (GH-137873) (GH-137875)

Bogus comments that start with "<![CDATA[" should not include the starting "!"
in its value.
(cherry picked from commit 7636a66635)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2025-08-17 12:59:24 +02:00 committed by GitHub
parent 133f7bd22b
commit f2b7954ce0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 9 additions and 15 deletions

View file

@ -271,11 +271,8 @@ def goahead(self, end):
j -= len(suffix) j -= len(suffix)
break break
self.handle_comment(rawdata[i+4:j]) self.handle_comment(rawdata[i+4:j])
elif startswith("<![CDATA[", i): elif startswith("<![CDATA[", i) and self._support_cdata:
if self._support_cdata: self.unknown_decl(rawdata[i+3:])
self.unknown_decl(rawdata[i+3:])
else:
self.handle_comment(rawdata[i+1:])
elif rawdata[i:i+9].lower() == '<!doctype': elif rawdata[i:i+9].lower() == '<!doctype':
self.handle_decl(rawdata[i+2:]) self.handle_decl(rawdata[i+2:])
elif startswith("<!", i): elif startswith("<!", i):
@ -350,15 +347,12 @@ def parse_html_declaration(self, i):
if rawdata[i:i+4] == '<!--': if rawdata[i:i+4] == '<!--':
# this case is actually already handled in goahead() # this case is actually already handled in goahead()
return self.parse_comment(i) return self.parse_comment(i)
elif rawdata[i:i+9] == '<![CDATA[': elif rawdata[i:i+9] == '<![CDATA[' and self._support_cdata:
if self._support_cdata: j = rawdata.find(']]>', i+9)
j = rawdata.find(']]>', i+9) if j < 0:
if j < 0: return -1
return -1 self.unknown_decl(rawdata[i+3: j])
self.unknown_decl(rawdata[i+3: j]) return j + 3
return j + 3
else:
return self.parse_bogus_comment(i)
elif rawdata[i:i+9].lower() == '<!doctype': elif rawdata[i:i+9].lower() == '<!doctype':
# find the closing > # find the closing >
gtpos = rawdata.find('>', i+9) gtpos = rawdata.find('>', i+9)

View file

@ -791,7 +791,7 @@ def test_eof_in_cdata(self, content):
self._run_check('<![CDATA[' + content, self._run_check('<![CDATA[' + content,
[('unknown decl', 'CDATA[' + content)]) [('unknown decl', 'CDATA[' + content)])
self._run_check('<![CDATA[' + content, self._run_check('<![CDATA[' + content,
[('comment', '![CDATA[' + content)], [('comment', '[CDATA[' + content)],
collector=EventCollector(autocdata=True)) collector=EventCollector(autocdata=True))
self._run_check('<svg><text y="100"><![CDATA[' + content, self._run_check('<svg><text y="100"><![CDATA[' + content,
[('starttag', 'svg', []), [('starttag', 'svg', []),