gh-102555: Fix comment parsing in HTMLParser according to the HTML5 standard (GH-135664)

* "--!>" now ends the comment.
* "-- >" no longer ends the comment.
* Support abnormally ended empty comments "<-->" and "<--->".

---------

Co-author: Kerim Kabirov <the.privat33r+gh@pm.me>
Co-authored-by: Ezio Melotti <ezio.melotti@gmail.com>
This commit is contained in:
Serhiy Storchaka 2025-07-04 10:00:23 +03:00 committed by GitHub
parent b582d751b4
commit 8ac7613dc8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 50 additions and 3 deletions

View file

@ -367,17 +367,45 @@ def test_comments(self):
html = ("<!-- I'm a valid comment -->"
'<!--me too!-->'
'<!------>'
'<!----->'
'<!---->'
# abrupt-closing-of-empty-comment
'<!--->'
'<!-->'
'<!----I have many hyphens---->'
'<!-- I have a > in the middle -->'
'<!-- and I have -- in the middle! -->')
'<!-- and I have -- in the middle! -->'
'<!--incorrectly-closed-comment--!>'
'<!----!>'
'<!----!-->'
'<!---- >-->'
'<!---!>-->'
'<!--!>-->'
# nested-comment
'<!-- <!-- nested --> -->'
'<!--<!-->'
'<!--<!--!>'
)
expected = [('comment', " I'm a valid comment "),
('comment', 'me too!'),
('comment', '--'),
('comment', '-'),
('comment', ''),
('comment', ''),
('comment', ''),
('comment', '--I have many hyphens--'),
('comment', ' I have a > in the middle '),
('comment', ' and I have -- in the middle! ')]
('comment', ' and I have -- in the middle! '),
('comment', 'incorrectly-closed-comment'),
('comment', ''),
('comment', '--!'),
('comment', '-- >'),
('comment', '-!>'),
('comment', '!>'),
('comment', ' <!-- nested '), ('data', ' -->'),
('comment', '<!'),
('comment', '<!'),
]
self._run_check(html, expected)
def test_condcoms(self):