mirror of
https://github.com/python/cpython.git
synced 2025-10-24 18:33:49 +00:00
#13358: HTMLParser now calls handle_data only once for each CDATA.
This commit is contained in:
parent
8008f2aba0
commit
15cb489234
3 changed files with 26 additions and 3 deletions
|
@ -14,7 +14,6 @@
|
||||||
# Regular expressions used for parsing
|
# Regular expressions used for parsing
|
||||||
|
|
||||||
interesting_normal = re.compile('[&<]')
|
interesting_normal = re.compile('[&<]')
|
||||||
interesting_cdata = re.compile(r'<(/|\Z)')
|
|
||||||
incomplete = re.compile('&[a-zA-Z#]')
|
incomplete = re.compile('&[a-zA-Z#]')
|
||||||
|
|
||||||
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
||||||
|
@ -149,8 +148,8 @@ def get_starttag_text(self):
|
||||||
return self.__starttag_text
|
return self.__starttag_text
|
||||||
|
|
||||||
def set_cdata_mode(self, elem):
|
def set_cdata_mode(self, elem):
|
||||||
self.interesting = interesting_cdata
|
|
||||||
self.cdata_elem = elem.lower()
|
self.cdata_elem = elem.lower()
|
||||||
|
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||||
|
|
||||||
def clear_cdata_mode(self):
|
def clear_cdata_mode(self):
|
||||||
self.interesting = interesting_normal
|
self.interesting = interesting_normal
|
||||||
|
@ -168,6 +167,8 @@ def goahead(self, end):
|
||||||
if match:
|
if match:
|
||||||
j = match.start()
|
j = match.start()
|
||||||
else:
|
else:
|
||||||
|
if self.cdata_elem:
|
||||||
|
break
|
||||||
j = n
|
j = n
|
||||||
if i < j: self.handle_data(rawdata[i:j])
|
if i < j: self.handle_data(rawdata[i:j])
|
||||||
i = self.updatepos(i, j)
|
i = self.updatepos(i, j)
|
||||||
|
@ -250,7 +251,7 @@ def goahead(self, end):
|
||||||
else:
|
else:
|
||||||
assert 0, "interesting.search() lied"
|
assert 0, "interesting.search() lied"
|
||||||
# end while
|
# end while
|
||||||
if end and i < n:
|
if end and i < n and not self.cdata_elem:
|
||||||
self.handle_data(rawdata[i:n])
|
self.handle_data(rawdata[i:n])
|
||||||
i = self.updatepos(i, n)
|
i = self.updatepos(i, n)
|
||||||
self.rawdata = rawdata[i:]
|
self.rawdata = rawdata[i:]
|
||||||
|
|
|
@ -301,7 +301,27 @@ def test_cdata_content(self):
|
||||||
("data", content),
|
("data", content),
|
||||||
("endtag", element_lower)])
|
("endtag", element_lower)])
|
||||||
|
|
||||||
|
def test_cdata_with_closing_tags(self):
|
||||||
|
# see issue #13358
|
||||||
|
# make sure that HTMLParser calls handle_data only once for each CDATA.
|
||||||
|
# The normal event collector normalizes the events in get_events,
|
||||||
|
# so we override it to return the original list of events.
|
||||||
|
class Collector(EventCollector):
|
||||||
|
def get_events(self):
|
||||||
|
return self.events
|
||||||
|
|
||||||
|
content = """<!-- not a comment --> ¬-an-entity-ref;
|
||||||
|
<a href="" /> </p><p> <span></span></style>
|
||||||
|
'</script' + '>'"""
|
||||||
|
for element in [' script', 'script ', ' script ',
|
||||||
|
'\nscript', 'script\n', '\nscript\n']:
|
||||||
|
element_lower = element.lower().strip()
|
||||||
|
s = '<script>{content}</{element}>'.format(element=element,
|
||||||
|
content=content)
|
||||||
|
self._run_check(s, [("starttag", element_lower, []),
|
||||||
|
("data", content),
|
||||||
|
("endtag", element_lower)],
|
||||||
|
collector=Collector())
|
||||||
|
|
||||||
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
|
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
|
||||||
|
|
||||||
|
|
|
@ -76,6 +76,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #13358: HTMLParser now calls handle_data only once for each CDATA.
|
||||||
|
|
||||||
- Issue #4147: minidom's toprettyxml no longer adds whitespace around a text
|
- Issue #4147: minidom's toprettyxml no longer adds whitespace around a text
|
||||||
node when it is the only child of an element. Initial patch by Dan
|
node when it is the only child of an element. Initial patch by Dan
|
||||||
Kenigsberg.
|
Kenigsberg.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue