mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	#13358: HTMLParser now calls handle_data only once for each CDATA.
This commit is contained in:
		
							parent
							
								
									8008f2aba0
								
							
						
					
					
						commit
						15cb489234
					
				
					 3 changed files with 26 additions and 3 deletions
				
			
		|  | @ -14,7 +14,6 @@ | |||
| # Regular expressions used for parsing | ||||
| 
 | ||||
| interesting_normal = re.compile('[&<]') | ||||
| interesting_cdata = re.compile(r'<(/|\Z)') | ||||
| incomplete = re.compile('&[a-zA-Z#]') | ||||
| 
 | ||||
| entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') | ||||
|  | @ -149,8 +148,8 @@ def get_starttag_text(self): | |||
|         return self.__starttag_text | ||||
| 
 | ||||
|     def set_cdata_mode(self, elem): | ||||
|         self.interesting = interesting_cdata | ||||
|         self.cdata_elem = elem.lower() | ||||
|         self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I) | ||||
| 
 | ||||
|     def clear_cdata_mode(self): | ||||
|         self.interesting = interesting_normal | ||||
|  | @ -168,6 +167,8 @@ def goahead(self, end): | |||
|             if match: | ||||
|                 j = match.start() | ||||
|             else: | ||||
|                 if self.cdata_elem: | ||||
|                     break | ||||
|                 j = n | ||||
|             if i < j: self.handle_data(rawdata[i:j]) | ||||
|             i = self.updatepos(i, j) | ||||
|  | @ -250,7 +251,7 @@ def goahead(self, end): | |||
|             else: | ||||
|                 assert 0, "interesting.search() lied" | ||||
|         # end while | ||||
|         if end and i < n: | ||||
|         if end and i < n and not self.cdata_elem: | ||||
|             self.handle_data(rawdata[i:n]) | ||||
|             i = self.updatepos(i, n) | ||||
|         self.rawdata = rawdata[i:] | ||||
|  |  | |||
|  | @ -301,7 +301,27 @@ def test_cdata_content(self): | |||
|                                     ("data", content), | ||||
|                                     ("endtag", element_lower)]) | ||||
| 
 | ||||
|     def test_cdata_with_closing_tags(self): | ||||
|         # see issue #13358 | ||||
|         # make sure that HTMLParser calls handle_data only once for each CDATA. | ||||
|         # The normal event collector normalizes  the events in get_events, | ||||
|         # so we override it to return the original list of events. | ||||
|         class Collector(EventCollector): | ||||
|             def get_events(self): | ||||
|                 return self.events | ||||
| 
 | ||||
|         content = """<!-- not a comment --> ¬-an-entity-ref; | ||||
|                   <a href="" /> </p><p> <span></span></style> | ||||
|                   '</script' + '>'""" | ||||
|         for element in [' script', 'script ', ' script ', | ||||
|                         '\nscript', 'script\n', '\nscript\n']: | ||||
|             element_lower = element.lower().strip() | ||||
|             s = '<script>{content}</{element}>'.format(element=element, | ||||
|                                                        content=content) | ||||
|             self._run_check(s, [("starttag", element_lower, []), | ||||
|                                 ("data", content), | ||||
|                                 ("endtag", element_lower)], | ||||
|                             collector=Collector()) | ||||
| 
 | ||||
| class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): | ||||
| 
 | ||||
|  |  | |||
|  | @ -76,6 +76,8 @@ Core and Builtins | |||
| Library | ||||
| ------- | ||||
| 
 | ||||
| - Issue #13358: HTMLParser now calls handle_data only once for each CDATA. | ||||
| 
 | ||||
| - Issue #4147: minidom's toprettyxml no longer adds whitespace around a text | ||||
|   node when it is the only child of an element.  Initial patch by Dan | ||||
|   Kenigsberg. | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Ezio Melotti
						Ezio Melotti