mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 15:11:34 +00:00 
			
		
		
		
	[3.14] gh-86155: Fix data loss after unclosed script or style tag in HTMLParser (GH-22658) (GH-133844)
When calling .close() the HTMLParser should flush all remaining content,
even when that content is in an unclosed script or style tag.
(cherry picked from commit 53383e90e4)
Co-authored-by: Waylan Limberg <waylan.limberg@icloud.com>
			
			
This commit is contained in:
		
							parent
							
								
									856e5903ba
								
							
						
					
					
						commit
						e7e105f51b
					
				
					 3 changed files with 13 additions and 1 deletions
				
			
		| 
						 | 
				
			
			@ -260,7 +260,7 @@ def goahead(self, end):
 | 
			
		|||
            else:
 | 
			
		||||
                assert 0, "interesting.search() lied"
 | 
			
		||||
        # end while
 | 
			
		||||
        if end and i < n and not self.cdata_elem:
 | 
			
		||||
        if end and i < n:
 | 
			
		||||
            if self.convert_charrefs and not self.cdata_elem:
 | 
			
		||||
                self.handle_data(unescape(rawdata[i:n]))
 | 
			
		||||
            else:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -317,6 +317,16 @@ def get_events(self):
 | 
			
		|||
                                ("endtag", element_lower)],
 | 
			
		||||
                            collector=Collector(convert_charrefs=False))
 | 
			
		||||
 | 
			
		||||
    def test_EOF_in_cdata(self):
 | 
			
		||||
        content = """<!-- not a comment --> ¬-an-entity-ref;
 | 
			
		||||
                  <a href="" /> </p><p> <span></span></style>
 | 
			
		||||
                  '</script' + '>'"""
 | 
			
		||||
        s = f'<script>{content}'
 | 
			
		||||
        self._run_check(s, [
 | 
			
		||||
            ("starttag", 'script', []),
 | 
			
		||||
            ("data", content)
 | 
			
		||||
        ])
 | 
			
		||||
 | 
			
		||||
    def test_comments(self):
 | 
			
		||||
        html = ("<!-- I'm a valid comment -->"
 | 
			
		||||
                '<!--me too!-->'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,2 @@
 | 
			
		|||
:meth:`html.parser.HTMLParser.close` no longer loses data when the
 | 
			
		||||
``<script>`` tag is not closed. Patch by Waylan Limberg.
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue