mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	#20288: fix handling of invalid numeric charrefs in HTMLParser.
This commit is contained in:
		
							parent
							
								
									a479b7505e
								
							
						
					
					
						commit
						f27b9a741a
					
				
					 3 changed files with 11 additions and 3 deletions
				
			
		| 
						 | 
				
			
			@ -228,9 +228,9 @@ def goahead(self, end):
 | 
			
		|||
                    i = self.updatepos(i, k)
 | 
			
		||||
                    continue
 | 
			
		||||
                else:
 | 
			
		||||
                    if ";" in rawdata[i:]: #bail by consuming &#
 | 
			
		||||
                        self.handle_data(rawdata[0:2])
 | 
			
		||||
                        i = self.updatepos(i, 2)
 | 
			
		||||
                    if ";" in rawdata[i:]:  # bail by consuming &#
 | 
			
		||||
                        self.handle_data(rawdata[i:i+2])
 | 
			
		||||
                        i = self.updatepos(i, i+2)
 | 
			
		||||
                    break
 | 
			
		||||
            elif startswith('&', i):
 | 
			
		||||
                match = entityref.match(rawdata, i)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -151,6 +151,12 @@ def test_malformatted_charref(self):
 | 
			
		|||
            ("data", "&#bad;"),
 | 
			
		||||
            ("endtag", "p"),
 | 
			
		||||
        ])
 | 
			
		||||
        # add the [] as a workaround to avoid buffering (see #20288)
 | 
			
		||||
        self._run_check(["<div>&#bad;</div>"], [
 | 
			
		||||
            ("starttag", "div", []),
 | 
			
		||||
            ("data", "&#bad;"),
 | 
			
		||||
            ("endtag", "div"),
 | 
			
		||||
        ])
 | 
			
		||||
 | 
			
		||||
    def test_unclosed_entityref(self):
 | 
			
		||||
        self._run_check("&entityref foo", [
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,6 +45,8 @@ Core and Builtins
 | 
			
		|||
Library
 | 
			
		||||
-------
 | 
			
		||||
 | 
			
		||||
- Issue #20288: fix handling of invalid numeric charrefs in HTMLParser.
 | 
			
		||||
 | 
			
		||||
- Issue #20424: Python implementation of io.StringIO now supports lone surrogates.
 | 
			
		||||
 | 
			
		||||
- Issue #19456: ntpath.join() now joins relative paths correctly when a drive
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue