mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	Merged revisions 87542 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r87542 | senthil.kumaran | 2010-12-28 23:55:16 +0800 (Tue, 28 Dec 2010) | 3 lines Fix Issue10759 - html.parser.unescape() fails on HTML entities with incorrect syntax ........
This commit is contained in:
		
							parent
							
								
									18f6b1987f
								
							
						
					
					
						commit
						6c85838489
					
				
					 2 changed files with 14 additions and 7 deletions
				
			
		| 
						 | 
					@ -367,6 +367,7 @@ def unescape(self, s):
 | 
				
			||||||
            return s
 | 
					            return s
 | 
				
			||||||
        def replaceEntities(s):
 | 
					        def replaceEntities(s):
 | 
				
			||||||
            s = s.groups()[0]
 | 
					            s = s.groups()[0]
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
                if s[0] == "#":
 | 
					                if s[0] == "#":
 | 
				
			||||||
                    s = s[1:]
 | 
					                    s = s[1:]
 | 
				
			||||||
                    if s[0] in ['x','X']:
 | 
					                    if s[0] in ['x','X']:
 | 
				
			||||||
| 
						 | 
					@ -374,6 +375,8 @@ def replaceEntities(s):
 | 
				
			||||||
                    else:
 | 
					                    else:
 | 
				
			||||||
                        c = int(s)
 | 
					                        c = int(s)
 | 
				
			||||||
                    return chr(c)
 | 
					                    return chr(c)
 | 
				
			||||||
 | 
					            except ValueError:
 | 
				
			||||||
 | 
					                return '&#'+ s +';'
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                # Cannot use name2codepoint directly, because HTMLParser
 | 
					                # Cannot use name2codepoint directly, because HTMLParser
 | 
				
			||||||
                # supports apos, which is not part of HTML 4
 | 
					                # supports apos, which is not part of HTML 4
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -319,6 +319,10 @@ def test_entityrefs_in_attributes(self):
 | 
				
			||||||
        self._run_check("<html foo='€&aa&unsupported;'>", [
 | 
					        self._run_check("<html foo='€&aa&unsupported;'>", [
 | 
				
			||||||
                ("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])
 | 
					                ("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])
 | 
				
			||||||
                ])
 | 
					                ])
 | 
				
			||||||
 | 
					    def test_unescape_function(self):
 | 
				
			||||||
 | 
					        p = html.parser.HTMLParser()
 | 
				
			||||||
 | 
					        self.assertEqual(p.unescape('&#bad;'),'&#bad;')
 | 
				
			||||||
 | 
					        self.assertEqual(p.unescape('&'),'&')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_main():
 | 
					def test_main():
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue