mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 03:04:41 +00:00 
			
		
		
		
	Improve handling of declarations in HTMLParser.
This commit is contained in:
		
							parent
							
								
									86f67123be
								
							
						
					
					
						commit
						f4ab491901
					
				
					 2 changed files with 51 additions and 29 deletions
				
			
		|  | @ -122,7 +122,7 @@ def test_simple_html(self): | |||
| <Img sRc='Bar' isMAP>sample | ||||
| text | ||||
| “ | ||||
| <!--comment2a-- --comment2b--><!> | ||||
| <!--comment2a-- --comment2b--> | ||||
| </Html> | ||||
| """, [ | ||||
|     ("data", "\n"), | ||||
|  | @ -157,24 +157,6 @@ def test_unclosed_entityref(self): | |||
|             ("data", " foo"), | ||||
|             ]) | ||||
| 
 | ||||
|     def test_doctype_decl(self): | ||||
|         inside = """\ | ||||
| DOCTYPE html [ | ||||
|   <!ELEMENT html - O EMPTY> | ||||
|   <!ATTLIST html | ||||
|       version CDATA #IMPLIED | ||||
|       profile CDATA 'DublinCore'> | ||||
|   <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'> | ||||
|   <!ENTITY myEntity 'internal parsed entity'> | ||||
|   <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'> | ||||
|   <!ENTITY % paramEntity 'name|name|name'> | ||||
|   %paramEntity; | ||||
|   <!-- comment --> | ||||
| ]""" | ||||
|         self._run_check("<!%s>" % inside, [ | ||||
|             ("decl", inside), | ||||
|             ]) | ||||
| 
 | ||||
|     def test_bad_nesting(self): | ||||
|         # Strangely, this *is* supposed to test that overlapping | ||||
|         # elements are allowed.  HTMLParser is more geared toward | ||||
|  | @ -247,6 +229,30 @@ def test_starttag_junk_chars(self): | |||
|         self._parse_error("<a foo='>'") | ||||
|         self._parse_error("<a foo='>") | ||||
| 
 | ||||
|     def test_valid_doctypes(self): | ||||
|         # from http://www.w3.org/QA/2002/04/valid-dtd-list.html | ||||
|         dtds = ['HTML',  # HTML5 doctype | ||||
|                 ('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' | ||||
|                  '"http://www.w3.org/TR/html4/strict.dtd"'), | ||||
|                 ('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" ' | ||||
|                  '"http://www.w3.org/TR/html4/loose.dtd"'), | ||||
|                 ('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" ' | ||||
|                  '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"'), | ||||
|                 ('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" ' | ||||
|                  '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"'), | ||||
|                 ('math PUBLIC "-//W3C//DTD MathML 2.0//EN" ' | ||||
|                  '"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"'), | ||||
|                 ('html PUBLIC "-//W3C//DTD ' | ||||
|                  'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" ' | ||||
|                  '"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"'), | ||||
|                 ('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" ' | ||||
|                  '"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"'), | ||||
|                 'html PUBLIC "-//IETF//DTD HTML 2.0//EN"', | ||||
|                 'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"'] | ||||
|         for dtd in dtds: | ||||
|             self._run_check("<!DOCTYPE %s>" % dtd, | ||||
|                             [('decl', 'DOCTYPE ' + dtd)]) | ||||
| 
 | ||||
|     def test_declaration_junk_chars(self): | ||||
|         self._parse_error("<!DOCTYPE foo $ >") | ||||
| 
 | ||||
|  | @ -384,8 +390,7 @@ def test_starttag_junk_chars(self): | |||
|         self._run_check("<a foo='>", [('data', "<a foo='>")]) | ||||
| 
 | ||||
|     def test_declaration_junk_chars(self): | ||||
|         # XXX this is wrong | ||||
|         self._run_check("<!DOCTYPE foo $ >", [('comment', 'DOCTYPE foo $ ')]) | ||||
|         self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')]) | ||||
| 
 | ||||
|     def test_illegal_declarations(self): | ||||
|         # XXX this might be wrong | ||||
|  | @ -510,11 +515,14 @@ def test_broken_comments(self): | |||
|         html = ('<! not really a comment >' | ||||
|                 '<! not a comment either -->' | ||||
|                 '<! -- close enough -->' | ||||
|                 '<!><!<-- this was an empty comment>' | ||||
|                 '<!!! another bogus comment !!!>') | ||||
|         expected = [ | ||||
|             ('comment', ' not really a comment '), | ||||
|             ('comment', ' not a comment either --'), | ||||
|             ('comment', ' -- close enough --'), | ||||
|             ('comment', ''), | ||||
|             ('comment', '<-- this was an empty comment'), | ||||
|             ('comment', '!! another bogus comment !!!'), | ||||
|         ] | ||||
|         self._run_check(html, expected) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Ezio Melotti
						Ezio Melotti