| 
									
										
										
										
											2000-03-31 15:44:52 +00:00
										 |  |  | # XXX TypeErrors on calling handlers, or on bad return values from a | 
					
						
							|  |  |  | # handler, are obscure and unhelpful. | 
					
						
							| 
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  | import StringIO | 
					
						
							|  |  |  | import unittest | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-07-30 21:47:25 +00:00
										 |  |  | import pyexpat | 
					
						
							| 
									
										
										
										
											2000-09-23 04:47:56 +00:00
										 |  |  | from xml.parsers import expat | 
					
						
							| 
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  | from test.test_support import sortdict, run_unittest | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SetAttributeTest(unittest.TestCase): | 
					
						
							|  |  |  |     def setUp(self): | 
					
						
							|  |  |  |         self.parser = expat.ParserCreate(namespace_separator='!') | 
					
						
							|  |  |  |         self.set_get_pairs = [ | 
					
						
							|  |  |  |             [0, 0], | 
					
						
							|  |  |  |             [1, 1], | 
					
						
							|  |  |  |             [2, 1], | 
					
						
							|  |  |  |             [0, 0], | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |     def test_returns_unicode(self): | 
					
						
							|  |  |  |         for x, y in self.set_get_pairs: | 
					
						
							|  |  |  |             self.parser.returns_unicode = x | 
					
						
							|  |  |  |             self.assertEquals(self.parser.returns_unicode, y) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |     def test_ordered_attributes(self): | 
					
						
							|  |  |  |         for x, y in self.set_get_pairs: | 
					
						
							|  |  |  |             self.parser.ordered_attributes = x | 
					
						
							|  |  |  |             self.assertEquals(self.parser.ordered_attributes, y) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_specified_attributes(self): | 
					
						
							|  |  |  |         for x, y in self.set_get_pairs: | 
					
						
							|  |  |  |             self.parser.specified_attributes = x | 
					
						
							|  |  |  |             self.assertEquals(self.parser.specified_attributes, y) | 
					
						
							| 
									
										
											  
											
												Get rid of the superstitious "~" in dict hashing's "i = (~hash) & mask".
The comment following used to say:
	/* We use ~hash instead of hash, as degenerate hash functions, such
	   as for ints <sigh>, can have lots of leading zeros. It's not
	   really a performance risk, but better safe than sorry.
	   12-Dec-00 tim:  so ~hash produces lots of leading ones instead --
	   what's the gain? */
That is, there was never a good reason for doing it.  And to the contrary,
as explained on Python-Dev last December, it tended to make the *sum*
(i + incr) & mask (which is the first table index examined in case of
collison) the same "too often" across distinct hashes.
Changing to the simpler "i = hash & mask" reduced the number of string-dict
collisions (== # number of times we go around the lookup for-loop) from about
6 million to 5 million during a full run of the test suite (these are
approximate because the test suite does some random stuff from run to run).
The number of collisions in non-string dicts also decreased, but not as
dramatically.
Note that this may, for a given dict, change the order (wrt previous
releases) of entries exposed by .keys(), .values() and .items().  A number
of std tests suffered bogus failures as a result.  For dicts keyed by
small ints, or (less so) by characters, the order is much more likely to be
in increasing order of key now; e.g.,
>>> d = {}
>>> for i in range(10):
...    d[i] = i
...
>>> d
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
>>>
Unfortunately. people may latch on to that in small examples and draw a
bogus conclusion.
test_support.py
    Moved test_extcall's sortdict() into test_support, made it stronger,
    and imported sortdict into other std tests that needed it.
test_unicode.py
    Excluced cp875 from the "roundtrip over range(128)" test, because
    cp875 doesn't have a well-defined inverse for unicode("?", "cp875").
    See Python-Dev for excruciating details.
Cookie.py
    Chaged various output functions to sort dicts before building
    strings from them.
test_extcall
    Fiddled the expected-result file.  This remains sensitive to native
    dict ordering, because, e.g., if there are multiple errors in a
    keyword-arg dict (and test_extcall sets up many cases like that), the
    specific error Python complains about first depends on native dict
    ordering.
											
										 
											2001-05-13 00:19:31 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-03-31 15:44:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-09-21 20:32:13 +00:00
										 |  |  | data = '''\
 | 
					
						
							|  |  |  | <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> | 
					
						
							| 
									
										
										
										
											2000-03-31 15:44:52 +00:00
										 |  |  | <?xml-stylesheet href="stylesheet.css"?> | 
					
						
							|  |  |  | <!-- comment data --> | 
					
						
							|  |  |  | <!DOCTYPE quotations SYSTEM "quotations.dtd" [ | 
					
						
							|  |  |  | <!ELEMENT root ANY> | 
					
						
							|  |  |  | <!NOTATION notation SYSTEM "notation.jpeg"> | 
					
						
							|  |  |  | <!ENTITY acirc "â"> | 
					
						
							|  |  |  | <!ENTITY external_entity SYSTEM "entity.file"> | 
					
						
							|  |  |  | <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> | 
					
						
							|  |  |  | %unparsed_entity; | 
					
						
							|  |  |  | ]> | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-06-27 00:37:25 +00:00
										 |  |  | <root attr1="value1" attr2="value2ὀ"> | 
					
						
							| 
									
										
										
										
											2000-03-31 15:44:52 +00:00
										 |  |  | <myns:subelement xmlns:myns="http://www.python.org/namespace"> | 
					
						
							|  |  |  |      Contents of subelements | 
					
						
							|  |  |  | </myns:subelement> | 
					
						
							|  |  |  | <sub2><![CDATA[contents of CDATA section]]></sub2> | 
					
						
							|  |  |  | &external_entity; | 
					
						
							|  |  |  | </root> | 
					
						
							| 
									
										
										
										
											2000-09-21 20:32:13 +00:00
										 |  |  | '''
 | 
					
						
							| 
									
										
										
										
											2000-03-31 15:44:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-06-27 00:37:25 +00:00
										 |  |  | # Produce UTF-8 output | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  | class ParseTest(unittest.TestCase): | 
					
						
							|  |  |  |     class Outputter: | 
					
						
							|  |  |  |         def __init__(self): | 
					
						
							|  |  |  |             self.out = [] | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |         def StartElementHandler(self, name, attrs): | 
					
						
							|  |  |  |             self.out.append('Start element: ' + repr(name) + ' ' +  | 
					
						
							|  |  |  |                             sortdict(attrs)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def EndElementHandler(self, name): | 
					
						
							|  |  |  |             self.out.append('End element: ' + repr(name)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def CharacterDataHandler(self, data): | 
					
						
							|  |  |  |             data = data.strip() | 
					
						
							|  |  |  |             if data: | 
					
						
							|  |  |  |                 self.out.append('Character data: ' + repr(data)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def ProcessingInstructionHandler(self, target, data): | 
					
						
							|  |  |  |             self.out.append('PI: ' + repr(target) + ' ' + repr(data)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def StartNamespaceDeclHandler(self, prefix, uri): | 
					
						
							|  |  |  |             self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def EndNamespaceDeclHandler(self, prefix): | 
					
						
							|  |  |  |             self.out.append('End of NS decl: ' + repr(prefix)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def StartCdataSectionHandler(self): | 
					
						
							|  |  |  |             self.out.append('Start of CDATA section') | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def EndCdataSectionHandler(self): | 
					
						
							|  |  |  |             self.out.append('End of CDATA section') | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def CommentHandler(self, text): | 
					
						
							|  |  |  |             self.out.append('Comment: ' + repr(text)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def NotationDeclHandler(self, *args): | 
					
						
							|  |  |  |             name, base, sysid, pubid = args | 
					
						
							|  |  |  |             self.out.append('Notation declared: %s' %(args,)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def UnparsedEntityDeclHandler(self, *args): | 
					
						
							|  |  |  |             entityName, base, systemId, publicId, notationName = args | 
					
						
							|  |  |  |             self.out.append('Unparsed entity decl: %s' %(args,)) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def NotStandaloneHandler(self, userData): | 
					
						
							|  |  |  |             self.out.append('Not standalone') | 
					
						
							|  |  |  |             return 1 | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def ExternalEntityRefHandler(self, *args): | 
					
						
							|  |  |  |             context, base, sysId, pubId = args | 
					
						
							|  |  |  |             self.out.append('External entity ref: %s' %(args[1:],)) | 
					
						
							|  |  |  |             return 1 | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def DefaultHandler(self, userData): | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         def DefaultHandlerExpand(self, userData): | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     handler_names = [ | 
					
						
							|  |  |  |         'StartElementHandler', 'EndElementHandler', | 
					
						
							|  |  |  |         'CharacterDataHandler', 'ProcessingInstructionHandler', | 
					
						
							|  |  |  |         'UnparsedEntityDeclHandler', 'NotationDeclHandler', | 
					
						
							|  |  |  |         'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', | 
					
						
							|  |  |  |         'CommentHandler', 'StartCdataSectionHandler', | 
					
						
							|  |  |  |         'EndCdataSectionHandler', | 
					
						
							|  |  |  |         'DefaultHandler', 'DefaultHandlerExpand', | 
					
						
							|  |  |  |         #'NotStandaloneHandler', | 
					
						
							|  |  |  |         'ExternalEntityRefHandler' | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     def test_utf8(self): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out = self.Outputter() | 
					
						
							|  |  |  |         parser = expat.ParserCreate(namespace_separator='!') | 
					
						
							|  |  |  |         for name in self.handler_names: | 
					
						
							|  |  |  |             setattr(parser, name, getattr(out, name)) | 
					
						
							|  |  |  |         parser.returns_unicode = 0 | 
					
						
							|  |  |  |         parser.Parse(data, 1) | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |         # Verify output | 
					
						
							|  |  |  |         op = out.out | 
					
						
							|  |  |  |         self.assertEquals(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'') | 
					
						
							|  |  |  |         self.assertEquals(op[1], "Comment: ' comment data '") | 
					
						
							|  |  |  |         self.assertEquals(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)") | 
					
						
							|  |  |  |         self.assertEquals(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')") | 
					
						
							|  |  |  |         self.assertEquals(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}") | 
					
						
							|  |  |  |         self.assertEquals(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'") | 
					
						
							|  |  |  |         self.assertEquals(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}") | 
					
						
							|  |  |  |         self.assertEquals(op[7], "Character data: 'Contents of subelements'") | 
					
						
							|  |  |  |         self.assertEquals(op[8], "End element: 'http://www.python.org/namespace!subelement'") | 
					
						
							|  |  |  |         self.assertEquals(op[9], "End of NS decl: 'myns'") | 
					
						
							|  |  |  |         self.assertEquals(op[10], "Start element: 'sub2' {}") | 
					
						
							|  |  |  |         self.assertEquals(op[11], 'Start of CDATA section') | 
					
						
							|  |  |  |         self.assertEquals(op[12], "Character data: 'contents of CDATA section'") | 
					
						
							|  |  |  |         self.assertEquals(op[13], 'End of CDATA section') | 
					
						
							|  |  |  |         self.assertEquals(op[14], "End element: 'sub2'") | 
					
						
							|  |  |  |         self.assertEquals(op[15], "External entity ref: (None, 'entity.file', None)") | 
					
						
							|  |  |  |         self.assertEquals(op[16], "End element: 'root'") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_unicode(self): | 
					
						
							|  |  |  |         # Try the parse again, this time producing Unicode output | 
					
						
							|  |  |  |         out = self.Outputter() | 
					
						
							|  |  |  |         parser = expat.ParserCreate(namespace_separator='!') | 
					
						
							|  |  |  |         parser.returns_unicode = 1 | 
					
						
							|  |  |  |         for name in self.handler_names: | 
					
						
							|  |  |  |             setattr(parser, name, getattr(out, name)) | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |         parser.Parse(data, 1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         op = out.out | 
					
						
							|  |  |  |         self.assertEquals(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') | 
					
						
							|  |  |  |         self.assertEquals(op[1], "Comment: u' comment data '") | 
					
						
							|  |  |  |         self.assertEquals(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") | 
					
						
							|  |  |  |         self.assertEquals(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") | 
					
						
							|  |  |  |         self.assertEquals(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") | 
					
						
							|  |  |  |         self.assertEquals(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") | 
					
						
							|  |  |  |         self.assertEquals(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") | 
					
						
							|  |  |  |         self.assertEquals(op[7], "Character data: u'Contents of subelements'") | 
					
						
							|  |  |  |         self.assertEquals(op[8], "End element: u'http://www.python.org/namespace!subelement'") | 
					
						
							|  |  |  |         self.assertEquals(op[9], "End of NS decl: u'myns'") | 
					
						
							|  |  |  |         self.assertEquals(op[10], "Start element: u'sub2' {}") | 
					
						
							|  |  |  |         self.assertEquals(op[11], 'Start of CDATA section') | 
					
						
							|  |  |  |         self.assertEquals(op[12], "Character data: u'contents of CDATA section'") | 
					
						
							|  |  |  |         self.assertEquals(op[13], 'End of CDATA section') | 
					
						
							|  |  |  |         self.assertEquals(op[14], "End element: u'sub2'") | 
					
						
							|  |  |  |         self.assertEquals(op[15], "External entity ref: (None, u'entity.file', None)") | 
					
						
							|  |  |  |         self.assertEquals(op[16], "End element: u'root'") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_parse_file(self): | 
					
						
							|  |  |  |         # Try parsing a file | 
					
						
							|  |  |  |         out = self.Outputter() | 
					
						
							|  |  |  |         parser = expat.ParserCreate(namespace_separator='!') | 
					
						
							|  |  |  |         parser.returns_unicode = 1 | 
					
						
							|  |  |  |         for name in self.handler_names: | 
					
						
							|  |  |  |             setattr(parser, name, getattr(out, name)) | 
					
						
							|  |  |  |         file = StringIO.StringIO(data) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         parser.ParseFile(file) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         op = out.out | 
					
						
							|  |  |  |         self.assertEquals(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') | 
					
						
							|  |  |  |         self.assertEquals(op[1], "Comment: u' comment data '") | 
					
						
							|  |  |  |         self.assertEquals(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") | 
					
						
							|  |  |  |         self.assertEquals(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") | 
					
						
							|  |  |  |         self.assertEquals(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") | 
					
						
							|  |  |  |         self.assertEquals(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") | 
					
						
							|  |  |  |         self.assertEquals(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") | 
					
						
							|  |  |  |         self.assertEquals(op[7], "Character data: u'Contents of subelements'") | 
					
						
							|  |  |  |         self.assertEquals(op[8], "End element: u'http://www.python.org/namespace!subelement'") | 
					
						
							|  |  |  |         self.assertEquals(op[9], "End of NS decl: u'myns'") | 
					
						
							|  |  |  |         self.assertEquals(op[10], "Start element: u'sub2' {}") | 
					
						
							|  |  |  |         self.assertEquals(op[11], 'Start of CDATA section') | 
					
						
							|  |  |  |         self.assertEquals(op[12], "Character data: u'contents of CDATA section'") | 
					
						
							|  |  |  |         self.assertEquals(op[13], 'End of CDATA section') | 
					
						
							|  |  |  |         self.assertEquals(op[14], "End element: u'sub2'") | 
					
						
							|  |  |  |         self.assertEquals(op[15], "External entity ref: (None, u'entity.file', None)") | 
					
						
							|  |  |  |         self.assertEquals(op[16], "End element: u'root'") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class NamespaceSeparatorTest(unittest.TestCase): | 
					
						
							|  |  |  |     def test_legal(self): | 
					
						
							|  |  |  |         # Tests that make sure we get errors when the namespace_separator value | 
					
						
							|  |  |  |         # is illegal, and that we don't for good values: | 
					
						
							|  |  |  |         expat.ParserCreate() | 
					
						
							|  |  |  |         expat.ParserCreate(namespace_separator=None) | 
					
						
							|  |  |  |         expat.ParserCreate(namespace_separator=' ') | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |     def test_illegal(self): | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             expat.ParserCreate(namespace_separator=42) | 
					
						
							|  |  |  |             self.fail() | 
					
						
							|  |  |  |         except TypeError, e: | 
					
						
							|  |  |  |             self.assertEquals(str(e),  | 
					
						
							|  |  |  |                 'ParserCreate() argument 2 must be string or None, not int') | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             expat.ParserCreate(namespace_separator='too long') | 
					
						
							|  |  |  |             self.fail() | 
					
						
							|  |  |  |         except ValueError, e: | 
					
						
							|  |  |  |             self.assertEquals(str(e), | 
					
						
							|  |  |  |                 'namespace_separator must be at most one character, omitted, or None') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_zero_length(self): | 
					
						
							|  |  |  |         # ParserCreate() needs to accept a namespace_separator of zero length | 
					
						
							|  |  |  |         # to satisfy the requirements of RDF applications that are required | 
					
						
							|  |  |  |         # to simply glue together the namespace URI and the localname.  Though | 
					
						
							|  |  |  |         # considered a wart of the RDF specifications, it needs to be supported. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # See XML-SIG mailing list thread starting with | 
					
						
							|  |  |  |         # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         expat.ParserCreate(namespace_separator='') # too short | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class InterningTest(unittest.TestCase): | 
					
						
							|  |  |  |     def test(self): | 
					
						
							|  |  |  |         # Test the interning machinery. | 
					
						
							|  |  |  |         p = expat.ParserCreate() | 
					
						
							|  |  |  |         L = [] | 
					
						
							|  |  |  |         def collector(name, *args): | 
					
						
							|  |  |  |             L.append(name) | 
					
						
							|  |  |  |         p.StartElementHandler = collector | 
					
						
							|  |  |  |         p.EndElementHandler = collector | 
					
						
							|  |  |  |         p.Parse("<e> <e/> <e></e> </e>", 1) | 
					
						
							|  |  |  |         tag = L[0] | 
					
						
							|  |  |  |         self.assertEquals(len(L), 6) | 
					
						
							|  |  |  |         for entry in L: | 
					
						
							|  |  |  |             # L should have the same string repeated over and over. | 
					
						
							|  |  |  |             self.assertTrue(tag is entry) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class BufferTextTest(unittest.TestCase): | 
					
						
							|  |  |  |     def setUp(self): | 
					
						
							| 
									
										
										
										
											2002-06-28 22:56:48 +00:00
										 |  |  |         self.stuff = [] | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |         self.parser = expat.ParserCreate() | 
					
						
							|  |  |  |         self.parser.buffer_text = 1 | 
					
						
							|  |  |  |         self.parser.CharacterDataHandler = self.CharacterDataHandler | 
					
						
							|  |  |  |          | 
					
						
							| 
									
										
										
										
											2002-06-28 22:56:48 +00:00
										 |  |  |     def check(self, expected, label): | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |         self.assertEquals(self.stuff, expected, | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |                 "%s\nstuff    = %r\nexpected = %r" | 
					
						
							|  |  |  |                 % (label, self.stuff, map(unicode, expected))) | 
					
						
							| 
									
										
										
										
											2002-06-28 22:56:48 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def CharacterDataHandler(self, text): | 
					
						
							|  |  |  |         self.stuff.append(text) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def StartElementHandler(self, name, attrs): | 
					
						
							|  |  |  |         self.stuff.append("<%s>" % name) | 
					
						
							|  |  |  |         bt = attrs.get("buffer-text") | 
					
						
							|  |  |  |         if bt == "yes": | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |             self.parser.buffer_text = 1 | 
					
						
							| 
									
										
										
										
											2002-06-28 22:56:48 +00:00
										 |  |  |         elif bt == "no": | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |             self.parser.buffer_text = 0 | 
					
						
							| 
									
										
										
										
											2002-06-28 22:56:48 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def EndElementHandler(self, name): | 
					
						
							|  |  |  |         self.stuff.append("</%s>" % name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def CommentHandler(self, data): | 
					
						
							|  |  |  |         self.stuff.append("<!--%s-->" % data) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |     def setHandlers(self, handlers=[]): | 
					
						
							|  |  |  |         for name in handlers: | 
					
						
							|  |  |  |             setattr(self.parser, name, getattr(self, name)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_default_to_disabled(self): | 
					
						
							|  |  |  |         parser = expat.ParserCreate() | 
					
						
							|  |  |  |         self.assertFalse(parser.buffer_text) | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |     def test_buffering_enabled(self): | 
					
						
							|  |  |  |         # Make sure buffering is turned on | 
					
						
							|  |  |  |         self.assertTrue(self.parser.buffer_text) | 
					
						
							|  |  |  |         self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) | 
					
						
							|  |  |  |         self.assertEquals(self.stuff, ['123'],  | 
					
						
							|  |  |  |                           "buffered text not properly collapsed") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test1(self): | 
					
						
							|  |  |  |         # XXX This test exposes more detail of Expat's text chunking than we | 
					
						
							|  |  |  |         # XXX like, but it tests what we need to concisely. | 
					
						
							|  |  |  |         self.setHandlers(["StartElementHandler"]) | 
					
						
							|  |  |  |         self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) | 
					
						
							|  |  |  |         self.assertEquals(self.stuff,  | 
					
						
							|  |  |  |                           ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], | 
					
						
							|  |  |  |                           "buffering control not reacting as expected") | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     def test2(self): | 
					
						
							|  |  |  |         self.parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1) | 
					
						
							|  |  |  |         self.assertEquals(self.stuff, ["1<2> \n 3"], | 
					
						
							|  |  |  |                           "buffered text not properly collapsed") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test3(self): | 
					
						
							|  |  |  |         self.setHandlers(["StartElementHandler"]) | 
					
						
							|  |  |  |         self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) | 
					
						
							|  |  |  |         self.assertEquals(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"], | 
					
						
							|  |  |  |                           "buffered text not properly split") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test4(self): | 
					
						
							|  |  |  |         self.setHandlers(["StartElementHandler", "EndElementHandler"]) | 
					
						
							|  |  |  |         self.parser.CharacterDataHandler = None | 
					
						
							|  |  |  |         self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) | 
					
						
							|  |  |  |         self.assertEquals(self.stuff,  | 
					
						
							|  |  |  |                           ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test5(self): | 
					
						
							|  |  |  |         self.setHandlers(["StartElementHandler", "EndElementHandler"]) | 
					
						
							|  |  |  |         self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1) | 
					
						
							|  |  |  |         self.assertEquals(self.stuff, | 
					
						
							|  |  |  |             ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test6(self): | 
					
						
							|  |  |  |         self.setHandlers(["CommentHandler", "EndElementHandler",  | 
					
						
							|  |  |  |                     "StartElementHandler"]) | 
					
						
							|  |  |  |         self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1) | 
					
						
							|  |  |  |         self.assertEquals(self.stuff,  | 
					
						
							|  |  |  |             ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],  | 
					
						
							|  |  |  |             "buffered text not properly split") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test7(self): | 
					
						
							|  |  |  |         self.setHandlers(["CommentHandler", "EndElementHandler", | 
					
						
							|  |  |  |                     "StartElementHandler"]) | 
					
						
							|  |  |  |         self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) | 
					
						
							|  |  |  |         self.assertEquals(self.stuff, | 
					
						
							|  |  |  |                           ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", | 
					
						
							|  |  |  |                            "<!--abc-->", "4", "<!--def-->", "5", "</a>"], | 
					
						
							|  |  |  |                           "buffered text not properly split") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-08-13 03:09:07 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Test handling of exception from callback: | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  | class HandlerExceptionTest(unittest.TestCase): | 
					
						
							|  |  |  |     def StartElementHandler(self, name, attrs): | 
					
						
							|  |  |  |         raise RuntimeError(name) | 
					
						
							| 
									
										
										
										
											2004-08-13 03:09:07 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |     def test(self): | 
					
						
							|  |  |  |         parser = expat.ParserCreate() | 
					
						
							|  |  |  |         parser.StartElementHandler = self.StartElementHandler | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             parser.Parse("<a><b><c/></b></a>", 1) | 
					
						
							|  |  |  |             self.fail() | 
					
						
							|  |  |  |         except RuntimeError, e: | 
					
						
							|  |  |  |             self.assertEquals(e.args[0], 'a', | 
					
						
							|  |  |  |                               "Expected RuntimeError for element 'a', but" + \ | 
					
						
							|  |  |  |                               " found %r" % e.args[0]) | 
					
						
							| 
									
										
										
										
											2004-08-13 03:09:07 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-08-26 00:37:31 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Test Current* members: | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  | class PositionTest(unittest.TestCase): | 
					
						
							| 
									
										
										
										
											2004-08-26 00:37:31 +00:00
										 |  |  |     def StartElementHandler(self, name, attrs): | 
					
						
							|  |  |  |         self.check_pos('s') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def EndElementHandler(self, name): | 
					
						
							|  |  |  |         self.check_pos('e') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def check_pos(self, event): | 
					
						
							|  |  |  |         pos = (event, | 
					
						
							|  |  |  |                self.parser.CurrentByteIndex, | 
					
						
							|  |  |  |                self.parser.CurrentLineNumber, | 
					
						
							|  |  |  |                self.parser.CurrentColumnNumber) | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |         self.assertTrue(self.upto < len(self.expected_list), | 
					
						
							|  |  |  |                         'too many parser events') | 
					
						
							| 
									
										
										
										
											2004-08-26 00:37:31 +00:00
										 |  |  |         expected = self.expected_list[self.upto] | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |         self.assertEquals(pos, expected,  | 
					
						
							|  |  |  |                 'Expected position %s, got position %s' %(pos, expected)) | 
					
						
							| 
									
										
										
										
											2004-08-26 00:37:31 +00:00
										 |  |  |         self.upto += 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-28 23:34:06 +00:00
										 |  |  |     def test(self): | 
					
						
							|  |  |  |         self.parser = expat.ParserCreate() | 
					
						
							|  |  |  |         self.parser.StartElementHandler = self.StartElementHandler | 
					
						
							|  |  |  |         self.parser.EndElementHandler = self.EndElementHandler | 
					
						
							|  |  |  |         self.upto = 0 | 
					
						
							|  |  |  |         self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), | 
					
						
							|  |  |  |                               ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         xml = '<a>\n <b>\n  <c/>\n </b>\n</a>' | 
					
						
							|  |  |  |         self.parser.Parse(xml, 1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class sf1296433Test(unittest.TestCase): | 
					
						
							|  |  |  |     def test_parse_only_xml_data(self): | 
					
						
							|  |  |  |         # http://python.org/sf/1296433 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) | 
					
						
							|  |  |  |         # this one doesn't crash | 
					
						
							|  |  |  |         #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         class SpecificException(Exception): | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         def handler(text): | 
					
						
							|  |  |  |             raise SpecificException | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         parser = expat.ParserCreate() | 
					
						
							|  |  |  |         parser.CharacterDataHandler = handler | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |         self.assertRaises(Exception, parser.Parse, xml) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_main(): | 
					
						
							|  |  |  |     run_unittest(SetAttributeTest, | 
					
						
							|  |  |  |                  ParseTest, | 
					
						
							|  |  |  |                  NamespaceSeparatorTest, | 
					
						
							|  |  |  |                  InterningTest, | 
					
						
							|  |  |  |                  BufferTextTest, | 
					
						
							|  |  |  |                  HandlerExceptionTest, | 
					
						
							|  |  |  |                  PositionTest, | 
					
						
							|  |  |  |                  sf1296433Test) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     test_main() |