| 
									
										
										
										
											2009-10-11 15:56:06 +00:00
										 |  |  | """
 | 
					
						
							|  |  |  | A simple demo that reads in an XML document and displays the number of | 
					
						
							|  |  |  | elements and attributes as well as a tally of elements and attributes by name. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | import sys | 
					
						
							| 
									
										
										
										
											2009-10-11 15:56:06 +00:00
										 |  |  | from collections import defaultdict | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | from xml.sax import make_parser, handler | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class FancyCounter(handler.ContentHandler): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self._elems = 0 | 
					
						
							|  |  |  |         self._attrs = 0 | 
					
						
							| 
									
										
										
										
											2009-10-11 15:56:06 +00:00
										 |  |  |         self._elem_types = defaultdict(int) | 
					
						
							|  |  |  |         self._attr_types = defaultdict(int) | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def startElement(self, name, attrs): | 
					
						
							| 
									
										
										
										
											2009-10-11 15:56:06 +00:00
										 |  |  |         self._elems += 1 | 
					
						
							|  |  |  |         self._attrs += len(attrs) | 
					
						
							|  |  |  |         self._elem_types[name] += 1 | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-06 21:07:53 +00:00
										 |  |  |         for name in attrs.keys(): | 
					
						
							| 
									
										
										
										
											2009-10-11 15:56:06 +00:00
										 |  |  |             self._attr_types[name] += 1 | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def endDocument(self): | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print("There were", self._elems, "elements.") | 
					
						
							|  |  |  |         print("There were", self._attrs, "attributes.") | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print("---ELEMENT TYPES") | 
					
						
							| 
									
										
										
										
											2007-08-06 21:07:53 +00:00
										 |  |  |         for pair in  self._elem_types.items(): | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |             print("%20s %d" % pair) | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print("---ATTRIBUTE TYPES") | 
					
						
							| 
									
										
										
										
											2007-08-06 21:07:53 +00:00
										 |  |  |         for pair in  self._attr_types.items(): | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |             print("%20s %d" % pair) | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-10-11 15:56:06 +00:00
										 |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     parser = make_parser() | 
					
						
							|  |  |  |     parser.setContentHandler(FancyCounter()) | 
					
						
							|  |  |  |     parser.parse(sys.argv[1]) |