| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | import sys | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from xml.sax import make_parser, handler | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class FancyCounter(handler.ContentHandler): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self._elems = 0 | 
					
						
							|  |  |  |         self._attrs = 0 | 
					
						
							|  |  |  |         self._elem_types = {} | 
					
						
							|  |  |  |         self._attr_types = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def startElement(self, name, attrs): | 
					
						
							|  |  |  |         self._elems = self._elems + 1 | 
					
						
							|  |  |  |         self._attrs = self._attrs + len(attrs) | 
					
						
							|  |  |  |         self._elem_types[name] = self._elem_types.get(name, 0) + 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         for name in list(attrs.keys()): | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  |             self._attr_types[name] = self._attr_types.get(name, 0) + 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def endDocument(self): | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print("There were", self._elems, "elements.") | 
					
						
							|  |  |  |         print("There were", self._attrs, "attributes.") | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print("---ELEMENT TYPES") | 
					
						
							|  |  |  |         for pair in  list(self._elem_types.items()): | 
					
						
							|  |  |  |             print("%20s %d" % pair) | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print("---ATTRIBUTE TYPES") | 
					
						
							|  |  |  |         for pair in  list(self._attr_types.items()): | 
					
						
							|  |  |  |             print("%20s %d" % pair) | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-10-16 15:27:05 +00:00
										 |  |  | parser = make_parser() | 
					
						
							|  |  |  | parser.setContentHandler(FancyCounter()) | 
					
						
							|  |  |  | parser.parse(sys.argv[1]) |