| 
									
										
										
										
											2010-03-11 22:53:45 +00:00
										 |  |  | #!/usr/bin/env python3 | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | """ Compare the output of two codecs.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Licensed to PSF under a Contributor Agreement. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def compare_codecs(encoding1, encoding2): | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-08-03 17:06:41 +00:00
										 |  |  |     print('Comparing encoding/decoding of   %r and   %r' % (encoding1, encoding2)) | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |     mismatch = 0 | 
					
						
							|  |  |  |     # Check encoding | 
					
						
							| 
									
										
										
										
											2011-10-04 19:06:00 +03:00
										 |  |  |     for i in range(sys.maxunicode+1): | 
					
						
							| 
									
										
										
										
											2008-05-16 17:02:34 +00:00
										 |  |  |         u = chr(i) | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             c1 = u.encode(encoding1) | 
					
						
							| 
									
										
										
										
											2007-01-10 16:19:56 +00:00
										 |  |  |         except UnicodeError as reason: | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |             c1 = '<undefined>' | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             c2 = u.encode(encoding2) | 
					
						
							| 
									
										
										
										
											2007-01-10 16:19:56 +00:00
										 |  |  |         except UnicodeError as reason: | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |             c2 = '<undefined>' | 
					
						
							|  |  |  |         if c1 != c2: | 
					
						
							| 
									
										
										
										
											2007-08-03 17:06:41 +00:00
										 |  |  |             print(' * encoding mismatch for 0x%04X: %-14r != %r' % \ | 
					
						
							|  |  |  |                   (i, c1, c2)) | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |             mismatch += 1 | 
					
						
							|  |  |  |     # Check decoding | 
					
						
							|  |  |  |     for i in range(256): | 
					
						
							|  |  |  |         c = chr(i) | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             u1 = c.decode(encoding1) | 
					
						
							|  |  |  |         except UnicodeError: | 
					
						
							| 
									
										
										
										
											2008-05-16 17:02:34 +00:00
										 |  |  |             u1 = '<undefined>' | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |         try: | 
					
						
							|  |  |  |             u2 = c.decode(encoding2) | 
					
						
							|  |  |  |         except UnicodeError: | 
					
						
							| 
									
										
										
										
											2008-05-16 17:02:34 +00:00
										 |  |  |             u2 = '<undefined>' | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |         if u1 != u2: | 
					
						
							| 
									
										
										
										
											2007-08-03 17:06:41 +00:00
										 |  |  |             print(' * decoding mismatch for 0x%04X: %-14r != %r' % \ | 
					
						
							|  |  |  |                   (i, u1, u2)) | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |             mismatch += 1 | 
					
						
							|  |  |  |     if mismatch: | 
					
						
							| 
									
										
										
										
											2007-08-03 17:06:41 +00:00
										 |  |  |         print() | 
					
						
							|  |  |  |         print('Found %i mismatches' % mismatch) | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  |     else: | 
					
						
							| 
									
										
										
										
											2007-08-03 17:06:41 +00:00
										 |  |  |         print('-> Codecs are identical.') | 
					
						
							| 
									
										
										
										
											2005-10-21 13:47:03 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     compare_codecs(sys.argv[1], sys.argv[2]) |