| 
									
										
										
										
											2010-03-08 22:17:58 +00:00
										 |  |  | """This script generates a Python codec module from a Windows Code Page.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | It uses the function MultiByteToWideChar to generate a decoding table. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ctypes | 
					
						
							|  |  |  | from ctypes import wintypes | 
					
						
							|  |  |  | from gencodec import codegen | 
					
						
							|  |  |  | import unicodedata | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def genwinmap(codepage): | 
					
						
							|  |  |  |     MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar | 
					
						
							|  |  |  |     MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, | 
					
						
							|  |  |  |                                     wintypes.LPCSTR, ctypes.c_int, | 
					
						
							|  |  |  |                                     wintypes.LPWSTR, ctypes.c_int] | 
					
						
							|  |  |  |     MultiByteToWideChar.restype = ctypes.c_int | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     enc2uni = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for i in list(range(32)) + [127]: | 
					
						
							|  |  |  |         enc2uni[i] = (i, 'CONTROL CHARACTER') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for i in range(256): | 
					
						
							|  |  |  |         buf = ctypes.create_unicode_buffer(2) | 
					
						
							|  |  |  |         ret = MultiByteToWideChar( | 
					
						
							|  |  |  |             codepage, 0, | 
					
						
							|  |  |  |             bytes([i]), 1, | 
					
						
							|  |  |  |             buf, 2) | 
					
						
							|  |  |  |         assert ret == 1, "invalid code page" | 
					
						
							|  |  |  |         assert buf[1] == '\x00' | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             name = unicodedata.name(buf[0]) | 
					
						
							|  |  |  |         except ValueError: | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 name = enc2uni[i][1] | 
					
						
							|  |  |  |             except KeyError: | 
					
						
							|  |  |  |                 name = '' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         enc2uni[i] = (ord(buf[0]), name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return enc2uni | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def genwincodec(codepage): | 
					
						
							|  |  |  |     import platform | 
					
						
							|  |  |  |     map = genwinmap(codepage) | 
					
						
							|  |  |  |     encodingname = 'cp%d' % codepage | 
					
						
							|  |  |  |     code = codegen("", map, encodingname) | 
					
						
							|  |  |  |     # Replace first lines with our own docstring | 
					
						
							|  |  |  |     code = '''\
 | 
					
						
							|  |  |  | """Python Character Mapping Codec %s generated on Windows:
 | 
					
						
							|  |  |  | %s with the command: | 
					
						
							|  |  |  |   python Tools/unicode/genwincodec.py %s | 
					
						
							|  |  |  | """#"
 | 
					
						
							|  |  |  | ''' % (encodingname, ' '.join(platform.win32_ver()), codepage
 | 
					
						
							|  |  |  |       ) + code.split('"""#"', 1)[1] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     print(code) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     import sys | 
					
						
							|  |  |  |     genwincodec(int(sys.argv[1])) |