mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Update big5hkscs codec to conform to the HKSCS:2004 revision.
This commit is contained in:
		
							parent
							
								
									27d339446a
								
							
						
					
					
						commit
						01612e7dec
					
				
					 6 changed files with 1868 additions and 1777 deletions
				
			
		|  | @ -64,8 +64,10 @@ | |||
| "\xab\x96\xe7\x9a\x84\xe5\x95\x8f\xe9\xa1\x8c\xe5\xb0\xb1\xe6\x98" | ||||
| "\xaf\x3a\x0a\x0a"), | ||||
| 'big5hkscs': ( | ||||
| "\x88\x45\x88\x5c\x8a\x73\x8b\xda\x8d\xd8\x0a", | ||||
| "\xf0\xa0\x84\x8c\xc4\x9a\xe9\xb5\xae\xe7\xbd\x93\xe6\xb4\x86\x0a"), | ||||
| "\x88\x45\x88\x5c\x8a\x73\x8b\xda\x8d\xd8\x0a\x88\x66\x88\x62\x88" | ||||
| "\xa7\x20\x88\xa7\x88\xa3\x0a", | ||||
| "\xf0\xa0\x84\x8c\xc4\x9a\xe9\xb5\xae\xe7\xbd\x93\xe6\xb4\x86\x0a" | ||||
| "\xc3\x8a\xc3\x8a\xcc\x84\xc3\xaa\x20\xc3\xaa\xc3\xaa\xcc\x84\x0a"), | ||||
| 'cp949': ( | ||||
| "\x8c\x63\xb9\xe6\xb0\xa2\xc7\xcf\x20\xbc\x84\xbd\xc3\xc4\xdd\xb6" | ||||
| "\xf3\x0a\x0a\xa8\xc0\xa8\xc0\xb3\xb3\x21\x21\x20\xec\xd7\xce\xfa" | ||||
|  |  | |||
|  | @ -11,10 +11,11 @@ | |||
| class TestBig5HKSCSMap(test_multibytecodec_support.TestBase_Mapping, | ||||
|                        unittest.TestCase): | ||||
|     encoding = 'big5hkscs' | ||||
|     mapfileurl = 'http://people.freebsd.org/~perky/i18n/BIG5HKSCS.TXT' | ||||
|     mapfileurl = 'http://people.freebsd.org/~perky/i18n/BIG5HKSCS-2004.TXT' | ||||
| 
 | ||||
| def test_main(): | ||||
|     test_support.run_unittest(__name__) | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     test_support.use_resources = ['urlfetch'] | ||||
|     test_main() | ||||
|  |  | |||
|  | @ -323,9 +323,17 @@ def test_mapping_supplemental(self): | |||
| 
 | ||||
|     def _testpoint(self, csetch, unich): | ||||
|         if (csetch, unich) not in self.pass_enctest: | ||||
|             self.assertEqual(unich.encode(self.encoding), csetch) | ||||
|             try: | ||||
|                 self.assertEqual(unich.encode(self.encoding), csetch) | ||||
|             except UnicodeError, exc: | ||||
|                 self.fail('Encoding failed while testing %s -> %s: %s' % ( | ||||
|                             repr(unich), repr(csetch), exc.reason)) | ||||
|         if (csetch, unich) not in self.pass_dectest: | ||||
|             self.assertEqual(unicode(csetch, self.encoding), unich) | ||||
|             try: | ||||
|                 self.assertEqual(csetch.decode(self.encoding), unich) | ||||
|             except UnicodeError, exc: | ||||
|                 self.fail('Decoding failed while testing %s -> %s: %s' % ( | ||||
|                             repr(csetch), repr(unich), exc.reason)) | ||||
| 
 | ||||
| def load_teststring(encoding): | ||||
|     from test import cjkencodings_test | ||||
|  |  | |||
|  | @ -1118,6 +1118,8 @@ Library | |||
| Extension Modules | ||||
| ----------------- | ||||
| 
 | ||||
| - Updated ``big5hkscs`` codec to the HKSCS revision of 2004. | ||||
| 
 | ||||
| - #1940: make it possible to use curses.filter() before curses.initscr() | ||||
|   as the documentation says. | ||||
| 
 | ||||
|  |  | |||
|  | @ -26,6 +26,16 @@ CODEC_INIT(big5hkscs) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004: | ||||
|  *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866) | ||||
|  *  U+00CA U+030C -> 8864 | ||||
|  *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7) | ||||
|  *  U+00EA U+030C -> 88a5 | ||||
|  * These are handled by not mapping tables but a hand-written code. | ||||
|  */ | ||||
| static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5}; | ||||
| 
 | ||||
| ENCODER(big5hkscs) | ||||
| { | ||||
| 	while (inleft > 0) { | ||||
|  | @ -46,7 +56,27 @@ ENCODER(big5hkscs) | |||
| 		REQUIRE_OUTBUF(2) | ||||
| 
 | ||||
| 		if (c < 0x10000) { | ||||
| 			TRYMAP_ENC(big5hkscs_bmp, code, c); | ||||
| 			TRYMAP_ENC(big5hkscs_bmp, code, c) { | ||||
| 				if (code == MULTIC) { | ||||
| 					if (inleft >= 2 && | ||||
| 					    ((c & 0xffdf) == 0x00ca) && | ||||
| 					    (((*inbuf)[1] & 0xfff7) == 0x0304)) { | ||||
| 						code = big5hkscs_pairenc_table[ | ||||
| 							((c >> 4) | | ||||
| 							 ((*inbuf)[1] >> 3)) & 3]; | ||||
| 						insize = 2; | ||||
| 					} | ||||
| 					else if (inleft < 2 && | ||||
| 						 !(flags & MBENC_FLUSH)) | ||||
| 						return MBERR_TOOFEW; | ||||
| 					else { | ||||
| 						if (c == 0xca) | ||||
| 							code = 0x8866; | ||||
| 						else /* c == 0xea */ | ||||
| 							code = 0x88a7; | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 			else TRYMAP_ENC(big5, code, c); | ||||
| 			else return 1; | ||||
| 		} | ||||
|  | @ -67,7 +97,7 @@ ENCODER(big5hkscs) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| #define BH2S(c1, c2) (((c1) - 0x88) * (0xfe - 0x40 + 1) + ((c2) - 0x40)) | ||||
| #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40)) | ||||
| 
 | ||||
| DECODER(big5hkscs) | ||||
| { | ||||
|  | @ -96,19 +126,19 @@ hkscsdec:	TRYMAP_DEC(big5hkscs, decoded, c, IN2) { | |||
| 			int s = BH2S(c, IN2); | ||||
| 			const unsigned char *hintbase; | ||||
| 
 | ||||
| 			assert(0x88 <= c && c <= 0xfe); | ||||
| 			assert(0x87 <= c && c <= 0xfe); | ||||
| 			assert(0x40 <= IN2 && IN2 <= 0xfe); | ||||
| 
 | ||||
| 			if (BH2S(0x88, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { | ||||
| 			if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { | ||||
| 				hintbase = big5hkscs_phint_0; | ||||
| 				s -= BH2S(0x88, 0x40); | ||||
| 				s -= BH2S(0x87, 0x40); | ||||
| 			} | ||||
| 			else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ | ||||
| 				hintbase = big5hkscs_phint_11939; | ||||
| 				hintbase = big5hkscs_phint_12130; | ||||
| 				s -= BH2S(0xc6, 0xa1); | ||||
| 			} | ||||
| 			else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ | ||||
| 				hintbase = big5hkscs_phint_21733; | ||||
| 				hintbase = big5hkscs_phint_21924; | ||||
| 				s -= BH2S(0xf9, 0xd6); | ||||
| 			} | ||||
| 			else | ||||
|  | @ -123,7 +153,17 @@ hkscsdec:	TRYMAP_DEC(big5hkscs, decoded, c, IN2) { | |||
| 				NEXT(2, 1) | ||||
| 			} | ||||
| 		} | ||||
| 		else return 2; | ||||
| 		else { | ||||
| 			switch ((c << 8) | IN2) { | ||||
| 			case 0x8862: WRITE2(0x00ca, 0x0304); break; | ||||
| 			case 0x8864: WRITE2(0x00ca, 0x030c); break; | ||||
| 			case 0x88a3: WRITE2(0x00ea, 0x0304); break; | ||||
| 			case 0x88a5: WRITE2(0x00ea, 0x030c); break; | ||||
| 			default: return 2; | ||||
| 			} | ||||
| 
 | ||||
| 			NEXT(2, 2) /* all decoded codepoints are pairs, above. */ | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Hye-Shik Chang
						Hye-Shik Chang