mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	Update big5hkscs codec to conform to the HKSCS:2004 revision.
This commit is contained in:
		
							parent
							
								
									27d339446a
								
							
						
					
					
						commit
						01612e7dec
					
				
					 6 changed files with 1868 additions and 1777 deletions
				
			
		|  | @ -64,8 +64,10 @@ | ||||||
| "\xab\x96\xe7\x9a\x84\xe5\x95\x8f\xe9\xa1\x8c\xe5\xb0\xb1\xe6\x98" | "\xab\x96\xe7\x9a\x84\xe5\x95\x8f\xe9\xa1\x8c\xe5\xb0\xb1\xe6\x98" | ||||||
| "\xaf\x3a\x0a\x0a"), | "\xaf\x3a\x0a\x0a"), | ||||||
| 'big5hkscs': ( | 'big5hkscs': ( | ||||||
| "\x88\x45\x88\x5c\x8a\x73\x8b\xda\x8d\xd8\x0a", | "\x88\x45\x88\x5c\x8a\x73\x8b\xda\x8d\xd8\x0a\x88\x66\x88\x62\x88" | ||||||
| "\xf0\xa0\x84\x8c\xc4\x9a\xe9\xb5\xae\xe7\xbd\x93\xe6\xb4\x86\x0a"), | "\xa7\x20\x88\xa7\x88\xa3\x0a", | ||||||
|  | "\xf0\xa0\x84\x8c\xc4\x9a\xe9\xb5\xae\xe7\xbd\x93\xe6\xb4\x86\x0a" | ||||||
|  | "\xc3\x8a\xc3\x8a\xcc\x84\xc3\xaa\x20\xc3\xaa\xc3\xaa\xcc\x84\x0a"), | ||||||
| 'cp949': ( | 'cp949': ( | ||||||
| "\x8c\x63\xb9\xe6\xb0\xa2\xc7\xcf\x20\xbc\x84\xbd\xc3\xc4\xdd\xb6" | "\x8c\x63\xb9\xe6\xb0\xa2\xc7\xcf\x20\xbc\x84\xbd\xc3\xc4\xdd\xb6" | ||||||
| "\xf3\x0a\x0a\xa8\xc0\xa8\xc0\xb3\xb3\x21\x21\x20\xec\xd7\xce\xfa" | "\xf3\x0a\x0a\xa8\xc0\xa8\xc0\xb3\xb3\x21\x21\x20\xec\xd7\xce\xfa" | ||||||
|  |  | ||||||
|  | @ -11,10 +11,11 @@ | ||||||
| class TestBig5HKSCSMap(test_multibytecodec_support.TestBase_Mapping, | class TestBig5HKSCSMap(test_multibytecodec_support.TestBase_Mapping, | ||||||
|                        unittest.TestCase): |                        unittest.TestCase): | ||||||
|     encoding = 'big5hkscs' |     encoding = 'big5hkscs' | ||||||
|     mapfileurl = 'http://people.freebsd.org/~perky/i18n/BIG5HKSCS.TXT' |     mapfileurl = 'http://people.freebsd.org/~perky/i18n/BIG5HKSCS-2004.TXT' | ||||||
| 
 | 
 | ||||||
| def test_main(): | def test_main(): | ||||||
|     test_support.run_unittest(__name__) |     test_support.run_unittest(__name__) | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|  |     test_support.use_resources = ['urlfetch'] | ||||||
|     test_main() |     test_main() | ||||||
|  |  | ||||||
|  | @ -323,9 +323,17 @@ def test_mapping_supplemental(self): | ||||||
| 
 | 
 | ||||||
|     def _testpoint(self, csetch, unich): |     def _testpoint(self, csetch, unich): | ||||||
|         if (csetch, unich) not in self.pass_enctest: |         if (csetch, unich) not in self.pass_enctest: | ||||||
|  |             try: | ||||||
|                 self.assertEqual(unich.encode(self.encoding), csetch) |                 self.assertEqual(unich.encode(self.encoding), csetch) | ||||||
|  |             except UnicodeError, exc: | ||||||
|  |                 self.fail('Encoding failed while testing %s -> %s: %s' % ( | ||||||
|  |                             repr(unich), repr(csetch), exc.reason)) | ||||||
|         if (csetch, unich) not in self.pass_dectest: |         if (csetch, unich) not in self.pass_dectest: | ||||||
|             self.assertEqual(unicode(csetch, self.encoding), unich) |             try: | ||||||
|  |                 self.assertEqual(csetch.decode(self.encoding), unich) | ||||||
|  |             except UnicodeError, exc: | ||||||
|  |                 self.fail('Decoding failed while testing %s -> %s: %s' % ( | ||||||
|  |                             repr(csetch), repr(unich), exc.reason)) | ||||||
| 
 | 
 | ||||||
| def load_teststring(encoding): | def load_teststring(encoding): | ||||||
|     from test import cjkencodings_test |     from test import cjkencodings_test | ||||||
|  |  | ||||||
|  | @ -1118,6 +1118,8 @@ Library | ||||||
| Extension Modules | Extension Modules | ||||||
| ----------------- | ----------------- | ||||||
| 
 | 
 | ||||||
|  | - Updated ``big5hkscs`` codec to the HKSCS revision of 2004. | ||||||
|  | 
 | ||||||
| - #1940: make it possible to use curses.filter() before curses.initscr() | - #1940: make it possible to use curses.filter() before curses.initscr() | ||||||
|   as the documentation says. |   as the documentation says. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -26,6 +26,16 @@ CODEC_INIT(big5hkscs) | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004: | ||||||
|  |  *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866) | ||||||
|  |  *  U+00CA U+030C -> 8864 | ||||||
|  |  *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7) | ||||||
|  |  *  U+00EA U+030C -> 88a5 | ||||||
|  |  * These are handled by not mapping tables but a hand-written code. | ||||||
|  |  */ | ||||||
|  | static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5}; | ||||||
|  | 
 | ||||||
| ENCODER(big5hkscs) | ENCODER(big5hkscs) | ||||||
| { | { | ||||||
| 	while (inleft > 0) { | 	while (inleft > 0) { | ||||||
|  | @ -46,7 +56,27 @@ ENCODER(big5hkscs) | ||||||
| 		REQUIRE_OUTBUF(2) | 		REQUIRE_OUTBUF(2) | ||||||
| 
 | 
 | ||||||
| 		if (c < 0x10000) { | 		if (c < 0x10000) { | ||||||
| 			TRYMAP_ENC(big5hkscs_bmp, code, c); | 			TRYMAP_ENC(big5hkscs_bmp, code, c) { | ||||||
|  | 				if (code == MULTIC) { | ||||||
|  | 					if (inleft >= 2 && | ||||||
|  | 					    ((c & 0xffdf) == 0x00ca) && | ||||||
|  | 					    (((*inbuf)[1] & 0xfff7) == 0x0304)) { | ||||||
|  | 						code = big5hkscs_pairenc_table[ | ||||||
|  | 							((c >> 4) | | ||||||
|  | 							 ((*inbuf)[1] >> 3)) & 3]; | ||||||
|  | 						insize = 2; | ||||||
|  | 					} | ||||||
|  | 					else if (inleft < 2 && | ||||||
|  | 						 !(flags & MBENC_FLUSH)) | ||||||
|  | 						return MBERR_TOOFEW; | ||||||
|  | 					else { | ||||||
|  | 						if (c == 0xca) | ||||||
|  | 							code = 0x8866; | ||||||
|  | 						else /* c == 0xea */ | ||||||
|  | 							code = 0x88a7; | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
| 			else TRYMAP_ENC(big5, code, c); | 			else TRYMAP_ENC(big5, code, c); | ||||||
| 			else return 1; | 			else return 1; | ||||||
| 		} | 		} | ||||||
|  | @ -67,7 +97,7 @@ ENCODER(big5hkscs) | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define BH2S(c1, c2) (((c1) - 0x88) * (0xfe - 0x40 + 1) + ((c2) - 0x40)) | #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40)) | ||||||
| 
 | 
 | ||||||
| DECODER(big5hkscs) | DECODER(big5hkscs) | ||||||
| { | { | ||||||
|  | @ -96,19 +126,19 @@ hkscsdec:	TRYMAP_DEC(big5hkscs, decoded, c, IN2) { | ||||||
| 			int s = BH2S(c, IN2); | 			int s = BH2S(c, IN2); | ||||||
| 			const unsigned char *hintbase; | 			const unsigned char *hintbase; | ||||||
| 
 | 
 | ||||||
| 			assert(0x88 <= c && c <= 0xfe); | 			assert(0x87 <= c && c <= 0xfe); | ||||||
| 			assert(0x40 <= IN2 && IN2 <= 0xfe); | 			assert(0x40 <= IN2 && IN2 <= 0xfe); | ||||||
| 
 | 
 | ||||||
| 			if (BH2S(0x88, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { | 			if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { | ||||||
| 				hintbase = big5hkscs_phint_0; | 				hintbase = big5hkscs_phint_0; | ||||||
| 				s -= BH2S(0x88, 0x40); | 				s -= BH2S(0x87, 0x40); | ||||||
| 			} | 			} | ||||||
| 			else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ | 			else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ | ||||||
| 				hintbase = big5hkscs_phint_11939; | 				hintbase = big5hkscs_phint_12130; | ||||||
| 				s -= BH2S(0xc6, 0xa1); | 				s -= BH2S(0xc6, 0xa1); | ||||||
| 			} | 			} | ||||||
| 			else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ | 			else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ | ||||||
| 				hintbase = big5hkscs_phint_21733; | 				hintbase = big5hkscs_phint_21924; | ||||||
| 				s -= BH2S(0xf9, 0xd6); | 				s -= BH2S(0xf9, 0xd6); | ||||||
| 			} | 			} | ||||||
| 			else | 			else | ||||||
|  | @ -123,7 +153,17 @@ hkscsdec:	TRYMAP_DEC(big5hkscs, decoded, c, IN2) { | ||||||
| 				NEXT(2, 1) | 				NEXT(2, 1) | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		else return 2; | 		else { | ||||||
|  | 			switch ((c << 8) | IN2) { | ||||||
|  | 			case 0x8862: WRITE2(0x00ca, 0x0304); break; | ||||||
|  | 			case 0x8864: WRITE2(0x00ca, 0x030c); break; | ||||||
|  | 			case 0x88a3: WRITE2(0x00ea, 0x0304); break; | ||||||
|  | 			case 0x88a5: WRITE2(0x00ea, 0x030c); break; | ||||||
|  | 			default: return 2; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			NEXT(2, 2) /* all decoded codepoints are pairs, above. */ | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
|  |  | ||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Hye-Shik Chang
						Hye-Shik Chang