From 5dfe3bb2d9416ab3e77834bd65b6f575cbb49009 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 3 Jun 2011 23:34:09 +0200 Subject: [PATCH 1/2] Issue #12016: Reindent decoders of HK and JP codecs --- Modules/cjkcodecs/_codecs_hk.c | 87 +++++++++++++++++----------------- Modules/cjkcodecs/_codecs_jp.c | 10 ++-- 2 files changed, 49 insertions(+), 48 deletions(-) diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index aaf103db888..558a42f89c8 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -115,55 +115,56 @@ DECODER(big5hkscs) REQUIRE_INBUF(2) - if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1)) - goto hkscsdec; - - TRYMAP_DEC(big5, **outbuf, c, IN2) { - NEXT(2, 1) + if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) { + TRYMAP_DEC(big5, **outbuf, c, IN2) { + NEXT(2, 1) + continue; + } } - else -hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) { - int s = BH2S(c, IN2); - const unsigned char *hintbase; - assert(0x87 <= c && c <= 0xfe); - assert(0x40 <= IN2 && IN2 <= 0xfe); + TRYMAP_DEC(big5hkscs, decoded, c, IN2) + { + int s = BH2S(c, IN2); + const unsigned char *hintbase; - if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { - hintbase = big5hkscs_phint_0; - s -= BH2S(0x87, 0x40); - } - else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ - hintbase = big5hkscs_phint_12130; - s -= BH2S(0xc6, 0xa1); - } - else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ - hintbase = big5hkscs_phint_21924; - s -= BH2S(0xf9, 0xd6); - } - else - return MBERR_INTERNAL; + assert(0x87 <= c && c <= 0xfe); + assert(0x40 <= IN2 && IN2 <= 0xfe); - if (hintbase[s >> 3] & (1 << (s & 7))) { - WRITEUCS4(decoded | 0x20000) - NEXT_IN(2) - } - else { - OUT1(decoded) - NEXT(2, 1) - } - } - else { - switch ((c << 8) | IN2) { - case 0x8862: WRITE2(0x00ca, 0x0304); break; - case 0x8864: WRITE2(0x00ca, 0x030c); break; - case 0x88a3: WRITE2(0x00ea, 0x0304); break; - case 0x88a5: WRITE2(0x00ea, 0x030c); break; - default: return 2; - } + if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { + hintbase = big5hkscs_phint_0; + s -= BH2S(0x87, 0x40); + } + else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){ + hintbase = big5hkscs_phint_12130; + s -= BH2S(0xc6, 0xa1); + } + else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){ + hintbase = big5hkscs_phint_21924; + s -= BH2S(0xf9, 0xd6); + } + else + return MBERR_INTERNAL; - NEXT(2, 2) /* all decoded codepoints are pairs, above. */ + if (hintbase[s >> 3] & (1 << (s & 7))) { + WRITEUCS4(decoded | 0x20000) + NEXT_IN(2) + } + else { + OUT1(decoded) + NEXT(2, 1) + } + continue; } + + switch ((c << 8) | IN2) { + case 0x8862: WRITE2(0x00ca, 0x0304); break; + case 0x8864: WRITE2(0x00ca, 0x030c); break; + case 0x88a3: WRITE2(0x00ea, 0x0304); break; + case 0x88a5: WRITE2(0x00ea, 0x030c); break; + default: return 2; + } + + NEXT(2, 2) /* all decoded codepoints are pairs, above. */ } return 0; diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index 901d3bee47e..a05e01b32e5 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -371,11 +371,11 @@ DECODER(euc_jp) REQUIRE_OUTBUF(1) - if (c < 0x80) { - OUT1(c) - NEXT(1, 1) - continue; - } + if (c < 0x80) { + OUT1(c) + NEXT(1, 1) + continue; + } if (c == 0x8e) { /* JIS X 0201 half-width katakana */ From f5048a40752b8a0197ef9e4b6ace8dab75479fd7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 3 Jun 2011 23:44:39 +0200 Subject: [PATCH 2/2] Issue #12016: Add test_errorhandle() to TestBase_Mapping of test_multibytecodec_support. Improve also error message of the test_errorhandle() of TestBase. --- Lib/test/test_multibytecodec_support.py | 36 ++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index 77359766156..ef63b6934d5 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -58,11 +58,16 @@ def test_errorhandle(self): result = func(source, scheme)[0] if func is self.decode: self.assertTrue(type(result) is str, type(result)) + self.assertEqual(result, expected, + '%a.decode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) else: self.assertTrue(type(result) is bytes, type(result)) - self.assertEqual(result, expected, - '%a.decode(%r)=%a != %a' - % (source, self.encoding, result, expected)) + self.assertEqual(result, expected, + '%a.encode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) else: self.assertRaises(UnicodeError, func, source, scheme) @@ -279,6 +284,7 @@ class TestBase_Mapping(unittest.TestCase): pass_enctest = [] pass_dectest = [] supmaps = [] + codectests = [] def __init__(self, *args, **kw): unittest.TestCase.__init__(self, *args, **kw) @@ -348,6 +354,30 @@ def _testpoint(self, csetch, unich): if (csetch, unich) not in self.pass_dectest: self.assertEqual(str(csetch, self.encoding), unich) + def test_errorhandle(self): + for source, scheme, expected in self.codectests: + if isinstance(source, bytes): + func = source.decode + else: + func = source.encode + if expected: + if isinstance(source, bytes): + result = func(self.encoding, scheme) + self.assertTrue(type(result) is str, type(result)) + self.assertEqual(result, expected, + '%a.decode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) + else: + result = func(self.encoding, scheme) + self.assertTrue(type(result) is bytes, type(result)) + self.assertEqual(result, expected, + '%a.encode(%r, %r)=%a != %a' + % (source, self.encoding, scheme, result, + expected)) + else: + self.assertRaises(UnicodeError, func, self.encoding, scheme) + def load_teststring(name): dir = os.path.join(os.path.dirname(__file__), 'cjkencodings') with open(os.path.join(dir, name + '.txt'), 'rb') as f: