mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	Fix warnings when using -Wimplicit-fallthrough compiler flag. Annotate explicitly "fall through" switch cases with a new _Py_FALLTHROUGH macro which uses __attribute__((fallthrough)) if available. Replace "fall through" comments with _Py_FALLTHROUGH. Add _Py__has_attribute() macro. No longer define __has_attribute() macro if it's not defined. Move also _Py__has_builtin() at the top of pyport.h. Co-Authored-By: Nikita Sobolev <mail@sobolevn.me>
		
			
				
	
	
		
			1150 lines
		
	
	
	
		
			35 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1150 lines
		
	
	
	
		
			35 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.
 | 
						|
 *
 | 
						|
 * Written by Hye-Shik Chang <perky@FreeBSD.org>
 | 
						|
 */
 | 
						|
 | 
						|
#define USING_IMPORTED_MAPS
 | 
						|
#define USING_BINARY_PAIR_SEARCH
 | 
						|
#define EXTERN_JISX0213_PAIR
 | 
						|
#define EMULATE_JISX0213_2000_ENCODE_INVALID MAP_UNMAPPABLE
 | 
						|
#define EMULATE_JISX0213_2000_DECODE_INVALID MAP_UNMAPPABLE
 | 
						|
 | 
						|
#define CJK_MOD_SPECIFIC_STATE                  \
 | 
						|
    /* kr */                                    \
 | 
						|
    const encode_map *cp949_encmap;             \
 | 
						|
    const decode_map *ksx1001_decmap;           \
 | 
						|
                                                \
 | 
						|
    /* jp */                                    \
 | 
						|
    const encode_map *jisxcommon_encmap;        \
 | 
						|
    const decode_map *jisx0208_decmap;          \
 | 
						|
    const decode_map *jisx0212_decmap;          \
 | 
						|
    const encode_map *jisx0213_bmp_encmap;      \
 | 
						|
    const decode_map *jisx0213_1_bmp_decmap;    \
 | 
						|
    const decode_map *jisx0213_2_bmp_decmap;    \
 | 
						|
    const encode_map *jisx0213_emp_encmap;      \
 | 
						|
    const decode_map *jisx0213_1_emp_decmap;    \
 | 
						|
    const decode_map *jisx0213_2_emp_decmap;    \
 | 
						|
                                                \
 | 
						|
    /* cn */                                    \
 | 
						|
    const encode_map *gbcommon_encmap;          \
 | 
						|
    const decode_map *gb2312_decmap;
 | 
						|
 | 
						|
 | 
						|
#include "cjkcodecs.h"
 | 
						|
#include "alg_jisx0201.h"
 | 
						|
#include "emu_jisx0213_2000.h"
 | 
						|
#include "mappings_jisx0213_pair.h"
 | 
						|
 | 
						|
/* STATE
 | 
						|
 | 
						|
   state->c[0-3]
 | 
						|
 | 
						|
    00000000
 | 
						|
    ||^^^^^|
 | 
						|
    |+-----+----  G0-3 Character Set
 | 
						|
    +-----------  Is G0-3 double byte?
 | 
						|
 | 
						|
   state->c[4]
 | 
						|
 | 
						|
    00000000
 | 
						|
          ||
 | 
						|
          |+----  Locked-Shift?
 | 
						|
          +-----  ESC Throughout
 | 
						|
*/
 | 
						|
 | 
						|
#define ESC                     0x1B
 | 
						|
#define SO                      0x0E
 | 
						|
#define SI                      0x0F
 | 
						|
#define LF                      0x0A
 | 
						|
 | 
						|
#define MAX_ESCSEQLEN           16
 | 
						|
 | 
						|
#define CHARSET_ISO8859_1       'A'
 | 
						|
#define CHARSET_ASCII           'B'
 | 
						|
#define CHARSET_ISO8859_7       'F'
 | 
						|
#define CHARSET_JISX0201_K      'I'
 | 
						|
#define CHARSET_JISX0201_R      'J'
 | 
						|
 | 
						|
#define CHARSET_GB2312          ('A'|CHARSET_DBCS)
 | 
						|
#define CHARSET_JISX0208        ('B'|CHARSET_DBCS)
 | 
						|
#define CHARSET_KSX1001         ('C'|CHARSET_DBCS)
 | 
						|
#define CHARSET_JISX0212        ('D'|CHARSET_DBCS)
 | 
						|
#define CHARSET_GB2312_8565     ('E'|CHARSET_DBCS)
 | 
						|
#define CHARSET_CNS11643_1      ('G'|CHARSET_DBCS)
 | 
						|
#define CHARSET_CNS11643_2      ('H'|CHARSET_DBCS)
 | 
						|
#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
 | 
						|
#define CHARSET_JISX0213_2      ('P'|CHARSET_DBCS)
 | 
						|
#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
 | 
						|
#define CHARSET_JISX0208_O      ('@'|CHARSET_DBCS)
 | 
						|
 | 
						|
#define CHARSET_DBCS            0x80
 | 
						|
#define ESCMARK(mark)           ((mark) & 0x7f)
 | 
						|
 | 
						|
#define IS_ESCEND(c)    (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
 | 
						|
#define IS_ISO2022ESC(c2) \
 | 
						|
        ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
 | 
						|
         (c2) == '.' || (c2) == '&')
 | 
						|
    /* this is not a complete list of ISO-2022 escape sequence headers.
 | 
						|
     * but, it's enough to implement CJK instances of iso-2022. */
 | 
						|
 | 
						|
#define MAP_UNMAPPABLE          0xFFFF
 | 
						|
#define MAP_MULTIPLE_AVAIL      0xFFFE /* for JIS X 0213 */
 | 
						|
 | 
						|
#define F_SHIFTED               0x01
 | 
						|
#define F_ESCTHROUGHOUT         0x02
 | 
						|
 | 
						|
#define STATE_SETG(dn, v)       do { ((state)->c[dn]) = (v); } while (0)
 | 
						|
#define STATE_GETG(dn)          ((state)->c[dn])
 | 
						|
 | 
						|
#define STATE_G0                STATE_GETG(0)
 | 
						|
#define STATE_G1                STATE_GETG(1)
 | 
						|
#define STATE_G2                STATE_GETG(2)
 | 
						|
#define STATE_G3                STATE_GETG(3)
 | 
						|
#define STATE_SETG0(v)          STATE_SETG(0, v)
 | 
						|
#define STATE_SETG1(v)          STATE_SETG(1, v)
 | 
						|
#define STATE_SETG2(v)          STATE_SETG(2, v)
 | 
						|
#define STATE_SETG3(v)          STATE_SETG(3, v)
 | 
						|
 | 
						|
#define STATE_SETFLAG(f)        do { ((state)->c[4]) |= (f); } while (0)
 | 
						|
#define STATE_GETFLAG(f)        ((state)->c[4] & (f))
 | 
						|
#define STATE_CLEARFLAG(f)      do { ((state)->c[4]) &= ~(f); } while (0)
 | 
						|
#define STATE_CLEARFLAGS()      do { ((state)->c[4]) = 0; } while (0)
 | 
						|
 | 
						|
#define ISO2022_CONFIG          ((const struct iso2022_config *)(codec->config))
 | 
						|
#define CONFIG_ISSET(flag)      (ISO2022_CONFIG->flags & (flag))
 | 
						|
#define CONFIG_DESIGNATIONS     (ISO2022_CONFIG->designations)
 | 
						|
 | 
						|
/* iso2022_config.flags */
 | 
						|
#define NO_SHIFT                0x01
 | 
						|
#define USE_G2                  0x02
 | 
						|
#define USE_JISX0208_EXT        0x04
 | 
						|
 | 
						|
/*-*- internal data structures -*-*/
 | 
						|
 | 
						|
typedef int (*iso2022_init_func)(const MultibyteCodec *codec);
 | 
						|
typedef Py_UCS4 (*iso2022_decode_func)(const MultibyteCodec *codec,
 | 
						|
                                       const unsigned char *data);
 | 
						|
typedef DBCHAR (*iso2022_encode_func)(const MultibyteCodec *codec,
 | 
						|
                                      const Py_UCS4 *data,
 | 
						|
                                      Py_ssize_t *length);
 | 
						|
 | 
						|
struct iso2022_designation {
 | 
						|
    unsigned char mark;
 | 
						|
    unsigned char plane;
 | 
						|
    unsigned char width;
 | 
						|
    iso2022_init_func initializer;
 | 
						|
    iso2022_decode_func decoder;
 | 
						|
    iso2022_encode_func encoder;
 | 
						|
};
 | 
						|
 | 
						|
struct iso2022_config {
 | 
						|
    int flags;
 | 
						|
    const struct iso2022_designation *designations; /* non-ascii desigs */
 | 
						|
};
 | 
						|
 | 
						|
/*-*- iso-2022 codec implementation -*-*/
 | 
						|
 | 
						|
CODEC_INIT(iso2022)
 | 
						|
{
 | 
						|
    const struct iso2022_designation *desig;
 | 
						|
    for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++) {
 | 
						|
        if (desig->initializer != NULL && desig->initializer(codec) != 0) {
 | 
						|
            return -1;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
ENCODER_INIT(iso2022)
 | 
						|
{
 | 
						|
    STATE_CLEARFLAGS();
 | 
						|
    STATE_SETG0(CHARSET_ASCII);
 | 
						|
    STATE_SETG1(CHARSET_ASCII);
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
ENCODER_RESET(iso2022)
 | 
						|
{
 | 
						|
    if (STATE_GETFLAG(F_SHIFTED)) {
 | 
						|
        WRITEBYTE1(SI);
 | 
						|
        NEXT_OUT(1);
 | 
						|
        STATE_CLEARFLAG(F_SHIFTED);
 | 
						|
    }
 | 
						|
    if (STATE_G0 != CHARSET_ASCII) {
 | 
						|
        WRITEBYTE3(ESC, '(', 'B');
 | 
						|
        NEXT_OUT(3);
 | 
						|
        STATE_SETG0(CHARSET_ASCII);
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
ENCODER(iso2022)
 | 
						|
{
 | 
						|
    while (*inpos < inlen) {
 | 
						|
        const struct iso2022_designation *dsg;
 | 
						|
        DBCHAR encoded;
 | 
						|
        Py_UCS4 c = INCHAR1;
 | 
						|
        Py_ssize_t insize;
 | 
						|
 | 
						|
        if (c < 0x80) {
 | 
						|
            if (STATE_G0 != CHARSET_ASCII) {
 | 
						|
                WRITEBYTE3(ESC, '(', 'B');
 | 
						|
                STATE_SETG0(CHARSET_ASCII);
 | 
						|
                NEXT_OUT(3);
 | 
						|
            }
 | 
						|
            if (STATE_GETFLAG(F_SHIFTED)) {
 | 
						|
                WRITEBYTE1(SI);
 | 
						|
                STATE_CLEARFLAG(F_SHIFTED);
 | 
						|
                NEXT_OUT(1);
 | 
						|
            }
 | 
						|
            WRITEBYTE1((unsigned char)c);
 | 
						|
            NEXT(1, 1);
 | 
						|
            continue;
 | 
						|
        }
 | 
						|
 | 
						|
        insize = 1;
 | 
						|
 | 
						|
        encoded = MAP_UNMAPPABLE;
 | 
						|
        for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
 | 
						|
            Py_UCS4 buf[2] = {c, 0};
 | 
						|
            Py_ssize_t length = 1;
 | 
						|
            encoded = dsg->encoder(codec, buf, &length);
 | 
						|
            if (encoded == MAP_MULTIPLE_AVAIL) {
 | 
						|
                /* this implementation won't work for pair
 | 
						|
                 * of non-bmp characters. */
 | 
						|
                if (inlen - *inpos < 2) {
 | 
						|
                    if (!(flags & MBENC_FLUSH))
 | 
						|
                        return MBERR_TOOFEW;
 | 
						|
                    length = -1;
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    buf[1] = INCHAR2;
 | 
						|
                    length = 2;
 | 
						|
                }
 | 
						|
                encoded = dsg->encoder(codec, buf, &length);
 | 
						|
                if (encoded != MAP_UNMAPPABLE) {
 | 
						|
                    insize = length;
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            else if (encoded != MAP_UNMAPPABLE)
 | 
						|
                break;
 | 
						|
        }
 | 
						|
 | 
						|
        if (!dsg->mark)
 | 
						|
            return 1;
 | 
						|
        assert(dsg->width == 1 || dsg->width == 2);
 | 
						|
 | 
						|
        switch (dsg->plane) {
 | 
						|
        case 0: /* G0 */
 | 
						|
            if (STATE_GETFLAG(F_SHIFTED)) {
 | 
						|
                WRITEBYTE1(SI);
 | 
						|
                STATE_CLEARFLAG(F_SHIFTED);
 | 
						|
                NEXT_OUT(1);
 | 
						|
            }
 | 
						|
            if (STATE_G0 != dsg->mark) {
 | 
						|
                if (dsg->width == 1) {
 | 
						|
                    WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark));
 | 
						|
                    STATE_SETG0(dsg->mark);
 | 
						|
                    NEXT_OUT(3);
 | 
						|
                }
 | 
						|
                else if (dsg->mark == CHARSET_JISX0208) {
 | 
						|
                    WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark));
 | 
						|
                    STATE_SETG0(dsg->mark);
 | 
						|
                    NEXT_OUT(3);
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    WRITEBYTE4(ESC, '$', '(',
 | 
						|
                        ESCMARK(dsg->mark));
 | 
						|
                    STATE_SETG0(dsg->mark);
 | 
						|
                    NEXT_OUT(4);
 | 
						|
                }
 | 
						|
            }
 | 
						|
            break;
 | 
						|
        case 1: /* G1 */
 | 
						|
            if (STATE_G1 != dsg->mark) {
 | 
						|
                if (dsg->width == 1) {
 | 
						|
                    WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark));
 | 
						|
                    STATE_SETG1(dsg->mark);
 | 
						|
                    NEXT_OUT(3);
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark));
 | 
						|
                    STATE_SETG1(dsg->mark);
 | 
						|
                    NEXT_OUT(4);
 | 
						|
                }
 | 
						|
            }
 | 
						|
            if (!STATE_GETFLAG(F_SHIFTED)) {
 | 
						|
                WRITEBYTE1(SO);
 | 
						|
                STATE_SETFLAG(F_SHIFTED);
 | 
						|
                NEXT_OUT(1);
 | 
						|
            }
 | 
						|
            break;
 | 
						|
        default: /* G2 and G3 is not supported: no encoding in
 | 
						|
                  * CJKCodecs are using them yet */
 | 
						|
            return MBERR_INTERNAL;
 | 
						|
        }
 | 
						|
 | 
						|
        if (dsg->width == 1) {
 | 
						|
            WRITEBYTE1((unsigned char)encoded);
 | 
						|
            NEXT_OUT(1);
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            WRITEBYTE2(encoded >> 8, encoded & 0xff);
 | 
						|
            NEXT_OUT(2);
 | 
						|
        }
 | 
						|
        NEXT_INCHAR(insize);
 | 
						|
    }
 | 
						|
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
DECODER_INIT(iso2022)
 | 
						|
{
 | 
						|
    STATE_CLEARFLAGS();
 | 
						|
    STATE_SETG0(CHARSET_ASCII);
 | 
						|
    STATE_SETG1(CHARSET_ASCII);
 | 
						|
    STATE_SETG2(CHARSET_ASCII);
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
DECODER_RESET(iso2022)
 | 
						|
{
 | 
						|
    STATE_SETG0(CHARSET_ASCII);
 | 
						|
    STATE_CLEARFLAG(F_SHIFTED);
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static Py_ssize_t
 | 
						|
iso2022processesc(const MultibyteCodec *codec, MultibyteCodec_State *state,
 | 
						|
                  const unsigned char **inbuf, Py_ssize_t *inleft)
 | 
						|
{
 | 
						|
    unsigned char charset, designation;
 | 
						|
    Py_ssize_t i, esclen = 0;
 | 
						|
 | 
						|
    for (i = 1;i < MAX_ESCSEQLEN;i++) {
 | 
						|
        if (i >= *inleft)
 | 
						|
            return MBERR_TOOFEW;
 | 
						|
        if (IS_ESCEND((*inbuf)[i])) {
 | 
						|
            esclen = i + 1;
 | 
						|
            break;
 | 
						|
        }
 | 
						|
        else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
 | 
						|
                 (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') {
 | 
						|
            i += 2;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    switch (esclen) {
 | 
						|
    case 0:
 | 
						|
        return 1; /* unterminated escape sequence */
 | 
						|
    case 3:
 | 
						|
        if (INBYTE2 == '$') {
 | 
						|
            charset = INBYTE3 | CHARSET_DBCS;
 | 
						|
            designation = 0;
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            charset = INBYTE3;
 | 
						|
            if (INBYTE2 == '(')
 | 
						|
                designation = 0;
 | 
						|
            else if (INBYTE2 == ')')
 | 
						|
                designation = 1;
 | 
						|
            else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.')
 | 
						|
                designation = 2;
 | 
						|
            else
 | 
						|
                return 3;
 | 
						|
        }
 | 
						|
        break;
 | 
						|
    case 4:
 | 
						|
        if (INBYTE2 != '$')
 | 
						|
            return 4;
 | 
						|
 | 
						|
        charset = INBYTE4 | CHARSET_DBCS;
 | 
						|
        if (INBYTE3 == '(')
 | 
						|
            designation = 0;
 | 
						|
        else if (INBYTE3 == ')')
 | 
						|
            designation = 1;
 | 
						|
        else
 | 
						|
            return 4;
 | 
						|
        break;
 | 
						|
    case 6: /* designation with prefix */
 | 
						|
        if (CONFIG_ISSET(USE_JISX0208_EXT) &&
 | 
						|
            (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&
 | 
						|
            (*inbuf)[5] == 'B') {
 | 
						|
            charset = 'B' | CHARSET_DBCS;
 | 
						|
            designation = 0;
 | 
						|
        }
 | 
						|
        else
 | 
						|
            return 6;
 | 
						|
        break;
 | 
						|
    default:
 | 
						|
        return esclen;
 | 
						|
    }
 | 
						|
 | 
						|
    /* raise error when the charset is not designated for this encoding */
 | 
						|
    if (charset != CHARSET_ASCII) {
 | 
						|
        const struct iso2022_designation *dsg;
 | 
						|
 | 
						|
        for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
 | 
						|
            if (dsg->mark == charset)
 | 
						|
                break;
 | 
						|
        }
 | 
						|
        if (!dsg->mark)
 | 
						|
            return esclen;
 | 
						|
    }
 | 
						|
 | 
						|
    STATE_SETG(designation, charset);
 | 
						|
    *inleft -= esclen;
 | 
						|
    (*inbuf) += esclen;
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
#define ISO8859_7_DECODE(c, writer)                                \
 | 
						|
    if ((c) < 0xa0) {                                              \
 | 
						|
        OUTCHAR(c);                                                \
 | 
						|
    } else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) { \
 | 
						|
        OUTCHAR(c);                                                \
 | 
						|
    } else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 ||       \
 | 
						|
             (0xbffffd77L & (1L << ((c)-0xb4))))) {                \
 | 
						|
        OUTCHAR(0x02d0 + (c));                                     \
 | 
						|
    } else if ((c) == 0xa1) {                                      \
 | 
						|
        OUTCHAR(0x2018);                                           \
 | 
						|
    } else if ((c) == 0xa2) {                                      \
 | 
						|
        OUTCHAR(0x2019);                                           \
 | 
						|
    } else if ((c) == 0xaf) {                                      \
 | 
						|
        OUTCHAR(0x2015);                                           \
 | 
						|
    }
 | 
						|
 | 
						|
static Py_ssize_t
 | 
						|
iso2022processg2(const MultibyteCodec *codec, MultibyteCodec_State *state,
 | 
						|
                 const unsigned char **inbuf, Py_ssize_t *inleft,
 | 
						|
                 _PyUnicodeWriter *writer)
 | 
						|
{
 | 
						|
    /* not written to use encoder, decoder functions because only few
 | 
						|
     * encodings use G2 designations in CJKCodecs */
 | 
						|
    if (STATE_G2 == CHARSET_ISO8859_1) {
 | 
						|
        if (INBYTE3 < 0x80)
 | 
						|
            OUTCHAR(INBYTE3 + 0x80);
 | 
						|
        else
 | 
						|
            return 3;
 | 
						|
    }
 | 
						|
    else if (STATE_G2 == CHARSET_ISO8859_7) {
 | 
						|
        ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer)
 | 
						|
        else
 | 
						|
            return 3;
 | 
						|
    }
 | 
						|
    else if (STATE_G2 == CHARSET_ASCII) {
 | 
						|
        if (INBYTE3 & 0x80)
 | 
						|
            return 3;
 | 
						|
        else
 | 
						|
            OUTCHAR(INBYTE3);
 | 
						|
    }
 | 
						|
    else
 | 
						|
        return MBERR_INTERNAL;
 | 
						|
 | 
						|
    (*inbuf) += 3;
 | 
						|
    *inleft -= 3;
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
DECODER(iso2022)
 | 
						|
{
 | 
						|
    const struct iso2022_designation *dsgcache = NULL;
 | 
						|
 | 
						|
    while (inleft > 0) {
 | 
						|
        unsigned char c = INBYTE1;
 | 
						|
        Py_ssize_t err;
 | 
						|
 | 
						|
        if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
 | 
						|
            /* ESC throughout mode:
 | 
						|
             * for non-iso2022 escape sequences */
 | 
						|
            OUTCHAR(c); /* assume as ISO-8859-1 */
 | 
						|
            NEXT_IN(1);
 | 
						|
            if (IS_ESCEND(c)) {
 | 
						|
                STATE_CLEARFLAG(F_ESCTHROUGHOUT);
 | 
						|
            }
 | 
						|
            continue;
 | 
						|
        }
 | 
						|
 | 
						|
        switch (c) {
 | 
						|
        case ESC:
 | 
						|
            REQUIRE_INBUF(2);
 | 
						|
            if (IS_ISO2022ESC(INBYTE2)) {
 | 
						|
                err = iso2022processesc(codec, state,
 | 
						|
                                        inbuf, &inleft);
 | 
						|
                if (err != 0)
 | 
						|
                    return err;
 | 
						|
            }
 | 
						|
            else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */
 | 
						|
                REQUIRE_INBUF(3);
 | 
						|
                err = iso2022processg2(codec, state,
 | 
						|
                                       inbuf, &inleft, writer);
 | 
						|
                if (err != 0)
 | 
						|
                    return err;
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                OUTCHAR(ESC);
 | 
						|
                STATE_SETFLAG(F_ESCTHROUGHOUT);
 | 
						|
                NEXT_IN(1);
 | 
						|
            }
 | 
						|
            break;
 | 
						|
        case SI:
 | 
						|
            if (CONFIG_ISSET(NO_SHIFT))
 | 
						|
                goto bypass;
 | 
						|
            STATE_CLEARFLAG(F_SHIFTED);
 | 
						|
            NEXT_IN(1);
 | 
						|
            break;
 | 
						|
        case SO:
 | 
						|
            if (CONFIG_ISSET(NO_SHIFT))
 | 
						|
                goto bypass;
 | 
						|
            STATE_SETFLAG(F_SHIFTED);
 | 
						|
            NEXT_IN(1);
 | 
						|
            break;
 | 
						|
        case LF:
 | 
						|
            STATE_CLEARFLAG(F_SHIFTED);
 | 
						|
            OUTCHAR(LF);
 | 
						|
            NEXT_IN(1);
 | 
						|
            break;
 | 
						|
        default:
 | 
						|
            if (c < 0x20) /* C0 */
 | 
						|
                goto bypass;
 | 
						|
            else if (c >= 0x80)
 | 
						|
                return 1;
 | 
						|
            else {
 | 
						|
                const struct iso2022_designation *dsg;
 | 
						|
                unsigned char charset;
 | 
						|
                Py_UCS4 decoded;
 | 
						|
 | 
						|
                if (STATE_GETFLAG(F_SHIFTED))
 | 
						|
                    charset = STATE_G1;
 | 
						|
                else
 | 
						|
                    charset = STATE_G0;
 | 
						|
 | 
						|
                if (charset == CHARSET_ASCII) {
 | 
						|
bypass:
 | 
						|
                    OUTCHAR(c);
 | 
						|
                    NEXT_IN(1);
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
 | 
						|
                if (dsgcache != NULL &&
 | 
						|
                    dsgcache->mark == charset)
 | 
						|
                        dsg = dsgcache;
 | 
						|
                else {
 | 
						|
                    for (dsg = CONFIG_DESIGNATIONS;
 | 
						|
                         dsg->mark != charset
 | 
						|
#ifdef Py_DEBUG
 | 
						|
                            && dsg->mark != '\0'
 | 
						|
#endif
 | 
						|
                         ; dsg++)
 | 
						|
                    {
 | 
						|
                        /* noop */
 | 
						|
                    }
 | 
						|
                    assert(dsg->mark != '\0');
 | 
						|
                    dsgcache = dsg;
 | 
						|
                }
 | 
						|
 | 
						|
                REQUIRE_INBUF(dsg->width);
 | 
						|
                decoded = dsg->decoder(codec, *inbuf);
 | 
						|
                if (decoded == MAP_UNMAPPABLE)
 | 
						|
                    return dsg->width;
 | 
						|
 | 
						|
                if (decoded < 0x10000) {
 | 
						|
                    OUTCHAR(decoded);
 | 
						|
                }
 | 
						|
                else if (decoded < 0x30000) {
 | 
						|
                    OUTCHAR(decoded);
 | 
						|
                }
 | 
						|
                else { /* JIS X 0213 pairs */
 | 
						|
                    OUTCHAR2(decoded >> 16, decoded & 0xffff);
 | 
						|
                }
 | 
						|
                NEXT_IN(dsg->width);
 | 
						|
            }
 | 
						|
            break;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
/*-*- mapping access functions -*-*/
 | 
						|
 | 
						|
static int
 | 
						|
ksx1001_init(const MultibyteCodec *codec)
 | 
						|
{
 | 
						|
    cjkcodecs_module_state *st = codec->modstate;
 | 
						|
    if (IMPORT_MAP(kr, cp949, &st->cp949_encmap, NULL) ||
 | 
						|
        IMPORT_MAP(kr, ksx1001, NULL, &st->ksx1001_decmap))
 | 
						|
    {
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
ksx1001_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    if (TRYMAP_DEC_ST(ksx1001, u, data[0], data[1]))
 | 
						|
        return u;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
ksx1001_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
    assert(*length == 1);
 | 
						|
    if (*data < 0x10000) {
 | 
						|
        if (TRYMAP_ENC_ST(cp949, coded, *data)) {
 | 
						|
            if (!(coded & 0x8000))
 | 
						|
                return coded;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
jisx0208_init(const MultibyteCodec *codec)
 | 
						|
{
 | 
						|
    cjkcodecs_module_state *st = codec->modstate;
 | 
						|
    if (IMPORT_MAP(jp, jisxcommon, &st->jisxcommon_encmap, NULL) ||
 | 
						|
        IMPORT_MAP(jp, jisx0208, NULL, &st->jisx0208_decmap))
 | 
						|
    {
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
jisx0208_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
 | 
						|
        return 0xff3c;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0208, u, data[0], data[1]))
 | 
						|
        return u;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0208_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                 Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
    assert(*length == 1);
 | 
						|
    if (*data < 0x10000) {
 | 
						|
        if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
 | 
						|
            return 0x2140;
 | 
						|
        else if (TRYMAP_ENC_ST(jisxcommon, coded, *data)) {
 | 
						|
            if (!(coded & 0x8000))
 | 
						|
                return coded;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
jisx0212_init(const MultibyteCodec *codec)
 | 
						|
{
 | 
						|
    cjkcodecs_module_state *st = codec->modstate;
 | 
						|
    if (IMPORT_MAP(jp, jisxcommon, &st->jisxcommon_encmap, NULL) ||
 | 
						|
        IMPORT_MAP(jp, jisx0212, NULL, &st->jisx0212_decmap))
 | 
						|
    {
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
jisx0212_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    if (TRYMAP_DEC_ST(jisx0212, u, data[0], data[1]))
 | 
						|
        return u;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0212_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                 Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
    assert(*length == 1);
 | 
						|
    if (*data < 0x10000) {
 | 
						|
        if (TRYMAP_ENC_ST(jisxcommon, coded, *data)) {
 | 
						|
            if (coded & 0x8000)
 | 
						|
                return coded & 0x7fff;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
jisx0213_init(const MultibyteCodec *codec)
 | 
						|
{
 | 
						|
    cjkcodecs_module_state *st = codec->modstate;
 | 
						|
    if (jisx0208_init(codec) ||
 | 
						|
        IMPORT_MAP(jp, jisx0213_bmp, &st->jisx0213_bmp_encmap, NULL) ||
 | 
						|
        IMPORT_MAP(jp, jisx0213_1_bmp, NULL, &st->jisx0213_1_bmp_decmap) ||
 | 
						|
        IMPORT_MAP(jp, jisx0213_2_bmp, NULL, &st->jisx0213_2_bmp_decmap) ||
 | 
						|
        IMPORT_MAP(jp, jisx0213_emp, &st->jisx0213_emp_encmap, NULL) ||
 | 
						|
        IMPORT_MAP(jp, jisx0213_1_emp, NULL, &st->jisx0213_1_emp_decmap) ||
 | 
						|
        IMPORT_MAP(jp, jisx0213_2_emp, NULL, &st->jisx0213_2_emp_decmap) ||
 | 
						|
        IMPORT_MAP(jp, jisx0213_pair,
 | 
						|
                   &jisx0213_pair_encmap, &jisx0213_pair_decmap))
 | 
						|
    {
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
#define config ((void *)2000)
 | 
						|
static Py_UCS4
 | 
						|
jisx0213_2000_1_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    EMULATE_JISX0213_2000_DECODE_PLANE1(config, u, data[0], data[1])
 | 
						|
    else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
 | 
						|
        return 0xff3c;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0208, u, data[0], data[1]))
 | 
						|
        ;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0213_1_bmp, u, data[0], data[1]))
 | 
						|
        ;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0213_1_emp, u, data[0], data[1]))
 | 
						|
        u |= 0x20000;
 | 
						|
    else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]))
 | 
						|
        ;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    return u;
 | 
						|
}
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
jisx0213_2000_2_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(config, u, data[0], data[1])
 | 
						|
    if (TRYMAP_DEC_ST(jisx0213_2_bmp, u, data[0], data[1]))
 | 
						|
        ;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0213_2_emp, u, data[0], data[1]))
 | 
						|
        u |= 0x20000;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    return u;
 | 
						|
}
 | 
						|
#undef config
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
jisx0213_2004_1_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
 | 
						|
        return 0xff3c;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0208, u, data[0], data[1]))
 | 
						|
        ;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0213_1_bmp, u, data[0], data[1]))
 | 
						|
        ;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0213_1_emp, u, data[0], data[1]))
 | 
						|
        u |= 0x20000;
 | 
						|
    else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]))
 | 
						|
        ;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    return u;
 | 
						|
}
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
jisx0213_2004_2_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    if (TRYMAP_DEC_ST(jisx0213_2_bmp, u, data[0], data[1]))
 | 
						|
        ;
 | 
						|
    else if (TRYMAP_DEC_ST(jisx0213_2_emp, u, data[0], data[1]))
 | 
						|
        u |= 0x20000;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    return u;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                 Py_ssize_t *length, const void *config)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
 | 
						|
    switch (*length) {
 | 
						|
    case 1: /* first character */
 | 
						|
        if (*data >= 0x10000) {
 | 
						|
            if ((*data) >> 16 == 0x20000 >> 16) {
 | 
						|
                EMULATE_JISX0213_2000_ENCODE_EMP(config, coded, *data)
 | 
						|
                else if (TRYMAP_ENC_ST(jisx0213_emp, coded, (*data) & 0xffff))
 | 
						|
                    return coded;
 | 
						|
            }
 | 
						|
            return MAP_UNMAPPABLE;
 | 
						|
        }
 | 
						|
 | 
						|
        EMULATE_JISX0213_2000_ENCODE_BMP(config, coded, *data)
 | 
						|
        else if (TRYMAP_ENC_ST(jisx0213_bmp, coded, *data)) {
 | 
						|
            if (coded == MULTIC)
 | 
						|
                return MAP_MULTIPLE_AVAIL;
 | 
						|
        }
 | 
						|
        else if (TRYMAP_ENC_ST(jisxcommon, coded, *data)) {
 | 
						|
            if (coded & 0x8000)
 | 
						|
                return MAP_UNMAPPABLE;
 | 
						|
        }
 | 
						|
        else
 | 
						|
            return MAP_UNMAPPABLE;
 | 
						|
        return coded;
 | 
						|
 | 
						|
    case 2: /* second character of unicode pair */
 | 
						|
        coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
 | 
						|
                                jisx0213_pair_encmap, JISX0213_ENCPAIRS);
 | 
						|
        if (coded != DBCINV)
 | 
						|
            return coded;
 | 
						|
        _Py_FALLTHROUGH;
 | 
						|
 | 
						|
    case -1: /* flush unterminated */
 | 
						|
        *length = 1;
 | 
						|
        coded = find_pairencmap((ucs2_t)data[0], 0,
 | 
						|
                                jisx0213_pair_encmap, JISX0213_ENCPAIRS);
 | 
						|
        if (coded == DBCINV)
 | 
						|
            return MAP_UNMAPPABLE;
 | 
						|
        else
 | 
						|
            return coded;
 | 
						|
        break;
 | 
						|
 | 
						|
    default:
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0213_2000_1_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                        Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded = jisx0213_encoder(codec, data, length, (void *)2000);
 | 
						|
    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
 | 
						|
        return coded;
 | 
						|
    else if (coded & 0x8000)
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    else
 | 
						|
        return coded;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0213_2000_1_encoder_paironly(const MultibyteCodec *codec,
 | 
						|
                                 const Py_UCS4 *data, Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
    Py_ssize_t ilength = *length;
 | 
						|
 | 
						|
    coded = jisx0213_encoder(codec, data, length, (void *)2000);
 | 
						|
    switch (ilength) {
 | 
						|
    case 1:
 | 
						|
        if (coded == MAP_MULTIPLE_AVAIL)
 | 
						|
            return MAP_MULTIPLE_AVAIL;
 | 
						|
        else
 | 
						|
            return MAP_UNMAPPABLE;
 | 
						|
    case 2:
 | 
						|
        if (*length != 2)
 | 
						|
            return MAP_UNMAPPABLE;
 | 
						|
        else
 | 
						|
            return coded;
 | 
						|
    default:
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0213_2000_2_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                        Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded = jisx0213_encoder(codec, data, length, (void *)2000);
 | 
						|
    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
 | 
						|
        return coded;
 | 
						|
    else if (coded & 0x8000)
 | 
						|
        return coded & 0x7fff;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0213_2004_1_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                        Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded = jisx0213_encoder(codec, data, length, NULL);
 | 
						|
    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
 | 
						|
        return coded;
 | 
						|
    else if (coded & 0x8000)
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    else
 | 
						|
        return coded;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0213_2004_1_encoder_paironly(const MultibyteCodec *codec,
 | 
						|
                                 const Py_UCS4 *data, Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
    Py_ssize_t ilength = *length;
 | 
						|
 | 
						|
    coded = jisx0213_encoder(codec, data, length, NULL);
 | 
						|
    switch (ilength) {
 | 
						|
    case 1:
 | 
						|
        if (coded == MAP_MULTIPLE_AVAIL)
 | 
						|
            return MAP_MULTIPLE_AVAIL;
 | 
						|
        else
 | 
						|
            return MAP_UNMAPPABLE;
 | 
						|
    case 2:
 | 
						|
        if (*length != 2)
 | 
						|
            return MAP_UNMAPPABLE;
 | 
						|
        else
 | 
						|
            return coded;
 | 
						|
    default:
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0213_2004_2_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                        Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded = jisx0213_encoder(codec, data, length, NULL);
 | 
						|
    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
 | 
						|
        return coded;
 | 
						|
    else if (coded & 0x8000)
 | 
						|
        return coded & 0x7fff;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
jisx0201_r_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    JISX0201_R_DECODE_CHAR(*data, u)
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    return u;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0201_r_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                   Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
    JISX0201_R_ENCODE(*data, coded)
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    return coded;
 | 
						|
}
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
jisx0201_k_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    JISX0201_K_DECODE_CHAR(*data ^ 0x80, u)
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    return u;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
jisx0201_k_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
                   Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
    JISX0201_K_ENCODE(*data, coded)
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
    return coded - 0x80;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
gb2312_init(const MultibyteCodec *codec)
 | 
						|
{
 | 
						|
    cjkcodecs_module_state *st = codec->modstate;
 | 
						|
    if (IMPORT_MAP(cn, gbcommon, &st->gbcommon_encmap, NULL) ||
 | 
						|
        IMPORT_MAP(cn, gb2312, NULL, &st->gb2312_decmap))
 | 
						|
    {
 | 
						|
        return -1;
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
gb2312_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    Py_UCS4 u;
 | 
						|
    if (TRYMAP_DEC_ST(gb2312, u, data[0], data[1]))
 | 
						|
        return u;
 | 
						|
    else
 | 
						|
        return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
gb2312_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
               Py_ssize_t *length)
 | 
						|
{
 | 
						|
    DBCHAR coded;
 | 
						|
    assert(*length == 1);
 | 
						|
    if (*data < 0x10000) {
 | 
						|
        if (TRYMAP_ENC_ST(gbcommon, coded, *data)) {
 | 
						|
            if (!(coded & 0x8000))
 | 
						|
                return coded;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static Py_UCS4
 | 
						|
dummy_decoder(const MultibyteCodec *codec, const unsigned char *data)
 | 
						|
{
 | 
						|
    return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
static DBCHAR
 | 
						|
dummy_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
 | 
						|
              Py_ssize_t *length)
 | 
						|
{
 | 
						|
    return MAP_UNMAPPABLE;
 | 
						|
}
 | 
						|
 | 
						|
/*-*- registry tables -*-*/
 | 
						|
 | 
						|
#define REGISTRY_KSX1001_G0     { CHARSET_KSX1001, 0, 2,                \
 | 
						|
                  ksx1001_init,                                         \
 | 
						|
                  ksx1001_decoder, ksx1001_encoder }
 | 
						|
#define REGISTRY_KSX1001_G1     { CHARSET_KSX1001, 1, 2,                \
 | 
						|
                  ksx1001_init,                                         \
 | 
						|
                  ksx1001_decoder, ksx1001_encoder }
 | 
						|
#define REGISTRY_JISX0201_R     { CHARSET_JISX0201_R, 0, 1,             \
 | 
						|
                  NULL,                                                 \
 | 
						|
                  jisx0201_r_decoder, jisx0201_r_encoder }
 | 
						|
#define REGISTRY_JISX0201_K     { CHARSET_JISX0201_K, 0, 1,             \
 | 
						|
                  NULL,                                                 \
 | 
						|
                  jisx0201_k_decoder, jisx0201_k_encoder }
 | 
						|
#define REGISTRY_JISX0208       { CHARSET_JISX0208, 0, 2,               \
 | 
						|
                  jisx0208_init,                                        \
 | 
						|
                  jisx0208_decoder, jisx0208_encoder }
 | 
						|
#define REGISTRY_JISX0208_O     { CHARSET_JISX0208_O, 0, 2,             \
 | 
						|
                  jisx0208_init,                                        \
 | 
						|
                  jisx0208_decoder, jisx0208_encoder }
 | 
						|
#define REGISTRY_JISX0212       { CHARSET_JISX0212, 0, 2,               \
 | 
						|
                  jisx0212_init,                                        \
 | 
						|
                  jisx0212_decoder, jisx0212_encoder }
 | 
						|
#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2,       \
 | 
						|
                  jisx0213_init,                                        \
 | 
						|
                  jisx0213_2000_1_decoder,                              \
 | 
						|
                  jisx0213_2000_1_encoder }
 | 
						|
#define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \
 | 
						|
                  jisx0213_init,                                        \
 | 
						|
                  jisx0213_2000_1_decoder,                              \
 | 
						|
                  jisx0213_2000_1_encoder_paironly }
 | 
						|
#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2,            \
 | 
						|
                  jisx0213_init,                                        \
 | 
						|
                  jisx0213_2000_2_decoder,                              \
 | 
						|
                  jisx0213_2000_2_encoder }
 | 
						|
#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2,       \
 | 
						|
                  jisx0213_init,                                        \
 | 
						|
                  jisx0213_2004_1_decoder,                              \
 | 
						|
                  jisx0213_2004_1_encoder }
 | 
						|
#define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \
 | 
						|
                  jisx0213_init,                                        \
 | 
						|
                  jisx0213_2004_1_decoder,                              \
 | 
						|
                  jisx0213_2004_1_encoder_paironly }
 | 
						|
#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2,            \
 | 
						|
                  jisx0213_init,                                        \
 | 
						|
                  jisx0213_2004_2_decoder,                              \
 | 
						|
                  jisx0213_2004_2_encoder }
 | 
						|
#define REGISTRY_GB2312         { CHARSET_GB2312, 0, 2,                 \
 | 
						|
                  gb2312_init,                                          \
 | 
						|
                  gb2312_decoder, gb2312_encoder }
 | 
						|
#define REGISTRY_CNS11643_1     { CHARSET_CNS11643_1, 1, 2,             \
 | 
						|
                  cns11643_init,                                        \
 | 
						|
                  cns11643_1_decoder, cns11643_1_encoder }
 | 
						|
#define REGISTRY_CNS11643_2     { CHARSET_CNS11643_2, 2, 2,             \
 | 
						|
                  cns11643_init,                                        \
 | 
						|
                  cns11643_2_decoder, cns11643_2_encoder }
 | 
						|
#define REGISTRY_ISO8859_1      { CHARSET_ISO8859_1, 2, 1,              \
 | 
						|
                  NULL, dummy_decoder, dummy_encoder }
 | 
						|
#define REGISTRY_ISO8859_7      { CHARSET_ISO8859_7, 2, 1,              \
 | 
						|
                  NULL, dummy_decoder, dummy_encoder }
 | 
						|
#define REGISTRY_SENTINEL       { 0, }
 | 
						|
#define CONFIGDEF(var, attrs)                                           \
 | 
						|
    static const struct iso2022_config iso2022_##var##_config = {       \
 | 
						|
        attrs, iso2022_##var##_designations                             \
 | 
						|
    };
 | 
						|
 | 
						|
static const struct iso2022_designation iso2022_kr_designations[] = {
 | 
						|
    REGISTRY_KSX1001_G1, REGISTRY_SENTINEL
 | 
						|
};
 | 
						|
CONFIGDEF(kr, 0)
 | 
						|
 | 
						|
static const struct iso2022_designation iso2022_jp_designations[] = {
 | 
						|
    REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
 | 
						|
    REGISTRY_SENTINEL
 | 
						|
};
 | 
						|
CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT)
 | 
						|
 | 
						|
static const struct iso2022_designation iso2022_jp_1_designations[] = {
 | 
						|
    REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
 | 
						|
    REGISTRY_JISX0208_O, REGISTRY_SENTINEL
 | 
						|
};
 | 
						|
CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT)
 | 
						|
 | 
						|
static const struct iso2022_designation iso2022_jp_2_designations[] = {
 | 
						|
    REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,
 | 
						|
    REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
 | 
						|
    REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL
 | 
						|
};
 | 
						|
CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT)
 | 
						|
 | 
						|
static const struct iso2022_designation iso2022_jp_2004_designations[] = {
 | 
						|
    REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208,
 | 
						|
    REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL
 | 
						|
};
 | 
						|
CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT)
 | 
						|
 | 
						|
static const struct iso2022_designation iso2022_jp_3_designations[] = {
 | 
						|
    REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208,
 | 
						|
    REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL
 | 
						|
};
 | 
						|
CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT)
 | 
						|
 | 
						|
static const struct iso2022_designation iso2022_jp_ext_designations[] = {
 | 
						|
    REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
 | 
						|
    REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL
 | 
						|
};
 | 
						|
CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)
 | 
						|
 | 
						|
 | 
						|
BEGIN_MAPPINGS_LIST(0)
 | 
						|
  /* no mapping table here */
 | 
						|
END_MAPPINGS_LIST
 | 
						|
 | 
						|
#define ISO2022_CODEC(variation)                \
 | 
						|
NEXT_CODEC = (MultibyteCodec){                  \
 | 
						|
    "iso2022_" #variation,                      \
 | 
						|
    &iso2022_##variation##_config,              \
 | 
						|
    iso2022_codec_init,                         \
 | 
						|
    _STATEFUL_METHODS(iso2022)                  \
 | 
						|
};
 | 
						|
 | 
						|
BEGIN_CODECS_LIST(7)
 | 
						|
  ISO2022_CODEC(kr)
 | 
						|
  ISO2022_CODEC(jp)
 | 
						|
  ISO2022_CODEC(jp_1)
 | 
						|
  ISO2022_CODEC(jp_2)
 | 
						|
  ISO2022_CODEC(jp_2004)
 | 
						|
  ISO2022_CODEC(jp_3)
 | 
						|
  ISO2022_CODEC(jp_ext)
 | 
						|
END_CODECS_LIST
 | 
						|
 | 
						|
I_AM_A_MODULE_FOR(iso2022)
 |