mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			705 lines
		
	
	
	
		
			24 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			705 lines
		
	
	
	
		
			24 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * _iconv_codec.c
 | |
|  *
 | |
|  * libiconv adaptor for Python iconvcodec
 | |
|  *
 | |
|  * Author  : Hye-Shik Chang <perky@FreeBSD.org>
 | |
|  * Created : 17 January 2003
 | |
|  */
 | |
| 
 | |
| #include "Python.h"
 | |
| #include <string.h>
 | |
| #include <iconv.h>
 | |
| 
 | |
| static const char *__version__ = "$Revision$";
 | |
| 
 | |
| #if Py_USING_UNICODE
 | |
| # if Py_UNICODE_SIZE == 2
 | |
| #  ifdef __GNU_LIBRARY__
 | |
| #   define UNICODE_ENCODING        "ucs-2"
 | |
| #  else
 | |
| #   define UNICODE_ENCODING        "ucs-2-internal"
 | |
| #  endif
 | |
| #  define MBENCODED_LENGTH_MAX    4
 | |
| # elif Py_UNICODE_SIZE == 4
 | |
| #  ifdef __GNU_LIBRARY__
 | |
| #   define UNICODE_ENCODING        "ucs-4"
 | |
| #  else
 | |
| #   define UNICODE_ENCODING        "ucs-4-internal"
 | |
| #  endif
 | |
| #  define MBENCODED_LENGTH_MAX    6
 | |
| # endif
 | |
| #else
 | |
| # error "Unicode is not available"
 | |
| #endif
 | |
| 
 | |
| typedef struct {
 | |
|     PyObject_HEAD
 | |
|     iconv_t  enchdl, dechdl;
 | |
|     char    *encoding;
 | |
| } iconvcodecObject;
 | |
| PyDoc_STRVAR(iconvcodec_doc, "iconvcodec object");
 | |
| 
 | |
| /* does the chosen internal encoding require
 | |
|  * byteswapping to get native endianness?
 | |
|  * 0=no, 1=yes, -1=unknown */
 | |
| static int byteswap = -1;
 | |
| 
 | |
| #define ERROR_STRICT                (PyObject *)(1)
 | |
| #define ERROR_IGNORE                (PyObject *)(2)
 | |
| #define ERROR_REPLACE               (PyObject *)(3)
 | |
| #define ERROR_MAX                   ERROR_REPLACE
 | |
| 
 | |
| #define REPLACEMENT_CHAR_DECODE     0xFFFD
 | |
| #define REPLACEMENT_CHAR_ENCODE     '?'
 | |
| 
 | |
| #define DEFAULT_ENCODING            "utf-8"
 | |
| 
 | |
| 
 | |
| static PyObject *
 | |
| get_errorcallback(const char *errors)
 | |
| {
 | |
|     if (errors == NULL || strcmp(errors, "strict") == 0)
 | |
|         return ERROR_STRICT;
 | |
|     else if (strcmp(errors, "ignore") == 0)
 | |
|         return ERROR_IGNORE;
 | |
|     else if (strcmp(errors, "replace") == 0)
 | |
|         return ERROR_REPLACE;
 | |
|     else
 | |
|         return PyCodec_LookupError(errors);
 | |
| }
 | |
| 
 | |
| 
 | |
| PyDoc_STRVAR(iconvcodec_encode__doc__,
 | |
| "I.encode(unicode, [,errors]) -> (string, length consumed)\n\
 | |
| \n\
 | |
| Return an encoded string version of `unicode'. errors may be given to\n\
 | |
| set a different error handling scheme. Default is 'strict' meaning that\n\
 | |
| encoding errors raise a UnicodeEncodeError. Other possible values are\n\
 | |
| 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
 | |
| registered with codecs.register_error that can handle UnicodeEncodeErrors.");
 | |
| 
 | |
| static PyObject *
 | |
| iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
 | |
| {
 | |
|     static char         *kwlist[] = { "input", "errors", NULL };
 | |
|     Py_UNICODE          *input;
 | |
|     int                  inputlen;
 | |
|     char                *errors = NULL/*strict*/, *out, *out_top;
 | |
|     const char          *inp, *inp_top;
 | |
|     size_t               inplen, inplen_total, outlen, outlen_total, estep;
 | |
|     PyObject            *outputobj = NULL, *errorcb = NULL,
 | |
|                         *exceptionobj = NULL;
 | |
|     Py_UNICODE          *swappedinput = NULL;
 | |
|     int                  swapi;
 | |
| 
 | |
|     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode",
 | |
|                 kwlist, &input, &inputlen, &errors))
 | |
|         return NULL; /* TypeError */
 | |
| 
 | |
|     errorcb = get_errorcallback(errors);
 | |
|     if (errorcb == NULL)
 | |
|         return NULL; /* LookupError or something else from error handler */
 | |
| 
 | |
|     inp = inp_top = (char *)input;
 | |
|     inplen = inplen_total = (size_t)(inputlen * Py_UNICODE_SIZE);
 | |
| 
 | |
|     outlen = inputlen * MBENCODED_LENGTH_MAX;
 | |
|     if (outlen < 16)
 | |
|         outlen = 16; /* for iso-2022 codecs */
 | |
| 
 | |
|     outputobj = PyString_FromStringAndSize(NULL, outlen);
 | |
|     if (outputobj == NULL)
 | |
|         return NULL;
 | |
|     out = out_top = PyString_AS_STRING(outputobj);
 | |
|     outlen_total = outlen;
 | |
| 
 | |
|     estep = inputlen * Py_UNICODE_SIZE / 2;
 | |
| 
 | |
| #define RESIZE_OUTBUFFER(size) {                            \
 | |
|     size_t   toadd = (size);                                \
 | |
|     outlen_total += toadd;                                  \
 | |
|     outlen += toadd;                                        \
 | |
|     if (_PyString_Resize(&outputobj, outlen_total) == -1)   \
 | |
|         goto errorexit;                                     \
 | |
|     out = PyString_AS_STRING(outputobj) + (out - out_top);  \
 | |
|     out_top = PyString_AS_STRING(outputobj);                \
 | |
| }
 | |
|     if (byteswap) {
 | |
|         swappedinput = PyMem_Malloc(inplen);
 | |
|         if (swappedinput == NULL)
 | |
|             return NULL;
 | |
|         for (swapi = 0; swapi<inputlen; ++swapi)
 | |
|         {
 | |
|            Py_UNICODE c = input[swapi];
 | |
| #if Py_UNICODE_SIZE == 2
 | |
|            c = ((char *)&c)[0]<<8 | ((char *)&c)[1];
 | |
| #else
 | |
|            c = ((char *)&c)[0]<<24 | ((char *)&c)[1]<<16 |
 | |
|                ((char *)&c)[2]<<8 | ((char *)&c)[3];
 | |
| #endif
 | |
|            swappedinput[swapi] = c;
 | |
|         }
 | |
|         inp = inp_top = (char *)swappedinput;
 | |
|     }
 | |
| 
 | |
|     while (inplen > 0) {
 | |
|         if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen) == (size_t)-1) {
 | |
|             char         reason[128];
 | |
|             int          errpos;
 | |
| 
 | |
|             if (errno == E2BIG) {
 | |
|                 RESIZE_OUTBUFFER(estep);
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) {
 | |
|                 inplen -= Py_UNICODE_SIZE;
 | |
|                 inp += Py_UNICODE_SIZE;
 | |
|                 if (errorcb == ERROR_REPLACE) {
 | |
|                     if (outlen < 1)
 | |
|                         RESIZE_OUTBUFFER(errno == EINVAL ?  1 : estep);
 | |
|                     outlen--;
 | |
|                     *out++ = REPLACEMENT_CHAR_ENCODE;
 | |
|                 }
 | |
|                 if (errno == EINVAL) break;
 | |
|                 else continue;
 | |
|             }
 | |
| 
 | |
|             errpos = (int)(inp - inp_top) / Py_UNICODE_SIZE;
 | |
|             sprintf(reason, "Undefined character map from "
 | |
| #if Py_UNICODE_SIZE == 2
 | |
|                         "\\u%04x"
 | |
| #elif Py_UNICODE_SIZE == 4
 | |
|                         "\\u%08x"
 | |
| #endif
 | |
|                         , *(Py_UNICODE *)inp);
 | |
| 
 | |
|             if (exceptionobj == NULL) {
 | |
|                 if ((exceptionobj = PyUnicodeEncodeError_Create(
 | |
|                                         self->encoding, input, inputlen,
 | |
|                                         errpos, errpos + 1, reason)) == NULL)
 | |
|                     goto errorexit;
 | |
|             } else {
 | |
|                 if (PyUnicodeEncodeError_SetStart(exceptionobj, errpos) != 0)
 | |
|                     goto errorexit;
 | |
|                 if (PyUnicodeEncodeError_SetEnd(exceptionobj, errpos + 1) != 0)
 | |
|                     goto errorexit;
 | |
|                 if (PyUnicodeEncodeError_SetReason(exceptionobj, reason) != 0)
 | |
|                     goto errorexit;
 | |
|             }
 | |
| 
 | |
|             if (errorcb == ERROR_STRICT) {
 | |
|                 PyCodec_StrictErrors(exceptionobj);
 | |
|                 goto errorexit;
 | |
|             } else {
 | |
|                 PyObject        *argsobj, *retobj, *retuni;
 | |
|                 long             newpos;
 | |
| 
 | |
|                 argsobj = PyTuple_New(1);
 | |
|                 if (argsobj == NULL)
 | |
|                     goto errorexit;
 | |
|                 PyTuple_SET_ITEM(argsobj, 0, exceptionobj);
 | |
|                 Py_INCREF(exceptionobj);
 | |
|                 retobj = PyObject_CallObject(errorcb, argsobj);
 | |
|                 Py_DECREF(argsobj);
 | |
|                 if (retobj == NULL)
 | |
|                     goto errorexit;
 | |
| 
 | |
|                 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
 | |
|                     !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
 | |
|                     !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
 | |
|                     Py_DECREF(retobj);
 | |
|                     PyErr_SetString(PyExc_ValueError, "encoding error handler "
 | |
|                                     "must return (unicode, int) tuple");
 | |
|                     goto errorexit;
 | |
|                 }
 | |
|                 if (PyUnicode_GET_SIZE(retuni) > 0) {
 | |
| #define errorexit errorexit_cbpad
 | |
|                     PyObject    *retstr = NULL;
 | |
|                     int          retstrsize;
 | |
| 
 | |
|                     retstr = PyUnicode_AsEncodedString(
 | |
|                                     retuni, self->encoding, NULL);
 | |
|                     if (retstr == NULL || !PyString_Check(retstr))
 | |
|                         goto errorexit;
 | |
| 
 | |
|                     retstrsize = PyString_GET_SIZE(retstr);
 | |
|                     if (outlen < retstrsize)
 | |
|                         RESIZE_OUTBUFFER(errno == EINVAL || retstrsize > estep
 | |
|                                          ? retstrsize - outlen : estep);
 | |
| 
 | |
|                     memcpy(out, PyString_AS_STRING(retstr), retstrsize);
 | |
|                     out += retstrsize;
 | |
|                     outlen -= retstrsize;
 | |
| #undef errorexit
 | |
|                     if (0) {
 | |
| errorexit_cbpad:        Py_XDECREF(retobj);
 | |
|                         Py_XDECREF(retstr);
 | |
|                         goto errorexit;
 | |
|                     }
 | |
|                     Py_DECREF(retstr);
 | |
|                 }
 | |
| 
 | |
|                 newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
 | |
|                 Py_DECREF(retobj);
 | |
| 
 | |
|                 if (newpos < 0)
 | |
|                     newpos = inputlen + newpos;
 | |
|                 if (newpos < 0 || newpos > inputlen) {
 | |
|                     PyErr_Format(PyExc_IndexError, "position %ld from error handler"
 | |
|                           " out of bounds", newpos);
 | |
|                     goto errorexit;
 | |
|                 }
 | |
|                 if (newpos == inputlen)
 | |
|                     break;
 | |
|                 inp = inp_top + Py_UNICODE_SIZE * newpos;
 | |
|                 inplen = inplen_total - Py_UNICODE_SIZE * newpos;
 | |
|             }
 | |
|         } else
 | |
|             break;
 | |
|     }
 | |
| #undef RESIZE_OUTBUFFER
 | |
| 
 | |
|     {
 | |
|         PyObject    *rettup;
 | |
|         int          finalsize;
 | |
| 
 | |
|         finalsize = (int)(out - out_top);
 | |
| 
 | |
|         if (finalsize != outlen_total) {
 | |
|             if (_PyString_Resize(&outputobj, finalsize) == -1)
 | |
|                 goto errorexit;
 | |
|         }
 | |
| 
 | |
|         if (errorcb > ERROR_MAX) {
 | |
|             Py_DECREF(errorcb);
 | |
|         }
 | |
|         Py_XDECREF(exceptionobj);
 | |
| 
 | |
|         rettup = PyTuple_New(2);
 | |
|         if (rettup == NULL) {
 | |
|             Py_DECREF(outputobj);
 | |
|             if (byteswap)
 | |
|                 PyMem_Free(swappedinput);
 | |
|             return NULL;
 | |
|         }
 | |
|         PyTuple_SET_ITEM(rettup, 0, outputobj);
 | |
|         PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inputlen));
 | |
|         return rettup;
 | |
|     }
 | |
| 
 | |
| errorexit:
 | |
|     Py_XDECREF(outputobj);
 | |
|     if (errorcb > ERROR_MAX) {
 | |
|         Py_DECREF(errorcb);
 | |
|     }
 | |
|     Py_XDECREF(exceptionobj);
 | |
|     if (byteswap)
 | |
|         PyMem_Free(swappedinput);
 | |
| 
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| PyDoc_STRVAR(iconvcodec_decode__doc__,
 | |
| "I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\
 | |
| \n\
 | |
| Decodes `string' using I, an iconvcodec instance. errors may be given\n\
 | |
| to set a different error handling scheme. Default is 'strict' meaning\n\
 | |
| that encoding errors raise a UnicodeDecodeError. Other possible values\n\
 | |
| are 'ignore' and 'replace' as well as any other name registerd with\n\
 | |
| codecs.register_error that is able to handle UnicodeDecodeErrors.");
 | |
| 
 | |
| static PyObject *
 | |
| iconvcodec_decode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
 | |
| {
 | |
|     static char         *kwlist[] = { "input", "errors", NULL };
 | |
|     char                *errors = NULL/*strict*/, *out, *out_top;
 | |
|     const char          *inp, *inp_top;
 | |
|     int                  inplen_int;
 | |
|     size_t               inplen, inplen_total, outlen, outlen_total, estep;
 | |
|     PyObject            *outputobj = NULL, *errorcb = NULL,
 | |
|                         *exceptionobj = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|s:decode",
 | |
|                 kwlist, &inp, &inplen_int, &errors))
 | |
|         return NULL; /* TypeError */
 | |
| 
 | |
|     errorcb = get_errorcallback(errors);
 | |
|     if (errorcb == NULL)
 | |
|         return NULL; /* LookupError or something else from error handler */
 | |
| 
 | |
|     inp_top = inp;
 | |
|     inplen_total = inplen = (size_t)inplen_int;
 | |
| 
 | |
|     outputobj = PyUnicode_FromUnicode(NULL, inplen);
 | |
|     if (outputobj == NULL)
 | |
|         return NULL;
 | |
|     outlen_total = outlen = PyUnicode_GET_DATA_SIZE(outputobj);
 | |
|     out = out_top = (char *)PyUnicode_AS_UNICODE(outputobj);
 | |
| 
 | |
|     estep = outlen / 2;
 | |
| 
 | |
| #define RESIZE_OUTBUFFER(size) {                                            \
 | |
|     size_t   toadd = (size);                                                \
 | |
|     outlen_total += toadd;                                                  \
 | |
|     outlen += toadd;                                                        \
 | |
|     if (PyUnicode_Resize(&outputobj, outlen_total/Py_UNICODE_SIZE) == -1)   \
 | |
|         goto errorexit;                                                     \
 | |
|     out = (char *)PyUnicode_AS_UNICODE(outputobj) + (out - out_top);        \
 | |
|     out_top = (char *)PyUnicode_AS_UNICODE(outputobj);                      \
 | |
| }
 | |
|     while (inplen > 0) {
 | |
|         char *oldout = out;
 | |
|         size_t res = iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen);
 | |
| 
 | |
|         if (byteswap) {
 | |
|             while (oldout < out)
 | |
|             {
 | |
|                 char c0 = oldout[0];
 | |
| #if Py_UNICODE_SIZE == 2
 | |
|                 oldout[0] = oldout[1];
 | |
|                 oldout[1] = c0;
 | |
| #else
 | |
|                 char c1 = oldout[1];
 | |
|                 oldout[0] = oldout[3];
 | |
|                 oldout[1] = oldout[2];
 | |
|                 oldout[2] = c1;
 | |
|                 oldout[3] = c0;
 | |
| #endif
 | |
|                 oldout += sizeof(Py_UNICODE);
 | |
|             }
 | |
|         }
 | |
|         if (res == (size_t)-1) {
 | |
|             char         reason[128], *reasonpos = (char *)reason;
 | |
|             int          errpos;
 | |
| 
 | |
|             if (errno == E2BIG) {
 | |
|                 RESIZE_OUTBUFFER(estep);
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) {
 | |
|                 inplen--; inp++;
 | |
|                 if (errorcb == ERROR_REPLACE) {
 | |
|                     Py_UNICODE      *replp;
 | |
| 
 | |
|                     if (outlen < Py_UNICODE_SIZE)
 | |
|                         RESIZE_OUTBUFFER(
 | |
|                             errno == EINVAL || Py_UNICODE_SIZE > estep
 | |
|                             ? Py_UNICODE_SIZE : estep);
 | |
| 
 | |
|                     /* some compilers hate casted lvalue */
 | |
|                     replp   = (Py_UNICODE *)out;
 | |
|                     assert((long)replp % Py_UNICODE_SIZE == 0);/* aligned? */
 | |
|                     *replp  = REPLACEMENT_CHAR_DECODE;
 | |
| 
 | |
|                     out     += Py_UNICODE_SIZE;
 | |
|                     outlen  -= Py_UNICODE_SIZE;
 | |
|                 }
 | |
|                 if (errno == EINVAL) break;
 | |
|                 else continue;
 | |
|             }
 | |
| 
 | |
|             errpos = (int)(inp - inp_top);
 | |
|             reasonpos += sprintf(reason, "Invalid multibyte sequence \\x%02x",
 | |
|                                             (unsigned char)*inp);
 | |
|             if (inplen > 1) {
 | |
|                 reasonpos += sprintf(reasonpos,
 | |
|                                      "\\x%02x", (unsigned char)*(inp+1));
 | |
|                 if (inplen > 2)
 | |
|                     sprintf(reasonpos, "\\x%02x", (unsigned char)*(inp+2));
 | |
|             }
 | |
| 
 | |
|             if (exceptionobj == NULL) {
 | |
|                 exceptionobj = PyUnicodeDecodeError_Create(
 | |
|                                     self->encoding, inp_top, inplen_total,
 | |
|                                     errpos, errpos + 1, reason);
 | |
|                 if (exceptionobj == NULL)
 | |
|                     goto errorexit;
 | |
|             } else {
 | |
|                 if (PyUnicodeDecodeError_SetStart(exceptionobj, errpos) != 0)
 | |
|                     goto errorexit;
 | |
|                 if (PyUnicodeDecodeError_SetEnd(exceptionobj, errpos + 1) != 0)
 | |
|                     goto errorexit;
 | |
|                 if (PyUnicodeDecodeError_SetReason(exceptionobj, reason) != 0)
 | |
|                     goto errorexit;
 | |
|             }
 | |
| 
 | |
|             if (errorcb == ERROR_STRICT) {
 | |
|                 PyCodec_StrictErrors(exceptionobj);
 | |
|                 goto errorexit;
 | |
|             } else {
 | |
|                 PyObject        *argsobj, *retobj, *retuni;
 | |
|                 long             newpos;
 | |
| 
 | |
|                 argsobj = PyTuple_New(1);
 | |
|                 if (argsobj == NULL)
 | |
|                     goto errorexit;
 | |
|                 PyTuple_SET_ITEM(argsobj, 0, exceptionobj);
 | |
|                 Py_INCREF(exceptionobj);
 | |
|                 retobj = PyObject_CallObject(errorcb, argsobj);
 | |
|                 Py_DECREF(argsobj);
 | |
|                 if (retobj == NULL)
 | |
|                     goto errorexit;
 | |
| 
 | |
|                 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
 | |
|                     !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
 | |
|                     !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
 | |
|                     Py_DECREF(retobj);
 | |
|                     PyErr_SetString(PyExc_ValueError, "decoding error handler "
 | |
|                                     "must return (unicode, int) tuple");
 | |
|                     goto errorexit;
 | |
|                 }
 | |
|                 if (PyUnicode_GET_SIZE(retuni) > 0) {
 | |
| #define errorexit errorexit_cbpad
 | |
|                     size_t       retunisize;
 | |
| 
 | |
|                     retunisize = PyUnicode_GET_DATA_SIZE(retuni);
 | |
|                     if (outlen < retunisize)
 | |
|                         RESIZE_OUTBUFFER(errno == EINVAL || retunisize > estep
 | |
|                                          ? retunisize - outlen : estep);
 | |
| 
 | |
|                     memcpy(out, PyUnicode_AS_DATA(retuni), retunisize);
 | |
|                     out += retunisize;
 | |
|                     outlen -= retunisize;
 | |
| #undef errorexit
 | |
|                     if (0) {
 | |
| errorexit_cbpad:        Py_DECREF(retobj);
 | |
|                         goto errorexit;
 | |
|                     }
 | |
|                 }
 | |
| 
 | |
|                 newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
 | |
|                 Py_DECREF(retobj);
 | |
| 
 | |
|                 if (newpos < 0)
 | |
|                     newpos = inplen_total + newpos;
 | |
|                 if (newpos < 0 || newpos > inplen_total) {
 | |
|                     PyErr_Format(PyExc_IndexError, "position %ld from error handler"
 | |
|                           " out of bounds", newpos);
 | |
|                     goto errorexit;
 | |
|                 }
 | |
|                 if (newpos == inplen_total)
 | |
|                     break;
 | |
|                 inp = inp_top + newpos;
 | |
|                 inplen = inplen_total - newpos;
 | |
|             }
 | |
|         } else
 | |
|             break;
 | |
|     }
 | |
| #undef RESIZE_OUTBUFFER
 | |
| 
 | |
|     {
 | |
|         PyObject    *rettup;
 | |
|         int          finalsize;
 | |
| 
 | |
|         finalsize = (int)(out - out_top);
 | |
|         if (finalsize != outlen_total) {
 | |
|             if (PyUnicode_Resize(&outputobj, finalsize / Py_UNICODE_SIZE) == -1)
 | |
|                 goto errorexit;
 | |
|         }
 | |
| 
 | |
|         if (errorcb > ERROR_MAX) {
 | |
|             Py_DECREF(errorcb);
 | |
|         }
 | |
|         Py_XDECREF(exceptionobj);
 | |
| 
 | |
|         rettup = PyTuple_New(2);
 | |
|         if (rettup == NULL) {
 | |
|             Py_DECREF(outputobj);
 | |
|             return NULL;
 | |
|         }
 | |
|         PyTuple_SET_ITEM(rettup, 0, outputobj);
 | |
|         PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inplen_total));
 | |
|         return rettup;
 | |
|     }
 | |
| 
 | |
| errorexit:
 | |
|     Py_XDECREF(outputobj);
 | |
|     if (errorcb > ERROR_MAX) {
 | |
|         Py_DECREF(errorcb);
 | |
|     }
 | |
|     Py_XDECREF(exceptionobj);
 | |
| 
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static struct PyMethodDef iconvcodec_methods[] = {
 | |
|     {"encode",      (PyCFunction)iconvcodec_encode,
 | |
|                     METH_VARARGS | METH_KEYWORDS,
 | |
|                     iconvcodec_encode__doc__},
 | |
|     {"decode",      (PyCFunction)iconvcodec_decode,
 | |
|                     METH_VARARGS | METH_KEYWORDS,
 | |
|                     iconvcodec_decode__doc__},
 | |
|     {NULL, NULL},
 | |
| };
 | |
| 
 | |
| static PyObject *
 | |
| iconvcodec_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 | |
| {
 | |
|     PyObject            *encobj = NULL;
 | |
|     iconvcodecObject    *new = NULL;
 | |
| 
 | |
|     new = (iconvcodecObject *)type->tp_alloc(type, 0);
 | |
|     if (new == NULL)
 | |
|         return NULL;
 | |
| 
 | |
|     new->encoding = NULL;
 | |
|     new->enchdl = new->dechdl = (iconv_t)(-1);
 | |
| 
 | |
|     encobj = PyObject_GetAttrString((PyObject *)new, "encoding");
 | |
|     if (encobj == NULL) {
 | |
|         PyErr_Clear();
 | |
|         new->encoding = PyMem_Malloc(sizeof(DEFAULT_ENCODING));
 | |
|         strcpy(new->encoding, DEFAULT_ENCODING);
 | |
|     } else if (!PyString_Check(encobj)) {
 | |
|         Py_DECREF(encobj);
 | |
|         PyErr_SetString(PyExc_TypeError, 
 | |
|                         "`encoding' attribute must be a string.");
 | |
|         goto errorexit;
 | |
|     } else {
 | |
|         new->encoding = PyMem_Malloc(PyString_GET_SIZE(encobj) + 1);
 | |
|         strcpy(new->encoding, PyString_AS_STRING(encobj));
 | |
|         Py_DECREF(encobj);
 | |
|     }
 | |
| 
 | |
|     new->dechdl = iconv_open(UNICODE_ENCODING, new->encoding);
 | |
|     if (new->dechdl == (iconv_t)(-1)) {
 | |
|         PyErr_SetString(PyExc_ValueError, "unsupported decoding");
 | |
|         goto errorexit;
 | |
|     }
 | |
| 
 | |
|     new->enchdl = iconv_open(new->encoding, UNICODE_ENCODING);
 | |
|     if (new->enchdl == (iconv_t)(-1)) {
 | |
|         PyErr_SetString(PyExc_ValueError, "unsupported encoding");
 | |
|         iconv_close(new->dechdl);
 | |
|         new->dechdl = (iconv_t)(-1);
 | |
|         goto errorexit;
 | |
|     }
 | |
| 
 | |
|     return (PyObject *)new;
 | |
| 
 | |
| errorexit:
 | |
|     Py_XDECREF(new);
 | |
| 
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static void
 | |
| iconvcodec_dealloc(iconvcodecObject *self)
 | |
| {
 | |
|     if (self->enchdl != (iconv_t)-1)
 | |
|         iconv_close(self->enchdl);
 | |
|     if (self->dechdl != (iconv_t)-1)
 | |
|         iconv_close(self->dechdl);
 | |
|     if (self->encoding != NULL)
 | |
|         PyMem_Free(self->encoding);
 | |
| 
 | |
|     self->ob_type->tp_free((PyObject *)self);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| iconvcodec_repr(PyObject *self)
 | |
| {
 | |
|     return PyString_FromFormat("<iconvcodec encoding='%s'>",
 | |
|                     ((iconvcodecObject *)self)->encoding);
 | |
| }
 | |
| 
 | |
| static PyTypeObject iconvcodec_Type = {
 | |
|     PyObject_HEAD_INIT(&PyType_Type)
 | |
|     0,                              /* Number of items for varobject */
 | |
|     "iconvcodec",                   /* Name of this type */
 | |
|     sizeof(iconvcodecObject),       /* Basic object size */
 | |
|     0,                              /* Item size for varobject */
 | |
|     (destructor)iconvcodec_dealloc, /* tp_dealloc */
 | |
|     0,                              /* tp_print */
 | |
|     0,                              /* tp_getattr */
 | |
|     0,                              /* tp_setattr */
 | |
|     0,                              /* tp_compare */
 | |
|     iconvcodec_repr,                /* tp_repr */
 | |
|     0,                              /* tp_as_number */
 | |
|     0,                              /* tp_as_sequence */
 | |
|     0,                              /* tp_as_mapping */
 | |
|     0,                              /* tp_hash */
 | |
|     0,                              /* tp_call */
 | |
|     0,                              /* tp_str */
 | |
|     PyObject_GenericGetAttr,        /* tp_getattro */
 | |
|     0,                              /* tp_setattro */
 | |
|     0,                              /* tp_as_buffer */
 | |
|     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,    /* tp_flags */
 | |
|     iconvcodec_doc,                 /* tp_doc */
 | |
|     0,                              /* tp_traverse */
 | |
|     0,                              /* tp_clear */
 | |
|     0,                              /* tp_richcompare */
 | |
|     0,                              /* tp_weaklistoffset */
 | |
|     0,                              /* tp_iter */
 | |
|     0,                              /* tp_iterext */
 | |
|     iconvcodec_methods,             /* tp_methods */
 | |
|     0,                              /* tp_members */
 | |
|     0,                              /* tp_getset */
 | |
|     0,                              /* tp_base */
 | |
|     0,                              /* tp_dict */
 | |
|     0,                              /* tp_descr_get */
 | |
|     0,                              /* tp_descr_set */
 | |
|     0,                              /* tp_dictoffset */
 | |
|     0,                              /* tp_init */
 | |
|     PyType_GenericAlloc,            /* tp_alloc */
 | |
|     iconvcodec_new,                 /* tp_new */
 | |
|     PyObject_Del,                   /* tp_free */
 | |
| };
 | |
| 
 | |
| static struct PyMethodDef _iconv_codec_methods[] = {
 | |
|     {NULL, NULL},
 | |
| };
 | |
| 
 | |
| void
 | |
| init_iconv_codec(void)
 | |
| {
 | |
|     PyObject *m;
 | |
| 
 | |
|     char in = 1;
 | |
|     char *inptr = ∈
 | |
|     size_t insize = 1;
 | |
|     Py_UNICODE out = 0;
 | |
|     char *outptr = (char *)&out;
 | |
|     size_t outsize = sizeof(out);
 | |
|     size_t res;
 | |
| 
 | |
|     iconv_t hdl = iconv_open(UNICODE_ENCODING, "ASCII");
 | |
| 
 | |
|     if (hdl == (iconv_t)-1)
 | |
|         Py_FatalError("can't initialize the _iconv_codec module: iconv_open() failed");
 | |
| 
 | |
|     res = iconv(hdl, &inptr, &insize, &outptr, &outsize);
 | |
|     if (res == (size_t)-1)
 | |
|         Py_FatalError("can't initialize the _iconv_codec module: iconv() failed");
 | |
| 
 | |
|     /* Check whether conv() returned native endianess or not for the chosen encoding */
 | |
|     if (out == 0x1)
 | |
|        byteswap = 0;
 | |
| #if Py_UNICODE_SIZE == 2
 | |
|     else if (out == 0x0100)
 | |
| #else
 | |
|     else if (out == 0x01000000)
 | |
| #endif
 | |
|        byteswap = 1;
 | |
|     else
 | |
|         Py_FatalError("can't initialize the _iconv_codec module: mixed endianess");
 | |
|     iconv_close(hdl);
 | |
| 
 | |
|     m = Py_InitModule("_iconv_codec", _iconv_codec_methods);
 | |
| 
 | |
|     PyModule_AddStringConstant(m, "__version__", (char*)__version__);
 | |
|     Py_INCREF(&iconvcodec_Type);
 | |
|     PyModule_AddObject(m, "iconvcodec", (PyObject *)(&iconvcodec_Type));
 | |
|     PyModule_AddStringConstant(m, "internal_encoding", UNICODE_ENCODING);
 | |
| 
 | |
|     if (PyErr_Occurred())
 | |
|         Py_FatalError("can't initialize the _iconv_codec module");
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * ex: ts=8 sts=4 et
 | |
|  * $Id$
 | |
|  */
 | 
