mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 03:04:41 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			529 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			529 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* ------------------------------------------------------------------------
 | |
| 
 | |
|    _codecs -- Provides access to the codec registry and the builtin
 | |
|               codecs.
 | |
| 
 | |
|    This module should never be imported directly. The standard library
 | |
|    module "codecs" wraps this builtin module for use within Python.
 | |
| 
 | |
|    The codec registry is accessible via:
 | |
| 
 | |
|      register(search_function) -> None
 | |
| 
 | |
|      lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
 | |
| 
 | |
|    The builtin Unicode codecs use the following interface:
 | |
| 
 | |
|      <encoding>_encode(Unicode_object[,errors='strict']) -> 
 | |
|      	(string object, bytes consumed)
 | |
| 
 | |
|      <encoding>_decode(char_buffer_obj[,errors='strict']) -> 
 | |
|         (Unicode object, bytes consumed)
 | |
| 
 | |
|    These <encoding>s are available: utf_8, unicode_escape,
 | |
|    raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit)
 | |
| 
 | |
| Written by Marc-Andre Lemburg (mal@lemburg.com).
 | |
| 
 | |
| (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 | |
| 
 | |
|    ------------------------------------------------------------------------ */
 | |
| 
 | |
| #include "Python.h"
 | |
| 
 | |
| /* --- Registry ----------------------------------------------------------- */
 | |
| 
 | |
| static
 | |
| PyObject *codecregister(PyObject *self, PyObject *args)
 | |
| {
 | |
|     PyObject *search_function;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "O:register", &search_function))
 | |
|         goto onError;
 | |
| 
 | |
|     if (PyCodec_Register(search_function))
 | |
| 	goto onError;
 | |
|     
 | |
|     Py_INCREF(Py_None);
 | |
|     return Py_None;
 | |
| 
 | |
|  onError:
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static
 | |
| PyObject *codeclookup(PyObject *self, PyObject *args)
 | |
| {
 | |
|     char *encoding;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
 | |
|         goto onError;
 | |
| 
 | |
|     return _PyCodec_Lookup(encoding);
 | |
| 
 | |
|  onError:
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| /* --- Helpers ------------------------------------------------------------ */
 | |
| 
 | |
| static
 | |
| PyObject *codec_tuple(PyObject *unicode,
 | |
| 		      int len)
 | |
| {
 | |
|     PyObject *v,*w;
 | |
|     
 | |
|     if (unicode == NULL)
 | |
| 	return NULL;
 | |
|     v = PyTuple_New(2);
 | |
|     if (v == NULL) {
 | |
| 	Py_DECREF(unicode);
 | |
| 	return NULL;
 | |
|     }
 | |
|     PyTuple_SET_ITEM(v,0,unicode);
 | |
|     w = PyInt_FromLong(len);
 | |
|     if (w == NULL) {
 | |
| 	Py_DECREF(v);
 | |
| 	return NULL;
 | |
|     }
 | |
|     PyTuple_SET_ITEM(v,1,w);
 | |
|     return v;
 | |
| }
 | |
| 
 | |
| /* --- Decoder ------------------------------------------------------------ */
 | |
| 
 | |
| static PyObject *
 | |
| unicode_internal_decode(PyObject *self,
 | |
| 			PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "s#|z:unicode_internal_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data, 
 | |
| 					       size / sizeof(Py_UNICODE)),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| utf_8_decode(PyObject *self,
 | |
| 	    PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| utf_16_decode(PyObject *self,
 | |
| 	    PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     int byteorder = 0;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
|     return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| utf_16_le_decode(PyObject *self,
 | |
| 		 PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     int byteorder = -1;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
|     return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| utf_16_be_decode(PyObject *self,
 | |
| 		 PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     int byteorder = 1;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
|     return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| /* This non-standard version also provides access to the byteorder
 | |
|    parameter of the builtin UTF-16 codec.
 | |
| 
 | |
|    It returns a tuple (unicode, bytesread, byteorder) with byteorder
 | |
|    being the value in effect at the end of data.
 | |
| 
 | |
| */
 | |
| 
 | |
| static PyObject *
 | |
| utf_16_ex_decode(PyObject *self,
 | |
| 		 PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     int byteorder = 0;
 | |
|     PyObject *unicode, *tuple;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
 | |
| 			  &data, &size, &errors, &byteorder))
 | |
| 	return NULL;
 | |
| 
 | |
|     unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
 | |
|     if (unicode == NULL)
 | |
| 	return NULL;
 | |
|     tuple = Py_BuildValue("Oii", unicode, size, byteorder);
 | |
|     Py_DECREF(unicode);
 | |
|     return tuple;
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| unicode_escape_decode(PyObject *self,
 | |
| 		     PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| raw_unicode_escape_decode(PyObject *self,
 | |
| 			PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| latin_1_decode(PyObject *self,
 | |
| 	       PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| ascii_decode(PyObject *self,
 | |
| 	     PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| charmap_decode(PyObject *self,
 | |
| 	       PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
|     PyObject *mapping = NULL;
 | |
|     
 | |
|     if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
 | |
| 			  &data, &size, &errors, &mapping))
 | |
| 	return NULL;
 | |
|     if (mapping == Py_None)
 | |
| 	mapping = NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| /* --- Encoder ------------------------------------------------------------ */
 | |
| 
 | |
| static PyObject *
 | |
| readbuffer_encode(PyObject *self,
 | |
| 		  PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyString_FromStringAndSize(data, size),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| charbuffer_encode(PyObject *self,
 | |
| 		  PyObject *args)
 | |
| {
 | |
|     const char *data;
 | |
|     int size;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
 | |
| 			  &data, &size, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyString_FromStringAndSize(data, size),
 | |
| 		       size);
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| utf_8_encode(PyObject *self,
 | |
| 	    PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|z:utf_8_encode",
 | |
| 			  &str, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
 | |
| 					    PyUnicode_GET_SIZE(str),
 | |
| 					    errors),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| /* This version provides access to the byteorder parameter of the
 | |
|    builtin UTF-16 codecs as optional third argument. It defaults to 0
 | |
|    which means: use the native byte order and prepend the data with a
 | |
|    BOM mark.  
 | |
| 
 | |
| */
 | |
| 
 | |
| static PyObject *
 | |
| utf_16_encode(PyObject *self,
 | |
| 	    PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
|     int byteorder = 0;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|zi:utf_16_encode",
 | |
| 			  &str, &errors, &byteorder))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 | |
| 					     PyUnicode_GET_SIZE(str),
 | |
| 					     errors,
 | |
| 					     byteorder),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| utf_16_le_encode(PyObject *self,
 | |
| 		 PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|zi:utf_16_le_encode",
 | |
| 			  &str, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 | |
| 					     PyUnicode_GET_SIZE(str),
 | |
| 					     errors,
 | |
| 					     -1),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| utf_16_be_encode(PyObject *self,
 | |
| 		 PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|zi:utf_16_be_encode",
 | |
| 			  &str, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 | |
| 					     PyUnicode_GET_SIZE(str),
 | |
| 					     errors,
 | |
| 					     +1),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| unicode_escape_encode(PyObject *self,
 | |
| 		     PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|z:unicode_escape_encode",
 | |
| 			  &str, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeUnicodeEscape(
 | |
| 			       PyUnicode_AS_UNICODE(str), 
 | |
| 			       PyUnicode_GET_SIZE(str)),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| raw_unicode_escape_encode(PyObject *self,
 | |
| 			PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|z:raw_unicode_escape_encode",
 | |
| 			  &str, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
 | |
| 			       PyUnicode_AS_UNICODE(str), 
 | |
| 			       PyUnicode_GET_SIZE(str)),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| latin_1_encode(PyObject *self,
 | |
| 	       PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|z:latin_1_encode",
 | |
| 			  &str, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeLatin1(
 | |
| 			       PyUnicode_AS_UNICODE(str), 
 | |
| 			       PyUnicode_GET_SIZE(str),
 | |
| 			       errors),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| ascii_encode(PyObject *self,
 | |
| 	     PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|z:ascii_encode",
 | |
| 			  &str, &errors))
 | |
| 	return NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeASCII(
 | |
| 			       PyUnicode_AS_UNICODE(str), 
 | |
| 			       PyUnicode_GET_SIZE(str),
 | |
| 			       errors),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| static PyObject *
 | |
| charmap_encode(PyObject *self,
 | |
| 	     PyObject *args)
 | |
| {
 | |
|     PyObject *str;
 | |
|     const char *errors = NULL;
 | |
|     PyObject *mapping = NULL;
 | |
| 
 | |
|     if (!PyArg_ParseTuple(args, "U|zO:charmap_encode",
 | |
| 			  &str, &errors, &mapping))
 | |
| 	return NULL;
 | |
|     if (mapping == Py_None)
 | |
| 	mapping = NULL;
 | |
| 
 | |
|     return codec_tuple(PyUnicode_EncodeCharmap(
 | |
| 			       PyUnicode_AS_UNICODE(str), 
 | |
| 			       PyUnicode_GET_SIZE(str),
 | |
| 			       mapping, 
 | |
| 			       errors),
 | |
| 		       PyUnicode_GET_SIZE(str));
 | |
| }
 | |
| 
 | |
| /* --- Module API --------------------------------------------------------- */
 | |
| 
 | |
| static PyMethodDef _codecs_functions[] = {
 | |
|     {"register",		codecregister,			1},
 | |
|     {"lookup",			codeclookup, 			1},
 | |
|     {"utf_8_encode",		utf_8_encode,			1},
 | |
|     {"utf_8_decode",		utf_8_decode,			1},
 | |
|     {"utf_16_encode",		utf_16_encode,			1},
 | |
|     {"utf_16_le_encode",	utf_16_le_encode,		1},
 | |
|     {"utf_16_be_encode",	utf_16_be_encode,		1},
 | |
|     {"utf_16_decode",		utf_16_decode,			1},
 | |
|     {"utf_16_le_decode",	utf_16_le_decode,		1},
 | |
|     {"utf_16_be_decode",	utf_16_be_decode,		1},
 | |
|     {"utf_16_ex_decode",	utf_16_ex_decode,		1},
 | |
|     {"unicode_escape_encode",	unicode_escape_encode,		1},
 | |
|     {"unicode_escape_decode",	unicode_escape_decode,		1},
 | |
|     {"unicode_internal_encode",	readbuffer_encode,		1},
 | |
|     {"unicode_internal_decode",	unicode_internal_decode,	1},
 | |
|     {"raw_unicode_escape_encode", raw_unicode_escape_encode,	1},
 | |
|     {"raw_unicode_escape_decode", raw_unicode_escape_decode,	1},
 | |
|     {"latin_1_encode", 		latin_1_encode,			1},
 | |
|     {"latin_1_decode", 		latin_1_decode,			1},
 | |
|     {"ascii_encode", 		ascii_encode,			1},
 | |
|     {"ascii_decode", 		ascii_decode,			1},
 | |
|     {"charmap_encode", 		charmap_encode,			1},
 | |
|     {"charmap_decode", 		charmap_decode,			1},
 | |
|     {"readbuffer_encode",	readbuffer_encode,		1},
 | |
|     {"charbuffer_encode",	charbuffer_encode,		1},
 | |
|     {NULL, NULL}		/* sentinel */
 | |
| };
 | |
| 
 | |
| DL_EXPORT(void)
 | |
| init_codecs()
 | |
| {
 | |
|     Py_InitModule("_codecs", _codecs_functions);
 | |
| }
 | 
