mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	array module uses the new Unicode API
* Use Py_UCS4* buffer instead of Py_UNICODE* * Use "I" or "L" format, instead of "u" format
This commit is contained in:
		
							parent
							
								
									f8bb7d02f6
								
							
						
					
					
						commit
						8dba4e004f
					
				
					 2 changed files with 36 additions and 35 deletions
				
			
		|  | @ -218,10 +218,14 @@ def test_buffer_info(self): | ||||||
|         self.assertEqual(bi[1], len(a)) |         self.assertEqual(bi[1], len(a)) | ||||||
| 
 | 
 | ||||||
|     def test_byteswap(self): |     def test_byteswap(self): | ||||||
|         a = array.array(self.typecode, self.example) |         if self.typecode == 'u': | ||||||
|  |             example = '\U00100100' | ||||||
|  |         else: | ||||||
|  |             example = self.example | ||||||
|  |         a = array.array(self.typecode, example) | ||||||
|         self.assertRaises(TypeError, a.byteswap, 42) |         self.assertRaises(TypeError, a.byteswap, 42) | ||||||
|         if a.itemsize in (1, 2, 4, 8): |         if a.itemsize in (1, 2, 4, 8): | ||||||
|             b = array.array(self.typecode, self.example) |             b = array.array(self.typecode, example) | ||||||
|             b.byteswap() |             b.byteswap() | ||||||
|             if a.itemsize==1: |             if a.itemsize==1: | ||||||
|                 self.assertEqual(a, b) |                 self.assertEqual(a, b) | ||||||
|  |  | ||||||
|  | @ -174,24 +174,25 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) | ||||||
| static PyObject * | static PyObject * | ||||||
| u_getitem(arrayobject *ap, Py_ssize_t i) | u_getitem(arrayobject *ap, Py_ssize_t i) | ||||||
| { | { | ||||||
|     return PyUnicode_FromUnicode(&((Py_UNICODE *) ap->ob_item)[i], 1); |     return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int | static int | ||||||
| u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) | u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) | ||||||
| { | { | ||||||
|     Py_UNICODE *p; |     PyObject *p; | ||||||
|     Py_ssize_t len; |  | ||||||
| 
 | 
 | ||||||
|     if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len)) |     if (!PyArg_Parse(v, "U;array item must be unicode character", &p)) | ||||||
|         return -1; |         return -1; | ||||||
|     if (len != 1) { |     if (PyUnicode_READY(p)) | ||||||
|  |         return -1; | ||||||
|  |     if (PyUnicode_GET_LENGTH(p) != 1) { | ||||||
|         PyErr_SetString(PyExc_TypeError, |         PyErr_SetString(PyExc_TypeError, | ||||||
|                         "array item must be unicode character"); |                         "array item must be unicode character"); | ||||||
|         return -1; |         return -1; | ||||||
|     } |     } | ||||||
|     if (i >= 0) |     if (i >= 0) | ||||||
|         ((Py_UNICODE *)ap->ob_item)[i] = p[0]; |         ((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0); | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -443,6 +444,13 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #if SIZEOF_INT == 4 | ||||||
|  | #  define STRUCT_LONG_FORMAT "I" | ||||||
|  | #elif SIZEOF_LONG == 4 | ||||||
|  | #  define STRUCT_LONG_FORMAT "L" | ||||||
|  | #else | ||||||
|  | #  error "Unable to get struct format for Py_UCS4" | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| /* Description of types.
 | /* Description of types.
 | ||||||
|  * |  * | ||||||
|  | @ -452,7 +460,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) | ||||||
| static struct arraydescr descriptors[] = { | static struct arraydescr descriptors[] = { | ||||||
|     {'b', 1, b_getitem, b_setitem, "b", 1, 1}, |     {'b', 1, b_getitem, b_setitem, "b", 1, 1}, | ||||||
|     {'B', 1, BB_getitem, BB_setitem, "B", 1, 0}, |     {'B', 1, BB_getitem, BB_setitem, "B", 1, 0}, | ||||||
|     {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0}, |     {'u', sizeof(Py_UCS4), u_getitem, u_setitem, STRUCT_LONG_FORMAT, 0, 0}, | ||||||
|     {'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1}, |     {'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1}, | ||||||
|     {'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0}, |     {'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0}, | ||||||
|     {'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1}, |     {'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1}, | ||||||
|  | @ -1508,25 +1516,26 @@ This method is deprecated. Use tobytes instead."); | ||||||
| static PyObject * | static PyObject * | ||||||
| array_fromunicode(arrayobject *self, PyObject *args) | array_fromunicode(arrayobject *self, PyObject *args) | ||||||
| { | { | ||||||
|     Py_UNICODE *ustr; |     PyObject *ustr; | ||||||
|     Py_ssize_t n; |     Py_ssize_t n; | ||||||
|     char typecode; |  | ||||||
| 
 | 
 | ||||||
|     if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n)) |     if (!PyArg_ParseTuple(args, "U:fromunicode", &ustr)) | ||||||
|         return NULL; |         return NULL; | ||||||
|     typecode = self->ob_descr->typecode; |     if (self->ob_descr->typecode != 'u') { | ||||||
|     if ((typecode != 'u')) { |  | ||||||
|         PyErr_SetString(PyExc_ValueError, |         PyErr_SetString(PyExc_ValueError, | ||||||
|             "fromunicode() may only be called on " |             "fromunicode() may only be called on " | ||||||
|             "unicode type arrays"); |             "unicode type arrays"); | ||||||
|         return NULL; |         return NULL; | ||||||
|     } |     } | ||||||
|  |     if (PyUnicode_READY(ustr)) | ||||||
|  |         return NULL; | ||||||
|  |     n = PyUnicode_GET_LENGTH(ustr); | ||||||
|     if (n > 0) { |     if (n > 0) { | ||||||
|         Py_ssize_t old_size = Py_SIZE(self); |         Py_ssize_t old_size = Py_SIZE(self); | ||||||
|         if (array_resize(self, old_size + n) == -1) |         if (array_resize(self, old_size + n) == -1) | ||||||
|             return NULL; |             return NULL; | ||||||
|         memcpy(self->ob_item + old_size * sizeof(Py_UNICODE), |         if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0)) | ||||||
|                ustr, n * sizeof(Py_UNICODE)); |             return NULL; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Py_INCREF(Py_None); |     Py_INCREF(Py_None); | ||||||
|  | @ -1545,14 +1554,14 @@ append Unicode data to an array of some other type."); | ||||||
| static PyObject * | static PyObject * | ||||||
| array_tounicode(arrayobject *self, PyObject *unused) | array_tounicode(arrayobject *self, PyObject *unused) | ||||||
| { | { | ||||||
|     char typecode; |     if (self->ob_descr->typecode != 'u') { | ||||||
|     typecode = self->ob_descr->typecode; |  | ||||||
|     if ((typecode != 'u')) { |  | ||||||
|         PyErr_SetString(PyExc_ValueError, |         PyErr_SetString(PyExc_ValueError, | ||||||
|              "tounicode() may only be called on unicode type arrays"); |              "tounicode() may only be called on unicode type arrays"); | ||||||
|         return NULL; |         return NULL; | ||||||
|     } |     } | ||||||
|     return PyUnicode_FromUnicode((Py_UNICODE *) self->ob_item, Py_SIZE(self)); |     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, | ||||||
|  |                                      (Py_UCS4 *) self->ob_item, | ||||||
|  |                                      Py_SIZE(self)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| PyDoc_STRVAR(tounicode_doc, | PyDoc_STRVAR(tounicode_doc, | ||||||
|  | @ -1659,13 +1668,7 @@ typecode_to_mformat_code(char typecode) | ||||||
|         return UNSIGNED_INT8; |         return UNSIGNED_INT8; | ||||||
| 
 | 
 | ||||||
|     case 'u': |     case 'u': | ||||||
|         if (sizeof(Py_UNICODE) == 2) { |  | ||||||
|             return UTF16_LE + is_big_endian; |  | ||||||
|         } |  | ||||||
|         if (sizeof(Py_UNICODE) == 4) { |  | ||||||
|         return UTF32_LE + is_big_endian; |         return UTF32_LE + is_big_endian; | ||||||
|         } |  | ||||||
|         return UNKNOWN_FORMAT; |  | ||||||
| 
 | 
 | ||||||
|     case 'f': |     case 'f': | ||||||
|         if (sizeof(float) == 4) { |         if (sizeof(float) == 4) { | ||||||
|  | @ -2411,14 +2414,8 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags) | ||||||
|         view->strides = &(view->itemsize); |         view->strides = &(view->itemsize); | ||||||
|     view->format = NULL; |     view->format = NULL; | ||||||
|     view->internal = NULL; |     view->internal = NULL; | ||||||
|     if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) { |     if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) | ||||||
|         view->format = self->ob_descr->formats; |         view->format = self->ob_descr->formats; | ||||||
| #ifdef Py_UNICODE_WIDE |  | ||||||
|         if (self->ob_descr->typecode == 'u') { |  | ||||||
|             view->format = "w"; |  | ||||||
|         } |  | ||||||
| #endif |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|  finish: |  finish: | ||||||
|     self->ob_exports++; |     self->ob_exports++; | ||||||
|  | @ -2543,7 +2540,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | ||||||
|                         return NULL; |                         return NULL; | ||||||
|                     } |                     } | ||||||
|                     self->ob_item = item; |                     self->ob_item = item; | ||||||
|                     Py_SIZE(self) = n / sizeof(Py_UNICODE); |                     Py_SIZE(self) = n / sizeof(Py_UCS4); | ||||||
|                     memcpy(item, PyUnicode_AS_DATA(initial), n); |                     memcpy(item, PyUnicode_AS_DATA(initial), n); | ||||||
|                     self->allocated = Py_SIZE(self); |                     self->allocated = Py_SIZE(self); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Victor Stinner
						Victor Stinner