mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Change PyUnicode_KIND to 1,2,4. Drop _KIND_SIZE and _CHARACTER_SIZE.
This commit is contained in:
		
							parent
							
								
									dd07732af5
								
							
						
					
					
						commit
						c47adb04b3
					
				
					 8 changed files with 84 additions and 123 deletions
				
			
		|  | @ -99,7 +99,7 @@ access internal read-only data of Unicode objects: | |||
| 
 | ||||
|    .. deprecated-removed:: 3.3 4.0 | ||||
|       Part of the old-style Unicode API, please migrate to using | ||||
|       :c:func:`PyUnicode_GET_LENGTH` or :c:func:`PyUnicode_KIND_SIZE`. | ||||
|       :c:func:`PyUnicode_GET_LENGTH`. | ||||
| 
 | ||||
| 
 | ||||
| .. c:function:: Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *o) | ||||
|  | @ -149,9 +149,8 @@ access internal read-only data of Unicode objects: | |||
|    Return a pointer to the canonical representation cast to UCS1, UCS2 or UCS4 | ||||
|    integer types for direct character access.  No checks are performed if the | ||||
|    canonical representation has the correct character size; use | ||||
|    :c:func:`PyUnicode_CHARACTER_SIZE` or :c:func:`PyUnicode_KIND` to select the | ||||
|    right macro.  Make sure :c:func:`PyUnicode_READY` has been called before | ||||
|    accessing this. | ||||
|    :c:func:`PyUnicode_KIND` to select the right macro.  Make sure  | ||||
|    :c:func:`PyUnicode_READY` has been called before accessing this. | ||||
| 
 | ||||
|    .. versionadded:: 3.3 | ||||
| 
 | ||||
|  | @ -176,15 +175,6 @@ access internal read-only data of Unicode objects: | |||
|    .. versionadded:: 3.3 | ||||
| 
 | ||||
| 
 | ||||
| .. c:function:: int PyUnicode_CHARACTER_SIZE(PyObject *o) | ||||
| 
 | ||||
|    Return the number of bytes the string uses to represent single characters; | ||||
|    this can be 1, 2 or 4.  *o* has to be a Unicode object in the "canonical" | ||||
|    representation (not checked). | ||||
| 
 | ||||
|    .. versionadded:: 3.3 | ||||
| 
 | ||||
| 
 | ||||
| .. c:function:: void* PyUnicode_DATA(PyObject *o) | ||||
| 
 | ||||
|    Return a void pointer to the raw unicode buffer.  *o* has to be a Unicode | ||||
|  | @ -193,14 +183,6 @@ access internal read-only data of Unicode objects: | |||
|    .. versionadded:: 3.3 | ||||
| 
 | ||||
| 
 | ||||
| .. c:function:: int PyUnicode_KIND_SIZE(int kind, Py_ssize_t index) | ||||
| 
 | ||||
|    Compute ``index * char_size`` where ``char_size`` is ``2**(kind - 1)``.  The | ||||
|    index is a character index, the result is a size in bytes. | ||||
| 
 | ||||
|    .. versionadded:: 3.3 | ||||
| 
 | ||||
| 
 | ||||
| .. c:function:: void PyUnicode_WRITE(int kind, void *data, Py_ssize_t index, \ | ||||
|                                      Py_UCS4 value) | ||||
| 
 | ||||
|  |  | |||
|  | @ -305,12 +305,12 @@ typedef struct { | |||
|              * character type = Py_UCS2 (16 bits, unsigned) | ||||
|              * at least one character must be in range U+0100-U+FFFF | ||||
| 
 | ||||
|            - PyUnicode_4BYTE_KIND (3): | ||||
|            - PyUnicode_4BYTE_KIND (4): | ||||
| 
 | ||||
|              * character type = Py_UCS4 (32 bits, unsigned) | ||||
|              * at least one character must be in range U+10000-U+10FFFF | ||||
|          */ | ||||
|         unsigned int kind:2; | ||||
|         unsigned int kind:3; | ||||
|         /* Compact is with respect to the allocation scheme. Compact unicode
 | ||||
|            objects only require one memory block while non-compact objects use | ||||
|            one block for the PyUnicodeObject struct and another for its data | ||||
|  | @ -424,29 +424,21 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; | |||
| #define PyUnicode_IS_COMPACT_ASCII(op)                 \ | ||||
|     (PyUnicode_IS_ASCII(op) && PyUnicode_IS_COMPACT(op)) | ||||
| 
 | ||||
| enum PyUnicode_Kind { | ||||
| /* String contains only wstr byte characters.  This is only possible
 | ||||
|    when the string was created with a legacy API and _PyUnicode_Ready() | ||||
|    has not been called yet.  */ | ||||
| #define PyUnicode_WCHAR_KIND 0 | ||||
| 
 | ||||
|     PyUnicode_WCHAR_KIND = 0, | ||||
| /* Return values of the PyUnicode_KIND() macro: */ | ||||
| 
 | ||||
| #define PyUnicode_1BYTE_KIND 1 | ||||
| #define PyUnicode_2BYTE_KIND 2 | ||||
| #define PyUnicode_4BYTE_KIND 3 | ||||
| 
 | ||||
| 
 | ||||
| /* Return the number of bytes the string uses to represent single characters,
 | ||||
|    this can be 1, 2 or 4. | ||||
| 
 | ||||
|    See also PyUnicode_KIND_SIZE(). */ | ||||
| #define PyUnicode_CHARACTER_SIZE(op) \ | ||||
|     (((Py_ssize_t)1 << (PyUnicode_KIND(op) - 1))) | ||||
|     PyUnicode_1BYTE_KIND = 1, | ||||
|     PyUnicode_2BYTE_KIND = 2, | ||||
|     PyUnicode_4BYTE_KIND = 4 | ||||
| }; | ||||
| 
 | ||||
| /* Return pointers to the canonical representation cast to unsigned char,
 | ||||
|    Py_UCS2, or Py_UCS4 for direct character access. | ||||
|    No checks are performed, use PyUnicode_CHARACTER_SIZE or | ||||
|    PyUnicode_KIND() before to ensure these will work correctly. */ | ||||
|    No checks are performed, use PyUnicode_KIND() before to ensure | ||||
|    these will work correctly. */ | ||||
| 
 | ||||
| #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op)) | ||||
| #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op)) | ||||
|  | @ -473,13 +465,6 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; | |||
|      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \ | ||||
|      _PyUnicode_NONCOMPACT_DATA(op)) | ||||
| 
 | ||||
| /* Compute (index * char_size) where char_size is 2 ** (kind - 1).
 | ||||
|    The index is a character index, the result is a size in bytes. | ||||
| 
 | ||||
|    See also PyUnicode_CHARACTER_SIZE(). */ | ||||
| #define PyUnicode_KIND_SIZE(kind, index) \ | ||||
|     (((Py_ssize_t)(index)) << ((kind) - 1)) | ||||
| 
 | ||||
| /* In the access macros below, "kind" may be evaluated more than once.
 | ||||
|    All other macro parameters are evaluated exactly once, so it is safe | ||||
|    to put side effects into them (such as increasing the index). */ | ||||
|  |  | |||
|  | @ -291,9 +291,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, | |||
|         kind = PyUnicode_KIND(modified); | ||||
|         out = PyUnicode_DATA(modified); | ||||
|         PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r'); | ||||
|         memcpy(out + PyUnicode_KIND_SIZE(kind, 1), | ||||
|                PyUnicode_DATA(output), | ||||
|                PyUnicode_KIND_SIZE(kind, output_len)); | ||||
|         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); | ||||
|         Py_DECREF(output); | ||||
|         output = modified; /* output remains ready */ | ||||
|         self->pendingcr = 0; | ||||
|  | @ -336,7 +334,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, | |||
|            for the \r *byte* with the libc's optimized memchr. | ||||
|            */ | ||||
|         if (seennl == SEEN_LF || seennl == 0) { | ||||
|             only_lf = (memchr(in_str, '\r', PyUnicode_KIND_SIZE(kind, len)) == NULL); | ||||
|             only_lf = (memchr(in_str, '\r', kind * len) == NULL); | ||||
|         } | ||||
| 
 | ||||
|         if (only_lf) { | ||||
|  | @ -344,7 +342,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, | |||
|                (there's nothing else to be done, even when in translation mode) | ||||
|             */ | ||||
|             if (seennl == 0 && | ||||
|                 memchr(in_str, '\n', PyUnicode_KIND_SIZE(kind, len)) != NULL) { | ||||
|                 memchr(in_str, '\n', kind * len) != NULL) { | ||||
|                 Py_ssize_t i = 0; | ||||
|                 for (;;) { | ||||
|                     Py_UCS4 c; | ||||
|  | @ -403,7 +401,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self, | |||
|                when there is something to translate. On the other hand, | ||||
|                we already know there is a \r byte, so chances are high | ||||
|                that something needs to be done. */ | ||||
|             translated = PyMem_Malloc(PyUnicode_KIND_SIZE(kind, len)); | ||||
|             translated = PyMem_Malloc(kind * len); | ||||
|             if (translated == NULL) { | ||||
|                 PyErr_NoMemory(); | ||||
|                 goto error; | ||||
|  | @ -1576,15 +1574,14 @@ textiowrapper_read(textio *self, PyObject *args) | |||
| static char * | ||||
| find_control_char(int kind, char *s, char *end, Py_UCS4 ch) | ||||
| { | ||||
|     int size = PyUnicode_KIND_SIZE(kind, 1); | ||||
|     for (;;) { | ||||
|         while (PyUnicode_READ(kind, s, 0) > ch) | ||||
|             s += size; | ||||
|             s += kind; | ||||
|         if (PyUnicode_READ(kind, s, 0) == ch) | ||||
|             return s; | ||||
|         if (s == end) | ||||
|             return NULL; | ||||
|         s += size; | ||||
|         s += kind; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | @ -1593,14 +1590,13 @@ _PyIO_find_line_ending( | |||
|     int translated, int universal, PyObject *readnl, | ||||
|     int kind, char *start, char *end, Py_ssize_t *consumed) | ||||
| { | ||||
|     int size = PyUnicode_KIND_SIZE(kind, 1); | ||||
|     Py_ssize_t len = ((char*)end - (char*)start)/size; | ||||
|     Py_ssize_t len = ((char*)end - (char*)start)/kind; | ||||
| 
 | ||||
|     if (translated) { | ||||
|         /* Newlines are already translated, only search for \n */ | ||||
|         char *pos = find_control_char(kind, start, end, '\n'); | ||||
|         if (pos != NULL) | ||||
|             return (pos - start)/size + 1; | ||||
|             return (pos - start)/kind + 1; | ||||
|         else { | ||||
|             *consumed = len; | ||||
|             return -1; | ||||
|  | @ -1616,20 +1612,20 @@ _PyIO_find_line_ending( | |||
|             /* Fast path for non-control chars. The loop always ends
 | ||||
|                since the Unicode string is NUL-terminated. */ | ||||
|             while (PyUnicode_READ(kind, s, 0) > '\r') | ||||
|                 s += size; | ||||
|                 s += kind; | ||||
|             if (s >= end) { | ||||
|                 *consumed = len; | ||||
|                 return -1; | ||||
|             } | ||||
|             ch = PyUnicode_READ(kind, s, 0); | ||||
|             s += size; | ||||
|             s += kind; | ||||
|             if (ch == '\n') | ||||
|                 return (s - start)/size; | ||||
|                 return (s - start)/kind; | ||||
|             if (ch == '\r') { | ||||
|                 if (PyUnicode_READ(kind, s, 0) == '\n') | ||||
|                     return (s - start)/size + 1; | ||||
|                     return (s - start)/kind + 1; | ||||
|                 else | ||||
|                     return (s - start)/size; | ||||
|                     return (s - start)/kind; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | @ -1642,13 +1638,13 @@ _PyIO_find_line_ending( | |||
|         if (readnl_len == 1) { | ||||
|             char *pos = find_control_char(kind, start, end, nl[0]); | ||||
|             if (pos != NULL) | ||||
|                 return (pos - start)/size + 1; | ||||
|                 return (pos - start)/kind + 1; | ||||
|             *consumed = len; | ||||
|             return -1; | ||||
|         } | ||||
|         else { | ||||
|             char *s = start; | ||||
|             char *e = end - (readnl_len - 1)*size; | ||||
|             char *e = end - (readnl_len - 1)*kind; | ||||
|             char *pos; | ||||
|             if (e < s) | ||||
|                 e = s; | ||||
|  | @ -1662,14 +1658,14 @@ _PyIO_find_line_ending( | |||
|                         break; | ||||
|                 } | ||||
|                 if (i == readnl_len) | ||||
|                     return (pos - start)/size + readnl_len; | ||||
|                 s = pos + size; | ||||
|                     return (pos - start)/kind + readnl_len; | ||||
|                 s = pos + kind; | ||||
|             } | ||||
|             pos = find_control_char(kind, e, end, nl[0]); | ||||
|             if (pos == NULL) | ||||
|                 *consumed = len; | ||||
|             else | ||||
|                 *consumed = (pos - start)/size; | ||||
|                 *consumed = (pos - start)/kind; | ||||
|             return -1; | ||||
|         } | ||||
|     } | ||||
|  | @ -1738,8 +1734,8 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) | |||
|         endpos = _PyIO_find_line_ending( | ||||
|             self->readtranslate, self->readuniversal, self->readnl, | ||||
|             kind, | ||||
|             ptr + PyUnicode_KIND_SIZE(kind, start), | ||||
|             ptr + PyUnicode_KIND_SIZE(kind, line_len), | ||||
|             ptr + kind * start, | ||||
|             ptr + kind * line_len, | ||||
|             &consumed); | ||||
|         if (endpos >= 0) { | ||||
|             endpos += start; | ||||
|  |  | |||
|  | @ -365,7 +365,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next | |||
|             APPEND_OLD_CHUNK | ||||
|                 chunk = PyUnicode_FromKindAndData( | ||||
|                     kind, | ||||
|                     (char*)buf + PyUnicode_KIND_SIZE(kind, end), | ||||
|                     (char*)buf + kind * end, | ||||
|                     next - end); | ||||
|             if (chunk == NULL) { | ||||
|                 goto bail; | ||||
|  | @ -931,7 +931,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ | |||
|     if (custom_func) { | ||||
|         /* copy the section we determined to be a number */ | ||||
|         numstr = PyUnicode_FromKindAndData(kind, | ||||
|                                            (char*)str + PyUnicode_KIND_SIZE(kind, start), | ||||
|                                            (char*)str + kind * start, | ||||
|                                            idx - start); | ||||
|         if (numstr == NULL) | ||||
|             return NULL; | ||||
|  |  | |||
|  | @ -1669,7 +1669,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, | |||
|             return NULL; | ||||
|         ptr = PyUnicode_DATA(string); | ||||
|         *p_length = PyUnicode_GET_LENGTH(string); | ||||
|         *p_charsize = PyUnicode_CHARACTER_SIZE(string); | ||||
|         *p_charsize = PyUnicode_KIND(string); | ||||
|         *p_logical_charsize = 4; | ||||
|         return ptr; | ||||
|     } | ||||
|  |  | |||
|  | @ -30,5 +30,5 @@ unicode_eq(PyObject *aa, PyObject *bb) | |||
|         PyUnicode_GET_LENGTH(a) == 1) | ||||
|         return 1; | ||||
|     return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b), | ||||
|                   PyUnicode_GET_LENGTH(a) * PyUnicode_CHARACTER_SIZE(a)) == 0; | ||||
|                   PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0; | ||||
| } | ||||
|  |  | |||
|  | @ -470,12 +470,12 @@ Py_LOCAL_INLINE(char *) findchar(void *s, int kind, | |||
|     if (direction == 1) { | ||||
|         for(i = 0; i < size; i++) | ||||
|             if (PyUnicode_READ(kind, s, i) == ch) | ||||
|                 return (char*)s + PyUnicode_KIND_SIZE(kind, i); | ||||
|                 return (char*)s + kind * i; | ||||
|     } | ||||
|     else { | ||||
|         for(i = size-1; i >= 0; i--) | ||||
|             if (PyUnicode_READ(kind, s, i) == ch) | ||||
|                 return (char*)s + PyUnicode_KIND_SIZE(kind, i); | ||||
|                 return (char*)s + kind * i; | ||||
|     } | ||||
|     return NULL; | ||||
| } | ||||
|  | @ -489,7 +489,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) | |||
|     int share_wstr; | ||||
| 
 | ||||
|     assert(PyUnicode_IS_READY(unicode)); | ||||
|     char_size = PyUnicode_CHARACTER_SIZE(unicode); | ||||
|     char_size = PyUnicode_KIND(unicode); | ||||
|     if (PyUnicode_IS_COMPACT_ASCII(unicode)) | ||||
|         struct_size = sizeof(PyASCIIObject); | ||||
|     else | ||||
|  | @ -540,7 +540,7 @@ resize_inplace(PyUnicodeObject *unicode, Py_ssize_t length) | |||
| 
 | ||||
|         data = _PyUnicode_DATA_ANY(unicode); | ||||
|         assert(data != NULL); | ||||
|         char_size = PyUnicode_CHARACTER_SIZE(unicode); | ||||
|         char_size = PyUnicode_KIND(unicode); | ||||
|         share_wstr = _PyUnicode_SHARE_WSTR(unicode); | ||||
|         share_utf8 = _PyUnicode_SHARE_UTF8(unicode); | ||||
|         if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) | ||||
|  | @ -1005,11 +1005,9 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, | |||
|     } | ||||
| 
 | ||||
|     if (fast) { | ||||
|         Py_MEMCPY((char*)to_data | ||||
|                       + PyUnicode_KIND_SIZE(to_kind, to_start), | ||||
|                   (char*)from_data | ||||
|                       + PyUnicode_KIND_SIZE(from_kind, from_start), | ||||
|                   PyUnicode_KIND_SIZE(to_kind, how_many)); | ||||
|         Py_MEMCPY((char*)to_data + to_kind * to_start, | ||||
|                   (char*)from_data + from_kind * from_start, | ||||
|                   to_kind * how_many); | ||||
|     } | ||||
|     else if (from_kind == PyUnicode_1BYTE_KIND | ||||
|              && to_kind == PyUnicode_2BYTE_KIND) | ||||
|  | @ -8760,7 +8758,7 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, | |||
|         end = PyUnicode_GET_LENGTH(str); | ||||
|     kind = PyUnicode_KIND(str); | ||||
|     result = findchar(PyUnicode_1BYTE_DATA(str) | ||||
|                       + PyUnicode_KIND_SIZE(kind, start), | ||||
|                       + kind*start, | ||||
|                       kind, | ||||
|                       end-start, ch, direction); | ||||
|     if (!result) | ||||
|  | @ -8813,10 +8811,10 @@ tailmatch(PyUnicodeObject *self, | |||
|         /* If both are of the same kind, memcmp is sufficient */ | ||||
|         if (kind_self == kind_sub) { | ||||
|             return ! memcmp((char *)data_self + | ||||
|                                 (offset * PyUnicode_CHARACTER_SIZE(substring)), | ||||
|                                 (offset * PyUnicode_KIND(substring)), | ||||
|                             data_sub, | ||||
|                             PyUnicode_GET_LENGTH(substring) * | ||||
|                                 PyUnicode_CHARACTER_SIZE(substring)); | ||||
|                                 PyUnicode_KIND(substring)); | ||||
|         } | ||||
|         /* otherwise we have to compare each character by first accesing it */ | ||||
|         else { | ||||
|  | @ -8881,7 +8879,7 @@ fixup(PyObject *self, | |||
|         return NULL; | ||||
| 
 | ||||
|     Py_MEMCPY(PyUnicode_1BYTE_DATA(u), PyUnicode_1BYTE_DATA(self), | ||||
|               PyUnicode_GET_LENGTH(u) * PyUnicode_CHARACTER_SIZE(u)); | ||||
|               PyUnicode_GET_LENGTH(u) * PyUnicode_KIND(u)); | ||||
| 
 | ||||
|     /* fix functions return the new maximum character in a string,
 | ||||
|        if the kind of the resulting unicode object does not change, | ||||
|  | @ -9262,8 +9260,8 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) | |||
|             if (use_memcpy) { | ||||
|                 Py_MEMCPY(res_data, | ||||
|                           sep_data, | ||||
|                           PyUnicode_KIND_SIZE(kind, seplen)); | ||||
|                 res_data += PyUnicode_KIND_SIZE(kind, seplen); | ||||
|                           kind * seplen); | ||||
|                 res_data += kind * seplen; | ||||
|             } | ||||
|             else { | ||||
|                 copy_characters(res, res_offset, sep, 0, seplen); | ||||
|  | @ -9275,8 +9273,8 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) | |||
|             if (use_memcpy) { | ||||
|                 Py_MEMCPY(res_data, | ||||
|                           PyUnicode_DATA(item), | ||||
|                           PyUnicode_KIND_SIZE(kind, itemlen)); | ||||
|                 res_data += PyUnicode_KIND_SIZE(kind, itemlen); | ||||
|                           kind * itemlen); | ||||
|                 res_data += kind * itemlen; | ||||
|             } | ||||
|             else { | ||||
|                 copy_characters(res, res_offset, item, 0, itemlen); | ||||
|  | @ -9286,7 +9284,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) | |||
|     } | ||||
|     if (use_memcpy) | ||||
|         assert(res_data == PyUnicode_1BYTE_DATA(res) | ||||
|                            + PyUnicode_KIND_SIZE(kind, PyUnicode_GET_LENGTH(res))); | ||||
|                            + kind * PyUnicode_GET_LENGTH(res)); | ||||
|     else | ||||
|         assert(res_offset == PyUnicode_GET_LENGTH(res)); | ||||
| 
 | ||||
|  | @ -9735,22 +9733,22 @@ replace(PyObject *self, PyObject *str1, | |||
|                 goto error; | ||||
|             res = PyUnicode_DATA(rstr); | ||||
| 
 | ||||
|             memcpy(res, sbuf, PyUnicode_KIND_SIZE(rkind, slen)); | ||||
|             memcpy(res, sbuf, rkind * slen); | ||||
|             /* change everything in-place, starting with this one */ | ||||
|             memcpy(res + PyUnicode_KIND_SIZE(rkind, i), | ||||
|             memcpy(res + rkind * i, | ||||
|                    buf2, | ||||
|                    PyUnicode_KIND_SIZE(rkind, len2)); | ||||
|                    rkind * len2); | ||||
|             i += len1; | ||||
| 
 | ||||
|             while ( --maxcount > 0) { | ||||
|                 i = anylib_find(rkind, self, | ||||
|                                 sbuf+PyUnicode_KIND_SIZE(rkind, i), slen-i, | ||||
|                                 sbuf+rkind*i, slen-i, | ||||
|                                 str1, buf1, len1, i); | ||||
|                 if (i == -1) | ||||
|                     break; | ||||
|                 memcpy(res + PyUnicode_KIND_SIZE(rkind, i), | ||||
|                 memcpy(res + rkind * i, | ||||
|                        buf2, | ||||
|                        PyUnicode_KIND_SIZE(rkind, len2)); | ||||
|                        rkind * len2); | ||||
|                 i += len1; | ||||
|             } | ||||
| 
 | ||||
|  | @ -9816,49 +9814,49 @@ replace(PyObject *self, PyObject *str1, | |||
|             while (n-- > 0) { | ||||
|                 /* look for next match */ | ||||
|                 j = anylib_find(rkind, self, | ||||
|                                 sbuf + PyUnicode_KIND_SIZE(rkind, i), slen-i, | ||||
|                                 sbuf + rkind * i, slen-i, | ||||
|                                 str1, buf1, len1, i); | ||||
|                 if (j == -1) | ||||
|                     break; | ||||
|                 else if (j > i) { | ||||
|                     /* copy unchanged part [i:j] */ | ||||
|                     memcpy(res + PyUnicode_KIND_SIZE(rkind, ires), | ||||
|                            sbuf + PyUnicode_KIND_SIZE(rkind, i), | ||||
|                            PyUnicode_KIND_SIZE(rkind, j-i)); | ||||
|                     memcpy(res + rkind * ires, | ||||
|                            sbuf + rkind * i, | ||||
|                            rkind * (j-i)); | ||||
|                     ires += j - i; | ||||
|                 } | ||||
|                 /* copy substitution string */ | ||||
|                 if (len2 > 0) { | ||||
|                     memcpy(res + PyUnicode_KIND_SIZE(rkind, ires), | ||||
|                     memcpy(res + rkind * ires, | ||||
|                            buf2, | ||||
|                            PyUnicode_KIND_SIZE(rkind, len2)); | ||||
|                            rkind * len2); | ||||
|                     ires += len2; | ||||
|                 } | ||||
|                 i = j + len1; | ||||
|             } | ||||
|             if (i < slen) | ||||
|                 /* copy tail [i:] */ | ||||
|                 memcpy(res + PyUnicode_KIND_SIZE(rkind, ires), | ||||
|                        sbuf + PyUnicode_KIND_SIZE(rkind, i), | ||||
|                        PyUnicode_KIND_SIZE(rkind, slen-i)); | ||||
|                 memcpy(res + rkind * ires, | ||||
|                        sbuf + rkind * i, | ||||
|                        rkind * (slen-i)); | ||||
|         } else { | ||||
|             /* interleave */ | ||||
|             while (n > 0) { | ||||
|                 memcpy(res + PyUnicode_KIND_SIZE(rkind, ires), | ||||
|                 memcpy(res + rkind * ires, | ||||
|                        buf2, | ||||
|                        PyUnicode_KIND_SIZE(rkind, len2)); | ||||
|                        rkind * len2); | ||||
|                 ires += len2; | ||||
|                 if (--n <= 0) | ||||
|                     break; | ||||
|                 memcpy(res + PyUnicode_KIND_SIZE(rkind, ires), | ||||
|                        sbuf + PyUnicode_KIND_SIZE(rkind, i), | ||||
|                        PyUnicode_KIND_SIZE(rkind, 1)); | ||||
|                 memcpy(res + rkind * ires, | ||||
|                        sbuf + rkind * i, | ||||
|                        rkind); | ||||
|                 ires++; | ||||
|                 i++; | ||||
|             } | ||||
|             memcpy(res + PyUnicode_KIND_SIZE(rkind, ires), | ||||
|                    sbuf + PyUnicode_KIND_SIZE(rkind, i), | ||||
|                    PyUnicode_KIND_SIZE(rkind, slen-i)); | ||||
|             memcpy(res + rkind * ires, | ||||
|                    sbuf + rkind * i, | ||||
|                    rkind * (slen-i)); | ||||
|         } | ||||
|         u = rstr; | ||||
|         unicode_adjust_maxchar(&u); | ||||
|  | @ -11341,7 +11339,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) | |||
|         kind = PyUnicode_KIND(self); | ||||
|         data = PyUnicode_1BYTE_DATA(self); | ||||
|         return PyUnicode_FromKindAndData(kind, | ||||
|                                          data + PyUnicode_KIND_SIZE(kind, start), | ||||
|                                          data + kind * start, | ||||
|                                          length); | ||||
|     } | ||||
| } | ||||
|  | @ -11497,7 +11495,7 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len) | |||
|     else { | ||||
|         /* number of characters copied this far */ | ||||
|         Py_ssize_t done = PyUnicode_GET_LENGTH(str); | ||||
|         const Py_ssize_t char_size = PyUnicode_CHARACTER_SIZE(str); | ||||
|         const Py_ssize_t char_size = PyUnicode_KIND(str); | ||||
|         char *to = (char *) PyUnicode_DATA(u); | ||||
|         Py_MEMCPY(to, PyUnicode_DATA(str), | ||||
|                   PyUnicode_GET_LENGTH(str) * char_size); | ||||
|  | @ -12488,14 +12486,14 @@ unicode__sizeof__(PyUnicodeObject *v) | |||
|         size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(v) + 1; | ||||
|     else if (PyUnicode_IS_COMPACT(v)) | ||||
|         size = sizeof(PyCompactUnicodeObject) + | ||||
|             (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_CHARACTER_SIZE(v); | ||||
|             (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_KIND(v); | ||||
|     else { | ||||
|         /* If it is a two-block object, account for base object, and
 | ||||
|            for character block if present. */ | ||||
|         size = sizeof(PyUnicodeObject); | ||||
|         if (_PyUnicode_DATA_ANY(v)) | ||||
|             size += (PyUnicode_GET_LENGTH(v) + 1) * | ||||
|                 PyUnicode_CHARACTER_SIZE(v); | ||||
|                 PyUnicode_KIND(v); | ||||
|     } | ||||
|     /* If the wstr pointer is present, account for it unless it is shared
 | ||||
|        with the data pointer. Check if the data is not shared. */ | ||||
|  | @ -13246,7 +13244,7 @@ PyUnicode_Format(PyObject *format, PyObject *args) | |||
|             else { | ||||
|                 const char *p = (const char *) pbuf; | ||||
|                 assert(pbuf != NULL); | ||||
|                 p = p + PyUnicode_KIND_SIZE(kind, pindex); | ||||
|                 p += kind * pindex; | ||||
|                 v = PyUnicode_FromKindAndData(kind, p, len); | ||||
|             } | ||||
|             if (v == NULL) | ||||
|  | @ -13399,7 +13397,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |||
|     } | ||||
| 
 | ||||
|     Py_MEMCPY(data, PyUnicode_DATA(unicode), | ||||
|               PyUnicode_KIND_SIZE(kind, length + 1)); | ||||
|               kind * (length + 1)); | ||||
|     Py_DECREF(unicode); | ||||
|     assert(_PyUnicode_CheckConsistency(self, 1)); | ||||
| #ifdef Py_DEBUG | ||||
|  |  | |||
|  | @ -604,9 +604,9 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, | |||
| #endif | ||||
|             _PyUnicode_InsertThousandsGrouping( | ||||
|                 out, kind, | ||||
|                 (char*)data + PyUnicode_KIND_SIZE(kind, pos), | ||||
|                 (char*)data + kind * pos, | ||||
|                 spec->n_grouped_digits, | ||||
|                 pdigits + PyUnicode_KIND_SIZE(kind, d_pos), | ||||
|                 pdigits + kind * d_pos, | ||||
|                 spec->n_digits, spec->n_min_width, | ||||
|                 locale->grouping, locale->thousands_sep); | ||||
| #ifndef NDEBUG | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Martin v. Löwis
						Martin v. Löwis