mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Unicode replace() avoids calling unicode_adjust_maxchar() when it's useless
Add also a special case if the result is an empty string.
This commit is contained in:
		
							parent
							
								
									69f55cc033
								
							
						
					
					
						commit
						49a0a21f37
					
				
					 1 changed files with 38 additions and 40 deletions
				
			
		|  | @ -9686,6 +9686,8 @@ replace(PyObject *self, PyObject *str1, | |||
|     Py_ssize_t slen = PyUnicode_GET_LENGTH(self); | ||||
|     Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1); | ||||
|     Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2); | ||||
|     int mayshrink; | ||||
|     Py_UCS4 maxchar, maxchar_str2; | ||||
| 
 | ||||
|     if (maxcount < 0) | ||||
|         maxcount = PY_SSIZE_T_MAX; | ||||
|  | @ -9698,6 +9700,13 @@ replace(PyObject *self, PyObject *str1, | |||
|         /* substring too wide to be present */ | ||||
|         goto nothing; | ||||
| 
 | ||||
|     maxchar = PyUnicode_MAX_CHAR_VALUE(self); | ||||
|     maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2); | ||||
|     /* Replacing str1 with str2 may cause a maxchar reduction in the
 | ||||
|        result string. */ | ||||
|     mayshrink = (maxchar_str2 < maxchar); | ||||
|     maxchar = Py_MAX(maxchar, maxchar_str2); | ||||
| 
 | ||||
|     if (len1 == len2) { | ||||
|         Py_ssize_t i; | ||||
|         /* same length */ | ||||
|  | @ -9705,22 +9714,13 @@ replace(PyObject *self, PyObject *str1, | |||
|             goto nothing; | ||||
|         if (len1 == 1) { | ||||
|             /* replace characters */ | ||||
|             Py_UCS4 u1, u2, maxchar; | ||||
|             int mayshrink, rkind; | ||||
|             Py_UCS4 u1, u2; | ||||
|             int rkind; | ||||
|             u1 = PyUnicode_READ_CHAR(str1, 0); | ||||
|             if (!findchar(sbuf, PyUnicode_KIND(self), | ||||
|                           slen, u1, 1)) | ||||
|                 goto nothing; | ||||
|             u2 = PyUnicode_READ_CHAR(str2, 0); | ||||
|             maxchar = PyUnicode_MAX_CHAR_VALUE(self); | ||||
|             /* Replacing u1 with u2 may cause a maxchar reduction in the
 | ||||
|                result string. */ | ||||
|             if (u2 > maxchar) { | ||||
|                 maxchar = u2; | ||||
|                 mayshrink = 0; | ||||
|             } | ||||
|             else | ||||
|                 mayshrink = maxchar > 127; | ||||
|             u = PyUnicode_New(slen, maxchar); | ||||
|             if (!u) | ||||
|                 goto error; | ||||
|  | @ -9732,16 +9732,10 @@ replace(PyObject *self, PyObject *str1, | |||
|                         break; | ||||
|                     PyUnicode_WRITE(rkind, PyUnicode_DATA(u), i, u2); | ||||
|                 } | ||||
|             if (mayshrink) { | ||||
|                 unicode_adjust_maxchar(&u); | ||||
|                 if (u == NULL) | ||||
|                     goto error; | ||||
|             } | ||||
|         } else { | ||||
|         } | ||||
|         else { | ||||
|             int rkind = skind; | ||||
|             char *res; | ||||
|             PyObject *rstr; | ||||
|             Py_UCS4 maxchar; | ||||
| 
 | ||||
|             if (kind1 < rkind) { | ||||
|                 /* widen substring */ | ||||
|  | @ -9769,12 +9763,11 @@ replace(PyObject *self, PyObject *str1, | |||
|                 if (!buf1) goto error; | ||||
|                 release1 = 1; | ||||
|             } | ||||
|             maxchar = PyUnicode_MAX_CHAR_VALUE(self); | ||||
|             maxchar = Py_MAX(maxchar, PyUnicode_MAX_CHAR_VALUE(str2)); | ||||
|             rstr = PyUnicode_New(slen, maxchar); | ||||
|             if (!rstr) | ||||
|             u = PyUnicode_New(slen, maxchar); | ||||
|             if (!u) | ||||
|                 goto error; | ||||
|             res = PyUnicode_DATA(rstr); | ||||
|             assert(PyUnicode_KIND(u) == rkind); | ||||
|             res = PyUnicode_DATA(u); | ||||
| 
 | ||||
|             memcpy(res, sbuf, rkind * slen); | ||||
|             /* change everything in-place, starting with this one */ | ||||
|  | @ -9794,22 +9787,16 @@ replace(PyObject *self, PyObject *str1, | |||
|                        rkind * len2); | ||||
|                 i += len1; | ||||
|             } | ||||
| 
 | ||||
|             u = rstr; | ||||
|             unicode_adjust_maxchar(&u); | ||||
|             if (!u) | ||||
|                 goto error; | ||||
|         } | ||||
|     } else { | ||||
| 
 | ||||
|     } | ||||
|     else { | ||||
|         Py_ssize_t n, i, j, ires; | ||||
|         Py_ssize_t product, new_size; | ||||
|         int rkind = skind; | ||||
|         PyObject *rstr; | ||||
|         char *res; | ||||
|         Py_UCS4 maxchar; | ||||
| 
 | ||||
|         if (kind1 < rkind) { | ||||
|             /* widen substring */ | ||||
|             buf1 = _PyUnicode_AsKind(str1, rkind); | ||||
|             if (!buf1) goto error; | ||||
|             release1 = 1; | ||||
|  | @ -9818,11 +9805,13 @@ replace(PyObject *self, PyObject *str1, | |||
|         if (n == 0) | ||||
|             goto nothing; | ||||
|         if (kind2 < rkind) { | ||||
|             /* widen replacement */ | ||||
|             buf2 = _PyUnicode_AsKind(str2, rkind); | ||||
|             if (!buf2) goto error; | ||||
|             release2 = 1; | ||||
|         } | ||||
|         else if (kind2 > rkind) { | ||||
|             /* widen self and buf1 */ | ||||
|             rkind = kind2; | ||||
|             sbuf = _PyUnicode_AsKind(self, rkind); | ||||
|             if (!sbuf) goto error; | ||||
|  | @ -9841,17 +9830,21 @@ replace(PyObject *self, PyObject *str1, | |||
|                 goto error; | ||||
|         } | ||||
|         new_size = slen + product; | ||||
|         if (new_size == 0) { | ||||
|             Py_INCREF(unicode_empty); | ||||
|             u = unicode_empty; | ||||
|             goto done; | ||||
|         } | ||||
|         if (new_size < 0 || new_size > (PY_SSIZE_T_MAX >> (rkind-1))) { | ||||
|             PyErr_SetString(PyExc_OverflowError, | ||||
|                             "replace string is too long"); | ||||
|             goto error; | ||||
|         } | ||||
|         maxchar = PyUnicode_MAX_CHAR_VALUE(self); | ||||
|         maxchar = Py_MAX(maxchar, PyUnicode_MAX_CHAR_VALUE(str2)); | ||||
|         rstr = PyUnicode_New(new_size, maxchar); | ||||
|         if (!rstr) | ||||
|         u = PyUnicode_New(new_size, maxchar); | ||||
|         if (!u) | ||||
|             goto error; | ||||
|         res = PyUnicode_DATA(rstr); | ||||
|         assert(PyUnicode_KIND(u) == rkind); | ||||
|         res = PyUnicode_DATA(u); | ||||
|         ires = i = 0; | ||||
|         if (len1 > 0) { | ||||
|             while (n-- > 0) { | ||||
|  | @ -9882,7 +9875,8 @@ replace(PyObject *self, PyObject *str1, | |||
|                 memcpy(res + rkind * ires, | ||||
|                        sbuf + rkind * i, | ||||
|                        rkind * (slen-i)); | ||||
|         } else { | ||||
|         } | ||||
|         else { | ||||
|             /* interleave */ | ||||
|             while (n > 0) { | ||||
|                 memcpy(res + rkind * ires, | ||||
|  | @ -9901,11 +9895,15 @@ replace(PyObject *self, PyObject *str1, | |||
|                    sbuf + rkind * i, | ||||
|                    rkind * (slen-i)); | ||||
|         } | ||||
|         u = rstr; | ||||
|     } | ||||
| 
 | ||||
|     if (mayshrink) { | ||||
|         unicode_adjust_maxchar(&u); | ||||
|         if (u == NULL) | ||||
|             goto error; | ||||
|     } | ||||
| 
 | ||||
|   done: | ||||
|     if (srelease) | ||||
|         PyMem_FREE(sbuf); | ||||
|     if (release1) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Victor Stinner
						Victor Stinner