mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	Close #14648: Compute correctly maxchar in str.format() for substrin
This commit is contained in:
		
							parent
							
								
									0b7d7c9544
								
							
						
					
					
						commit
						ece58deb9f
					
				
					 4 changed files with 50 additions and 6 deletions
				
			
		|  | @ -710,6 +710,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_Substring( | |||
|     Py_ssize_t start, | ||||
|     Py_ssize_t end); | ||||
| 
 | ||||
| #ifndef Py_LIMITED_API | ||||
| /* Compute the maximum character of the substring unicode[start:end].
 | ||||
|    Return 127 for an empty string. */ | ||||
| PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( | ||||
|     PyObject *unicode, | ||||
|     Py_ssize_t start, | ||||
|     Py_ssize_t end); | ||||
| #endif | ||||
| 
 | ||||
| /* Copy the string into a UCS4 buffer including the null character if copy_null
 | ||||
|    is set. Return NULL and raise an exception on error. Raise a ValueError if | ||||
|    the buffer is smaller than the string. Return buffer on success. | ||||
|  |  | |||
|  | @ -924,6 +924,14 @@ def __format__(self, format_spec): | |||
|         self.assertRaises(ValueError, format, '', '#') | ||||
|         self.assertRaises(ValueError, format, '', '#20') | ||||
| 
 | ||||
|         # Non-ASCII | ||||
|         self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"), | ||||
|                          'ABC\u0410\u0411\u0412') | ||||
|         self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"), | ||||
|                          'ABC') | ||||
|         self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"), | ||||
|                          '') | ||||
| 
 | ||||
|     def test_format_map(self): | ||||
|         self.assertEqual(''.format_map({}), '') | ||||
|         self.assertEqual('a'.format_map({}), 'a') | ||||
|  | @ -1056,8 +1064,6 @@ def __str__(self): | |||
|         self.assertEqual('%f' % INF, 'inf') | ||||
|         self.assertEqual('%F' % INF, 'INF') | ||||
| 
 | ||||
|         self.assertEqual(format("\u0410\u0411\u0412", "s"), "АБВ") | ||||
| 
 | ||||
|     def test_startswith_endswith_errors(self): | ||||
|         for meth in ('foo'.startswith, 'foo'.endswith): | ||||
|             with self.assertRaises(TypeError) as cm: | ||||
|  |  | |||
|  | @ -1957,6 +1957,37 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) | |||
|     } | ||||
| } | ||||
| 
 | ||||
| Py_UCS4 | ||||
| _PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end) | ||||
| { | ||||
|     enum PyUnicode_Kind kind; | ||||
|     void *startptr, *endptr; | ||||
| 
 | ||||
|     assert(PyUnicode_IS_READY(unicode)); | ||||
|     assert(0 <= start); | ||||
|     assert(end <= PyUnicode_GET_LENGTH(unicode)); | ||||
|     assert(start <= end); | ||||
| 
 | ||||
|     if (start == 0 && end == PyUnicode_GET_LENGTH(unicode)) | ||||
|         return PyUnicode_MAX_CHAR_VALUE(unicode); | ||||
| 
 | ||||
|     if (start == end) | ||||
|         return 127; | ||||
| 
 | ||||
|     kind = PyUnicode_KIND(unicode); | ||||
|     startptr = PyUnicode_DATA(unicode); | ||||
|     endptr = (char*)startptr + end * kind; | ||||
|     if (start) | ||||
|         startptr = (char*)startptr + start * kind; | ||||
|     switch(kind) | ||||
|     { | ||||
|     case PyUnicode_1BYTE_KIND: return ucs1lib_find_max_char(startptr, endptr); | ||||
|     case PyUnicode_2BYTE_KIND: return ucs2lib_find_max_char(startptr, endptr); | ||||
|     default: | ||||
|     case PyUnicode_4BYTE_KIND: return ucs4lib_find_max_char(startptr, endptr); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /* Ensure that a string uses the most efficient storage, if it is not the
 | ||||
|    case: create a new string with of the right kind. Write NULL into *p_unicode | ||||
|    on error. */ | ||||
|  |  | |||
|  | @ -716,7 +716,7 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format) | |||
|     Py_ssize_t pos; | ||||
|     Py_ssize_t len = PyUnicode_GET_LENGTH(value); | ||||
|     PyObject *result = NULL; | ||||
|     Py_UCS4 maxchar = 127; | ||||
|     Py_UCS4 maxchar; | ||||
| 
 | ||||
|     /* sign is not allowed on strings */ | ||||
|     if (format->sign != '\0') { | ||||
|  | @ -747,11 +747,9 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format) | |||
|         len = format->precision; | ||||
|     } | ||||
| 
 | ||||
|     if (len) | ||||
|         maxchar = PyUnicode_MAX_CHAR_VALUE(value); | ||||
| 
 | ||||
|     calc_padding(len, format->width, format->align, &lpad, &rpad, &total); | ||||
| 
 | ||||
|     maxchar = _PyUnicode_FindMaxChar(value, 0, len); | ||||
|     if (lpad != 0 || rpad != 0) | ||||
|         maxchar = Py_MAX(maxchar, format->fill_char); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Victor Stinner
						Victor Stinner