mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	Close #14648: Compute correctly maxchar in str.format() for substrin
This commit is contained in:
		
							parent
							
								
									0b7d7c9544
								
							
						
					
					
						commit
						ece58deb9f
					
				
					 4 changed files with 50 additions and 6 deletions
				
			
		|  | @ -710,6 +710,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_Substring( | ||||||
|     Py_ssize_t start, |     Py_ssize_t start, | ||||||
|     Py_ssize_t end); |     Py_ssize_t end); | ||||||
| 
 | 
 | ||||||
|  | #ifndef Py_LIMITED_API | ||||||
|  | /* Compute the maximum character of the substring unicode[start:end].
 | ||||||
|  |    Return 127 for an empty string. */ | ||||||
|  | PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( | ||||||
|  |     PyObject *unicode, | ||||||
|  |     Py_ssize_t start, | ||||||
|  |     Py_ssize_t end); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| /* Copy the string into a UCS4 buffer including the null character if copy_null
 | /* Copy the string into a UCS4 buffer including the null character if copy_null
 | ||||||
|    is set. Return NULL and raise an exception on error. Raise a ValueError if |    is set. Return NULL and raise an exception on error. Raise a ValueError if | ||||||
|    the buffer is smaller than the string. Return buffer on success. |    the buffer is smaller than the string. Return buffer on success. | ||||||
|  |  | ||||||
|  | @ -924,6 +924,14 @@ def __format__(self, format_spec): | ||||||
|         self.assertRaises(ValueError, format, '', '#') |         self.assertRaises(ValueError, format, '', '#') | ||||||
|         self.assertRaises(ValueError, format, '', '#20') |         self.assertRaises(ValueError, format, '', '#20') | ||||||
| 
 | 
 | ||||||
|  |         # Non-ASCII | ||||||
|  |         self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"), | ||||||
|  |                          'ABC\u0410\u0411\u0412') | ||||||
|  |         self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"), | ||||||
|  |                          'ABC') | ||||||
|  |         self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"), | ||||||
|  |                          '') | ||||||
|  | 
 | ||||||
|     def test_format_map(self): |     def test_format_map(self): | ||||||
|         self.assertEqual(''.format_map({}), '') |         self.assertEqual(''.format_map({}), '') | ||||||
|         self.assertEqual('a'.format_map({}), 'a') |         self.assertEqual('a'.format_map({}), 'a') | ||||||
|  | @ -1056,8 +1064,6 @@ def __str__(self): | ||||||
|         self.assertEqual('%f' % INF, 'inf') |         self.assertEqual('%f' % INF, 'inf') | ||||||
|         self.assertEqual('%F' % INF, 'INF') |         self.assertEqual('%F' % INF, 'INF') | ||||||
| 
 | 
 | ||||||
|         self.assertEqual(format("\u0410\u0411\u0412", "s"), "АБВ") |  | ||||||
| 
 |  | ||||||
|     def test_startswith_endswith_errors(self): |     def test_startswith_endswith_errors(self): | ||||||
|         for meth in ('foo'.startswith, 'foo'.endswith): |         for meth in ('foo'.startswith, 'foo'.endswith): | ||||||
|             with self.assertRaises(TypeError) as cm: |             with self.assertRaises(TypeError) as cm: | ||||||
|  |  | ||||||
|  | @ -1957,6 +1957,37 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Py_UCS4 | ||||||
|  | _PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end) | ||||||
|  | { | ||||||
|  |     enum PyUnicode_Kind kind; | ||||||
|  |     void *startptr, *endptr; | ||||||
|  | 
 | ||||||
|  |     assert(PyUnicode_IS_READY(unicode)); | ||||||
|  |     assert(0 <= start); | ||||||
|  |     assert(end <= PyUnicode_GET_LENGTH(unicode)); | ||||||
|  |     assert(start <= end); | ||||||
|  | 
 | ||||||
|  |     if (start == 0 && end == PyUnicode_GET_LENGTH(unicode)) | ||||||
|  |         return PyUnicode_MAX_CHAR_VALUE(unicode); | ||||||
|  | 
 | ||||||
|  |     if (start == end) | ||||||
|  |         return 127; | ||||||
|  | 
 | ||||||
|  |     kind = PyUnicode_KIND(unicode); | ||||||
|  |     startptr = PyUnicode_DATA(unicode); | ||||||
|  |     endptr = (char*)startptr + end * kind; | ||||||
|  |     if (start) | ||||||
|  |         startptr = (char*)startptr + start * kind; | ||||||
|  |     switch(kind) | ||||||
|  |     { | ||||||
|  |     case PyUnicode_1BYTE_KIND: return ucs1lib_find_max_char(startptr, endptr); | ||||||
|  |     case PyUnicode_2BYTE_KIND: return ucs2lib_find_max_char(startptr, endptr); | ||||||
|  |     default: | ||||||
|  |     case PyUnicode_4BYTE_KIND: return ucs4lib_find_max_char(startptr, endptr); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* Ensure that a string uses the most efficient storage, if it is not the
 | /* Ensure that a string uses the most efficient storage, if it is not the
 | ||||||
|    case: create a new string with of the right kind. Write NULL into *p_unicode |    case: create a new string with of the right kind. Write NULL into *p_unicode | ||||||
|    on error. */ |    on error. */ | ||||||
|  |  | ||||||
|  | @ -716,7 +716,7 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format) | ||||||
|     Py_ssize_t pos; |     Py_ssize_t pos; | ||||||
|     Py_ssize_t len = PyUnicode_GET_LENGTH(value); |     Py_ssize_t len = PyUnicode_GET_LENGTH(value); | ||||||
|     PyObject *result = NULL; |     PyObject *result = NULL; | ||||||
|     Py_UCS4 maxchar = 127; |     Py_UCS4 maxchar; | ||||||
| 
 | 
 | ||||||
|     /* sign is not allowed on strings */ |     /* sign is not allowed on strings */ | ||||||
|     if (format->sign != '\0') { |     if (format->sign != '\0') { | ||||||
|  | @ -747,11 +747,9 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format) | ||||||
|         len = format->precision; |         len = format->precision; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (len) |  | ||||||
|         maxchar = PyUnicode_MAX_CHAR_VALUE(value); |  | ||||||
| 
 |  | ||||||
|     calc_padding(len, format->width, format->align, &lpad, &rpad, &total); |     calc_padding(len, format->width, format->align, &lpad, &rpad, &total); | ||||||
| 
 | 
 | ||||||
|  |     maxchar = _PyUnicode_FindMaxChar(value, 0, len); | ||||||
|     if (lpad != 0 || rpad != 0) |     if (lpad != 0 || rpad != 0) | ||||||
|         maxchar = Py_MAX(maxchar, format->fill_char); |         maxchar = Py_MAX(maxchar, format->fill_char); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Victor Stinner
						Victor Stinner