mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	Fix str.translate()
Issue #26464: Fix str.translate() when string is ASCII and first replacements removes character, but next replacement uses a non-ASCII character or a string longer than 1 character. Regression introduced in Python 3.5.0.
This commit is contained in:
		
							parent
							
								
									bb0dbd583b
								
							
						
					
					
						commit
						6c9aa8f2bf
					
				
					 3 changed files with 12 additions and 3 deletions
				
			
		| 
						 | 
					@ -347,6 +347,10 @@ def test_maketrans_translate(self):
 | 
				
			||||||
                         "[a]")
 | 
					                         "[a]")
 | 
				
			||||||
        self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})),
 | 
					        self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})),
 | 
				
			||||||
                         "[]")
 | 
					                         "[]")
 | 
				
			||||||
 | 
					        self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '123'})),
 | 
				
			||||||
 | 
					                         "x123")
 | 
				
			||||||
 | 
					        self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '\xe9'})),
 | 
				
			||||||
 | 
					                         "x\xe9")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # invalid Unicode characters
 | 
					        # invalid Unicode characters
 | 
				
			||||||
        invalid_char = 0x10ffff+1
 | 
					        invalid_char = 0x10ffff+1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,6 +10,10 @@ Release date: tba
 | 
				
			||||||
Core and Builtins
 | 
					Core and Builtins
 | 
				
			||||||
-----------------
 | 
					-----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Issue #26464: Fix str.translate() when string is ASCII and first replacements
 | 
				
			||||||
 | 
					  removes character, but next replacement uses a non-ASCII character or a
 | 
				
			||||||
 | 
					  string longer than 1 character. Regression introduced in Python 3.5.0.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Issue #22836: Ensure exception reports from PyErr_Display() and
 | 
					- Issue #22836: Ensure exception reports from PyErr_Display() and
 | 
				
			||||||
  PyErr_WriteUnraisable() are sensible even when formatting them produces
 | 
					  PyErr_WriteUnraisable() are sensible even when formatting them produces
 | 
				
			||||||
  secondary errors.  This affects the reports produced by
 | 
					  secondary errors.  This affects the reports produced by
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8574,7 +8574,8 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch,
 | 
				
			||||||
   translated into writer, raise an exception and return -1 on error. */
 | 
					   translated into writer, raise an exception and return -1 on error. */
 | 
				
			||||||
static int
 | 
					static int
 | 
				
			||||||
unicode_fast_translate(PyObject *input, PyObject *mapping,
 | 
					unicode_fast_translate(PyObject *input, PyObject *mapping,
 | 
				
			||||||
                       _PyUnicodeWriter *writer, int ignore)
 | 
					                       _PyUnicodeWriter *writer, int ignore,
 | 
				
			||||||
 | 
					                       Py_ssize_t *input_pos)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    Py_UCS1 ascii_table[128], ch, ch2;
 | 
					    Py_UCS1 ascii_table[128], ch, ch2;
 | 
				
			||||||
    Py_ssize_t len;
 | 
					    Py_ssize_t len;
 | 
				
			||||||
| 
						 | 
					@ -8621,6 +8622,7 @@ unicode_fast_translate(PyObject *input, PyObject *mapping,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
exit:
 | 
					exit:
 | 
				
			||||||
    writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer);
 | 
					    writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer);
 | 
				
			||||||
 | 
					    *input_pos = in - PyUnicode_1BYTE_DATA(input);
 | 
				
			||||||
    return res;
 | 
					    return res;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8666,7 +8668,7 @@ _PyUnicode_TranslateCharmap(PyObject *input,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
 | 
					    ignore = (errors != NULL && strcmp(errors, "ignore") == 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    res = unicode_fast_translate(input, mapping, &writer, ignore);
 | 
					    res = unicode_fast_translate(input, mapping, &writer, ignore, &i);
 | 
				
			||||||
    if (res < 0) {
 | 
					    if (res < 0) {
 | 
				
			||||||
        _PyUnicodeWriter_Dealloc(&writer);
 | 
					        _PyUnicodeWriter_Dealloc(&writer);
 | 
				
			||||||
        return NULL;
 | 
					        return NULL;
 | 
				
			||||||
| 
						 | 
					@ -8674,7 +8676,6 @@ _PyUnicode_TranslateCharmap(PyObject *input,
 | 
				
			||||||
    if (res == 1)
 | 
					    if (res == 1)
 | 
				
			||||||
        return _PyUnicodeWriter_Finish(&writer);
 | 
					        return _PyUnicodeWriter_Finish(&writer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    i = writer.pos;
 | 
					 | 
				
			||||||
    while (i<size) {
 | 
					    while (i<size) {
 | 
				
			||||||
        /* try to encode it */
 | 
					        /* try to encode it */
 | 
				
			||||||
        int translate;
 | 
					        int translate;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue