mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	fixed #449964: sre.sub raises an exception if the template contains a
\g<x> group reference followed by a character escape (also restructured a few things on the way to fixing #449000)
This commit is contained in:
		
							parent
							
								
									ab3b0343b8
								
							
						
					
					
						commit
						59b68656f8
					
				
					 4 changed files with 30 additions and 21 deletions
				
			
		| 
						 | 
					@ -251,9 +251,11 @@ def _subn(pattern, template, text, count=0, sub=0):
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        template = _compile_repl(template, pattern)
 | 
					        template = _compile_repl(template, pattern)
 | 
				
			||||||
        literals = template[1]
 | 
					        literals = template[1]
 | 
				
			||||||
        sub = 0 # temporarly disabled, see bug #449000
 | 
					        if sub and not count:
 | 
				
			||||||
        if (sub and not count and pattern._isliteral() and
 | 
					            literal = pattern._getliteral()
 | 
				
			||||||
            len(literals) == 1 and literals[0]):
 | 
					            if literal and "\\" in literal:
 | 
				
			||||||
 | 
					                literal = None # may contain untranslated escapes
 | 
				
			||||||
 | 
					            if literal is not None and len(literals) == 1 and literals[0]:
 | 
				
			||||||
                # shortcut: both pattern and string are literals
 | 
					                # shortcut: both pattern and string are literals
 | 
				
			||||||
                return string.replace(text, pattern.pattern, literals[0]), 0
 | 
					                return string.replace(text, pattern.pattern, literals[0]), 0
 | 
				
			||||||
        def filter(match, template=template):
 | 
					        def filter(match, template=template):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -647,9 +647,9 @@ def literal(literal, p=p):
 | 
				
			||||||
            p.append((LITERAL, literal))
 | 
					            p.append((LITERAL, literal))
 | 
				
			||||||
    sep = source[:0]
 | 
					    sep = source[:0]
 | 
				
			||||||
    if type(sep) is type(""):
 | 
					    if type(sep) is type(""):
 | 
				
			||||||
        char = chr
 | 
					        makechar = chr
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        char = unichr
 | 
					        makechar = unichr
 | 
				
			||||||
    while 1:
 | 
					    while 1:
 | 
				
			||||||
        this = s.get()
 | 
					        this = s.get()
 | 
				
			||||||
        if this is None:
 | 
					        if this is None:
 | 
				
			||||||
| 
						 | 
					@ -693,14 +693,14 @@ def literal(literal, p=p):
 | 
				
			||||||
                        break
 | 
					                        break
 | 
				
			||||||
                if not code:
 | 
					                if not code:
 | 
				
			||||||
                    this = this[1:]
 | 
					                    this = this[1:]
 | 
				
			||||||
                    code = LITERAL, char(atoi(this[-6:], 8) & 0xff)
 | 
					                    code = LITERAL, makechar(atoi(this[-6:], 8) & 0xff)
 | 
				
			||||||
                if code[0] is LITERAL:
 | 
					                if code[0] is LITERAL:
 | 
				
			||||||
                    literal(code[1])
 | 
					                    literal(code[1])
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    a(code)
 | 
					                    a(code)
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    this = char(ESCAPES[this][1])
 | 
					                    this = makechar(ESCAPES[this][1])
 | 
				
			||||||
                except KeyError:
 | 
					                except KeyError:
 | 
				
			||||||
                    pass
 | 
					                    pass
 | 
				
			||||||
                literal(this)
 | 
					                literal(this)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -104,6 +104,9 @@ def bump_num(matchobj):
 | 
				
			||||||
test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
 | 
					test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx')
 | 
				
			||||||
test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
 | 
					test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# bug 449964: fails for group followed by other escape
 | 
				
			||||||
 | 
					test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx')""", 'xx\bxx\b')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
 | 
					test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
 | 
				
			||||||
test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
 | 
					test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a')
 | 
				
			||||||
test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
 | 
					test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,7 +31,7 @@
 | 
				
			||||||
 * 2001-04-28 fl  added __copy__ methods (work in progress)
 | 
					 * 2001-04-28 fl  added __copy__ methods (work in progress)
 | 
				
			||||||
 * 2001-05-14 fl  fixes for 1.5.2
 | 
					 * 2001-05-14 fl  fixes for 1.5.2
 | 
				
			||||||
 * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
 | 
					 * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
 | 
				
			||||||
 * 2001-09-18 fl  
 | 
					 * 2001-09-18 fl  added _getliteral helper
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
 | 
					 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -1959,25 +1959,29 @@ pattern_deepcopy(PatternObject* self, PyObject* args)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyObject*
 | 
					static PyObject*
 | 
				
			||||||
pattern_isliteral(PatternObject* self, PyObject* args)
 | 
					pattern_getliteral(PatternObject* self, PyObject* args)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    /* internal: return true if pattern consists of literal text only */
 | 
					    /* internal: if the pattern is a literal string, return that
 | 
				
			||||||
 | 
					       string.  otherwise, return None */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    SRE_CODE* code;
 | 
					    SRE_CODE* code;
 | 
				
			||||||
    PyObject* isliteral;
 | 
					    PyObject* literal;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!PyArg_ParseTuple(args, ":_isliteral"))
 | 
					    if (!PyArg_ParseTuple(args, ":_getliteral"))
 | 
				
			||||||
        return NULL;
 | 
					        return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    code = PatternObject_GetCode(self);
 | 
					    code = PatternObject_GetCode(self);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL)
 | 
					    if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL) {
 | 
				
			||||||
        isliteral = Py_True;
 | 
					        /* FIXME: extract literal string from code buffer.  we can't
 | 
				
			||||||
    else
 | 
					           use the pattern member, since it may contain untranslated
 | 
				
			||||||
        isliteral = Py_False;
 | 
					           escape codes (see SF bug 449000) */
 | 
				
			||||||
 | 
					        literal = Py_None;
 | 
				
			||||||
 | 
					    } else
 | 
				
			||||||
 | 
					        literal = Py_None; /* no literal */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Py_INCREF(isliteral);
 | 
					    Py_INCREF(literal);
 | 
				
			||||||
    return isliteral;
 | 
					    return literal;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyMethodDef pattern_methods[] = {
 | 
					static PyMethodDef pattern_methods[] = {
 | 
				
			||||||
| 
						 | 
					@ -1990,7 +1994,7 @@ static PyMethodDef pattern_methods[] = {
 | 
				
			||||||
    {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
 | 
					    {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
 | 
				
			||||||
    {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
 | 
					    {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
 | 
				
			||||||
    {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
 | 
					    {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
 | 
				
			||||||
    {"_isliteral", (PyCFunction) pattern_isliteral, METH_VARARGS},
 | 
					    {"_getliteral", (PyCFunction) pattern_getliteral, METH_VARARGS},
 | 
				
			||||||
    {NULL, NULL}
 | 
					    {NULL, NULL}
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue