mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
[3.13] gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328) (#137345)
gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328)
(cherry picked from commit 0153d82a5a)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
This commit is contained in:
parent
7adea430c1
commit
23a3e892ee
5 changed files with 59 additions and 17 deletions
|
|
@ -1821,6 +1821,34 @@ def test_newlines_in_format_specifiers(self):
|
|||
for case in valid_cases:
|
||||
compile(case, "<string>", "exec")
|
||||
|
||||
def test_raw_fstring_format_spec(self):
|
||||
# Test raw f-string format spec behavior (Issue #137314).
|
||||
#
|
||||
# Raw f-strings should preserve literal backslashes in format specifications,
|
||||
# not interpret them as escape sequences.
|
||||
class UnchangedFormat:
|
||||
"""Test helper that returns the format spec unchanged."""
|
||||
def __format__(self, format):
|
||||
return format
|
||||
|
||||
# Test basic escape sequences
|
||||
self.assertEqual(f"{UnchangedFormat():\xFF}", 'ÿ')
|
||||
self.assertEqual(rf"{UnchangedFormat():\xFF}", '\\xFF')
|
||||
|
||||
# Test nested expressions with raw/non-raw combinations
|
||||
self.assertEqual(rf"{UnchangedFormat():{'\xFF'}}", 'ÿ')
|
||||
self.assertEqual(f"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
|
||||
self.assertEqual(rf"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
|
||||
|
||||
# Test continuation character in format specs
|
||||
self.assertEqual(f"""{UnchangedFormat():{'a'\
|
||||
'b'}}""", 'ab')
|
||||
self.assertEqual(rf"""{UnchangedFormat():{'a'\
|
||||
'b'}}""", 'ab')
|
||||
|
||||
# Test multiple format specs in same raw f-string
|
||||
self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
Fixed a regression where raw f-strings incorrectly interpreted
|
||||
escape sequences in format specifications. Raw f-strings now properly preserve
|
||||
literal backslashes in format specs, matching the behavior from Python 3.11.
|
||||
For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead of
|
||||
``'ÿ'``. Patch by Pablo Galindo.
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
#include <Python.h>
|
||||
|
||||
#include "pegen.h"
|
||||
#include "lexer/state.h"
|
||||
#include "string_parser.h"
|
||||
#include "pycore_runtime.h" // _PyRuntime
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
|
|
@ -1369,7 +1370,15 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) {
|
|||
if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok);
|
||||
|
||||
// Check if we're inside a raw f-string for format spec decoding
|
||||
int is_raw = 0;
|
||||
if (INSIDE_FSTRING(p->tok)) {
|
||||
tokenizer_mode *mode = TOK_GET_MODE(p->tok);
|
||||
is_raw = mode->f_string_raw;
|
||||
}
|
||||
|
||||
PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok);
|
||||
if (str == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,22 +22,6 @@
|
|||
|| c == '_'\
|
||||
|| (c >= 128))
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
|
||||
assert(tok->tok_mode_stack_index >= 0);
|
||||
assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
|
||||
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
|
||||
}
|
||||
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
|
||||
assert(tok->tok_mode_stack_index >= 0);
|
||||
assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
|
||||
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
|
||||
}
|
||||
#else
|
||||
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
|
||||
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
|
||||
#endif
|
||||
|
||||
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
|
||||
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
|
||||
_PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef _PY_LEXER_H_
|
||||
#define _PY_LEXER_H_
|
||||
|
||||
#include "Python.h"
|
||||
#include "object.h"
|
||||
|
||||
#define MAXINDENT 100 /* Max indentation level */
|
||||
|
|
@ -138,5 +139,20 @@ void _PyTokenizer_Free(struct tok_state *);
|
|||
void _PyToken_Free(struct token *);
|
||||
void _PyToken_Init(struct token *);
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
|
||||
assert(tok->tok_mode_stack_index >= 0);
|
||||
assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
|
||||
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
|
||||
}
|
||||
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
|
||||
assert(tok->tok_mode_stack_index >= 0);
|
||||
assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
|
||||
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
|
||||
}
|
||||
#else
|
||||
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
|
||||
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue