bpo-46503: Prevent an assert from firing when parsing some invalid \N sequences in f-strings. (GH-30865) (30867)

* bpo-46503: Prevent an assert from firing.  Also fix one nearby tiny PEP-7 nit.

* Added blurb.
(cherry picked from commit 0daf72194b)

Co-authored-by: Eric V. Smith <ericvsmith@users.noreply.github.com>

Co-authored-by: Eric V. Smith <ericvsmith@users.noreply.github.com>
This commit is contained in:
Miss Islington (bot) 2022-01-24 19:08:42 -08:00 committed by GitHub
parent 3f1ea163ea
commit c314e3e829
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 2 deletions

View file

@ -747,12 +747,16 @@ def test_misformed_unicode_character_name(self):
# differently inside f-strings.
self.assertAllRaise(SyntaxError, r"\(unicode error\) 'unicodeescape' codec can't decode bytes in position .*: malformed \\N character escape",
[r"f'\N'",
r"f'\N '",
r"f'\N '", # See bpo-46503.
r"f'\N{'",
r"f'\N{GREEK CAPITAL LETTER DELTA'",
# Here are the non-f-string versions,
# which should give the same errors.
r"'\N'",
r"'\N '",
r"'\N '",
r"'\N{'",
r"'\N{GREEK CAPITAL LETTER DELTA'",
])

View file

@ -0,0 +1 @@
Fix an assert when parsing some invalid \N escape sequences in f-strings.

View file

@ -444,12 +444,23 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
if (!raw && ch == '\\' && s < end) {
ch = *s++;
if (ch == 'N') {
/* We need to look at and skip matching braces for "\N{name}"
sequences because otherwise we'll think the opening '{'
starts an expression, which is not the case with "\N".
Keep looking for either a matched '{' '}' pair, or the end
of the string. */
if (s < end && *s++ == '{') {
while (s < end && *s++ != '}') {
}
continue;
}
break;
/* This is an invalid "\N" sequence, since it's a "\N" not
followed by a "{". Just keep parsing this literal. This
error will be caught later by
decode_unicode_with_escapes(). */
continue;
}
if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
return -1;
@ -493,7 +504,8 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
s - literal_start,
NULL, NULL);
} else {
}
else {
*literal = decode_unicode_with_escapes(p, literal_start,
s - literal_start, t);
}