mirror of
https://github.com/python/cpython.git
synced 2025-10-26 19:24:34 +00:00
[3.14] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133942)
If the error handler is used, a new bytes object is created to set as
the object attribute of UnicodeDecodeError, and that bytes object then
replaces the original data. A pointer to the decoded data will became invalid
after destroying that temporary bytes object. So we need other way to return
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
_PyBytes_DecodeEscape() does not have such issue, because it does not
use the error handlers registry, but it should be changed for compatibility
with _PyUnicode_DecodeUnicodeEscapeInternal().
(cherry picked from commit 9f69a58623)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
f0a7a6c2cc
commit
69b4387f78
9 changed files with 160 additions and 80 deletions
|
|
@ -1196,23 +1196,39 @@ def test_escape(self):
|
|||
check(br"[\1010]", b"[A0]")
|
||||
check(br"[\x41]", b"[A]")
|
||||
check(br"[\x410]", b"[A0]")
|
||||
|
||||
def test_warnings(self):
|
||||
decode = codecs.escape_decode
|
||||
check = coding_checker(self, decode)
|
||||
for i in range(97, 123):
|
||||
b = bytes([i])
|
||||
if b not in b'abfnrtvx':
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\%c" is an invalid escape sequence' % i):
|
||||
check(b"\\" + b, b"\\" + b)
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\%c" is an invalid escape sequence' % (i-32)):
|
||||
check(b"\\" + b.upper(), b"\\" + b.upper())
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\8" is an invalid escape sequence'):
|
||||
check(br"\8", b"\\8")
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(br"\9", b"\\9")
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\\xfa" is an invalid escape sequence') as cm:
|
||||
check(b"\\\xfa", b"\\\xfa")
|
||||
for i in range(0o400, 0o1000):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\%o" is an invalid octal escape sequence' % i):
|
||||
check(rb'\%o' % i, bytes([i & 0o377]))
|
||||
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\z" is an invalid escape sequence'):
|
||||
self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\501" is an invalid octal escape sequence'):
|
||||
self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6))
|
||||
|
||||
def test_errors(self):
|
||||
decode = codecs.escape_decode
|
||||
self.assertRaises(ValueError, decode, br"\x")
|
||||
|
|
@ -2661,24 +2677,40 @@ def test_escape_decode(self):
|
|||
check(br"[\x410]", "[A0]")
|
||||
check(br"\u20ac", "\u20ac")
|
||||
check(br"\U0001d120", "\U0001d120")
|
||||
|
||||
def test_decode_warnings(self):
|
||||
decode = codecs.unicode_escape_decode
|
||||
check = coding_checker(self, decode)
|
||||
for i in range(97, 123):
|
||||
b = bytes([i])
|
||||
if b not in b'abfnrtuvx':
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\%c" is an invalid escape sequence' % i):
|
||||
check(b"\\" + b, "\\" + chr(i))
|
||||
if b.upper() not in b'UN':
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\%c" is an invalid escape sequence' % (i-32)):
|
||||
check(b"\\" + b.upper(), "\\" + chr(i-32))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\8" is an invalid escape sequence'):
|
||||
check(br"\8", "\\8")
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(br"\9", "\\9")
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\\xfa" is an invalid escape sequence') as cm:
|
||||
check(b"\\\xfa", "\\\xfa")
|
||||
for i in range(0o400, 0o1000):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\%o" is an invalid octal escape sequence' % i):
|
||||
check(rb'\%o' % i, chr(i))
|
||||
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\z" is an invalid escape sequence'):
|
||||
self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
|
||||
with self.assertWarnsRegex(DeprecationWarning,
|
||||
r'"\\501" is an invalid octal escape sequence'):
|
||||
self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6))
|
||||
|
||||
def test_decode_errors(self):
|
||||
decode = codecs.unicode_escape_decode
|
||||
for c, d in (b'x', 2), (b'u', 4), (b'U', 4):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue