mirror of
https://github.com/python/cpython.git
synced 2025-11-02 22:51:25 +00:00
Issue #8559: improve unicode support of (gdb) libpython.py
* Escape non printable characters (use locale.getpreferredencoding()) * Fix support of surrogate pairs * test_gdb.py: use ascii() instead of repr() in gdb program arguments to avoid encoding issues * Fix test_strings() of test_gdb.py for encoding different than UTF-8 (eg. ACSII)
This commit is contained in:
parent
06710a8421
commit
150016fd24
2 changed files with 61 additions and 39 deletions
|
|
@ -8,6 +8,7 @@
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
import locale
|
||||||
|
|
||||||
from test.support import run_unittest, findfile
|
from test.support import run_unittest, findfile
|
||||||
|
|
||||||
|
|
@ -177,7 +178,7 @@ def test_getting_backtrace(self):
|
||||||
def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None):
|
def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None):
|
||||||
# Ensure that gdb's rendering of the value in a debugged process
|
# Ensure that gdb's rendering of the value in a debugged process
|
||||||
# matches repr(value) in this process:
|
# matches repr(value) in this process:
|
||||||
gdb_repr, gdb_output = self.get_gdb_repr('id(' + repr(val) + ')',
|
gdb_repr, gdb_output = self.get_gdb_repr('id(' + ascii(val) + ')',
|
||||||
cmds_after_breakpoint)
|
cmds_after_breakpoint)
|
||||||
if not exp_repr:
|
if not exp_repr:
|
||||||
exp_repr = repr(val)
|
exp_repr = repr(val)
|
||||||
|
|
@ -226,31 +227,35 @@ def test_bytes(self):
|
||||||
|
|
||||||
def test_strings(self):
|
def test_strings(self):
|
||||||
'Verify the pretty-printing of unicode strings'
|
'Verify the pretty-printing of unicode strings'
|
||||||
|
encoding = locale.getpreferredencoding()
|
||||||
|
def check_repr(text):
|
||||||
|
try:
|
||||||
|
text.encode(encoding)
|
||||||
|
printable = True
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
self.assertGdbRepr(text, ascii(text))
|
||||||
|
else:
|
||||||
|
self.assertGdbRepr(text)
|
||||||
|
|
||||||
self.assertGdbRepr('')
|
self.assertGdbRepr('')
|
||||||
self.assertGdbRepr('And now for something hopefully the same')
|
self.assertGdbRepr('And now for something hopefully the same')
|
||||||
self.assertGdbRepr('string with embedded NUL here \0 and then some more text')
|
self.assertGdbRepr('string with embedded NUL here \0 and then some more text')
|
||||||
|
|
||||||
# Test printing a single character:
|
# Test printing a single character:
|
||||||
# U+2620 SKULL AND CROSSBONES
|
# U+2620 SKULL AND CROSSBONES
|
||||||
self.assertGdbRepr('\u2620')
|
check_repr('\u2620')
|
||||||
|
|
||||||
# Test printing a Japanese unicode string
|
# Test printing a Japanese unicode string
|
||||||
# (I believe this reads "mojibake", using 3 characters from the CJK
|
# (I believe this reads "mojibake", using 3 characters from the CJK
|
||||||
# Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE)
|
# Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE)
|
||||||
self.assertGdbRepr('\u6587\u5b57\u5316\u3051')
|
check_repr('\u6587\u5b57\u5316\u3051')
|
||||||
|
|
||||||
# Test a character outside the BMP:
|
# Test a character outside the BMP:
|
||||||
# U+1D121 MUSICAL SYMBOL C CLEF
|
# U+1D121 MUSICAL SYMBOL C CLEF
|
||||||
# This is:
|
# This is:
|
||||||
# UTF-8: 0xF0 0x9D 0x84 0xA1
|
# UTF-8: 0xF0 0x9D 0x84 0xA1
|
||||||
# UTF-16: 0xD834 0xDD21
|
# UTF-16: 0xD834 0xDD21
|
||||||
if sys.maxunicode == 0x10FFFF:
|
check_repr(chr(0x1D121))
|
||||||
# wide unicode:
|
|
||||||
self.assertGdbRepr(chr(0x1D121))
|
|
||||||
else:
|
|
||||||
# narrow unicode:
|
|
||||||
self.assertGdbRepr(chr(0x1D121),
|
|
||||||
"'\\U0000d834\\U0000dd21'")
|
|
||||||
|
|
||||||
def test_tuples(self):
|
def test_tuples(self):
|
||||||
'Verify the pretty-printing of tuples'
|
'Verify the pretty-printing of tuples'
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@
|
||||||
'''
|
'''
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
import gdb
|
import gdb
|
||||||
|
import locale
|
||||||
|
|
||||||
# Look up the gdb.Type for some standard types:
|
# Look up the gdb.Type for some standard types:
|
||||||
_type_char_ptr = gdb.lookup_type('char').pointer() # char*
|
_type_char_ptr = gdb.lookup_type('char').pointer() # char*
|
||||||
|
|
@ -69,6 +70,7 @@
|
||||||
|
|
||||||
hexdigits = "0123456789abcdef"
|
hexdigits = "0123456789abcdef"
|
||||||
|
|
||||||
|
ENCODING = locale.getpreferredencoding()
|
||||||
|
|
||||||
class NullPyObjectPtr(RuntimeError):
|
class NullPyObjectPtr(RuntimeError):
|
||||||
pass
|
pass
|
||||||
|
|
@ -1128,52 +1130,67 @@ def write_repr(self, out, visited):
|
||||||
|
|
||||||
# Non-ASCII characters
|
# Non-ASCII characters
|
||||||
else:
|
else:
|
||||||
ucs = ch;
|
ucs = ch
|
||||||
|
orig_ucs = None
|
||||||
if self.char_width == 2:
|
if self.char_width() == 2:
|
||||||
ch2 = 0
|
|
||||||
# Get code point from surrogate pair
|
# Get code point from surrogate pair
|
||||||
if i < len(proxy):
|
if (i < len(proxy)
|
||||||
|
and 0xD800 <= ord(ch) < 0xDC00 \
|
||||||
|
and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
|
||||||
ch2 = proxy[i]
|
ch2 = proxy[i]
|
||||||
if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00
|
code = (ord(ch) & 0x03FF) << 10
|
||||||
and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF):
|
code |= ord(ch2) & 0x03FF
|
||||||
ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
|
code += 0x00010000
|
||||||
|
orig_ucs = ucs
|
||||||
|
ucs = unichr(code)
|
||||||
i += 1
|
i += 1
|
||||||
|
else:
|
||||||
|
ch2 = None
|
||||||
|
|
||||||
|
printable = _unichr_is_printable(ucs)
|
||||||
|
if printable:
|
||||||
|
try:
|
||||||
|
ucs.encode(ENCODING)
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
printable = False
|
||||||
|
if orig_ucs is not None:
|
||||||
|
ucs = orig_ucs
|
||||||
|
i -= 1
|
||||||
|
|
||||||
# Map Unicode whitespace and control characters
|
# Map Unicode whitespace and control characters
|
||||||
# (categories Z* and C* except ASCII space)
|
# (categories Z* and C* except ASCII space)
|
||||||
if not _unichr_is_printable(ucs):
|
if not printable:
|
||||||
# Unfortuately, Python 2's unicode type doesn't seem
|
# Unfortuately, Python 2's unicode type doesn't seem
|
||||||
# to expose the "isprintable" method
|
# to expose the "isprintable" method
|
||||||
|
code = ord(ucs)
|
||||||
|
|
||||||
# Map 8-bit characters to '\\xhh'
|
# Map 8-bit characters to '\\xhh'
|
||||||
if ucs <= 0xff:
|
if code <= 0xff:
|
||||||
out.write('\\x')
|
out.write('\\x')
|
||||||
out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
|
out.write(hexdigits[(code >> 4) & 0x000F])
|
||||||
out.write(hexdigits[ord(ucs) & 0x000F])
|
out.write(hexdigits[code & 0x000F])
|
||||||
# Map 21-bit characters to '\U00xxxxxx'
|
# Map 21-bit characters to '\U00xxxxxx'
|
||||||
elif ucs >= 0x10000:
|
elif code >= 0x10000:
|
||||||
out.write('\\U')
|
out.write('\\U')
|
||||||
out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F])
|
out.write(hexdigits[(code >> 28) & 0x0000000F])
|
||||||
out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F])
|
out.write(hexdigits[(code >> 24) & 0x0000000F])
|
||||||
out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F])
|
out.write(hexdigits[(code >> 20) & 0x0000000F])
|
||||||
out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F])
|
out.write(hexdigits[(code >> 16) & 0x0000000F])
|
||||||
out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F])
|
out.write(hexdigits[(code >> 12) & 0x0000000F])
|
||||||
out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F])
|
out.write(hexdigits[(code >> 8) & 0x0000000F])
|
||||||
out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F])
|
out.write(hexdigits[(code >> 4) & 0x0000000F])
|
||||||
out.write(hexdigits[ord(ucs) & 0x0000000F])
|
out.write(hexdigits[code & 0x0000000F])
|
||||||
# Map 16-bit characters to '\uxxxx'
|
# Map 16-bit characters to '\uxxxx'
|
||||||
else:
|
else:
|
||||||
out.write('\\u')
|
out.write('\\u')
|
||||||
out.write(hexdigits[(ord(ucs) >> 12) & 0x000F])
|
out.write(hexdigits[(code >> 12) & 0x000F])
|
||||||
out.write(hexdigits[(ord(ucs) >> 8) & 0x000F])
|
out.write(hexdigits[(code >> 8) & 0x000F])
|
||||||
out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
|
out.write(hexdigits[(code >> 4) & 0x000F])
|
||||||
out.write(hexdigits[ord(ucs) & 0x000F])
|
out.write(hexdigits[code & 0x000F])
|
||||||
else:
|
else:
|
||||||
# Copy characters as-is
|
# Copy characters as-is
|
||||||
out.write(ch)
|
out.write(ch)
|
||||||
if self.char_width == 2:
|
if self.char_width() == 2 and (ch2 is not None):
|
||||||
if ord(ucs) >= 0x10000:
|
|
||||||
out.write(ch2)
|
out.write(ch2)
|
||||||
|
|
||||||
out.write(quote)
|
out.write(quote)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue