mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	Issue #8559: improve unicode support of (gdb) libpython.py
* Escape non printable characters (use locale.getpreferredencoding()) * Fix support of surrogate pairs * test_gdb.py: use ascii() instead of repr() in gdb program arguments to avoid encoding issues * Fix test_strings() of test_gdb.py for encoding different than UTF-8 (eg. ACSII)
This commit is contained in:
		
							parent
							
								
									06710a8421
								
							
						
					
					
						commit
						150016fd24
					
				
					 2 changed files with 61 additions and 39 deletions
				
			
		| 
						 | 
					@ -8,6 +8,7 @@
 | 
				
			||||||
import subprocess
 | 
					import subprocess
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import unittest
 | 
					import unittest
 | 
				
			||||||
 | 
					import locale
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from test.support import run_unittest, findfile
 | 
					from test.support import run_unittest, findfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -177,7 +178,7 @@ def test_getting_backtrace(self):
 | 
				
			||||||
    def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None):
 | 
					    def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None):
 | 
				
			||||||
        # Ensure that gdb's rendering of the value in a debugged process
 | 
					        # Ensure that gdb's rendering of the value in a debugged process
 | 
				
			||||||
        # matches repr(value) in this process:
 | 
					        # matches repr(value) in this process:
 | 
				
			||||||
        gdb_repr, gdb_output = self.get_gdb_repr('id(' + repr(val) + ')',
 | 
					        gdb_repr, gdb_output = self.get_gdb_repr('id(' + ascii(val) + ')',
 | 
				
			||||||
                                                 cmds_after_breakpoint)
 | 
					                                                 cmds_after_breakpoint)
 | 
				
			||||||
        if not exp_repr:
 | 
					        if not exp_repr:
 | 
				
			||||||
            exp_repr = repr(val)
 | 
					            exp_repr = repr(val)
 | 
				
			||||||
| 
						 | 
					@ -226,31 +227,35 @@ def test_bytes(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_strings(self):
 | 
					    def test_strings(self):
 | 
				
			||||||
        'Verify the pretty-printing of unicode strings'
 | 
					        'Verify the pretty-printing of unicode strings'
 | 
				
			||||||
 | 
					        encoding = locale.getpreferredencoding()
 | 
				
			||||||
 | 
					        def check_repr(text):
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                text.encode(encoding)
 | 
				
			||||||
 | 
					                printable = True
 | 
				
			||||||
 | 
					            except UnicodeEncodeError:
 | 
				
			||||||
 | 
					                self.assertGdbRepr(text, ascii(text))
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                self.assertGdbRepr(text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertGdbRepr('')
 | 
					        self.assertGdbRepr('')
 | 
				
			||||||
        self.assertGdbRepr('And now for something hopefully the same')
 | 
					        self.assertGdbRepr('And now for something hopefully the same')
 | 
				
			||||||
        self.assertGdbRepr('string with embedded NUL here \0 and then some more text')
 | 
					        self.assertGdbRepr('string with embedded NUL here \0 and then some more text')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Test printing a single character:
 | 
					        # Test printing a single character:
 | 
				
			||||||
        #    U+2620 SKULL AND CROSSBONES
 | 
					        #    U+2620 SKULL AND CROSSBONES
 | 
				
			||||||
        self.assertGdbRepr('\u2620')
 | 
					        check_repr('\u2620')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Test printing a Japanese unicode string
 | 
					        # Test printing a Japanese unicode string
 | 
				
			||||||
        # (I believe this reads "mojibake", using 3 characters from the CJK
 | 
					        # (I believe this reads "mojibake", using 3 characters from the CJK
 | 
				
			||||||
        # Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE)
 | 
					        # Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE)
 | 
				
			||||||
        self.assertGdbRepr('\u6587\u5b57\u5316\u3051')
 | 
					        check_repr('\u6587\u5b57\u5316\u3051')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Test a character outside the BMP:
 | 
					        # Test a character outside the BMP:
 | 
				
			||||||
        #    U+1D121 MUSICAL SYMBOL C CLEF
 | 
					        #    U+1D121 MUSICAL SYMBOL C CLEF
 | 
				
			||||||
        # This is:
 | 
					        # This is:
 | 
				
			||||||
        # UTF-8: 0xF0 0x9D 0x84 0xA1
 | 
					        # UTF-8: 0xF0 0x9D 0x84 0xA1
 | 
				
			||||||
        # UTF-16: 0xD834 0xDD21
 | 
					        # UTF-16: 0xD834 0xDD21
 | 
				
			||||||
        if sys.maxunicode == 0x10FFFF:
 | 
					        check_repr(chr(0x1D121))
 | 
				
			||||||
            # wide unicode:
 | 
					 | 
				
			||||||
            self.assertGdbRepr(chr(0x1D121))
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            # narrow unicode:
 | 
					 | 
				
			||||||
            self.assertGdbRepr(chr(0x1D121),
 | 
					 | 
				
			||||||
                               "'\\U0000d834\\U0000dd21'")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_tuples(self):
 | 
					    def test_tuples(self):
 | 
				
			||||||
        'Verify the pretty-printing of tuples'
 | 
					        'Verify the pretty-printing of tuples'
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,6 +42,7 @@
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
from __future__ import with_statement
 | 
					from __future__ import with_statement
 | 
				
			||||||
import gdb
 | 
					import gdb
 | 
				
			||||||
 | 
					import locale
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Look up the gdb.Type for some standard types:
 | 
					# Look up the gdb.Type for some standard types:
 | 
				
			||||||
_type_char_ptr = gdb.lookup_type('char').pointer() # char*
 | 
					_type_char_ptr = gdb.lookup_type('char').pointer() # char*
 | 
				
			||||||
| 
						 | 
					@ -69,6 +70,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
hexdigits = "0123456789abcdef"
 | 
					hexdigits = "0123456789abcdef"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ENCODING = locale.getpreferredencoding()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class NullPyObjectPtr(RuntimeError):
 | 
					class NullPyObjectPtr(RuntimeError):
 | 
				
			||||||
    pass
 | 
					    pass
 | 
				
			||||||
| 
						 | 
					@ -1128,52 +1130,67 @@ def write_repr(self, out, visited):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # Non-ASCII characters
 | 
					            # Non-ASCII characters
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                ucs = ch;
 | 
					                ucs = ch
 | 
				
			||||||
 | 
					                orig_ucs = None
 | 
				
			||||||
                if self.char_width == 2:
 | 
					                if self.char_width() == 2:
 | 
				
			||||||
                    ch2 = 0
 | 
					 | 
				
			||||||
                    # Get code point from surrogate pair
 | 
					                    # Get code point from surrogate pair
 | 
				
			||||||
                    if i < len(proxy):
 | 
					                    if (i < len(proxy)
 | 
				
			||||||
 | 
					                    and 0xD800 <= ord(ch) < 0xDC00 \
 | 
				
			||||||
 | 
					                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
 | 
				
			||||||
                        ch2 = proxy[i]
 | 
					                        ch2 = proxy[i]
 | 
				
			||||||
                        if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00
 | 
					                        code = (ord(ch) & 0x03FF) << 10
 | 
				
			||||||
                            and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF):
 | 
					                        code |= ord(ch2) & 0x03FF
 | 
				
			||||||
                            ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
 | 
					                        code += 0x00010000
 | 
				
			||||||
 | 
					                        orig_ucs = ucs
 | 
				
			||||||
 | 
					                        ucs = unichr(code)
 | 
				
			||||||
                        i += 1
 | 
					                        i += 1
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        ch2 = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                printable = _unichr_is_printable(ucs)
 | 
				
			||||||
 | 
					                if printable:
 | 
				
			||||||
 | 
					                    try:
 | 
				
			||||||
 | 
					                        ucs.encode(ENCODING)
 | 
				
			||||||
 | 
					                    except UnicodeEncodeError:
 | 
				
			||||||
 | 
					                        printable = False
 | 
				
			||||||
 | 
					                        if orig_ucs is not None:
 | 
				
			||||||
 | 
					                            ucs = orig_ucs
 | 
				
			||||||
 | 
					                            i -= 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                # Map Unicode whitespace and control characters
 | 
					                # Map Unicode whitespace and control characters
 | 
				
			||||||
                # (categories Z* and C* except ASCII space)
 | 
					                # (categories Z* and C* except ASCII space)
 | 
				
			||||||
                if not _unichr_is_printable(ucs):
 | 
					                if not printable:
 | 
				
			||||||
                    # Unfortuately, Python 2's unicode type doesn't seem
 | 
					                    # Unfortuately, Python 2's unicode type doesn't seem
 | 
				
			||||||
                    # to expose the "isprintable" method
 | 
					                    # to expose the "isprintable" method
 | 
				
			||||||
 | 
					                    code = ord(ucs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    # Map 8-bit characters to '\\xhh'
 | 
					                    # Map 8-bit characters to '\\xhh'
 | 
				
			||||||
                    if ucs <= 0xff:
 | 
					                    if code <= 0xff:
 | 
				
			||||||
                        out.write('\\x')
 | 
					                        out.write('\\x')
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
 | 
					                        out.write(hexdigits[(code >> 4) & 0x000F])
 | 
				
			||||||
                        out.write(hexdigits[ord(ucs) & 0x000F])
 | 
					                        out.write(hexdigits[code & 0x000F])
 | 
				
			||||||
                    # Map 21-bit characters to '\U00xxxxxx'
 | 
					                    # Map 21-bit characters to '\U00xxxxxx'
 | 
				
			||||||
                    elif ucs >= 0x10000:
 | 
					                    elif code >= 0x10000:
 | 
				
			||||||
                        out.write('\\U')
 | 
					                        out.write('\\U')
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F])
 | 
					                        out.write(hexdigits[(code >> 28) & 0x0000000F])
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F])
 | 
					                        out.write(hexdigits[(code >> 24) & 0x0000000F])
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F])
 | 
					                        out.write(hexdigits[(code >> 20) & 0x0000000F])
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F])
 | 
					                        out.write(hexdigits[(code >> 16) & 0x0000000F])
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F])
 | 
					                        out.write(hexdigits[(code >> 12) & 0x0000000F])
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F])
 | 
					                        out.write(hexdigits[(code >> 8) & 0x0000000F])
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F])
 | 
					                        out.write(hexdigits[(code >> 4) & 0x0000000F])
 | 
				
			||||||
                        out.write(hexdigits[ord(ucs) & 0x0000000F])
 | 
					                        out.write(hexdigits[code & 0x0000000F])
 | 
				
			||||||
                    # Map 16-bit characters to '\uxxxx'
 | 
					                    # Map 16-bit characters to '\uxxxx'
 | 
				
			||||||
                    else:
 | 
					                    else:
 | 
				
			||||||
                        out.write('\\u')
 | 
					                        out.write('\\u')
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 12) & 0x000F])
 | 
					                        out.write(hexdigits[(code >> 12) & 0x000F])
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 8) & 0x000F])
 | 
					                        out.write(hexdigits[(code >> 8) & 0x000F])
 | 
				
			||||||
                        out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
 | 
					                        out.write(hexdigits[(code >> 4) & 0x000F])
 | 
				
			||||||
                        out.write(hexdigits[ord(ucs) & 0x000F])
 | 
					                        out.write(hexdigits[code & 0x000F])
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    # Copy characters as-is
 | 
					                    # Copy characters as-is
 | 
				
			||||||
                    out.write(ch)
 | 
					                    out.write(ch)
 | 
				
			||||||
                    if self.char_width == 2:
 | 
					                    if self.char_width() == 2 and (ch2 is not None):
 | 
				
			||||||
                        if ord(ucs) >= 0x10000:
 | 
					 | 
				
			||||||
                        out.write(ch2)
 | 
					                        out.write(ch2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        out.write(quote)
 | 
					        out.write(quote)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue