mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			918 lines
		
	
	
	
		
			39 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			918 lines
		
	
	
	
		
			39 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import unittest
 | 
						||
import sys
 | 
						||
from test import support
 | 
						||
from test.support import import_helper
 | 
						||
 | 
						||
try:
 | 
						||
    import _testcapi
 | 
						||
except ImportError:
 | 
						||
    _testcapi = None
 | 
						||
 | 
						||
 | 
						||
NULL = None
 | 
						||
 | 
						||
class Str(str):
 | 
						||
    pass
 | 
						||
 | 
						||
 | 
						||
class CAPITest(unittest.TestCase):
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_fromobject(self):
 | 
						||
        """Test PyUnicode_FromObject()"""
 | 
						||
        from _testcapi import unicode_fromobject as fromobject
 | 
						||
 | 
						||
        for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
 | 
						||
                  'a\ud800b\udfffc', '\ud834\udd1e']:
 | 
						||
            self.assertEqual(fromobject(s), s)
 | 
						||
            o = Str(s)
 | 
						||
            s2 = fromobject(o)
 | 
						||
            self.assertEqual(s2, s)
 | 
						||
            self.assertIs(type(s2), str)
 | 
						||
            self.assertIsNot(s2, s)
 | 
						||
 | 
						||
        self.assertRaises(TypeError, fromobject, b'abc')
 | 
						||
        self.assertRaises(TypeError, fromobject, [])
 | 
						||
        # CRASHES fromobject(NULL)
 | 
						||
 | 
						||
    def test_from_format(self):
 | 
						||
        """Test PyUnicode_FromFormat()"""
 | 
						||
        import_helper.import_module('ctypes')
 | 
						||
        from ctypes import (
 | 
						||
            c_char_p,
 | 
						||
            pythonapi, py_object, sizeof,
 | 
						||
            c_int, c_long, c_longlong, c_ssize_t,
 | 
						||
            c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
 | 
						||
        name = "PyUnicode_FromFormat"
 | 
						||
        _PyUnicode_FromFormat = getattr(pythonapi, name)
 | 
						||
        _PyUnicode_FromFormat.argtypes = (c_char_p,)
 | 
						||
        _PyUnicode_FromFormat.restype = py_object
 | 
						||
 | 
						||
        def PyUnicode_FromFormat(format, *args):
 | 
						||
            cargs = tuple(
 | 
						||
                py_object(arg) if isinstance(arg, str) else arg
 | 
						||
                for arg in args)
 | 
						||
            return _PyUnicode_FromFormat(format, *cargs)
 | 
						||
 | 
						||
        def check_format(expected, format, *args):
 | 
						||
            text = PyUnicode_FromFormat(format, *args)
 | 
						||
            self.assertEqual(expected, text)
 | 
						||
 | 
						||
        # ascii format, non-ascii argument
 | 
						||
        check_format('ascii\x7f=unicode\xe9',
 | 
						||
                     b'ascii\x7f=%U', 'unicode\xe9')
 | 
						||
 | 
						||
        # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
 | 
						||
        # raises an error
 | 
						||
        self.assertRaisesRegex(ValueError,
 | 
						||
            r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
 | 
						||
            'string, got a non-ASCII byte: 0xe9$',
 | 
						||
            PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
 | 
						||
 | 
						||
        # test "%c"
 | 
						||
        check_format('\uabcd',
 | 
						||
                     b'%c', c_int(0xabcd))
 | 
						||
        check_format('\U0010ffff',
 | 
						||
                     b'%c', c_int(0x10ffff))
 | 
						||
        with self.assertRaises(OverflowError):
 | 
						||
            PyUnicode_FromFormat(b'%c', c_int(0x110000))
 | 
						||
        # Issue #18183
 | 
						||
        check_format('\U00010000\U00100000',
 | 
						||
                     b'%c%c', c_int(0x10000), c_int(0x100000))
 | 
						||
 | 
						||
        # test "%"
 | 
						||
        check_format('%',
 | 
						||
                     b'%%')
 | 
						||
        check_format('%s',
 | 
						||
                     b'%%s')
 | 
						||
        check_format('[%]',
 | 
						||
                     b'[%%]')
 | 
						||
        check_format('%abc',
 | 
						||
                     b'%%%s', b'abc')
 | 
						||
 | 
						||
        # truncated string
 | 
						||
        check_format('abc',
 | 
						||
                     b'%.3s', b'abcdef')
 | 
						||
        check_format('abc[\ufffd',
 | 
						||
                     b'%.5s', 'abc[\u20ac]'.encode('utf8'))
 | 
						||
        check_format("'\\u20acABC'",
 | 
						||
                     b'%A', '\u20acABC')
 | 
						||
        check_format("'\\u20",
 | 
						||
                     b'%.5A', '\u20acABCDEF')
 | 
						||
        check_format("'\u20acABC'",
 | 
						||
                     b'%R', '\u20acABC')
 | 
						||
        check_format("'\u20acA",
 | 
						||
                     b'%.3R', '\u20acABCDEF')
 | 
						||
        check_format('\u20acAB',
 | 
						||
                     b'%.3S', '\u20acABCDEF')
 | 
						||
        check_format('\u20acAB',
 | 
						||
                     b'%.3U', '\u20acABCDEF')
 | 
						||
        check_format('\u20acAB',
 | 
						||
                     b'%.3V', '\u20acABCDEF', None)
 | 
						||
        check_format('abc[\ufffd',
 | 
						||
                     b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))
 | 
						||
 | 
						||
        # following tests comes from #7330
 | 
						||
        # test width modifier and precision modifier with %S
 | 
						||
        check_format("repr=  abc",
 | 
						||
                     b'repr=%5S', 'abc')
 | 
						||
        check_format("repr=ab",
 | 
						||
                     b'repr=%.2S', 'abc')
 | 
						||
        check_format("repr=   ab",
 | 
						||
                     b'repr=%5.2S', 'abc')
 | 
						||
 | 
						||
        # test width modifier and precision modifier with %R
 | 
						||
        check_format("repr=   'abc'",
 | 
						||
                     b'repr=%8R', 'abc')
 | 
						||
        check_format("repr='ab",
 | 
						||
                     b'repr=%.3R', 'abc')
 | 
						||
        check_format("repr=  'ab",
 | 
						||
                     b'repr=%5.3R', 'abc')
 | 
						||
 | 
						||
        # test width modifier and precision modifier with %A
 | 
						||
        check_format("repr=   'abc'",
 | 
						||
                     b'repr=%8A', 'abc')
 | 
						||
        check_format("repr='ab",
 | 
						||
                     b'repr=%.3A', 'abc')
 | 
						||
        check_format("repr=  'ab",
 | 
						||
                     b'repr=%5.3A', 'abc')
 | 
						||
 | 
						||
        # test width modifier and precision modifier with %s
 | 
						||
        check_format("repr=  abc",
 | 
						||
                     b'repr=%5s', b'abc')
 | 
						||
        check_format("repr=ab",
 | 
						||
                     b'repr=%.2s', b'abc')
 | 
						||
        check_format("repr=   ab",
 | 
						||
                     b'repr=%5.2s', b'abc')
 | 
						||
 | 
						||
        # test width modifier and precision modifier with %U
 | 
						||
        check_format("repr=  abc",
 | 
						||
                     b'repr=%5U', 'abc')
 | 
						||
        check_format("repr=ab",
 | 
						||
                     b'repr=%.2U', 'abc')
 | 
						||
        check_format("repr=   ab",
 | 
						||
                     b'repr=%5.2U', 'abc')
 | 
						||
 | 
						||
        # test width modifier and precision modifier with %V
 | 
						||
        check_format("repr=  abc",
 | 
						||
                     b'repr=%5V', 'abc', b'123')
 | 
						||
        check_format("repr=ab",
 | 
						||
                     b'repr=%.2V', 'abc', b'123')
 | 
						||
        check_format("repr=   ab",
 | 
						||
                     b'repr=%5.2V', 'abc', b'123')
 | 
						||
        check_format("repr=  123",
 | 
						||
                     b'repr=%5V', None, b'123')
 | 
						||
        check_format("repr=12",
 | 
						||
                     b'repr=%.2V', None, b'123')
 | 
						||
        check_format("repr=   12",
 | 
						||
                     b'repr=%5.2V', None, b'123')
 | 
						||
 | 
						||
        # test integer formats (%i, %d, %u)
 | 
						||
        check_format('010',
 | 
						||
                     b'%03i', c_int(10))
 | 
						||
        check_format('0010',
 | 
						||
                     b'%0.4i', c_int(10))
 | 
						||
        check_format('-123',
 | 
						||
                     b'%i', c_int(-123))
 | 
						||
        check_format('-123',
 | 
						||
                     b'%li', c_long(-123))
 | 
						||
        check_format('-123',
 | 
						||
                     b'%lli', c_longlong(-123))
 | 
						||
        check_format('-123',
 | 
						||
                     b'%zi', c_ssize_t(-123))
 | 
						||
 | 
						||
        check_format('-123',
 | 
						||
                     b'%d', c_int(-123))
 | 
						||
        check_format('-123',
 | 
						||
                     b'%ld', c_long(-123))
 | 
						||
        check_format('-123',
 | 
						||
                     b'%lld', c_longlong(-123))
 | 
						||
        check_format('-123',
 | 
						||
                     b'%zd', c_ssize_t(-123))
 | 
						||
 | 
						||
        check_format('123',
 | 
						||
                     b'%u', c_uint(123))
 | 
						||
        check_format('123',
 | 
						||
                     b'%lu', c_ulong(123))
 | 
						||
        check_format('123',
 | 
						||
                     b'%llu', c_ulonglong(123))
 | 
						||
        check_format('123',
 | 
						||
                     b'%zu', c_size_t(123))
 | 
						||
 | 
						||
        # test long output
 | 
						||
        min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
 | 
						||
        max_longlong = -min_longlong - 1
 | 
						||
        check_format(str(min_longlong),
 | 
						||
                     b'%lld', c_longlong(min_longlong))
 | 
						||
        check_format(str(max_longlong),
 | 
						||
                     b'%lld', c_longlong(max_longlong))
 | 
						||
        max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
 | 
						||
        check_format(str(max_ulonglong),
 | 
						||
                     b'%llu', c_ulonglong(max_ulonglong))
 | 
						||
        PyUnicode_FromFormat(b'%p', c_void_p(-1))
 | 
						||
 | 
						||
        # test padding (width and/or precision)
 | 
						||
        check_format('123'.rjust(10, '0'),
 | 
						||
                     b'%010i', c_int(123))
 | 
						||
        check_format('123'.rjust(100),
 | 
						||
                     b'%100i', c_int(123))
 | 
						||
        check_format('123'.rjust(100, '0'),
 | 
						||
                     b'%.100i', c_int(123))
 | 
						||
        check_format('123'.rjust(80, '0').rjust(100),
 | 
						||
                     b'%100.80i', c_int(123))
 | 
						||
 | 
						||
        check_format('123'.rjust(10, '0'),
 | 
						||
                     b'%010u', c_uint(123))
 | 
						||
        check_format('123'.rjust(100),
 | 
						||
                     b'%100u', c_uint(123))
 | 
						||
        check_format('123'.rjust(100, '0'),
 | 
						||
                     b'%.100u', c_uint(123))
 | 
						||
        check_format('123'.rjust(80, '0').rjust(100),
 | 
						||
                     b'%100.80u', c_uint(123))
 | 
						||
 | 
						||
        check_format('123'.rjust(10, '0'),
 | 
						||
                     b'%010x', c_int(0x123))
 | 
						||
        check_format('123'.rjust(100),
 | 
						||
                     b'%100x', c_int(0x123))
 | 
						||
        check_format('123'.rjust(100, '0'),
 | 
						||
                     b'%.100x', c_int(0x123))
 | 
						||
        check_format('123'.rjust(80, '0').rjust(100),
 | 
						||
                     b'%100.80x', c_int(0x123))
 | 
						||
 | 
						||
        # test %A
 | 
						||
        check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
 | 
						||
                     b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
 | 
						||
 | 
						||
        # test %V
 | 
						||
        check_format('repr=abc',
 | 
						||
                     b'repr=%V', 'abc', b'xyz')
 | 
						||
 | 
						||
        # test %p
 | 
						||
        # We cannot test the exact result,
 | 
						||
        # because it returns a hex representation of a C pointer,
 | 
						||
        # which is going to be different each time. But, we can test the format.
 | 
						||
        p_format_regex = r'^0x[a-zA-Z0-9]{3,}$'
 | 
						||
        p_format1 = PyUnicode_FromFormat(b'%p', 'abc')
 | 
						||
        self.assertIsInstance(p_format1, str)
 | 
						||
        self.assertRegex(p_format1, p_format_regex)
 | 
						||
 | 
						||
        p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz')
 | 
						||
        self.assertIsInstance(p_format2, str)
 | 
						||
        self.assertRegex(p_format2,
 | 
						||
                         r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}')
 | 
						||
 | 
						||
        # Extra args are ignored:
 | 
						||
        p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz')
 | 
						||
        self.assertIsInstance(p_format3, str)
 | 
						||
        self.assertRegex(p_format3, p_format_regex)
 | 
						||
 | 
						||
        # Test string decode from parameter of %s using utf-8.
 | 
						||
        # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
 | 
						||
        # '\u4eba\u6c11'
 | 
						||
        check_format('repr=\u4eba\u6c11',
 | 
						||
                     b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
 | 
						||
 | 
						||
        #Test replace error handler.
 | 
						||
        check_format('repr=abc\ufffd',
 | 
						||
                     b'repr=%V', None, b'abc\xff')
 | 
						||
 | 
						||
        # Issue #33817: empty strings
 | 
						||
        check_format('',
 | 
						||
                     b'')
 | 
						||
        check_format('',
 | 
						||
                     b'%s', b'')
 | 
						||
 | 
						||
        # check for crashes
 | 
						||
        for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1',
 | 
						||
                    b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc',
 | 
						||
                    b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'):
 | 
						||
            with self.subTest(fmt=fmt):
 | 
						||
                self.assertRaisesRegex(SystemError, 'invalid format string',
 | 
						||
                    PyUnicode_FromFormat, fmt, b'abc')
 | 
						||
        self.assertRaisesRegex(SystemError, 'invalid format string',
 | 
						||
            PyUnicode_FromFormat, b'%+i', c_int(10))
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_aswidechar(self):
 | 
						||
        """Test PyUnicode_AsWideChar()"""
 | 
						||
        from _testcapi import unicode_aswidechar
 | 
						||
        import_helper.import_module('ctypes')
 | 
						||
        from ctypes import c_wchar, sizeof
 | 
						||
 | 
						||
        wchar, size = unicode_aswidechar('abcdef', 2)
 | 
						||
        self.assertEqual(size, 2)
 | 
						||
        self.assertEqual(wchar, 'ab')
 | 
						||
 | 
						||
        wchar, size = unicode_aswidechar('abc', 3)
 | 
						||
        self.assertEqual(size, 3)
 | 
						||
        self.assertEqual(wchar, 'abc')
 | 
						||
 | 
						||
        wchar, size = unicode_aswidechar('abc', 4)
 | 
						||
        self.assertEqual(size, 3)
 | 
						||
        self.assertEqual(wchar, 'abc\0')
 | 
						||
 | 
						||
        wchar, size = unicode_aswidechar('abc', 10)
 | 
						||
        self.assertEqual(size, 3)
 | 
						||
        self.assertEqual(wchar, 'abc\0')
 | 
						||
 | 
						||
        wchar, size = unicode_aswidechar('abc\0def', 20)
 | 
						||
        self.assertEqual(size, 7)
 | 
						||
        self.assertEqual(wchar, 'abc\0def\0')
 | 
						||
 | 
						||
        nonbmp = chr(0x10ffff)
 | 
						||
        if sizeof(c_wchar) == 2:
 | 
						||
            buflen = 3
 | 
						||
            nchar = 2
 | 
						||
        else: # sizeof(c_wchar) == 4
 | 
						||
            buflen = 2
 | 
						||
            nchar = 1
 | 
						||
        wchar, size = unicode_aswidechar(nonbmp, buflen)
 | 
						||
        self.assertEqual(size, nchar)
 | 
						||
        self.assertEqual(wchar, nonbmp + '\0')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_aswidecharstring(self):
 | 
						||
        """Test PyUnicode_AsWideCharString()"""
 | 
						||
        from _testcapi import unicode_aswidecharstring
 | 
						||
        import_helper.import_module('ctypes')
 | 
						||
        from ctypes import c_wchar, sizeof
 | 
						||
 | 
						||
        wchar, size = unicode_aswidecharstring('abc')
 | 
						||
        self.assertEqual(size, 3)
 | 
						||
        self.assertEqual(wchar, 'abc\0')
 | 
						||
 | 
						||
        wchar, size = unicode_aswidecharstring('abc\0def')
 | 
						||
        self.assertEqual(size, 7)
 | 
						||
        self.assertEqual(wchar, 'abc\0def\0')
 | 
						||
 | 
						||
        nonbmp = chr(0x10ffff)
 | 
						||
        if sizeof(c_wchar) == 2:
 | 
						||
            nchar = 2
 | 
						||
        else: # sizeof(c_wchar) == 4
 | 
						||
            nchar = 1
 | 
						||
        wchar, size = unicode_aswidecharstring(nonbmp)
 | 
						||
        self.assertEqual(size, nchar)
 | 
						||
        self.assertEqual(wchar, nonbmp + '\0')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_asucs4(self):
 | 
						||
        """Test PyUnicode_AsUCS4()"""
 | 
						||
        from _testcapi import unicode_asucs4
 | 
						||
        for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
 | 
						||
                  'a\ud800b\udfffc', '\ud834\udd1e']:
 | 
						||
            l = len(s)
 | 
						||
            self.assertEqual(unicode_asucs4(s, l, True), s+'\0')
 | 
						||
            self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff')
 | 
						||
            self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff')
 | 
						||
            self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff')
 | 
						||
            self.assertRaises(SystemError, unicode_asucs4, s, l-1, True)
 | 
						||
            self.assertRaises(SystemError, unicode_asucs4, s, l-2, False)
 | 
						||
            s = '\0'.join([s, s])
 | 
						||
            self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
 | 
						||
            self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_asutf8(self):
 | 
						||
        """Test PyUnicode_AsUTF8()"""
 | 
						||
        from _testcapi import unicode_asutf8
 | 
						||
 | 
						||
        bmp = '\u0100'
 | 
						||
        bmp2 = '\uffff'
 | 
						||
        nonbmp = chr(0x10ffff)
 | 
						||
 | 
						||
        self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
 | 
						||
        self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
 | 
						||
        self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
 | 
						||
        self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_asutf8andsize(self):
 | 
						||
        """Test PyUnicode_AsUTF8AndSize()"""
 | 
						||
        from _testcapi import unicode_asutf8andsize
 | 
						||
 | 
						||
        bmp = '\u0100'
 | 
						||
        bmp2 = '\uffff'
 | 
						||
        nonbmp = chr(0x10ffff)
 | 
						||
 | 
						||
        self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
 | 
						||
        self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
 | 
						||
        self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
 | 
						||
        self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_concat(self):
 | 
						||
        """Test PyUnicode_Concat()"""
 | 
						||
        from _testcapi import unicode_concat as concat
 | 
						||
 | 
						||
        self.assertEqual(concat('abc', 'def'), 'abcdef')
 | 
						||
        self.assertEqual(concat('abc', 'Π³Π΄Π΅'), 'abcΠ³Π΄Π΅')
 | 
						||
        self.assertEqual(concat('Π°Π±Π²', 'def'), 'Π°Π±Π²def')
 | 
						||
        self.assertEqual(concat('Π°Π±Π²', 'Π³Π΄Π΅'), 'Π°Π±Π²Π³Π΄Π΅')
 | 
						||
        self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d')
 | 
						||
 | 
						||
        self.assertRaises(TypeError, concat, b'abc', 'def')
 | 
						||
        self.assertRaises(TypeError, concat, 'abc', b'def')
 | 
						||
        self.assertRaises(TypeError, concat, b'abc', b'def')
 | 
						||
        self.assertRaises(TypeError, concat, [], 'def')
 | 
						||
        self.assertRaises(TypeError, concat, 'abc', [])
 | 
						||
        self.assertRaises(TypeError, concat, [], [])
 | 
						||
        # CRASHES concat(NULL, 'def')
 | 
						||
        # CRASHES concat('abc', NULL)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_split(self):
 | 
						||
        """Test PyUnicode_Split()"""
 | 
						||
        from _testcapi import unicode_split as split
 | 
						||
 | 
						||
        self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
 | 
						||
        self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d'])
 | 
						||
        self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d'])
 | 
						||
        self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd'])
 | 
						||
        self.assertEqual(split('Π°|Π±|Π²|Π³', '|'), ['Π°', 'Π±', 'Π²', 'Π³'])
 | 
						||
        self.assertEqual(split('Π°Π±Π°Π±Π°Π³Π°Π»Π°ΠΌΠ°Π³Π°', 'Π°'),
 | 
						||
                         ['', 'Π±', 'Π±', 'Π³', 'Π»', 'ΠΌ', 'Π³', ''])
 | 
						||
        self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL),
 | 
						||
                         ['a', 'b', 'c', 'd', 'e'])
 | 
						||
        self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL),
 | 
						||
                         ['a', 'b', 'c', 'd', 'e'])
 | 
						||
 | 
						||
        self.assertRaises(ValueError, split, 'a|b|c|d', '')
 | 
						||
        self.assertRaises(TypeError, split, 'a|b|c|d', ord('|'))
 | 
						||
        self.assertRaises(TypeError, split, [], '|')
 | 
						||
        # CRASHES split(NULL, '|')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_rsplit(self):
 | 
						||
        """Test PyUnicode_RSplit()"""
 | 
						||
        from _testcapi import unicode_rsplit as rsplit
 | 
						||
 | 
						||
        self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
 | 
						||
        self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd'])
 | 
						||
        self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d'])
 | 
						||
        self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd'])
 | 
						||
        self.assertEqual(rsplit('Π°|Π±|Π²|Π³', '|'), ['Π°', 'Π±', 'Π²', 'Π³'])
 | 
						||
        self.assertEqual(rsplit('Π°Π±Π°Π±Π°Π³Π°Π»Π°ΠΌΠ°Π³Π°', 'Π°'),
 | 
						||
                         ['', 'Π±', 'Π±', 'Π³', 'Π»', 'ΠΌ', 'Π³', ''])
 | 
						||
        self.assertEqual(rsplit('aΠΆbΠΆcΠΆd', 'ΠΆ'), ['a', 'b', 'c', 'd'])
 | 
						||
        self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL),
 | 
						||
                         ['a', 'b', 'c', 'd', 'e'])
 | 
						||
        self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL),
 | 
						||
                         ['a', 'b', 'c', 'd', 'e'])
 | 
						||
 | 
						||
        self.assertRaises(ValueError, rsplit, 'a|b|c|d', '')
 | 
						||
        self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|'))
 | 
						||
        self.assertRaises(TypeError, rsplit, [], '|')
 | 
						||
        # CRASHES rsplit(NULL, '|')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_partition(self):
 | 
						||
        """Test PyUnicode_Partition()"""
 | 
						||
        from _testcapi import unicode_partition as partition
 | 
						||
 | 
						||
        self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c'))
 | 
						||
        self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c'))
 | 
						||
        self.assertEqual(partition('Π°|Π±|Π²', '|'), ('Π°', '|', 'Π±|Π²'))
 | 
						||
        self.assertEqual(partition('ΠΊΠ°Π±Π°Π½', 'Π°'), ('ΠΊ', 'Π°', 'Π±Π°Π½'))
 | 
						||
        self.assertEqual(partition('aΠΆbΠΆc', 'ΠΆ'), ('a', 'ΠΆ', 'bΠΆc'))
 | 
						||
 | 
						||
        self.assertRaises(ValueError, partition, 'a|b|c', '')
 | 
						||
        self.assertRaises(TypeError, partition, b'a|b|c', '|')
 | 
						||
        self.assertRaises(TypeError, partition, 'a|b|c', b'|')
 | 
						||
        self.assertRaises(TypeError, partition, 'a|b|c', ord('|'))
 | 
						||
        self.assertRaises(TypeError, partition, [], '|')
 | 
						||
        # CRASHES partition(NULL, '|')
 | 
						||
        # CRASHES partition('a|b|c', NULL)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_rpartition(self):
 | 
						||
        """Test PyUnicode_RPartition()"""
 | 
						||
        from _testcapi import unicode_rpartition as rpartition
 | 
						||
 | 
						||
        self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c'))
 | 
						||
        self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c'))
 | 
						||
        self.assertEqual(rpartition('Π°|Π±|Π²', '|'), ('Π°|Π±', '|', 'Π²'))
 | 
						||
        self.assertEqual(rpartition('ΠΊΠ°Π±Π°Π½', 'Π°'), ('ΠΊΠ°Π±', 'Π°', 'Π½'))
 | 
						||
        self.assertEqual(rpartition('aΠΆbΠΆc', 'ΠΆ'), ('aΠΆb', 'ΠΆ', 'c'))
 | 
						||
 | 
						||
        self.assertRaises(ValueError, rpartition, 'a|b|c', '')
 | 
						||
        self.assertRaises(TypeError, rpartition, b'a|b|c', '|')
 | 
						||
        self.assertRaises(TypeError, rpartition, 'a|b|c', b'|')
 | 
						||
        self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|'))
 | 
						||
        self.assertRaises(TypeError, rpartition, [], '|')
 | 
						||
        # CRASHES rpartition(NULL, '|')
 | 
						||
        # CRASHES rpartition('a|b|c', NULL)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_splitlines(self):
 | 
						||
        """Test PyUnicode_SplitLines()"""
 | 
						||
        from _testcapi import unicode_splitlines as splitlines
 | 
						||
 | 
						||
        self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd'])
 | 
						||
        self.assertEqual(splitlines('a\nb\rc\r\nd', True),
 | 
						||
                         ['a\n', 'b\r', 'c\r\n', 'd'])
 | 
						||
        self.assertEqual(splitlines('a\x85b\u2028c\u2029d'),
 | 
						||
                         ['a', 'b', 'c', 'd'])
 | 
						||
        self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True),
 | 
						||
                         ['a\x85', 'b\u2028', 'c\u2029', 'd'])
 | 
						||
        self.assertEqual(splitlines('Π°\nΠ±\rΠ²\r\nΠ³'), ['Π°', 'Π±', 'Π²', 'Π³'])
 | 
						||
 | 
						||
        self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd')
 | 
						||
        # CRASHES splitlines(NULL)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_translate(self):
 | 
						||
        """Test PyUnicode_Translate()"""
 | 
						||
        from _testcapi import unicode_translate as translate
 | 
						||
 | 
						||
        self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
 | 
						||
        self.assertEqual(translate('Π°Π±Π²Π³', {ord('Π°'): 'Π', ord('Π±'): ord('Π'), ord('Π²'): '<>'}), 'ΠΠ<>Π³')
 | 
						||
        self.assertEqual(translate('abc', {}), 'abc')
 | 
						||
        self.assertEqual(translate('abc', []), 'abc')
 | 
						||
        self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
 | 
						||
        self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
 | 
						||
        self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
 | 
						||
        self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac')
 | 
						||
        self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc')
 | 
						||
        self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c')
 | 
						||
        # XXX Other error handlers do not support UnicodeTranslateError
 | 
						||
        self.assertRaises(TypeError, translate, b'abc', [])
 | 
						||
        self.assertRaises(TypeError, translate, 123, [])
 | 
						||
        self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
 | 
						||
        self.assertRaises(TypeError, translate, 'abc', 123)
 | 
						||
        self.assertRaises(TypeError, translate, 'abc', NULL)
 | 
						||
        self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
 | 
						||
        # CRASHES translate(NULL, [])
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_join(self):
 | 
						||
        """Test PyUnicode_Join()"""
 | 
						||
        from _testcapi import unicode_join as join
 | 
						||
        self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c')
 | 
						||
        self.assertEqual(join('|', ['a', '', 'c']), 'a||c')
 | 
						||
        self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
 | 
						||
        self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
 | 
						||
        self.assertEqual(join('|', ['Π°', 'Π±', 'Π²']), 'Π°|Π±|Π²')
 | 
						||
        self.assertEqual(join('ΠΆ', ['Π°', 'Π±', 'Π²']), 'Π°ΠΆΠ±ΠΆΠ²')
 | 
						||
        self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c'])
 | 
						||
        self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c'])
 | 
						||
        self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c'])
 | 
						||
        self.assertRaises(TypeError, join, '|', b'123')
 | 
						||
        self.assertRaises(TypeError, join, '|', 123)
 | 
						||
        self.assertRaises(SystemError, join, '|', NULL)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_count(self):
 | 
						||
        """Test PyUnicode_Count()"""
 | 
						||
        from _testcapi import unicode_count
 | 
						||
 | 
						||
        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
 | 
						||
            for i, ch in enumerate(str):
 | 
						||
                self.assertEqual(unicode_count(str, ch, 0, len(str)), 1)
 | 
						||
 | 
						||
        str = "!>_<!"
 | 
						||
        self.assertEqual(unicode_count(str, 'z', 0, len(str)), 0)
 | 
						||
        self.assertEqual(unicode_count(str, '', 0, len(str)), len(str)+1)
 | 
						||
        # start < end
 | 
						||
        self.assertEqual(unicode_count(str, '!', 1, len(str)+1), 1)
 | 
						||
        # start >= end
 | 
						||
        self.assertEqual(unicode_count(str, '!', 0, 0), 0)
 | 
						||
        self.assertEqual(unicode_count(str, '!', len(str), 0), 0)
 | 
						||
        # negative
 | 
						||
        self.assertEqual(unicode_count(str, '!', -len(str), -1), 1)
 | 
						||
        # bad arguments
 | 
						||
        self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str))
 | 
						||
        self.assertRaises(TypeError, unicode_count, b"!>_<!", '!', 0, len(str))
 | 
						||
        self.assertRaises(TypeError, unicode_count, str, ord('!'), 0, len(str))
 | 
						||
        self.assertRaises(TypeError, unicode_count, [], '!', 0, len(str), 1)
 | 
						||
        # CRASHES unicode_count(NULL, '!', 0, len(str))
 | 
						||
        # CRASHES unicode_count(str, NULL, 0, len(str))
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_tailmatch(self):
 | 
						||
        """Test PyUnicode_Tailmatch()"""
 | 
						||
        from _testcapi import unicode_tailmatch as tailmatch
 | 
						||
 | 
						||
        str = 'ababahalamaha'
 | 
						||
        self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)
 | 
						||
 | 
						||
        self.assertEqual(tailmatch(str, 'aba', 0, sys.maxsize, -1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, 'aba', -len(str), sys.maxsize, -1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, 'aba', -sys.maxsize-1, len(str), -1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, 'aha', 0, sys.maxsize, 1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, 'aha', -sys.maxsize-1, len(str), 1), 1)
 | 
						||
 | 
						||
        self.assertEqual(tailmatch(str, 'z', 0, len(str), 1), 0)
 | 
						||
        self.assertEqual(tailmatch(str, 'z', 0, len(str), -1), 0)
 | 
						||
        self.assertEqual(tailmatch(str, '', 0, len(str), 1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, '', 0, len(str), -1), 1)
 | 
						||
 | 
						||
        self.assertEqual(tailmatch(str, 'ba', 0, len(str)-1, -1), 0)
 | 
						||
        self.assertEqual(tailmatch(str, 'ba', 1, len(str)-1, -1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, 'aba', 1, len(str)-1, -1), 0)
 | 
						||
        self.assertEqual(tailmatch(str, 'ba', -len(str)+1, -1, -1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, 'ah', 0, len(str), 1), 0)
 | 
						||
        self.assertEqual(tailmatch(str, 'ah', 0, len(str)-1, 1), 1)
 | 
						||
        self.assertEqual(tailmatch(str, 'ah', -len(str), -1, 1), 1)
 | 
						||
 | 
						||
        # bad arguments
 | 
						||
        self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), -1)
 | 
						||
        self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), 1)
 | 
						||
        # CRASHES tailmatch(NULL, 'aba', 0, len(str), -1)
 | 
						||
        # CRASHES tailmatch(str, NULL, 0, len(str), -1)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_find(self):
 | 
						||
        """Test PyUnicode_Find()"""
 | 
						||
        from _testcapi import unicode_find as find
 | 
						||
 | 
						||
        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
 | 
						||
            for i, ch in enumerate(str):
 | 
						||
                self.assertEqual(find(str, ch, 0, len(str), 1), i)
 | 
						||
                self.assertEqual(find(str, ch, 0, len(str), -1), i)
 | 
						||
 | 
						||
        str = "!>_<!"
 | 
						||
        self.assertEqual(find(str, 'z', 0, len(str), 1), -1)
 | 
						||
        self.assertEqual(find(str, 'z', 0, len(str), -1), -1)
 | 
						||
        self.assertEqual(find(str, '', 0, len(str), 1), 0)
 | 
						||
        self.assertEqual(find(str, '', 0, len(str), -1), len(str))
 | 
						||
        # start < end
 | 
						||
        self.assertEqual(find(str, '!', 1, len(str)+1, 1), 4)
 | 
						||
        self.assertEqual(find(str, '!', 1, len(str)+1, -1), 4)
 | 
						||
        # start >= end
 | 
						||
        self.assertEqual(find(str, '!', 0, 0, 1), -1)
 | 
						||
        self.assertEqual(find(str, '!', len(str), 0, 1), -1)
 | 
						||
        # negative
 | 
						||
        self.assertEqual(find(str, '!', -len(str), -1, 1), 0)
 | 
						||
        self.assertEqual(find(str, '!', -len(str), -1, -1), 0)
 | 
						||
        # bad arguments
 | 
						||
        self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1)
 | 
						||
        self.assertRaises(TypeError, find, b"!>_<!", '!', 0, len(str), 1)
 | 
						||
        self.assertRaises(TypeError, find, str, ord('!'), 0, len(str), 1)
 | 
						||
        self.assertRaises(TypeError, find, [], '!', 0, len(str), 1)
 | 
						||
        # CRASHES find(NULL, '!', 0, len(str), 1)
 | 
						||
        # CRASHES find(str, NULL, 0, len(str), 1)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_findchar(self):
 | 
						||
        """Test PyUnicode_FindChar()"""
 | 
						||
        from _testcapi import unicode_findchar
 | 
						||
 | 
						||
        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
 | 
						||
            for i, ch in enumerate(str):
 | 
						||
                self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
 | 
						||
                self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)
 | 
						||
 | 
						||
        str = "!>_<!"
 | 
						||
        self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
 | 
						||
        self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
 | 
						||
        # start < end
 | 
						||
        self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
 | 
						||
        self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
 | 
						||
        # start >= end
 | 
						||
        self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
 | 
						||
        self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
 | 
						||
        # negative
 | 
						||
        self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
 | 
						||
        self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
 | 
						||
        # bad arguments
 | 
						||
        # CRASHES unicode_findchar(b"!>_<!", ord('!'), 0, len(str), 1)
 | 
						||
        # CRASHES unicode_findchar([], ord('!'), 0, len(str), 1)
 | 
						||
        # CRASHES unicode_findchar(NULL, ord('!'), 0, len(str), 1), 1)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_replace(self):
 | 
						||
        """Test PyUnicode_Replace()"""
 | 
						||
        from _testcapi import unicode_replace as replace
 | 
						||
 | 
						||
        str = 'abracadabra'
 | 
						||
        self.assertEqual(replace(str, 'a', '='), '=br=c=d=br=')
 | 
						||
        self.assertEqual(replace(str, 'a', '<>'), '<>br<>c<>d<>br<>')
 | 
						||
        self.assertEqual(replace(str, 'abra', '='), '=cad=')
 | 
						||
        self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra')
 | 
						||
        self.assertEqual(replace(str, 'a', '=', 0), str)
 | 
						||
        self.assertEqual(replace(str, 'a', '=', sys.maxsize), '=br=c=d=br=')
 | 
						||
        self.assertEqual(replace(str, 'z', '='), str)
 | 
						||
        self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=')
 | 
						||
        self.assertEqual(replace(str, 'a', 'ΠΆ'), 'ΠΆbrΠΆcΠΆdΠΆbrΠΆ')
 | 
						||
        self.assertEqual(replace('Π°Π±Π°Π±Π°Π³Π°Π»Π°ΠΌΠ°Π³Π°', 'Π°', '='), '=Π±=Π±=Π³=Π»=ΠΌ=Π³=')
 | 
						||
        self.assertEqual(replace('ΠΠ°Π΄Π΅Π½-ΠΠ°Π΄Π΅Π½', 'ΠΠ°Π΄Π΅Π½', 'Baden'), 'Baden-Baden')
 | 
						||
        # bad arguments
 | 
						||
        self.assertRaises(TypeError, replace, 'a', 'a', b'=')
 | 
						||
        self.assertRaises(TypeError, replace, 'a', b'a', '=')
 | 
						||
        self.assertRaises(TypeError, replace, b'a', 'a', '=')
 | 
						||
        self.assertRaises(TypeError, replace, 'a', 'a', ord('='))
 | 
						||
        self.assertRaises(TypeError, replace, 'a', ord('a'), '=')
 | 
						||
        self.assertRaises(TypeError, replace, [], 'a', '=')
 | 
						||
        # CRASHES replace('a', 'a', NULL)
 | 
						||
        # CRASHES replace('a', NULL, '=')
 | 
						||
        # CRASHES replace(NULL, 'a', '=')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_compare(self):
 | 
						||
        """Test PyUnicode_Compare()"""
 | 
						||
        from _testcapi import unicode_compare as compare
 | 
						||
 | 
						||
        self.assertEqual(compare('abc', 'abc'), 0)
 | 
						||
        self.assertEqual(compare('abc', 'def'), -1)
 | 
						||
        self.assertEqual(compare('def', 'abc'), 1)
 | 
						||
        self.assertEqual(compare('abc', 'abc\0def'), -1)
 | 
						||
        self.assertEqual(compare('abc\0def', 'abc\0def'), 0)
 | 
						||
        self.assertEqual(compare('Π°Π±Π²', 'abc'), 1)
 | 
						||
 | 
						||
        self.assertRaises(TypeError, compare, b'abc', 'abc')
 | 
						||
        self.assertRaises(TypeError, compare, 'abc', b'abc')
 | 
						||
        self.assertRaises(TypeError, compare, b'abc', b'abc')
 | 
						||
        self.assertRaises(TypeError, compare, [], 'abc')
 | 
						||
        self.assertRaises(TypeError, compare, 'abc', [])
 | 
						||
        self.assertRaises(TypeError, compare, [], [])
 | 
						||
        # CRASHES compare(NULL, 'abc')
 | 
						||
        # CRASHES compare('abc', NULL)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_comparewithasciistring(self):
 | 
						||
        """Test PyUnicode_CompareWithASCIIString()"""
 | 
						||
        from _testcapi import unicode_comparewithasciistring as comparewithasciistring
 | 
						||
 | 
						||
        self.assertEqual(comparewithasciistring('abc', b'abc'), 0)
 | 
						||
        self.assertEqual(comparewithasciistring('abc', b'def'), -1)
 | 
						||
        self.assertEqual(comparewithasciistring('def', b'abc'), 1)
 | 
						||
        self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0)
 | 
						||
        self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1)
 | 
						||
        self.assertEqual(comparewithasciistring('Π°Π±Π²', b'abc'), 1)
 | 
						||
 | 
						||
        # CRASHES comparewithasciistring(b'abc', b'abc')
 | 
						||
        # CRASHES comparewithasciistring([], b'abc')
 | 
						||
        # CRASHES comparewithasciistring(NULL, b'abc')
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_richcompare(self):
 | 
						||
        """Test PyUnicode_RichCompare()"""
 | 
						||
        from _testcapi import unicode_richcompare as richcompare
 | 
						||
 | 
						||
        LT, LE, EQ, NE, GT, GE = range(6)
 | 
						||
        strings = ('abc', 'Π°Π±Π²', '\U0001f600', 'abc\0')
 | 
						||
        for s1 in strings:
 | 
						||
            for s2 in strings:
 | 
						||
                self.assertIs(richcompare(s1, s2, LT), s1 < s2)
 | 
						||
                self.assertIs(richcompare(s1, s2, LE), s1 <= s2)
 | 
						||
                self.assertIs(richcompare(s1, s2, EQ), s1 == s2)
 | 
						||
                self.assertIs(richcompare(s1, s2, NE), s1 != s2)
 | 
						||
                self.assertIs(richcompare(s1, s2, GT), s1 > s2)
 | 
						||
                self.assertIs(richcompare(s1, s2, GE), s1 >= s2)
 | 
						||
 | 
						||
        for op in LT, LE, EQ, NE, GT, GE:
 | 
						||
            self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented)
 | 
						||
            self.assertIs(richcompare('abc', b'abc', op), NotImplemented)
 | 
						||
            self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented)
 | 
						||
            self.assertIs(richcompare([], 'abc', op), NotImplemented)
 | 
						||
            self.assertIs(richcompare('abc', [], op), NotImplemented)
 | 
						||
            self.assertIs(richcompare([], [], op), NotImplemented)
 | 
						||
 | 
						||
            # CRASHES richcompare(NULL, 'abc', op)
 | 
						||
            # CRASHES richcompare('abc', NULL, op)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_format(self):
 | 
						||
        """Test PyUnicode_Format()"""
 | 
						||
        from _testcapi import unicode_format as format
 | 
						||
 | 
						||
        self.assertEqual(format('x=%d!', 42), 'x=42!')
 | 
						||
        self.assertEqual(format('x=%d!', (42,)), 'x=42!')
 | 
						||
        self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!')
 | 
						||
 | 
						||
        self.assertRaises(SystemError, format, 'x=%d!', NULL)
 | 
						||
        self.assertRaises(SystemError, format, NULL, 42)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_contains(self):
 | 
						||
        """Test PyUnicode_Contains()"""
 | 
						||
        from _testcapi import unicode_contains as contains
 | 
						||
 | 
						||
        self.assertEqual(contains('abcd', ''), 1)
 | 
						||
        self.assertEqual(contains('abcd', 'b'), 1)
 | 
						||
        self.assertEqual(contains('abcd', 'x'), 0)
 | 
						||
        self.assertEqual(contains('abcd', 'ΠΆ'), 0)
 | 
						||
        self.assertEqual(contains('abcd', '\0'), 0)
 | 
						||
        self.assertEqual(contains('abc\0def', '\0'), 1)
 | 
						||
        self.assertEqual(contains('abcd', 'bc'), 1)
 | 
						||
 | 
						||
        self.assertRaises(TypeError, contains, b'abcd', 'b')
 | 
						||
        self.assertRaises(TypeError, contains, 'abcd', b'b')
 | 
						||
        self.assertRaises(TypeError, contains, b'abcd', b'b')
 | 
						||
        self.assertRaises(TypeError, contains, [], 'b')
 | 
						||
        self.assertRaises(TypeError, contains, 'abcd', ord('b'))
 | 
						||
        # CRASHES contains(NULL, 'b')
 | 
						||
        # CRASHES contains('abcd', NULL)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_isidentifier(self):
 | 
						||
        """Test PyUnicode_IsIdentifier()"""
 | 
						||
        from _testcapi import unicode_isidentifier as isidentifier
 | 
						||
 | 
						||
        self.assertEqual(isidentifier("a"), 1)
 | 
						||
        self.assertEqual(isidentifier("b0"), 1)
 | 
						||
        self.assertEqual(isidentifier("Β΅"), 1)
 | 
						||
        self.assertEqual(isidentifier("ππ«π¦π π¬π‘π’"), 1)
 | 
						||
 | 
						||
        self.assertEqual(isidentifier(""), 0)
 | 
						||
        self.assertEqual(isidentifier(" "), 0)
 | 
						||
        self.assertEqual(isidentifier("["), 0)
 | 
						||
        self.assertEqual(isidentifier("Β©"), 0)
 | 
						||
        self.assertEqual(isidentifier("0"), 0)
 | 
						||
        self.assertEqual(isidentifier("32M"), 0)
 | 
						||
 | 
						||
        # CRASHES isidentifier(b"a")
 | 
						||
        # CRASHES isidentifier([])
 | 
						||
        # CRASHES isidentifier(NULL)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_copycharacters(self):
 | 
						||
        """Test PyUnicode_CopyCharacters()"""
 | 
						||
        from _testcapi import unicode_copycharacters
 | 
						||
 | 
						||
        strings = [
 | 
						||
            'abcde', '\xa1\xa2\xa3\xa4\xa5',
 | 
						||
            '\u4f60\u597d\u4e16\u754c\uff01',
 | 
						||
            '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
 | 
						||
        ]
 | 
						||
 | 
						||
        for idx, from_ in enumerate(strings):
 | 
						||
            # wide -> narrow: exceed maxchar limitation
 | 
						||
            for to in strings[:idx]:
 | 
						||
                self.assertRaises(
 | 
						||
                    SystemError,
 | 
						||
                    unicode_copycharacters, to, 0, from_, 0, 5
 | 
						||
                )
 | 
						||
            # same kind
 | 
						||
            for from_start in range(5):
 | 
						||
                self.assertEqual(
 | 
						||
                    unicode_copycharacters(from_, 0, from_, from_start, 5),
 | 
						||
                    (from_[from_start:from_start+5].ljust(5, '\0'),
 | 
						||
                     5-from_start)
 | 
						||
                )
 | 
						||
            for to_start in range(5):
 | 
						||
                self.assertEqual(
 | 
						||
                    unicode_copycharacters(from_, to_start, from_, to_start, 5),
 | 
						||
                    (from_[to_start:to_start+5].rjust(5, '\0'),
 | 
						||
                     5-to_start)
 | 
						||
                )
 | 
						||
            # narrow -> wide
 | 
						||
            # Tests omitted since this creates invalid strings.
 | 
						||
 | 
						||
        s = strings[0]
 | 
						||
        self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5)
 | 
						||
        self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5)
 | 
						||
        self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5)
 | 
						||
        self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5)
 | 
						||
        self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5)
 | 
						||
        self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
 | 
						||
        self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)
 | 
						||
 | 
						||
    @support.cpython_only
 | 
						||
    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | 
						||
    def test_pep393_utf8_caching_bug(self):
 | 
						||
        # Issue #25709: Problem with string concatenation and utf-8 cache
 | 
						||
        from _testcapi import getargs_s_hash
 | 
						||
        for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
 | 
						||
            s = ''
 | 
						||
            for i in range(5):
 | 
						||
                # Due to CPython specific optimization the 's' string can be
 | 
						||
                # resized in-place.
 | 
						||
                s += chr(k)
 | 
						||
                # Parsing with the "s#" format code calls indirectly
 | 
						||
                # PyUnicode_AsUTF8AndSize() which creates the UTF-8
 | 
						||
                # encoded string cached in the Unicode object.
 | 
						||
                self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 | 
						||
                # Check that the second call returns the same result
 | 
						||
                self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 | 
						||
 | 
						||
 | 
						||
if __name__ == "__main__":
 | 
						||
    unittest.main()
 |