mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 18:54:53 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1941 lines
		
	
	
	
		
			87 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1941 lines
		
	
	
	
		
			87 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import unittest
 | ||
| import sys
 | ||
| from test import support
 | ||
| from test.support import import_helper
 | ||
| 
 | ||
| try:
 | ||
|     import _testcapi
 | ||
|     from _testcapi import PY_SSIZE_T_MIN, PY_SSIZE_T_MAX
 | ||
| except ImportError:
 | ||
|     _testcapi = None
 | ||
| try:
 | ||
|     import _testlimitedcapi
 | ||
| except ImportError:
 | ||
|     _testlimitedcapi = None
 | ||
| try:
 | ||
|     import _testinternalcapi
 | ||
| except ImportError:
 | ||
|     _testinternalcapi = None
 | ||
| try:
 | ||
|     import ctypes
 | ||
| except ImportError:
 | ||
|     ctypes = None
 | ||
| 
 | ||
| 
 | ||
| NULL = None
 | ||
| 
 | ||
| class Str(str):
 | ||
|     pass
 | ||
| 
 | ||
| 
 | ||
| class CAPITest(unittest.TestCase):
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | ||
|     def test_new(self):
 | ||
|         """Test PyUnicode_New()"""
 | ||
|         from _testcapi import unicode_new as new
 | ||
| 
 | ||
|         for maxchar in 0, 0x61, 0xa1, 0x4f60, 0x1f600, 0x10ffff:
 | ||
|             self.assertEqual(new(0, maxchar), '')
 | ||
|             self.assertEqual(new(5, maxchar), chr(maxchar)*5)
 | ||
|             self.assertRaises(MemoryError, new, PY_SSIZE_T_MAX, maxchar)
 | ||
|         self.assertEqual(new(0, 0x110000), '')
 | ||
|         self.assertRaises(MemoryError, new, PY_SSIZE_T_MAX//2, 0x4f60)
 | ||
|         self.assertRaises(MemoryError, new, PY_SSIZE_T_MAX//2+1, 0x4f60)
 | ||
|         self.assertRaises(MemoryError, new, PY_SSIZE_T_MAX//2, 0x1f600)
 | ||
|         self.assertRaises(MemoryError, new, PY_SSIZE_T_MAX//2+1, 0x1f600)
 | ||
|         self.assertRaises(MemoryError, new, PY_SSIZE_T_MAX//4, 0x1f600)
 | ||
|         self.assertRaises(MemoryError, new, PY_SSIZE_T_MAX//4+1, 0x1f600)
 | ||
|         self.assertRaises(SystemError, new, 5, 0x110000)
 | ||
|         self.assertRaises(SystemError, new, -1, 0)
 | ||
|         self.assertRaises(SystemError, new, PY_SSIZE_T_MIN, 0)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | ||
|     def test_fill(self):
 | ||
|         """Test PyUnicode_Fill()"""
 | ||
|         from _testcapi import unicode_fill as fill
 | ||
| 
 | ||
|         strings = [
 | ||
|             # all strings have exactly 5 characters
 | ||
|             'abcde', '\xa1\xa2\xa3\xa4\xa5',
 | ||
|             '\u4f60\u597d\u4e16\u754c\uff01',
 | ||
|             '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
 | ||
|         ]
 | ||
|         chars = [0x78, 0xa9, 0x20ac, 0x1f638]
 | ||
| 
 | ||
|         for idx, fill_char in enumerate(chars):
 | ||
|             # wide -> narrow: exceed maxchar limitation
 | ||
|             for to in strings[:idx]:
 | ||
|                 self.assertRaises(ValueError, fill, to, 0, 0, fill_char)
 | ||
|             for to in strings[idx:]:
 | ||
|                 for start in [*range(7), PY_SSIZE_T_MAX]:
 | ||
|                     for length in [*range(-1, 7 - start), PY_SSIZE_T_MIN, PY_SSIZE_T_MAX]:
 | ||
|                         filled = max(min(length, 5 - start), 0)
 | ||
|                         if filled == 5 and to != strings[idx]:
 | ||
|                             # narrow -> wide
 | ||
|                             # Tests omitted since this creates invalid strings.
 | ||
|                             continue
 | ||
|                         expected = to[:start] + chr(fill_char) * filled + to[start + filled:]
 | ||
|                         self.assertEqual(fill(to, start, length, fill_char),
 | ||
|                                         (expected, filled))
 | ||
| 
 | ||
|         s = strings[0]
 | ||
|         self.assertRaises(IndexError, fill, s, -1, 0, 0x78)
 | ||
|         self.assertRaises(IndexError, fill, s, PY_SSIZE_T_MIN, 0, 0x78)
 | ||
|         self.assertRaises(ValueError, fill, s, 0, 0, 0x110000)
 | ||
|         self.assertRaises(SystemError, fill, b'abc', 0, 0, 0x78)
 | ||
|         self.assertRaises(SystemError, fill, [], 0, 0, 0x78)
 | ||
|         # CRASHES fill(s, 0, NULL, 0, 0)
 | ||
|         # CRASHES fill(NULL, 0, 0, 0x78)
 | ||
|         # TODO: Test PyUnicode_Fill() with non-modifiable unicode.
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_writechar(self):
 | ||
|         """Test PyUnicode_WriteChar()"""
 | ||
|         from _testlimitedcapi import unicode_writechar as writechar
 | ||
| 
 | ||
|         strings = [
 | ||
|             # one string for every kind
 | ||
|             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
 | ||
|             '\U0001f600\U0001f601\U0001f602'
 | ||
|         ]
 | ||
|         # one character for every kind + out of range code
 | ||
|         chars = [0x78, 0xa9, 0x20ac, 0x1f638, 0x110000]
 | ||
|         for i, s in enumerate(strings):
 | ||
|             for j, c in enumerate(chars):
 | ||
|                 if j <= i:
 | ||
|                     self.assertEqual(writechar(s, 1, c),
 | ||
|                                      (s[:1] + chr(c) + s[2:], 0))
 | ||
|                 else:
 | ||
|                     self.assertRaises(ValueError, writechar, s, 1, c)
 | ||
| 
 | ||
|         self.assertRaises(IndexError, writechar, 'abc', 3, 0x78)
 | ||
|         self.assertRaises(IndexError, writechar, 'abc', -1, 0x78)
 | ||
|         self.assertRaises(IndexError, writechar, 'abc', PY_SSIZE_T_MAX, 0x78)
 | ||
|         self.assertRaises(IndexError, writechar, 'abc', PY_SSIZE_T_MIN, 0x78)
 | ||
|         self.assertRaises(TypeError, writechar, b'abc', 0, 0x78)
 | ||
|         self.assertRaises(TypeError, writechar, [], 0, 0x78)
 | ||
|         # CRASHES writechar(NULL, 0, 0x78)
 | ||
|         # TODO: Test PyUnicode_WriteChar() with non-modifiable and legacy
 | ||
|         # unicode.
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_resize(self):
 | ||
|         """Test PyUnicode_Resize()"""
 | ||
|         from _testlimitedcapi import unicode_resize as resize
 | ||
| 
 | ||
|         strings = [
 | ||
|             # all strings have exactly 3 characters
 | ||
|             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
 | ||
|             '\U0001f600\U0001f601\U0001f602'
 | ||
|         ]
 | ||
|         for s in strings:
 | ||
|             self.assertEqual(resize(s, 3), (s, 0))
 | ||
|             self.assertEqual(resize(s, 2), (s[:2], 0))
 | ||
|             self.assertEqual(resize(s, 4), (s + '\0', 0))
 | ||
|             self.assertEqual(resize(s, 10), (s + '\0'*7, 0))
 | ||
|             self.assertEqual(resize(s, 0), ('', 0))
 | ||
|             self.assertRaises(MemoryError, resize, s, PY_SSIZE_T_MAX)
 | ||
|             self.assertRaises(SystemError, resize, s, -1)
 | ||
|             self.assertRaises(SystemError, resize, s, PY_SSIZE_T_MIN)
 | ||
|         self.assertRaises(SystemError, resize, b'abc', 0)
 | ||
|         self.assertRaises(SystemError, resize, [], 0)
 | ||
|         self.assertRaises(SystemError, resize, NULL, 0)
 | ||
|         # TODO: Test PyUnicode_Resize() with non-modifiable and legacy unicode
 | ||
|         # and with NULL as the address.
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_append(self):
 | ||
|         """Test PyUnicode_Append()"""
 | ||
|         from _testlimitedcapi import unicode_append as append
 | ||
| 
 | ||
|         strings = [
 | ||
|             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
 | ||
|             '\U0001f600\U0001f601\U0001f602'
 | ||
|         ]
 | ||
|         for left in strings:
 | ||
|             left = left[::-1]
 | ||
|             for right in strings:
 | ||
|                 expected = left + right
 | ||
|                 self.assertEqual(append(left, right), expected)
 | ||
| 
 | ||
|         self.assertRaises(SystemError, append, 'abc', b'abc')
 | ||
|         self.assertRaises(SystemError, append, b'abc', 'abc')
 | ||
|         self.assertRaises(SystemError, append, b'abc', b'abc')
 | ||
|         self.assertRaises(SystemError, append, 'abc', [])
 | ||
|         self.assertRaises(SystemError, append, [], 'abc')
 | ||
|         self.assertRaises(SystemError, append, [], [])
 | ||
|         self.assertRaises(SystemError, append, NULL, 'abc')
 | ||
|         self.assertRaises(SystemError, append, 'abc', NULL)
 | ||
|         # TODO: Test PyUnicode_Append() with modifiable unicode
 | ||
|         # and with NULL as the address.
 | ||
|         # TODO: Check reference counts.
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_appendanddel(self):
 | ||
|         """Test PyUnicode_AppendAndDel()"""
 | ||
|         from _testlimitedcapi import unicode_appendanddel as appendanddel
 | ||
| 
 | ||
|         strings = [
 | ||
|             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
 | ||
|             '\U0001f600\U0001f601\U0001f602'
 | ||
|         ]
 | ||
|         for left in strings:
 | ||
|             left = left[::-1]
 | ||
|             for right in strings:
 | ||
|                 self.assertEqual(appendanddel(left, right), left + right)
 | ||
| 
 | ||
|         self.assertRaises(SystemError, appendanddel, 'abc', b'abc')
 | ||
|         self.assertRaises(SystemError, appendanddel, b'abc', 'abc')
 | ||
|         self.assertRaises(SystemError, appendanddel, b'abc', b'abc')
 | ||
|         self.assertRaises(SystemError, appendanddel, 'abc', [])
 | ||
|         self.assertRaises(SystemError, appendanddel, [], 'abc')
 | ||
|         self.assertRaises(SystemError, appendanddel, [], [])
 | ||
|         self.assertRaises(SystemError, appendanddel, NULL, 'abc')
 | ||
|         self.assertRaises(SystemError, appendanddel, 'abc', NULL)
 | ||
|         # TODO: Test PyUnicode_AppendAndDel() with modifiable unicode
 | ||
|         # and with NULL as the address.
 | ||
|         # TODO: Check reference counts.
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_fromstringandsize(self):
 | ||
|         """Test PyUnicode_FromStringAndSize()"""
 | ||
|         from _testlimitedcapi import unicode_fromstringandsize as fromstringandsize
 | ||
| 
 | ||
|         self.assertEqual(fromstringandsize(b'abc'), 'abc')
 | ||
|         self.assertEqual(fromstringandsize(b'abc', 2), 'ab')
 | ||
|         self.assertEqual(fromstringandsize(b'abc\0def'), 'abc\0def')
 | ||
|         self.assertEqual(fromstringandsize(b'\xc2\xa1\xc2\xa2'), '\xa1\xa2')
 | ||
|         self.assertEqual(fromstringandsize(b'\xe4\xbd\xa0'), '\u4f60')
 | ||
|         self.assertEqual(fromstringandsize(b'\xf0\x9f\x98\x80'), '\U0001f600')
 | ||
|         self.assertRaises(UnicodeDecodeError, fromstringandsize, b'\xc2\xa1', 1)
 | ||
|         self.assertRaises(UnicodeDecodeError, fromstringandsize, b'\xa1', 1)
 | ||
|         self.assertEqual(fromstringandsize(b'', 0), '')
 | ||
|         self.assertEqual(fromstringandsize(NULL, 0), '')
 | ||
| 
 | ||
|         self.assertRaises(MemoryError, fromstringandsize, b'abc', PY_SSIZE_T_MAX)
 | ||
|         self.assertRaises(SystemError, fromstringandsize, b'abc', -1)
 | ||
|         self.assertRaises(SystemError, fromstringandsize, b'abc', PY_SSIZE_T_MIN)
 | ||
|         self.assertRaises(SystemError, fromstringandsize, NULL, -1)
 | ||
|         self.assertRaises(SystemError, fromstringandsize, NULL, PY_SSIZE_T_MIN)
 | ||
|         self.assertRaises(SystemError, fromstringandsize, NULL, 3)
 | ||
|         self.assertRaises(SystemError, fromstringandsize, NULL, PY_SSIZE_T_MAX)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_fromstring(self):
 | ||
|         """Test PyUnicode_FromString()"""
 | ||
|         from _testlimitedcapi import unicode_fromstring as fromstring
 | ||
| 
 | ||
|         self.assertEqual(fromstring(b'abc'), 'abc')
 | ||
|         self.assertEqual(fromstring(b'\xc2\xa1\xc2\xa2'), '\xa1\xa2')
 | ||
|         self.assertEqual(fromstring(b'\xe4\xbd\xa0'), '\u4f60')
 | ||
|         self.assertEqual(fromstring(b'\xf0\x9f\x98\x80'), '\U0001f600')
 | ||
|         self.assertRaises(UnicodeDecodeError, fromstring, b'\xc2')
 | ||
|         self.assertRaises(UnicodeDecodeError, fromstring, b'\xa1')
 | ||
|         self.assertEqual(fromstring(b''), '')
 | ||
| 
 | ||
|         # CRASHES fromstring(NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | ||
|     def test_fromkindanddata(self):
 | ||
|         """Test PyUnicode_FromKindAndData()"""
 | ||
|         from _testcapi import unicode_fromkindanddata as fromkindanddata
 | ||
| 
 | ||
|         strings = [
 | ||
|             'abcde', '\xa1\xa2\xa3\xa4\xa5',
 | ||
|             '\u4f60\u597d\u4e16\u754c\uff01',
 | ||
|             '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
 | ||
|         ]
 | ||
|         enc1 = 'latin1'
 | ||
|         for s in strings[:2]:
 | ||
|             self.assertEqual(fromkindanddata(1, s.encode(enc1)), s)
 | ||
|         enc2 = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
 | ||
|         for s in strings[:3]:
 | ||
|             self.assertEqual(fromkindanddata(2, s.encode(enc2)), s)
 | ||
|         enc4 = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
 | ||
|         for s in strings:
 | ||
|             self.assertEqual(fromkindanddata(4, s.encode(enc4)), s)
 | ||
|         self.assertEqual(fromkindanddata(2, '\U0001f600'.encode(enc2)),
 | ||
|                          '\ud83d\ude00')
 | ||
|         for kind in 1, 2, 4:
 | ||
|             self.assertEqual(fromkindanddata(kind, b''), '')
 | ||
|             self.assertEqual(fromkindanddata(kind, b'\0'*kind), '\0')
 | ||
|             self.assertEqual(fromkindanddata(kind, NULL, 0), '')
 | ||
| 
 | ||
|         for kind in -1, 0, 3, 5, 8:
 | ||
|             self.assertRaises(SystemError, fromkindanddata, kind, b'')
 | ||
|         self.assertRaises(ValueError, fromkindanddata, 1, b'abc', -1)
 | ||
|         self.assertRaises(ValueError, fromkindanddata, 1, b'abc', PY_SSIZE_T_MIN)
 | ||
|         self.assertRaises(ValueError, fromkindanddata, 1, NULL, -1)
 | ||
|         self.assertRaises(ValueError, fromkindanddata, 1, NULL, PY_SSIZE_T_MIN)
 | ||
|         # CRASHES fromkindanddata(1, NULL, 1)
 | ||
|         # CRASHES fromkindanddata(4, b'\xff\xff\xff\xff')
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_substring(self):
 | ||
|         """Test PyUnicode_Substring()"""
 | ||
|         from _testlimitedcapi import unicode_substring as substring
 | ||
| 
 | ||
|         strings = [
 | ||
|             'ab', 'ab\xa1\xa2',
 | ||
|             'ab\xa1\xa2\u4f60\u597d',
 | ||
|             'ab\xa1\xa2\u4f60\u597d\U0001f600\U0001f601'
 | ||
|         ]
 | ||
|         for s in strings:
 | ||
|             for start in [*range(0, len(s) + 2), PY_SSIZE_T_MAX]:
 | ||
|                 for end in [*range(max(start-1, 0), len(s) + 2), PY_SSIZE_T_MAX]:
 | ||
|                     self.assertEqual(substring(s, start, end), s[start:end])
 | ||
| 
 | ||
|         self.assertRaises(IndexError, substring, 'abc', -1, 0)
 | ||
|         self.assertRaises(IndexError, substring, 'abc', PY_SSIZE_T_MIN, 0)
 | ||
|         self.assertRaises(IndexError, substring, 'abc', 0, -1)
 | ||
|         self.assertRaises(IndexError, substring, 'abc', 0, PY_SSIZE_T_MIN)
 | ||
|         # CRASHES substring(b'abc', 0, 0)
 | ||
|         # CRASHES substring([], 0, 0)
 | ||
|         # CRASHES substring(NULL, 0, 0)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_getlength(self):
 | ||
|         """Test PyUnicode_GetLength()"""
 | ||
|         from _testlimitedcapi import unicode_getlength as getlength
 | ||
| 
 | ||
|         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
 | ||
|                   'a\ud800b\udfffc', '\ud834\udd1e']:
 | ||
|             self.assertEqual(getlength(s), len(s))
 | ||
| 
 | ||
|         self.assertRaises(TypeError, getlength, b'abc')
 | ||
|         self.assertRaises(TypeError, getlength, [])
 | ||
|         # CRASHES getlength(NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_readchar(self):
 | ||
|         """Test PyUnicode_ReadChar()"""
 | ||
|         from _testlimitedcapi import unicode_readchar as readchar
 | ||
| 
 | ||
|         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
 | ||
|                   'a\ud800b\udfffc', '\ud834\udd1e']:
 | ||
|             for i, c in enumerate(s):
 | ||
|                 self.assertEqual(readchar(s, i), ord(c))
 | ||
|             self.assertRaises(IndexError, readchar, s, len(s))
 | ||
|             self.assertRaises(IndexError, readchar, s, PY_SSIZE_T_MAX)
 | ||
|             self.assertRaises(IndexError, readchar, s, -1)
 | ||
|             self.assertRaises(IndexError, readchar, s, PY_SSIZE_T_MIN)
 | ||
| 
 | ||
|         self.assertRaises(TypeError, readchar, b'abc', 0)
 | ||
|         self.assertRaises(TypeError, readchar, [], 0)
 | ||
|         # CRASHES readchar(NULL, 0)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_fromobject(self):
 | ||
|         """Test PyUnicode_FromObject()"""
 | ||
|         from _testlimitedcapi import unicode_fromobject as fromobject
 | ||
| 
 | ||
|         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
 | ||
|                   'a\ud800b\udfffc', '\ud834\udd1e']:
 | ||
|             self.assertEqual(fromobject(s), s)
 | ||
|             o = Str(s)
 | ||
|             s2 = fromobject(o)
 | ||
|             self.assertEqual(s2, s)
 | ||
|             self.assertIs(type(s2), str)
 | ||
|             self.assertIsNot(s2, s)
 | ||
| 
 | ||
|         self.assertRaises(TypeError, fromobject, b'abc')
 | ||
|         self.assertRaises(TypeError, fromobject, [])
 | ||
|         # CRASHES fromobject(NULL)
 | ||
| 
 | ||
|     @unittest.skipIf(ctypes is None, 'need ctypes')
 | ||
|     def test_from_format(self):
 | ||
|         """Test PyUnicode_FromFormat()"""
 | ||
|         # Length modifiers "j" and "t" are not tested here because ctypes does
 | ||
|         # not expose types for intmax_t and ptrdiff_t.
 | ||
|         # _testlimitedcapi.test_string_from_format() has a wider coverage of all
 | ||
|         # formats.
 | ||
|         from ctypes import (
 | ||
|             c_char_p,
 | ||
|             pythonapi, py_object, sizeof,
 | ||
|             c_int, c_long, c_longlong, c_ssize_t,
 | ||
|             c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p,
 | ||
|             c_wchar, c_wchar_p)
 | ||
|         name = "PyUnicode_FromFormat"
 | ||
|         _PyUnicode_FromFormat = getattr(pythonapi, name)
 | ||
|         _PyUnicode_FromFormat.argtypes = (c_char_p,)
 | ||
|         _PyUnicode_FromFormat.restype = py_object
 | ||
| 
 | ||
|         def PyUnicode_FromFormat(format, *args):
 | ||
|             cargs = tuple(
 | ||
|                 py_object(arg) if isinstance(arg, str) else arg
 | ||
|                 for arg in args)
 | ||
|             return _PyUnicode_FromFormat(format, *cargs)
 | ||
| 
 | ||
|         def check_format(expected, format, *args):
 | ||
|             text = PyUnicode_FromFormat(format, *args)
 | ||
|             self.assertEqual(expected, text)
 | ||
| 
 | ||
|         # ascii format, non-ascii argument
 | ||
|         check_format('ascii\x7f=unicode\xe9',
 | ||
|                      b'ascii\x7f=%U', 'unicode\xe9')
 | ||
| 
 | ||
|         # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
 | ||
|         # raises an error
 | ||
|         self.assertRaisesRegex(ValueError,
 | ||
|             r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
 | ||
|             'string, got a non-ASCII byte: 0xe9$',
 | ||
|             PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
 | ||
| 
 | ||
|         # test "%c"
 | ||
|         check_format('\uabcd',
 | ||
|                      b'%c', c_int(0xabcd))
 | ||
|         check_format('\U0010ffff',
 | ||
|                      b'%c', c_int(0x10ffff))
 | ||
|         with self.assertRaises(OverflowError):
 | ||
|             PyUnicode_FromFormat(b'%c', c_int(0x110000))
 | ||
|         # Issue #18183
 | ||
|         check_format('\U00010000\U00100000',
 | ||
|                      b'%c%c', c_int(0x10000), c_int(0x100000))
 | ||
| 
 | ||
|         # test "%"
 | ||
|         check_format('%',
 | ||
|                      b'%%')
 | ||
|         check_format('%s',
 | ||
|                      b'%%s')
 | ||
|         check_format('[%]',
 | ||
|                      b'[%%]')
 | ||
|         check_format('%abc',
 | ||
|                      b'%%%s', b'abc')
 | ||
| 
 | ||
|         # truncated string
 | ||
|         check_format('abc',
 | ||
|                      b'%.3s', b'abcdef')
 | ||
|         check_format('abc[',
 | ||
|                      b'%.6s', 'abc[\u20ac]'.encode('utf8'))
 | ||
|         check_format('abc[\u20ac',
 | ||
|                      b'%.7s', 'abc[\u20ac]'.encode('utf8'))
 | ||
|         check_format('abc[\ufffd',
 | ||
|                      b'%.5s', b'abc[\xff]')
 | ||
|         check_format('abc[',
 | ||
|                      b'%.6s', b'abc[\xe2\x82]')
 | ||
|         check_format('abc[\ufffd]',
 | ||
|                      b'%.7s', b'abc[\xe2\x82]')
 | ||
|         check_format('abc[\ufffd',
 | ||
|                      b'%.7s', b'abc[\xe2\x82\0')
 | ||
|         check_format('      abc[',
 | ||
|                      b'%10.6s', 'abc[\u20ac]'.encode('utf8'))
 | ||
|         check_format('     abc[\u20ac',
 | ||
|                      b'%10.7s', 'abc[\u20ac]'.encode('utf8'))
 | ||
|         check_format('     abc[\ufffd',
 | ||
|                      b'%10.5s', b'abc[\xff]')
 | ||
|         check_format('      abc[',
 | ||
|                      b'%10.6s', b'abc[\xe2\x82]')
 | ||
|         check_format('    abc[\ufffd]',
 | ||
|                      b'%10.7s', b'abc[\xe2\x82]')
 | ||
| 
 | ||
|         check_format("'\\u20acABC'",
 | ||
|                      b'%A', '\u20acABC')
 | ||
|         check_format("'\\u20",
 | ||
|                      b'%.5A', '\u20acABCDEF')
 | ||
|         check_format("'\u20acABC'",
 | ||
|                      b'%R', '\u20acABC')
 | ||
|         check_format("'\u20acA",
 | ||
|                      b'%.3R', '\u20acABCDEF')
 | ||
|         check_format('\u20acAB',
 | ||
|                      b'%.3S', '\u20acABCDEF')
 | ||
|         check_format('\u20acAB',
 | ||
|                      b'%.3U', '\u20acABCDEF')
 | ||
| 
 | ||
|         check_format('\u20acAB',
 | ||
|                      b'%.3V', '\u20acABCDEF', None)
 | ||
|         check_format('abc[',
 | ||
|                      b'%.6V', None, 'abc[\u20ac]'.encode('utf8'))
 | ||
|         check_format('abc[\u20ac',
 | ||
|                      b'%.7V', None, 'abc[\u20ac]'.encode('utf8'))
 | ||
|         check_format('abc[\ufffd',
 | ||
|                      b'%.5V', None, b'abc[\xff]')
 | ||
|         check_format('abc[',
 | ||
|                      b'%.6V', None, b'abc[\xe2\x82]')
 | ||
|         check_format('abc[\ufffd]',
 | ||
|                      b'%.7V', None, b'abc[\xe2\x82]')
 | ||
|         check_format('      abc[',
 | ||
|                      b'%10.6V', None, 'abc[\u20ac]'.encode('utf8'))
 | ||
|         check_format('     abc[\u20ac',
 | ||
|                      b'%10.7V', None, 'abc[\u20ac]'.encode('utf8'))
 | ||
|         check_format('     abc[\ufffd',
 | ||
|                      b'%10.5V', None, b'abc[\xff]')
 | ||
|         check_format('      abc[',
 | ||
|                      b'%10.6V', None, b'abc[\xe2\x82]')
 | ||
|         check_format('    abc[\ufffd]',
 | ||
|                      b'%10.7V', None, b'abc[\xe2\x82]')
 | ||
|         check_format('     abc[\ufffd',
 | ||
|                      b'%10.7V', None, b'abc[\xe2\x82\0')
 | ||
| 
 | ||
|         # following tests comes from #7330
 | ||
|         # test width modifier and precision modifier with %S
 | ||
|         check_format("repr=  abc",
 | ||
|                      b'repr=%5S', 'abc')
 | ||
|         check_format("repr=ab",
 | ||
|                      b'repr=%.2S', 'abc')
 | ||
|         check_format("repr=   ab",
 | ||
|                      b'repr=%5.2S', 'abc')
 | ||
| 
 | ||
|         # test width modifier and precision modifier with %R
 | ||
|         check_format("repr=   'abc'",
 | ||
|                      b'repr=%8R', 'abc')
 | ||
|         check_format("repr='ab",
 | ||
|                      b'repr=%.3R', 'abc')
 | ||
|         check_format("repr=  'ab",
 | ||
|                      b'repr=%5.3R', 'abc')
 | ||
| 
 | ||
|         # test width modifier and precision modifier with %A
 | ||
|         check_format("repr=   'abc'",
 | ||
|                      b'repr=%8A', 'abc')
 | ||
|         check_format("repr='ab",
 | ||
|                      b'repr=%.3A', 'abc')
 | ||
|         check_format("repr=  'ab",
 | ||
|                      b'repr=%5.3A', 'abc')
 | ||
| 
 | ||
|         # test width modifier and precision modifier with %s
 | ||
|         check_format("repr=  abc",
 | ||
|                      b'repr=%5s', b'abc')
 | ||
|         check_format("repr=ab",
 | ||
|                      b'repr=%.2s', b'abc')
 | ||
|         check_format("repr=   ab",
 | ||
|                      b'repr=%5.2s', b'abc')
 | ||
| 
 | ||
|         # test width modifier and precision modifier with %U
 | ||
|         check_format("repr=  abc",
 | ||
|                      b'repr=%5U', 'abc')
 | ||
|         check_format("repr=ab",
 | ||
|                      b'repr=%.2U', 'abc')
 | ||
|         check_format("repr=   ab",
 | ||
|                      b'repr=%5.2U', 'abc')
 | ||
| 
 | ||
|         # test width modifier and precision modifier with %V
 | ||
|         check_format("repr=  abc",
 | ||
|                      b'repr=%5V', 'abc', b'123')
 | ||
|         check_format("repr=ab",
 | ||
|                      b'repr=%.2V', 'abc', b'123')
 | ||
|         check_format("repr=   ab",
 | ||
|                      b'repr=%5.2V', 'abc', b'123')
 | ||
|         check_format("repr=  123",
 | ||
|                      b'repr=%5V', None, b'123')
 | ||
|         check_format("repr=12",
 | ||
|                      b'repr=%.2V', None, b'123')
 | ||
|         check_format("repr=   12",
 | ||
|                      b'repr=%5.2V', None, b'123')
 | ||
| 
 | ||
|         # test integer formats (%i, %d, %u, %o, %x, %X)
 | ||
|         check_format('010',
 | ||
|                      b'%03i', c_int(10))
 | ||
|         check_format('0010',
 | ||
|                      b'%0.4i', c_int(10))
 | ||
|         for conv, signed, value, expected in [
 | ||
|             (b'i', True, -123, '-123'),
 | ||
|             (b'd', True, -123, '-123'),
 | ||
|             (b'u', False, 123, '123'),
 | ||
|             (b'o', False, 0o123, '123'),
 | ||
|             (b'x', False, 0xabc, 'abc'),
 | ||
|             (b'X', False, 0xabc, 'ABC'),
 | ||
|         ]:
 | ||
|             for mod, ctype in [
 | ||
|                 (b'', c_int if signed else c_uint),
 | ||
|                 (b'l', c_long if signed else c_ulong),
 | ||
|                 (b'll', c_longlong if signed else c_ulonglong),
 | ||
|                 (b'z', c_ssize_t if signed else c_size_t),
 | ||
|             ]:
 | ||
|                 with self.subTest(format=b'%' + mod + conv):
 | ||
|                     check_format(expected,
 | ||
|                                  b'%' + mod + conv, ctype(value))
 | ||
| 
 | ||
|         # test long output
 | ||
|         min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
 | ||
|         max_longlong = -min_longlong - 1
 | ||
|         check_format(str(min_longlong),
 | ||
|                      b'%lld', c_longlong(min_longlong))
 | ||
|         check_format(str(max_longlong),
 | ||
|                      b'%lld', c_longlong(max_longlong))
 | ||
|         max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
 | ||
|         check_format(str(max_ulonglong),
 | ||
|                      b'%llu', c_ulonglong(max_ulonglong))
 | ||
|         PyUnicode_FromFormat(b'%p', c_void_p(-1))
 | ||
| 
 | ||
|         # test padding (width and/or precision)
 | ||
|         check_format('123',        b'%2i', c_int(123))
 | ||
|         check_format('       123', b'%10i', c_int(123))
 | ||
|         check_format('0000000123', b'%010i', c_int(123))
 | ||
|         check_format('123       ', b'%-10i', c_int(123))
 | ||
|         check_format('123       ', b'%-010i', c_int(123))
 | ||
|         check_format('123',        b'%.2i', c_int(123))
 | ||
|         check_format('0000123',    b'%.7i', c_int(123))
 | ||
|         check_format('       123', b'%10.2i', c_int(123))
 | ||
|         check_format('   0000123', b'%10.7i', c_int(123))
 | ||
|         check_format('0000000123', b'%010.7i', c_int(123))
 | ||
|         check_format('0000123   ', b'%-10.7i', c_int(123))
 | ||
|         check_format('0000123   ', b'%-010.7i', c_int(123))
 | ||
| 
 | ||
|         check_format('-123',       b'%2i', c_int(-123))
 | ||
|         check_format('      -123', b'%10i', c_int(-123))
 | ||
|         check_format('-000000123', b'%010i', c_int(-123))
 | ||
|         check_format('-123      ', b'%-10i', c_int(-123))
 | ||
|         check_format('-123      ', b'%-010i', c_int(-123))
 | ||
|         check_format('-123',       b'%.2i', c_int(-123))
 | ||
|         check_format('-0000123',   b'%.7i', c_int(-123))
 | ||
|         check_format('      -123', b'%10.2i', c_int(-123))
 | ||
|         check_format('  -0000123', b'%10.7i', c_int(-123))
 | ||
|         check_format('-000000123', b'%010.7i', c_int(-123))
 | ||
|         check_format('-0000123  ', b'%-10.7i', c_int(-123))
 | ||
|         check_format('-0000123  ', b'%-010.7i', c_int(-123))
 | ||
| 
 | ||
|         check_format('123',        b'%2u', c_uint(123))
 | ||
|         check_format('       123', b'%10u', c_uint(123))
 | ||
|         check_format('0000000123', b'%010u', c_uint(123))
 | ||
|         check_format('123       ', b'%-10u', c_uint(123))
 | ||
|         check_format('123       ', b'%-010u', c_uint(123))
 | ||
|         check_format('123',        b'%.2u', c_uint(123))
 | ||
|         check_format('0000123',    b'%.7u', c_uint(123))
 | ||
|         check_format('       123', b'%10.2u', c_uint(123))
 | ||
|         check_format('   0000123', b'%10.7u', c_uint(123))
 | ||
|         check_format('0000000123', b'%010.7u', c_uint(123))
 | ||
|         check_format('0000123   ', b'%-10.7u', c_uint(123))
 | ||
|         check_format('0000123   ', b'%-010.7u', c_uint(123))
 | ||
| 
 | ||
|         check_format('123',        b'%2o', c_uint(0o123))
 | ||
|         check_format('       123', b'%10o', c_uint(0o123))
 | ||
|         check_format('0000000123', b'%010o', c_uint(0o123))
 | ||
|         check_format('123       ', b'%-10o', c_uint(0o123))
 | ||
|         check_format('123       ', b'%-010o', c_uint(0o123))
 | ||
|         check_format('123',        b'%.2o', c_uint(0o123))
 | ||
|         check_format('0000123',    b'%.7o', c_uint(0o123))
 | ||
|         check_format('       123', b'%10.2o', c_uint(0o123))
 | ||
|         check_format('   0000123', b'%10.7o', c_uint(0o123))
 | ||
|         check_format('0000000123', b'%010.7o', c_uint(0o123))
 | ||
|         check_format('0000123   ', b'%-10.7o', c_uint(0o123))
 | ||
|         check_format('0000123   ', b'%-010.7o', c_uint(0o123))
 | ||
| 
 | ||
|         check_format('abc',        b'%2x', c_uint(0xabc))
 | ||
|         check_format('       abc', b'%10x', c_uint(0xabc))
 | ||
|         check_format('0000000abc', b'%010x', c_uint(0xabc))
 | ||
|         check_format('abc       ', b'%-10x', c_uint(0xabc))
 | ||
|         check_format('abc       ', b'%-010x', c_uint(0xabc))
 | ||
|         check_format('abc',        b'%.2x', c_uint(0xabc))
 | ||
|         check_format('0000abc',    b'%.7x', c_uint(0xabc))
 | ||
|         check_format('       abc', b'%10.2x', c_uint(0xabc))
 | ||
|         check_format('   0000abc', b'%10.7x', c_uint(0xabc))
 | ||
|         check_format('0000000abc', b'%010.7x', c_uint(0xabc))
 | ||
|         check_format('0000abc   ', b'%-10.7x', c_uint(0xabc))
 | ||
|         check_format('0000abc   ', b'%-010.7x', c_uint(0xabc))
 | ||
| 
 | ||
|         check_format('ABC',        b'%2X', c_uint(0xabc))
 | ||
|         check_format('       ABC', b'%10X', c_uint(0xabc))
 | ||
|         check_format('0000000ABC', b'%010X', c_uint(0xabc))
 | ||
|         check_format('ABC       ', b'%-10X', c_uint(0xabc))
 | ||
|         check_format('ABC       ', b'%-010X', c_uint(0xabc))
 | ||
|         check_format('ABC',        b'%.2X', c_uint(0xabc))
 | ||
|         check_format('0000ABC',    b'%.7X', c_uint(0xabc))
 | ||
|         check_format('       ABC', b'%10.2X', c_uint(0xabc))
 | ||
|         check_format('   0000ABC', b'%10.7X', c_uint(0xabc))
 | ||
|         check_format('0000000ABC', b'%010.7X', c_uint(0xabc))
 | ||
|         check_format('0000ABC   ', b'%-10.7X', c_uint(0xabc))
 | ||
|         check_format('0000ABC   ', b'%-010.7X', c_uint(0xabc))
 | ||
| 
 | ||
|         # test %A
 | ||
|         check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
 | ||
|                      b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
 | ||
| 
 | ||
|         # test %V
 | ||
|         check_format('abc',
 | ||
|                      b'%V', 'abc', b'xyz')
 | ||
|         check_format('xyz',
 | ||
|                      b'%V', None, b'xyz')
 | ||
| 
 | ||
|         # test %ls
 | ||
|         check_format('abc', b'%ls', c_wchar_p('abc'))
 | ||
|         check_format('\u4eba\u6c11', b'%ls', c_wchar_p('\u4eba\u6c11'))
 | ||
|         check_format('\U0001f4bb+\U0001f40d',
 | ||
|                      b'%ls', c_wchar_p('\U0001f4bb+\U0001f40d'))
 | ||
|         check_format('   ab', b'%5.2ls', c_wchar_p('abc'))
 | ||
|         check_format('   \u4eba\u6c11', b'%5ls', c_wchar_p('\u4eba\u6c11'))
 | ||
|         check_format('  \U0001f4bb+\U0001f40d',
 | ||
|                      b'%5ls', c_wchar_p('\U0001f4bb+\U0001f40d'))
 | ||
|         check_format('\u4eba', b'%.1ls', c_wchar_p('\u4eba\u6c11'))
 | ||
|         check_format('\U0001f4bb' if sizeof(c_wchar) > 2 else '\ud83d',
 | ||
|                      b'%.1ls', c_wchar_p('\U0001f4bb+\U0001f40d'))
 | ||
|         check_format('\U0001f4bb+' if sizeof(c_wchar) > 2 else '\U0001f4bb',
 | ||
|                      b'%.2ls', c_wchar_p('\U0001f4bb+\U0001f40d'))
 | ||
| 
 | ||
|         # test %lV
 | ||
|         check_format('abc',
 | ||
|                      b'%lV', 'abc', c_wchar_p('xyz'))
 | ||
|         check_format('xyz',
 | ||
|                      b'%lV', None, c_wchar_p('xyz'))
 | ||
|         check_format('\u4eba\u6c11',
 | ||
|                      b'%lV', None, c_wchar_p('\u4eba\u6c11'))
 | ||
|         check_format('\U0001f4bb+\U0001f40d',
 | ||
|                      b'%lV', None, c_wchar_p('\U0001f4bb+\U0001f40d'))
 | ||
|         check_format('   ab',
 | ||
|                      b'%5.2lV', None, c_wchar_p('abc'))
 | ||
|         check_format('   \u4eba\u6c11',
 | ||
|                      b'%5lV', None, c_wchar_p('\u4eba\u6c11'))
 | ||
|         check_format('  \U0001f4bb+\U0001f40d',
 | ||
|                      b'%5lV', None, c_wchar_p('\U0001f4bb+\U0001f40d'))
 | ||
|         check_format('\u4eba',
 | ||
|                      b'%.1lV', None, c_wchar_p('\u4eba\u6c11'))
 | ||
|         check_format('\U0001f4bb' if sizeof(c_wchar) > 2 else '\ud83d',
 | ||
|                      b'%.1lV', None, c_wchar_p('\U0001f4bb+\U0001f40d'))
 | ||
|         check_format('\U0001f4bb+' if sizeof(c_wchar) > 2 else '\U0001f4bb',
 | ||
|                      b'%.2lV', None, c_wchar_p('\U0001f4bb+\U0001f40d'))
 | ||
| 
 | ||
|         # test %T
 | ||
|         check_format('type: str',
 | ||
|                      b'type: %T', py_object("abc"))
 | ||
|         check_format(f'type: st',
 | ||
|                      b'type: %.2T', py_object("abc"))
 | ||
|         check_format(f'type:        str',
 | ||
|                      b'type: %10T', py_object("abc"))
 | ||
| 
 | ||
|         class LocalType:
 | ||
|             pass
 | ||
|         obj = LocalType()
 | ||
|         fullname = f'{__name__}.{LocalType.__qualname__}'
 | ||
|         check_format(f'type: {fullname}',
 | ||
|                      b'type: %T', py_object(obj))
 | ||
|         fullname_alt = f'{__name__}:{LocalType.__qualname__}'
 | ||
|         check_format(f'type: {fullname_alt}',
 | ||
|                      b'type: %#T', py_object(obj))
 | ||
| 
 | ||
|         # test %N
 | ||
|         check_format('type: str',
 | ||
|                      b'type: %N', py_object(str))
 | ||
|         check_format(f'type: st',
 | ||
|                      b'type: %.2N', py_object(str))
 | ||
|         check_format(f'type:        str',
 | ||
|                      b'type: %10N', py_object(str))
 | ||
| 
 | ||
|         check_format(f'type: {fullname}',
 | ||
|                      b'type: %N', py_object(type(obj)))
 | ||
|         check_format(f'type: {fullname_alt}',
 | ||
|                      b'type: %#N', py_object(type(obj)))
 | ||
|         with self.assertRaisesRegex(TypeError, "%N argument must be a type"):
 | ||
|             check_format('type: str',
 | ||
|                          b'type: %N', py_object("abc"))
 | ||
| 
 | ||
|         # test variable width and precision
 | ||
|         check_format('  abc', b'%*s', c_int(5), b'abc')
 | ||
|         check_format('ab', b'%.*s', c_int(2), b'abc')
 | ||
|         check_format('   ab', b'%*.*s', c_int(5), c_int(2), b'abc')
 | ||
|         check_format('  abc', b'%*U', c_int(5), 'abc')
 | ||
|         check_format('ab', b'%.*U', c_int(2), 'abc')
 | ||
|         check_format('   ab', b'%*.*U', c_int(5), c_int(2), 'abc')
 | ||
|         check_format('   ab', b'%*.*V', c_int(5), c_int(2), None, b'abc')
 | ||
|         check_format('   ab', b'%*.*lV', c_int(5), c_int(2),
 | ||
|                      None, c_wchar_p('abc'))
 | ||
|         check_format('     123', b'%*i', c_int(8), c_int(123))
 | ||
|         check_format('00123', b'%.*i', c_int(5), c_int(123))
 | ||
|         check_format('   00123', b'%*.*i', c_int(8), c_int(5), c_int(123))
 | ||
| 
 | ||
|         # test %p
 | ||
|         # We cannot test the exact result,
 | ||
|         # because it returns a hex representation of a C pointer,
 | ||
|         # which is going to be different each time. But, we can test the format.
 | ||
|         p_format_regex = r'^0x[a-zA-Z0-9]{3,}$'
 | ||
|         p_format1 = PyUnicode_FromFormat(b'%p', 'abc')
 | ||
|         self.assertIsInstance(p_format1, str)
 | ||
|         self.assertRegex(p_format1, p_format_regex)
 | ||
| 
 | ||
|         p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz')
 | ||
|         self.assertIsInstance(p_format2, str)
 | ||
|         self.assertRegex(p_format2,
 | ||
|                          r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}')
 | ||
| 
 | ||
|         # Extra args are ignored:
 | ||
|         p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz')
 | ||
|         self.assertIsInstance(p_format3, str)
 | ||
|         self.assertRegex(p_format3, p_format_regex)
 | ||
| 
 | ||
|         # Test string decode from parameter of %s using utf-8.
 | ||
|         # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
 | ||
|         # '\u4eba\u6c11'
 | ||
|         check_format('repr=\u4eba\u6c11',
 | ||
|                      b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
 | ||
| 
 | ||
|         #Test replace error handler.
 | ||
|         check_format('repr=abc\ufffd',
 | ||
|                      b'repr=%V', None, b'abc\xff')
 | ||
| 
 | ||
|         # Issue #33817: empty strings
 | ||
|         check_format('',
 | ||
|                      b'')
 | ||
|         check_format('',
 | ||
|                      b'%s', b'')
 | ||
| 
 | ||
|         # test invalid format strings. these tests are just here
 | ||
|         # to check for crashes and should not be considered as specifications
 | ||
|         for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1',
 | ||
|                     b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc',
 | ||
|                     b'%l', b'%ll', b'%z', b'%lls', b'%zs'):
 | ||
|             with self.subTest(fmt=fmt):
 | ||
|                 self.assertRaisesRegex(SystemError, 'invalid format string',
 | ||
|                     PyUnicode_FromFormat, fmt, b'abc')
 | ||
|         self.assertRaisesRegex(SystemError, 'invalid format string',
 | ||
|             PyUnicode_FromFormat, b'%+i', c_int(10))
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_interninplace(self):
 | ||
|         """Test PyUnicode_InternInPlace()"""
 | ||
|         from _testlimitedcapi import unicode_interninplace as interninplace
 | ||
| 
 | ||
|         s = b'abc'.decode()
 | ||
|         r = interninplace(s)
 | ||
|         self.assertEqual(r, 'abc')
 | ||
| 
 | ||
|         # CRASHES interninplace(b'abc')
 | ||
|         # CRASHES interninplace(NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_internfromstring(self):
 | ||
|         """Test PyUnicode_InternFromString()"""
 | ||
|         from _testlimitedcapi import unicode_internfromstring as internfromstring
 | ||
| 
 | ||
|         self.assertEqual(internfromstring(b'abc'), 'abc')
 | ||
|         self.assertEqual(internfromstring(b'\xf0\x9f\x98\x80'), '\U0001f600')
 | ||
|         self.assertRaises(UnicodeDecodeError, internfromstring, b'\xc2')
 | ||
|         self.assertRaises(UnicodeDecodeError, internfromstring, b'\xa1')
 | ||
|         self.assertEqual(internfromstring(b''), '')
 | ||
| 
 | ||
|         # CRASHES internfromstring(NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_fromwidechar(self):
 | ||
|         """Test PyUnicode_FromWideChar()"""
 | ||
|         from _testlimitedcapi import unicode_fromwidechar as fromwidechar
 | ||
|         from _testcapi import SIZEOF_WCHAR_T
 | ||
| 
 | ||
|         if SIZEOF_WCHAR_T == 2:
 | ||
|             encoding = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
 | ||
|         elif SIZEOF_WCHAR_T == 4:
 | ||
|             encoding = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
 | ||
| 
 | ||
|         for s in '', 'abc', '\xa1\xa2', '\u4f60', '\U0001f600':
 | ||
|             b = s.encode(encoding)
 | ||
|             self.assertEqual(fromwidechar(b), s)
 | ||
|             self.assertEqual(fromwidechar(b + b'\0'*SIZEOF_WCHAR_T, -1), s)
 | ||
|         for s in '\ud83d', '\ude00':
 | ||
|             b = s.encode(encoding, 'surrogatepass')
 | ||
|             self.assertEqual(fromwidechar(b), s)
 | ||
|             self.assertEqual(fromwidechar(b + b'\0'*SIZEOF_WCHAR_T, -1), s)
 | ||
| 
 | ||
|         self.assertEqual(fromwidechar('abc'.encode(encoding), 2), 'ab')
 | ||
|         if SIZEOF_WCHAR_T == 2:
 | ||
|             self.assertEqual(fromwidechar('a\U0001f600'.encode(encoding), 2), 'a\ud83d')
 | ||
| 
 | ||
|         self.assertRaises(MemoryError, fromwidechar, b'', PY_SSIZE_T_MAX)
 | ||
|         self.assertRaises(SystemError, fromwidechar, b'\0'*SIZEOF_WCHAR_T, -2)
 | ||
|         self.assertRaises(SystemError, fromwidechar, b'\0'*SIZEOF_WCHAR_T, PY_SSIZE_T_MIN)
 | ||
|         self.assertEqual(fromwidechar(NULL, 0), '')
 | ||
|         self.assertRaises(SystemError, fromwidechar, NULL, 1)
 | ||
|         self.assertRaises(SystemError, fromwidechar, NULL, PY_SSIZE_T_MAX)
 | ||
|         self.assertRaises(SystemError, fromwidechar, NULL, -1)
 | ||
|         self.assertRaises(SystemError, fromwidechar, NULL, -2)
 | ||
|         self.assertRaises(SystemError, fromwidechar, NULL, PY_SSIZE_T_MIN)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_aswidechar(self):
 | ||
|         """Test PyUnicode_AsWideChar()"""
 | ||
|         from _testlimitedcapi import unicode_aswidechar
 | ||
|         from _testlimitedcapi import unicode_aswidechar_null
 | ||
|         from _testcapi import SIZEOF_WCHAR_T
 | ||
| 
 | ||
|         wchar, size = unicode_aswidechar('abcdef', 2)
 | ||
|         self.assertEqual(size, 2)
 | ||
|         self.assertEqual(wchar, 'ab')
 | ||
| 
 | ||
|         wchar, size = unicode_aswidechar('abc', 3)
 | ||
|         self.assertEqual(size, 3)
 | ||
|         self.assertEqual(wchar, 'abc')
 | ||
|         self.assertEqual(unicode_aswidechar_null('abc', 10), 4)
 | ||
|         self.assertEqual(unicode_aswidechar_null('abc', 0), 4)
 | ||
| 
 | ||
|         wchar, size = unicode_aswidechar('abc', 4)
 | ||
|         self.assertEqual(size, 3)
 | ||
|         self.assertEqual(wchar, 'abc\0')
 | ||
| 
 | ||
|         wchar, size = unicode_aswidechar('abc', 10)
 | ||
|         self.assertEqual(size, 3)
 | ||
|         self.assertEqual(wchar, 'abc\0')
 | ||
| 
 | ||
|         wchar, size = unicode_aswidechar('abc\0def', 20)
 | ||
|         self.assertEqual(size, 7)
 | ||
|         self.assertEqual(wchar, 'abc\0def\0')
 | ||
|         self.assertEqual(unicode_aswidechar_null('abc\0def', 20), 8)
 | ||
| 
 | ||
|         nonbmp = chr(0x10ffff)
 | ||
|         if SIZEOF_WCHAR_T == 2:
 | ||
|             nchar = 2
 | ||
|         else: # SIZEOF_WCHAR_T == 4
 | ||
|             nchar = 1
 | ||
|         wchar, size = unicode_aswidechar(nonbmp, 10)
 | ||
|         self.assertEqual(size, nchar)
 | ||
|         self.assertEqual(wchar, nonbmp + '\0')
 | ||
|         self.assertEqual(unicode_aswidechar_null(nonbmp, 10), nchar + 1)
 | ||
| 
 | ||
|         self.assertRaises(TypeError, unicode_aswidechar, b'abc', 10)
 | ||
|         self.assertRaises(TypeError, unicode_aswidechar, [], 10)
 | ||
|         self.assertRaises(SystemError, unicode_aswidechar, NULL, 10)
 | ||
|         self.assertRaises(TypeError, unicode_aswidechar_null, b'abc', 10)
 | ||
|         self.assertRaises(TypeError, unicode_aswidechar_null, [], 10)
 | ||
|         self.assertRaises(SystemError, unicode_aswidechar_null, NULL, 10)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_aswidecharstring(self):
 | ||
|         """Test PyUnicode_AsWideCharString()"""
 | ||
|         from _testlimitedcapi import unicode_aswidecharstring
 | ||
|         from _testlimitedcapi import unicode_aswidecharstring_null
 | ||
|         from _testcapi import SIZEOF_WCHAR_T
 | ||
| 
 | ||
|         wchar, size = unicode_aswidecharstring('abc')
 | ||
|         self.assertEqual(size, 3)
 | ||
|         self.assertEqual(wchar, 'abc\0')
 | ||
|         self.assertEqual(unicode_aswidecharstring_null('abc'), 'abc')
 | ||
| 
 | ||
|         wchar, size = unicode_aswidecharstring('abc\0def')
 | ||
|         self.assertEqual(size, 7)
 | ||
|         self.assertEqual(wchar, 'abc\0def\0')
 | ||
|         self.assertRaises(ValueError, unicode_aswidecharstring_null, 'abc\0def')
 | ||
| 
 | ||
|         nonbmp = chr(0x10ffff)
 | ||
|         if SIZEOF_WCHAR_T == 2:
 | ||
|             nchar = 2
 | ||
|         else: # SIZEOF_WCHAR_T == 4
 | ||
|             nchar = 1
 | ||
|         wchar, size = unicode_aswidecharstring(nonbmp)
 | ||
|         self.assertEqual(size, nchar)
 | ||
|         self.assertEqual(wchar, nonbmp + '\0')
 | ||
|         self.assertEqual(unicode_aswidecharstring_null(nonbmp), nonbmp)
 | ||
| 
 | ||
|         self.assertRaises(TypeError, unicode_aswidecharstring, b'abc')
 | ||
|         self.assertRaises(TypeError, unicode_aswidecharstring, [])
 | ||
|         self.assertRaises(SystemError, unicode_aswidecharstring, NULL)
 | ||
|         self.assertRaises(TypeError, unicode_aswidecharstring_null, b'abc')
 | ||
|         self.assertRaises(TypeError, unicode_aswidecharstring_null, [])
 | ||
|         self.assertRaises(SystemError, unicode_aswidecharstring_null, NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | ||
|     def test_asucs4(self):
 | ||
|         """Test PyUnicode_AsUCS4()"""
 | ||
|         from _testcapi import unicode_asucs4
 | ||
| 
 | ||
|         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
 | ||
|                   'a\ud800b\udfffc', '\ud834\udd1e']:
 | ||
|             l = len(s)
 | ||
|             self.assertEqual(unicode_asucs4(s, l, 1), s+'\0')
 | ||
|             self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff')
 | ||
|             self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff')
 | ||
|             self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff')
 | ||
|             self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1)
 | ||
|             self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0)
 | ||
|             s = '\0'.join([s, s])
 | ||
|             self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
 | ||
|             self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
 | ||
| 
 | ||
|         # CRASHES unicode_asucs4(b'abc', 1, 0)
 | ||
|         # CRASHES unicode_asucs4(b'abc', 1, 1)
 | ||
|         # CRASHES unicode_asucs4([], 1, 1)
 | ||
|         # CRASHES unicode_asucs4(NULL, 1, 0)
 | ||
|         # CRASHES unicode_asucs4(NULL, 1, 1)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | ||
|     def test_asucs4copy(self):
 | ||
|         """Test PyUnicode_AsUCS4Copy()"""
 | ||
|         from _testcapi import unicode_asucs4copy as asucs4copy
 | ||
| 
 | ||
|         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
 | ||
|                   'a\ud800b\udfffc', '\ud834\udd1e']:
 | ||
|             self.assertEqual(asucs4copy(s), s+'\0')
 | ||
|             s = '\0'.join([s, s])
 | ||
|             self.assertEqual(asucs4copy(s), s+'\0')
 | ||
| 
 | ||
|         # CRASHES asucs4copy(b'abc')
 | ||
|         # CRASHES asucs4copy([])
 | ||
|         # CRASHES asucs4copy(NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_fromordinal(self):
 | ||
|         """Test PyUnicode_FromOrdinal()"""
 | ||
|         from _testlimitedcapi import unicode_fromordinal as fromordinal
 | ||
| 
 | ||
|         self.assertEqual(fromordinal(0x61), 'a')
 | ||
|         self.assertEqual(fromordinal(0x20ac), '\u20ac')
 | ||
|         self.assertEqual(fromordinal(0x1f600), '\U0001f600')
 | ||
| 
 | ||
|         self.assertRaises(ValueError, fromordinal, 0x110000)
 | ||
|         self.assertRaises(ValueError, fromordinal, -1)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | ||
|     def test_asutf8(self):
 | ||
|         """Test PyUnicode_AsUTF8()"""
 | ||
|         from _testcapi import unicode_asutf8
 | ||
| 
 | ||
|         self.assertEqual(unicode_asutf8('abc', 4), b'abc\0')
 | ||
|         self.assertEqual(unicode_asutf8('абв', 7), b'\xd0\xb0\xd0\xb1\xd0\xb2\0')
 | ||
|         self.assertEqual(unicode_asutf8('\U0001f600', 5), b'\xf0\x9f\x98\x80\0')
 | ||
|         self.assertEqual(unicode_asutf8('abc\0def', 8), b'abc\0def\0')
 | ||
| 
 | ||
|         self.assertRaises(UnicodeEncodeError, unicode_asutf8, '\ud8ff', 0)
 | ||
|         self.assertRaises(TypeError, unicode_asutf8, b'abc', 0)
 | ||
|         self.assertRaises(TypeError, unicode_asutf8, [], 0)
 | ||
|         # CRASHES unicode_asutf8(NULL, 0)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_asutf8andsize(self):
 | ||
|         """Test PyUnicode_AsUTF8AndSize()"""
 | ||
|         from _testlimitedcapi import unicode_asutf8andsize
 | ||
|         from _testlimitedcapi import unicode_asutf8andsize_null
 | ||
| 
 | ||
|         self.assertEqual(unicode_asutf8andsize('abc', 4), (b'abc\0', 3))
 | ||
|         self.assertEqual(unicode_asutf8andsize('абв', 7), (b'\xd0\xb0\xd0\xb1\xd0\xb2\0', 6))
 | ||
|         self.assertEqual(unicode_asutf8andsize('\U0001f600', 5), (b'\xf0\x9f\x98\x80\0', 4))
 | ||
|         self.assertEqual(unicode_asutf8andsize('abc\0def', 8), (b'abc\0def\0', 7))
 | ||
|         self.assertEqual(unicode_asutf8andsize_null('abc', 4), b'abc\0')
 | ||
|         self.assertEqual(unicode_asutf8andsize_null('abc\0def', 8), b'abc\0def\0')
 | ||
| 
 | ||
|         self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, '\ud8ff', 0)
 | ||
|         self.assertRaises(TypeError, unicode_asutf8andsize, b'abc', 0)
 | ||
|         self.assertRaises(TypeError, unicode_asutf8andsize, [], 0)
 | ||
|         self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize_null, '\ud8ff', 0)
 | ||
|         self.assertRaises(TypeError, unicode_asutf8andsize_null, b'abc', 0)
 | ||
|         self.assertRaises(TypeError, unicode_asutf8andsize_null, [], 0)
 | ||
|         # CRASHES unicode_asutf8andsize(NULL, 0)
 | ||
|         # CRASHES unicode_asutf8andsize_null(NULL, 0)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_getdefaultencoding(self):
 | ||
|         """Test PyUnicode_GetDefaultEncoding()"""
 | ||
|         from _testlimitedcapi import unicode_getdefaultencoding as getdefaultencoding
 | ||
| 
 | ||
|         self.assertEqual(getdefaultencoding(), b'utf-8')
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testinternalcapi is None, 'need _testinternalcapi module')
 | ||
|     def test_transform_decimal_and_space(self):
 | ||
|         """Test _PyUnicode_TransformDecimalAndSpaceToASCII()"""
 | ||
|         from _testinternalcapi import _PyUnicode_TransformDecimalAndSpaceToASCII as transform_decimal
 | ||
| 
 | ||
|         self.assertEqual(transform_decimal('123'),
 | ||
|                          '123')
 | ||
|         self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
 | ||
|                          '3.14')
 | ||
|         self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
 | ||
|                          " 3.14 ")
 | ||
|         self.assertEqual(transform_decimal('12\u20ac3'),
 | ||
|                          '12?')
 | ||
|         self.assertEqual(transform_decimal(''), '')
 | ||
| 
 | ||
|         self.assertRaises(SystemError, transform_decimal, b'123')
 | ||
|         self.assertRaises(SystemError, transform_decimal, [])
 | ||
|         # CRASHES transform_decimal(NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_concat(self):
 | ||
|         """Test PyUnicode_Concat()"""
 | ||
|         from _testlimitedcapi import unicode_concat as concat
 | ||
| 
 | ||
|         self.assertEqual(concat('abc', 'def'), 'abcdef')
 | ||
|         self.assertEqual(concat('abc', 'где'), 'abcгде')
 | ||
|         self.assertEqual(concat('абв', 'def'), 'абвdef')
 | ||
|         self.assertEqual(concat('абв', 'где'), 'абвгде')
 | ||
|         self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d')
 | ||
| 
 | ||
|         self.assertRaises(TypeError, concat, b'abc', 'def')
 | ||
|         self.assertRaises(TypeError, concat, 'abc', b'def')
 | ||
|         self.assertRaises(TypeError, concat, b'abc', b'def')
 | ||
|         self.assertRaises(TypeError, concat, [], 'def')
 | ||
|         self.assertRaises(TypeError, concat, 'abc', [])
 | ||
|         self.assertRaises(TypeError, concat, [], [])
 | ||
|         # CRASHES concat(NULL, 'def')
 | ||
|         # CRASHES concat('abc', NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_split(self):
 | ||
|         """Test PyUnicode_Split()"""
 | ||
|         from _testlimitedcapi import unicode_split as split
 | ||
| 
 | ||
|         self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d'])
 | ||
|         self.assertEqual(split('a|b|c|d', '|', PY_SSIZE_T_MAX),
 | ||
|                          ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(split('a|b|c|d', '|', -1), ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(split('a|b|c|d', '|', PY_SSIZE_T_MIN),
 | ||
|                          ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d'])
 | ||
|         self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd'])
 | ||
|         self.assertEqual(split('а|б|в|г', '|'), ['а', 'б', 'в', 'г'])
 | ||
|         self.assertEqual(split('абабагаламага', 'а'),
 | ||
|                          ['', 'б', 'б', 'г', 'л', 'м', 'г', ''])
 | ||
|         self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL),
 | ||
|                          ['a', 'b', 'c', 'd', 'e'])
 | ||
|         self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL),
 | ||
|                          ['a', 'b', 'c', 'd', 'e'])
 | ||
| 
 | ||
|         self.assertRaises(ValueError, split, 'a|b|c|d', '')
 | ||
|         self.assertRaises(TypeError, split, 'a|b|c|d', ord('|'))
 | ||
|         self.assertRaises(TypeError, split, [], '|')
 | ||
|         # CRASHES split(NULL, '|')
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_rsplit(self):
 | ||
|         """Test PyUnicode_RSplit()"""
 | ||
|         from _testlimitedcapi import unicode_rsplit as rsplit
 | ||
| 
 | ||
|         self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd'])
 | ||
|         self.assertEqual(rsplit('a|b|c|d', '|', PY_SSIZE_T_MAX),
 | ||
|                          ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(rsplit('a|b|c|d', '|', -1), ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(rsplit('a|b|c|d', '|', PY_SSIZE_T_MIN),
 | ||
|                          ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d'])
 | ||
|         self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd'])
 | ||
|         self.assertEqual(rsplit('а|б|в|г', '|'), ['а', 'б', 'в', 'г'])
 | ||
|         self.assertEqual(rsplit('абабагаламага', 'а'),
 | ||
|                          ['', 'б', 'б', 'г', 'л', 'м', 'г', ''])
 | ||
|         self.assertEqual(rsplit('aжbжcжd', 'ж'), ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL),
 | ||
|                          ['a', 'b', 'c', 'd', 'e'])
 | ||
|         self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL),
 | ||
|                          ['a', 'b', 'c', 'd', 'e'])
 | ||
| 
 | ||
|         self.assertRaises(ValueError, rsplit, 'a|b|c|d', '')
 | ||
|         self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|'))
 | ||
|         self.assertRaises(TypeError, rsplit, [], '|')
 | ||
|         # CRASHES rsplit(NULL, '|')
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_partition(self):
 | ||
|         """Test PyUnicode_Partition()"""
 | ||
|         from _testlimitedcapi import unicode_partition as partition
 | ||
| 
 | ||
|         self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c'))
 | ||
|         self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c'))
 | ||
|         self.assertEqual(partition('а|б|в', '|'), ('а', '|', 'б|в'))
 | ||
|         self.assertEqual(partition('кабан', 'а'), ('к', 'а', 'бан'))
 | ||
|         self.assertEqual(partition('aжbжc', 'ж'), ('a', 'ж', 'bжc'))
 | ||
| 
 | ||
|         self.assertRaises(ValueError, partition, 'a|b|c', '')
 | ||
|         self.assertRaises(TypeError, partition, b'a|b|c', '|')
 | ||
|         self.assertRaises(TypeError, partition, 'a|b|c', b'|')
 | ||
|         self.assertRaises(TypeError, partition, 'a|b|c', ord('|'))
 | ||
|         self.assertRaises(TypeError, partition, [], '|')
 | ||
|         # CRASHES partition(NULL, '|')
 | ||
|         # CRASHES partition('a|b|c', NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_rpartition(self):
 | ||
|         """Test PyUnicode_RPartition()"""
 | ||
|         from _testlimitedcapi import unicode_rpartition as rpartition
 | ||
| 
 | ||
|         self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c'))
 | ||
|         self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c'))
 | ||
|         self.assertEqual(rpartition('а|б|в', '|'), ('а|б', '|', 'в'))
 | ||
|         self.assertEqual(rpartition('кабан', 'а'), ('каб', 'а', 'н'))
 | ||
|         self.assertEqual(rpartition('aжbжc', 'ж'), ('aжb', 'ж', 'c'))
 | ||
| 
 | ||
|         self.assertRaises(ValueError, rpartition, 'a|b|c', '')
 | ||
|         self.assertRaises(TypeError, rpartition, b'a|b|c', '|')
 | ||
|         self.assertRaises(TypeError, rpartition, 'a|b|c', b'|')
 | ||
|         self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|'))
 | ||
|         self.assertRaises(TypeError, rpartition, [], '|')
 | ||
|         # CRASHES rpartition(NULL, '|')
 | ||
|         # CRASHES rpartition('a|b|c', NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_splitlines(self):
 | ||
|         """Test PyUnicode_SplitLines()"""
 | ||
|         from _testlimitedcapi import unicode_splitlines as splitlines
 | ||
| 
 | ||
|         self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(splitlines('a\nb\rc\r\nd', True),
 | ||
|                          ['a\n', 'b\r', 'c\r\n', 'd'])
 | ||
|         self.assertEqual(splitlines('a\x85b\u2028c\u2029d'),
 | ||
|                          ['a', 'b', 'c', 'd'])
 | ||
|         self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True),
 | ||
|                          ['a\x85', 'b\u2028', 'c\u2029', 'd'])
 | ||
|         self.assertEqual(splitlines('а\nб\rв\r\nг'), ['а', 'б', 'в', 'г'])
 | ||
| 
 | ||
|         self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd')
 | ||
|         # CRASHES splitlines(NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_translate(self):
 | ||
|         """Test PyUnicode_Translate()"""
 | ||
|         from _testlimitedcapi import unicode_translate as translate
 | ||
| 
 | ||
|         self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
 | ||
|         self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г')
 | ||
|         self.assertEqual(translate('abc', {}), 'abc')
 | ||
|         self.assertEqual(translate('abc', []), 'abc')
 | ||
|         self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
 | ||
|         self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
 | ||
|         self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
 | ||
|         self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac')
 | ||
|         self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc')
 | ||
|         self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c')
 | ||
|         # XXX Other error handlers do not support UnicodeTranslateError
 | ||
|         self.assertRaises(TypeError, translate, b'abc', [])
 | ||
|         self.assertRaises(TypeError, translate, 123, [])
 | ||
|         self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
 | ||
|         self.assertRaises(TypeError, translate, 'abc', 123)
 | ||
|         self.assertRaises(TypeError, translate, 'abc', NULL)
 | ||
|         self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
 | ||
|         # CRASHES translate(NULL, [])
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_join(self):
 | ||
|         """Test PyUnicode_Join()"""
 | ||
|         from _testlimitedcapi import unicode_join as join
 | ||
|         self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c')
 | ||
|         self.assertEqual(join('|', ['a', '', 'c']), 'a||c')
 | ||
|         self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
 | ||
|         self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
 | ||
|         self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в')
 | ||
|         self.assertEqual(join('ж', ['а', 'б', 'в']), 'ажбжв')
 | ||
|         self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c'])
 | ||
|         self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c'])
 | ||
|         self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c'])
 | ||
|         self.assertRaises(TypeError, join, '|', b'123')
 | ||
|         self.assertRaises(TypeError, join, '|', 123)
 | ||
|         self.assertRaises(SystemError, join, '|', NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_count(self):
 | ||
|         """Test PyUnicode_Count()"""
 | ||
|         from _testlimitedcapi import unicode_count
 | ||
| 
 | ||
|         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
 | ||
|             for i, ch in enumerate(str):
 | ||
|                 self.assertEqual(unicode_count(str, ch, 0, len(str)), 1)
 | ||
| 
 | ||
|         str = "!>_<!"
 | ||
|         self.assertEqual(unicode_count(str, 'z', 0, len(str)), 0)
 | ||
|         self.assertEqual(unicode_count(str, '', 0, len(str)), len(str)+1)
 | ||
|         # start < end
 | ||
|         self.assertEqual(unicode_count(str, '!', 1, len(str)+1), 1)
 | ||
|         self.assertEqual(unicode_count(str, '!', 1, PY_SSIZE_T_MAX), 1)
 | ||
|         # start >= end
 | ||
|         self.assertEqual(unicode_count(str, '!', 0, 0), 0)
 | ||
|         self.assertEqual(unicode_count(str, '!', len(str), 0), 0)
 | ||
|         # negative
 | ||
|         self.assertEqual(unicode_count(str, '!', -len(str), -1), 1)
 | ||
|         self.assertEqual(unicode_count(str, '!', -len(str)-1, -1), 1)
 | ||
|         self.assertEqual(unicode_count(str, '!', PY_SSIZE_T_MIN, -1), 1)
 | ||
|         # bad arguments
 | ||
|         self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str))
 | ||
|         self.assertRaises(TypeError, unicode_count, b"!>_<!", '!', 0, len(str))
 | ||
|         self.assertRaises(TypeError, unicode_count, str, ord('!'), 0, len(str))
 | ||
|         self.assertRaises(TypeError, unicode_count, [], '!', 0, len(str), 1)
 | ||
|         # CRASHES unicode_count(NULL, '!', 0, len(str))
 | ||
|         # CRASHES unicode_count(str, NULL, 0, len(str))
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_tailmatch(self):
 | ||
|         """Test PyUnicode_Tailmatch()"""
 | ||
|         from _testlimitedcapi import unicode_tailmatch as tailmatch
 | ||
| 
 | ||
|         str = 'ababahalamaha'
 | ||
|         self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
 | ||
|         self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)
 | ||
| 
 | ||
|         self.assertEqual(tailmatch(str, 'aba', 0, PY_SSIZE_T_MAX, -1), 1)
 | ||
|         self.assertEqual(tailmatch(str, 'aba', -len(str), PY_SSIZE_T_MAX, -1), 1)
 | ||
|         self.assertEqual(tailmatch(str, 'aba', PY_SSIZE_T_MIN, len(str), -1), 1)
 | ||
|         self.assertEqual(tailmatch(str, 'aha', 0, PY_SSIZE_T_MAX, 1), 1)
 | ||
|         self.assertEqual(tailmatch(str, 'aha', PY_SSIZE_T_MIN, len(str), 1), 1)
 | ||
| 
 | ||
|         self.assertEqual(tailmatch(str, 'z', 0, len(str), 1), 0)
 | ||
|         self.assertEqual(tailmatch(str, 'z', 0, len(str), -1), 0)
 | ||
|         self.assertEqual(tailmatch(str, '', 0, len(str), 1), 1)
 | ||
|         self.assertEqual(tailmatch(str, '', 0, len(str), -1), 1)
 | ||
| 
 | ||
|         self.assertEqual(tailmatch(str, 'ba', 0, len(str)-1, -1), 0)
 | ||
|         self.assertEqual(tailmatch(str, 'ba', 1, len(str)-1, -1), 1)
 | ||
|         self.assertEqual(tailmatch(str, 'aba', 1, len(str)-1, -1), 0)
 | ||
|         self.assertEqual(tailmatch(str, 'ba', -len(str)+1, -1, -1), 1)
 | ||
|         self.assertEqual(tailmatch(str, 'ah', 0, len(str), 1), 0)
 | ||
|         self.assertEqual(tailmatch(str, 'ah', 0, len(str)-1, 1), 1)
 | ||
|         self.assertEqual(tailmatch(str, 'ah', -len(str), -1, 1), 1)
 | ||
| 
 | ||
|         # bad arguments
 | ||
|         self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), -1)
 | ||
|         self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), 1)
 | ||
|         # CRASHES tailmatch(NULL, 'aba', 0, len(str), -1)
 | ||
|         # CRASHES tailmatch(str, NULL, 0, len(str), -1)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_find(self):
 | ||
|         """Test PyUnicode_Find()"""
 | ||
|         from _testlimitedcapi import unicode_find as find
 | ||
| 
 | ||
|         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
 | ||
|             for i, ch in enumerate(str):
 | ||
|                 self.assertEqual(find(str, ch, 0, len(str), 1), i)
 | ||
|                 self.assertEqual(find(str, ch, 0, len(str), -1), i)
 | ||
| 
 | ||
|         str = "!>_<!"
 | ||
|         self.assertEqual(find(str, 'z', 0, len(str), 1), -1)
 | ||
|         self.assertEqual(find(str, 'z', 0, len(str), -1), -1)
 | ||
|         self.assertEqual(find(str, '', 0, len(str), 1), 0)
 | ||
|         self.assertEqual(find(str, '', 0, len(str), -1), len(str))
 | ||
|         # start < end
 | ||
|         self.assertEqual(find(str, '!', 1, len(str)+1, 1), 4)
 | ||
|         self.assertEqual(find(str, '!', 1, PY_SSIZE_T_MAX, 1), 4)
 | ||
|         self.assertEqual(find(str, '!', 0, len(str)+1, -1), 4)
 | ||
|         self.assertEqual(find(str, '!', 0, PY_SSIZE_T_MAX, -1), 4)
 | ||
|         # start >= end
 | ||
|         self.assertEqual(find(str, '!', 0, 0, 1), -1)
 | ||
|         self.assertEqual(find(str, '!', 0, 0, -1), -1)
 | ||
|         self.assertEqual(find(str, '!', len(str), 0, 1), -1)
 | ||
|         self.assertEqual(find(str, '!', len(str), 0, -1), -1)
 | ||
|         # negative
 | ||
|         self.assertEqual(find(str, '!', -len(str), -1, 1), 0)
 | ||
|         self.assertEqual(find(str, '!', -len(str), -1, -1), 0)
 | ||
|         self.assertEqual(find(str, '!', PY_SSIZE_T_MIN, -1, 1), 0)
 | ||
|         self.assertEqual(find(str, '!', PY_SSIZE_T_MIN, -1, -1), 0)
 | ||
|         self.assertEqual(find(str, '!', PY_SSIZE_T_MIN, PY_SSIZE_T_MAX, 1), 0)
 | ||
|         self.assertEqual(find(str, '!', PY_SSIZE_T_MIN, PY_SSIZE_T_MAX, -1), 4)
 | ||
|         # bad arguments
 | ||
|         self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1)
 | ||
|         self.assertRaises(TypeError, find, b"!>_<!", '!', 0, len(str), 1)
 | ||
|         self.assertRaises(TypeError, find, str, ord('!'), 0, len(str), 1)
 | ||
|         self.assertRaises(TypeError, find, [], '!', 0, len(str), 1)
 | ||
|         # CRASHES find(NULL, '!', 0, len(str), 1)
 | ||
|         # CRASHES find(str, NULL, 0, len(str), 1)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_findchar(self):
 | ||
|         """Test PyUnicode_FindChar()"""
 | ||
|         from _testlimitedcapi import unicode_findchar
 | ||
| 
 | ||
|         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
 | ||
|             for i, ch in enumerate(str):
 | ||
|                 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
 | ||
|                 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)
 | ||
| 
 | ||
|         str = "!>_<!"
 | ||
|         self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
 | ||
|         self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
 | ||
|         # start < end
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), 1, PY_SSIZE_T_MAX, 1), 4)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), 0, len(str)+1, -1), 4)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), 0, PY_SSIZE_T_MAX, -1), 4)
 | ||
|         # start >= end
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, -1), -1)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, -1), -1)
 | ||
|         # negative
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), PY_SSIZE_T_MIN, -1, 1), 0)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), PY_SSIZE_T_MIN, -1, -1), 0)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), PY_SSIZE_T_MIN, PY_SSIZE_T_MAX, 1), 0)
 | ||
|         self.assertEqual(unicode_findchar(str, ord('!'), PY_SSIZE_T_MIN, PY_SSIZE_T_MAX, -1), 4)
 | ||
|         # bad arguments
 | ||
|         # CRASHES unicode_findchar(b"!>_<!", ord('!'), 0, len(str), 1)
 | ||
|         # CRASHES unicode_findchar([], ord('!'), 0, len(str), 1)
 | ||
|         # CRASHES unicode_findchar(NULL, ord('!'), 0, len(str), 1), 1)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_replace(self):
 | ||
|         """Test PyUnicode_Replace()"""
 | ||
|         from _testlimitedcapi import unicode_replace as replace
 | ||
| 
 | ||
|         str = 'abracadabra'
 | ||
|         self.assertEqual(replace(str, 'a', '='), '=br=c=d=br=')
 | ||
|         self.assertEqual(replace(str, 'a', '<>'), '<>br<>c<>d<>br<>')
 | ||
|         self.assertEqual(replace(str, 'abra', '='), '=cad=')
 | ||
|         self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra')
 | ||
|         self.assertEqual(replace(str, 'a', '=', 0), str)
 | ||
|         self.assertEqual(replace(str, 'a', '=', PY_SSIZE_T_MAX), '=br=c=d=br=')
 | ||
|         self.assertEqual(replace(str, 'a', '=', -1), '=br=c=d=br=')
 | ||
|         self.assertEqual(replace(str, 'a', '=', PY_SSIZE_T_MIN), '=br=c=d=br=')
 | ||
|         self.assertEqual(replace(str, 'z', '='), str)
 | ||
|         self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=')
 | ||
|         self.assertEqual(replace(str, 'a', 'ж'), 'жbrжcжdжbrж')
 | ||
|         self.assertEqual(replace('абабагаламага', 'а', '='), '=б=б=г=л=м=г=')
 | ||
|         self.assertEqual(replace('Баден-Баден', 'Баден', 'Baden'), 'Baden-Baden')
 | ||
|         # bad arguments
 | ||
|         self.assertRaises(TypeError, replace, 'a', 'a', b'=')
 | ||
|         self.assertRaises(TypeError, replace, 'a', b'a', '=')
 | ||
|         self.assertRaises(TypeError, replace, b'a', 'a', '=')
 | ||
|         self.assertRaises(TypeError, replace, 'a', 'a', ord('='))
 | ||
|         self.assertRaises(TypeError, replace, 'a', ord('a'), '=')
 | ||
|         self.assertRaises(TypeError, replace, [], 'a', '=')
 | ||
|         # CRASHES replace('a', 'a', NULL)
 | ||
|         # CRASHES replace('a', NULL, '=')
 | ||
|         # CRASHES replace(NULL, 'a', '=')
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_compare(self):
 | ||
|         """Test PyUnicode_Compare()"""
 | ||
|         from _testlimitedcapi import unicode_compare as compare
 | ||
| 
 | ||
|         self.assertEqual(compare('abc', 'abc'), 0)
 | ||
|         self.assertEqual(compare('abc', 'def'), -1)
 | ||
|         self.assertEqual(compare('def', 'abc'), 1)
 | ||
|         self.assertEqual(compare('abc', 'abc\0def'), -1)
 | ||
|         self.assertEqual(compare('abc\0def', 'abc\0def'), 0)
 | ||
|         self.assertEqual(compare('абв', 'abc'), 1)
 | ||
| 
 | ||
|         self.assertRaises(TypeError, compare, b'abc', 'abc')
 | ||
|         self.assertRaises(TypeError, compare, 'abc', b'abc')
 | ||
|         self.assertRaises(TypeError, compare, b'abc', b'abc')
 | ||
|         self.assertRaises(TypeError, compare, [], 'abc')
 | ||
|         self.assertRaises(TypeError, compare, 'abc', [])
 | ||
|         self.assertRaises(TypeError, compare, [], [])
 | ||
|         # CRASHES compare(NULL, 'abc')
 | ||
|         # CRASHES compare('abc', NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_comparewithasciistring(self):
 | ||
|         """Test PyUnicode_CompareWithASCIIString()"""
 | ||
|         from _testlimitedcapi import unicode_comparewithasciistring as comparewithasciistring
 | ||
| 
 | ||
|         self.assertEqual(comparewithasciistring('abc', b'abc'), 0)
 | ||
|         self.assertEqual(comparewithasciistring('abc', b'def'), -1)
 | ||
|         self.assertEqual(comparewithasciistring('def', b'abc'), 1)
 | ||
|         self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0)
 | ||
|         self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1)
 | ||
|         self.assertEqual(comparewithasciistring('абв', b'abc'), 1)
 | ||
| 
 | ||
|         # CRASHES comparewithasciistring(b'abc', b'abc')
 | ||
|         # CRASHES comparewithasciistring([], b'abc')
 | ||
|         # CRASHES comparewithasciistring(NULL, b'abc')
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_equaltoutf8(self):
 | ||
|         # Test PyUnicode_EqualToUTF8()
 | ||
|         from _testlimitedcapi import unicode_equaltoutf8 as equaltoutf8
 | ||
|         from _testlimitedcapi import unicode_asutf8andsize as asutf8andsize
 | ||
| 
 | ||
|         strings = [
 | ||
|             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
 | ||
|             '\U0001f600\U0001f601\U0001f602',
 | ||
|             '\U0010ffff',
 | ||
|         ]
 | ||
|         for s in strings:
 | ||
|             # Call PyUnicode_AsUTF8AndSize() which creates the UTF-8
 | ||
|             # encoded string cached in the Unicode object.
 | ||
|             asutf8andsize(s, 0)
 | ||
|             b = s.encode()
 | ||
|             self.assertEqual(equaltoutf8(s, b), 1)  # Use the UTF-8 cache.
 | ||
|             s2 = b.decode()  # New Unicode object without the UTF-8 cache.
 | ||
|             self.assertEqual(equaltoutf8(s2, b), 1)
 | ||
|             self.assertEqual(equaltoutf8(s + 'x', b + b'x'), 1)
 | ||
|             self.assertEqual(equaltoutf8(s + 'x', b + b'y'), 0)
 | ||
|             self.assertEqual(equaltoutf8(s, b + b'\0'), 1)
 | ||
|             self.assertEqual(equaltoutf8(s2, b + b'\0'), 1)
 | ||
|             self.assertEqual(equaltoutf8(s + '\0', b + b'\0'), 0)
 | ||
|             self.assertEqual(equaltoutf8(s + '\0', b), 0)
 | ||
|             self.assertEqual(equaltoutf8(s2, b + b'x'), 0)
 | ||
|             self.assertEqual(equaltoutf8(s2, b[:-1]), 0)
 | ||
|             self.assertEqual(equaltoutf8(s2, b[:-1] + b'x'), 0)
 | ||
| 
 | ||
|         self.assertEqual(equaltoutf8('', b''), 1)
 | ||
|         self.assertEqual(equaltoutf8('', b'\0'), 1)
 | ||
| 
 | ||
|         # embedded null chars/bytes
 | ||
|         self.assertEqual(equaltoutf8('abc', b'abc\0def\0'), 1)
 | ||
|         self.assertEqual(equaltoutf8('a\0bc', b'abc'), 0)
 | ||
|         self.assertEqual(equaltoutf8('abc', b'a\0bc'), 0)
 | ||
| 
 | ||
|         # Surrogate characters are always treated as not equal
 | ||
|         self.assertEqual(equaltoutf8('\udcfe',
 | ||
|                             '\udcfe'.encode("utf8", "surrogateescape")), 0)
 | ||
|         self.assertEqual(equaltoutf8('\udcfe',
 | ||
|                             '\udcfe'.encode("utf8", "surrogatepass")), 0)
 | ||
|         self.assertEqual(equaltoutf8('\ud801',
 | ||
|                             '\ud801'.encode("utf8", "surrogatepass")), 0)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_equaltoutf8andsize(self):
 | ||
|         # Test PyUnicode_EqualToUTF8AndSize()
 | ||
|         from _testlimitedcapi import unicode_equaltoutf8andsize as equaltoutf8andsize
 | ||
|         from _testlimitedcapi import unicode_asutf8andsize as asutf8andsize
 | ||
| 
 | ||
|         strings = [
 | ||
|             'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
 | ||
|             '\U0001f600\U0001f601\U0001f602',
 | ||
|             '\U0010ffff',
 | ||
|         ]
 | ||
|         for s in strings:
 | ||
|             # Call PyUnicode_AsUTF8AndSize() which creates the UTF-8
 | ||
|             # encoded string cached in the Unicode object.
 | ||
|             asutf8andsize(s, 0)
 | ||
|             b = s.encode()
 | ||
|             self.assertEqual(equaltoutf8andsize(s, b), 1)  # Use the UTF-8 cache.
 | ||
|             s2 = b.decode()  # New Unicode object without the UTF-8 cache.
 | ||
|             self.assertEqual(equaltoutf8andsize(s2, b), 1)
 | ||
|             self.assertEqual(equaltoutf8andsize(s + 'x', b + b'x'), 1)
 | ||
|             self.assertEqual(equaltoutf8andsize(s + 'x', b + b'y'), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s, b + b'\0'), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s2, b + b'\0'), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s + '\0', b + b'\0'), 1)
 | ||
|             self.assertEqual(equaltoutf8andsize(s + '\0', b), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s2, b + b'x'), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s2, b[:-1]), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s2, b[:-1] + b'x'), 0)
 | ||
|             # Not null-terminated,
 | ||
|             self.assertEqual(equaltoutf8andsize(s, b + b'x', len(b)), 1)
 | ||
|             self.assertEqual(equaltoutf8andsize(s2, b + b'x', len(b)), 1)
 | ||
|             self.assertEqual(equaltoutf8andsize(s + '\0', b + b'\0x', len(b) + 1), 1)
 | ||
|             self.assertEqual(equaltoutf8andsize(s2, b, len(b) - 1), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s, b, -1), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s, b, PY_SSIZE_T_MAX), 0)
 | ||
|             self.assertEqual(equaltoutf8andsize(s, b, PY_SSIZE_T_MIN), 0)
 | ||
| 
 | ||
|         self.assertEqual(equaltoutf8andsize('', b''), 1)
 | ||
|         self.assertEqual(equaltoutf8andsize('', b'\0'), 0)
 | ||
|         self.assertEqual(equaltoutf8andsize('', b'x', 0), 1)
 | ||
| 
 | ||
|         # embedded null chars/bytes
 | ||
|         self.assertEqual(equaltoutf8andsize('abc\0def', b'abc\0def'), 1)
 | ||
|         self.assertEqual(equaltoutf8andsize('abc\0def\0', b'abc\0def\0'), 1)
 | ||
| 
 | ||
|         # Surrogate characters are always treated as not equal
 | ||
|         self.assertEqual(equaltoutf8andsize('\udcfe',
 | ||
|                             '\udcfe'.encode("utf8", "surrogateescape")), 0)
 | ||
|         self.assertEqual(equaltoutf8andsize('\udcfe',
 | ||
|                             '\udcfe'.encode("utf8", "surrogatepass")), 0)
 | ||
|         self.assertEqual(equaltoutf8andsize('\ud801',
 | ||
|                             '\ud801'.encode("utf8", "surrogatepass")), 0)
 | ||
| 
 | ||
|         def check_not_equal_encoding(text, encoding):
 | ||
|             self.assertEqual(equaltoutf8andsize(text, text.encode(encoding)), 0)
 | ||
|             self.assertNotEqual(text.encode(encoding), text.encode("utf8"))
 | ||
| 
 | ||
|         # Strings encoded to other encodings are not equal to expected UTF8-encoding string
 | ||
|         check_not_equal_encoding('Stéphane', 'latin1')
 | ||
|         check_not_equal_encoding('Stéphane', 'utf-16-le')  # embedded null characters
 | ||
|         check_not_equal_encoding('北京市', 'gbk')
 | ||
| 
 | ||
|         # CRASHES equaltoutf8andsize('abc', b'abc', -1)
 | ||
|         # CRASHES equaltoutf8andsize(b'abc', b'abc')
 | ||
|         # CRASHES equaltoutf8andsize([], b'abc')
 | ||
|         # CRASHES equaltoutf8andsize(NULL, b'abc')
 | ||
|         # CRASHES equaltoutf8andsize('abc', NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_richcompare(self):
 | ||
|         """Test PyUnicode_RichCompare()"""
 | ||
|         from _testlimitedcapi import unicode_richcompare as richcompare
 | ||
| 
 | ||
|         LT, LE, EQ, NE, GT, GE = range(6)
 | ||
|         strings = ('abc', 'абв', '\U0001f600', 'abc\0')
 | ||
|         for s1 in strings:
 | ||
|             for s2 in strings:
 | ||
|                 self.assertIs(richcompare(s1, s2, LT), s1 < s2)
 | ||
|                 self.assertIs(richcompare(s1, s2, LE), s1 <= s2)
 | ||
|                 self.assertIs(richcompare(s1, s2, EQ), s1 == s2)
 | ||
|                 self.assertIs(richcompare(s1, s2, NE), s1 != s2)
 | ||
|                 self.assertIs(richcompare(s1, s2, GT), s1 > s2)
 | ||
|                 self.assertIs(richcompare(s1, s2, GE), s1 >= s2)
 | ||
| 
 | ||
|         for op in LT, LE, EQ, NE, GT, GE:
 | ||
|             self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented)
 | ||
|             self.assertIs(richcompare('abc', b'abc', op), NotImplemented)
 | ||
|             self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented)
 | ||
|             self.assertIs(richcompare([], 'abc', op), NotImplemented)
 | ||
|             self.assertIs(richcompare('abc', [], op), NotImplemented)
 | ||
|             self.assertIs(richcompare([], [], op), NotImplemented)
 | ||
| 
 | ||
|             # CRASHES richcompare(NULL, 'abc', op)
 | ||
|             # CRASHES richcompare('abc', NULL, op)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_format(self):
 | ||
|         """Test PyUnicode_Format()"""
 | ||
|         from _testlimitedcapi import unicode_format as format
 | ||
| 
 | ||
|         self.assertEqual(format('x=%d!', 42), 'x=42!')
 | ||
|         self.assertEqual(format('x=%d!', (42,)), 'x=42!')
 | ||
|         self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!')
 | ||
| 
 | ||
|         self.assertRaises(SystemError, format, 'x=%d!', NULL)
 | ||
|         self.assertRaises(SystemError, format, NULL, 42)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_contains(self):
 | ||
|         """Test PyUnicode_Contains()"""
 | ||
|         from _testlimitedcapi import unicode_contains as contains
 | ||
| 
 | ||
|         self.assertEqual(contains('abcd', ''), 1)
 | ||
|         self.assertEqual(contains('abcd', 'b'), 1)
 | ||
|         self.assertEqual(contains('abcd', 'x'), 0)
 | ||
|         self.assertEqual(contains('abcd', 'ж'), 0)
 | ||
|         self.assertEqual(contains('abcd', '\0'), 0)
 | ||
|         self.assertEqual(contains('abc\0def', '\0'), 1)
 | ||
|         self.assertEqual(contains('abcd', 'bc'), 1)
 | ||
| 
 | ||
|         self.assertRaises(TypeError, contains, b'abcd', 'b')
 | ||
|         self.assertRaises(TypeError, contains, 'abcd', b'b')
 | ||
|         self.assertRaises(TypeError, contains, b'abcd', b'b')
 | ||
|         self.assertRaises(TypeError, contains, [], 'b')
 | ||
|         self.assertRaises(TypeError, contains, 'abcd', ord('b'))
 | ||
|         # CRASHES contains(NULL, 'b')
 | ||
|         # CRASHES contains('abcd', NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
 | ||
|     def test_isidentifier(self):
 | ||
|         """Test PyUnicode_IsIdentifier()"""
 | ||
|         from _testlimitedcapi import unicode_isidentifier as isidentifier
 | ||
| 
 | ||
|         self.assertEqual(isidentifier("a"), 1)
 | ||
|         self.assertEqual(isidentifier("b0"), 1)
 | ||
|         self.assertEqual(isidentifier("µ"), 1)
 | ||
|         self.assertEqual(isidentifier("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"), 1)
 | ||
| 
 | ||
|         self.assertEqual(isidentifier(""), 0)
 | ||
|         self.assertEqual(isidentifier(" "), 0)
 | ||
|         self.assertEqual(isidentifier("["), 0)
 | ||
|         self.assertEqual(isidentifier("©"), 0)
 | ||
|         self.assertEqual(isidentifier("0"), 0)
 | ||
|         self.assertEqual(isidentifier("32M"), 0)
 | ||
| 
 | ||
|         # CRASHES isidentifier(b"a")
 | ||
|         # CRASHES isidentifier([])
 | ||
|         # CRASHES isidentifier(NULL)
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | ||
|     def test_copycharacters(self):
 | ||
|         """Test PyUnicode_CopyCharacters()"""
 | ||
|         from _testcapi import unicode_copycharacters
 | ||
| 
 | ||
|         strings = [
 | ||
|             # all strings have exactly 5 characters
 | ||
|             'abcde', '\xa1\xa2\xa3\xa4\xa5',
 | ||
|             '\u4f60\u597d\u4e16\u754c\uff01',
 | ||
|             '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
 | ||
|         ]
 | ||
| 
 | ||
|         for idx, from_ in enumerate(strings):
 | ||
|             # wide -> narrow: exceed maxchar limitation
 | ||
|             for to in strings[:idx]:
 | ||
|                 self.assertRaises(
 | ||
|                     SystemError,
 | ||
|                     unicode_copycharacters, to, 0, from_, 0, 5
 | ||
|                 )
 | ||
|             # same kind
 | ||
|             for from_start in range(5):
 | ||
|                 self.assertEqual(
 | ||
|                     unicode_copycharacters(from_, 0, from_, from_start, 5),
 | ||
|                     (from_[from_start:from_start+5].ljust(5, '\0'),
 | ||
|                      5-from_start)
 | ||
|                 )
 | ||
|             for to_start in range(5):
 | ||
|                 self.assertEqual(
 | ||
|                     unicode_copycharacters(from_, to_start, from_, to_start, 5),
 | ||
|                     (from_[to_start:to_start+5].rjust(5, '\0'),
 | ||
|                      5-to_start)
 | ||
|                 )
 | ||
|             # narrow -> wide
 | ||
|             # Tests omitted since this creates invalid strings.
 | ||
| 
 | ||
|         s = strings[0]
 | ||
|         self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5)
 | ||
|         self.assertRaises(IndexError, unicode_copycharacters, s, PY_SSIZE_T_MAX, s, 0, 5)
 | ||
|         self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5)
 | ||
|         self.assertRaises(IndexError, unicode_copycharacters, s, PY_SSIZE_T_MIN, s, 0, 5)
 | ||
|         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5)
 | ||
|         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, PY_SSIZE_T_MAX, 5)
 | ||
|         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5)
 | ||
|         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, PY_SSIZE_T_MIN, 5)
 | ||
|         self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5)
 | ||
|         self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, PY_SSIZE_T_MAX)
 | ||
|         self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
 | ||
|         self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, PY_SSIZE_T_MIN)
 | ||
|         self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)
 | ||
|         self.assertRaises(SystemError, unicode_copycharacters, s, 0, [], 0, 0)
 | ||
|         # CRASHES unicode_copycharacters(s, 0, NULL, 0, 0)
 | ||
|         # TODO: Test PyUnicode_CopyCharacters() with non-unicode and
 | ||
|         # non-modifiable unicode as "to".
 | ||
| 
 | ||
|     @support.cpython_only
 | ||
|     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
 | ||
|     def test_pep393_utf8_caching_bug(self):
 | ||
|         # Issue #25709: Problem with string concatenation and utf-8 cache
 | ||
|         from _testcapi import getargs_s_hash
 | ||
|         for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
 | ||
|             s = ''
 | ||
|             for i in range(5):
 | ||
|                 # Due to CPython specific optimization the 's' string can be
 | ||
|                 # resized in-place.
 | ||
|                 s += chr(k)
 | ||
|                 # Parsing with the "s#" format code calls indirectly
 | ||
|                 # PyUnicode_AsUTF8AndSize() which creates the UTF-8
 | ||
|                 # encoded string cached in the Unicode object.
 | ||
|                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 | ||
|                 # Check that the second call returns the same result
 | ||
|                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 | ||
| 
 | ||
| 
 | ||
| class PyUnicodeWriterTest(unittest.TestCase):
 | ||
|     def create_writer(self, size):
 | ||
|         return _testcapi.PyUnicodeWriter(size)
 | ||
| 
 | ||
|     def test_basic(self):
 | ||
|         writer = self.create_writer(100)
 | ||
| 
 | ||
|         # test PyUnicodeWriter_WriteUTF8()
 | ||
|         writer.write_utf8(b'var', -1)
 | ||
| 
 | ||
|         # test PyUnicodeWriter_WriteChar()
 | ||
|         writer.write_char('=')
 | ||
| 
 | ||
|         # test PyUnicodeWriter_WriteSubstring()
 | ||
|         writer.write_substring("[long]", 1, 5)
 | ||
| 
 | ||
|         # test PyUnicodeWriter_WriteStr()
 | ||
|         writer.write_str(" value ")
 | ||
| 
 | ||
|         # test PyUnicodeWriter_WriteRepr()
 | ||
|         writer.write_repr("repr")
 | ||
| 
 | ||
|         self.assertEqual(writer.finish(),
 | ||
|                          "var=long value 'repr'")
 | ||
| 
 | ||
|     def test_utf8(self):
 | ||
|         writer = self.create_writer(0)
 | ||
|         writer.write_utf8(b"ascii", -1)
 | ||
|         writer.write_char('-')
 | ||
|         writer.write_utf8(b"latin1=\xC3\xA9", -1)
 | ||
|         writer.write_char('-')
 | ||
|         writer.write_utf8(b"euro=\xE2\x82\xAC", -1)
 | ||
|         writer.write_char('.')
 | ||
|         self.assertEqual(writer.finish(),
 | ||
|                          "ascii-latin1=\xE9-euro=\u20AC.")
 | ||
| 
 | ||
|     def test_invalid_utf8(self):
 | ||
|         writer = self.create_writer(0)
 | ||
|         with self.assertRaises(UnicodeDecodeError):
 | ||
|             writer.write_utf8(b"invalid=\xFF", -1)
 | ||
| 
 | ||
|     def test_recover_utf8_error(self):
 | ||
|         # test recovering from PyUnicodeWriter_WriteUTF8() error
 | ||
|         writer = self.create_writer(0)
 | ||
|         writer.write_utf8(b"value=", -1)
 | ||
| 
 | ||
|         # write fails with an invalid string
 | ||
|         with self.assertRaises(UnicodeDecodeError):
 | ||
|             writer.write_utf8(b"invalid\xFF", -1)
 | ||
| 
 | ||
|         # retry write with a valid string
 | ||
|         writer.write_utf8(b"valid", -1)
 | ||
| 
 | ||
|         self.assertEqual(writer.finish(),
 | ||
|                          "value=valid")
 | ||
| 
 | ||
|     def test_decode_utf8(self):
 | ||
|         # test PyUnicodeWriter_DecodeUTF8Stateful()
 | ||
|         writer = self.create_writer(0)
 | ||
|         writer.decodeutf8stateful(b"ign\xFFore", -1, b"ignore")
 | ||
|         writer.write_char('-')
 | ||
|         writer.decodeutf8stateful(b"replace\xFF", -1, b"replace")
 | ||
|         writer.write_char('-')
 | ||
| 
 | ||
|         # incomplete trailing UTF-8 sequence
 | ||
|         writer.decodeutf8stateful(b"incomplete\xC3", -1, b"replace")
 | ||
| 
 | ||
|         self.assertEqual(writer.finish(),
 | ||
|                          "ignore-replace\uFFFD-incomplete\uFFFD")
 | ||
| 
 | ||
|     def test_decode_utf8_consumed(self):
 | ||
|         # test PyUnicodeWriter_DecodeUTF8Stateful() with consumed
 | ||
|         writer = self.create_writer(0)
 | ||
| 
 | ||
|         # valid string
 | ||
|         consumed = writer.decodeutf8stateful(b"text", -1, b"strict", True)
 | ||
|         self.assertEqual(consumed, 4)
 | ||
|         writer.write_char('-')
 | ||
| 
 | ||
|         # non-ASCII
 | ||
|         consumed = writer.decodeutf8stateful(b"\xC3\xA9-\xE2\x82\xAC", 6, b"strict", True)
 | ||
|         self.assertEqual(consumed, 6)
 | ||
|         writer.write_char('-')
 | ||
| 
 | ||
|         # invalid UTF-8 (consumed is 0 on error)
 | ||
|         with self.assertRaises(UnicodeDecodeError):
 | ||
|             writer.decodeutf8stateful(b"invalid\xFF", -1, b"strict", True)
 | ||
| 
 | ||
|         # ignore error handler
 | ||
|         consumed = writer.decodeutf8stateful(b"more\xFF", -1, b"ignore", True)
 | ||
|         self.assertEqual(consumed, 5)
 | ||
|         writer.write_char('-')
 | ||
| 
 | ||
|         # incomplete trailing UTF-8 sequence
 | ||
|         consumed = writer.decodeutf8stateful(b"incomplete\xC3", -1, b"ignore", True)
 | ||
|         self.assertEqual(consumed, 10)
 | ||
| 
 | ||
|         self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete")
 | ||
| 
 | ||
|     def test_widechar(self):
 | ||
|         writer = self.create_writer(0)
 | ||
|         writer.write_widechar("latin1=\xE9")
 | ||
|         writer.write_widechar("-")
 | ||
|         writer.write_widechar("euro=\u20AC")
 | ||
|         writer.write_char("-")
 | ||
|         writer.write_widechar("max=\U0010ffff")
 | ||
|         writer.write_char('.')
 | ||
|         self.assertEqual(writer.finish(),
 | ||
|                          "latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
 | ||
| 
 | ||
|     def test_ucs4(self):
 | ||
|         writer = self.create_writer(0)
 | ||
|         writer.write_ucs4("ascii IGNORED", 5)
 | ||
|         writer.write_char("-")
 | ||
|         writer.write_ucs4("latin1=\xe9", 8)
 | ||
|         writer.write_char("-")
 | ||
|         writer.write_ucs4("euro=\u20ac", 6)
 | ||
|         writer.write_char("-")
 | ||
|         writer.write_ucs4("max=\U0010ffff", 5)
 | ||
|         writer.write_char(".")
 | ||
|         self.assertEqual(writer.finish(),
 | ||
|                          "ascii-latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
 | ||
| 
 | ||
|         # Test some special characters
 | ||
|         writer = self.create_writer(0)
 | ||
|         # Lone surrogate character
 | ||
|         writer.write_ucs4("lone\uDC80", 5)
 | ||
|         writer.write_char("-")
 | ||
|         # Surrogate pair
 | ||
|         writer.write_ucs4("pair\uDBFF\uDFFF", 5)
 | ||
|         writer.write_char("-")
 | ||
|         writer.write_ucs4("null[\0]", 7)
 | ||
|         self.assertEqual(writer.finish(),
 | ||
|                          "lone\udc80-pair\udbff-null[\0]")
 | ||
| 
 | ||
|         # invalid size
 | ||
|         writer = self.create_writer(0)
 | ||
|         with self.assertRaises(ValueError):
 | ||
|             writer.write_ucs4("text", -1)
 | ||
| 
 | ||
|     def test_substring_empty(self):
 | ||
|         writer = self.create_writer(0)
 | ||
|         writer.write_substring("abc", 1, 1)
 | ||
|         self.assertEqual(writer.finish(), '')
 | ||
| 
 | ||
| 
 | ||
| @unittest.skipIf(ctypes is None, 'need ctypes')
 | ||
| class PyUnicodeWriterFormatTest(unittest.TestCase):
 | ||
|     def create_writer(self, size):
 | ||
|         return _testcapi.PyUnicodeWriter(size)
 | ||
| 
 | ||
|     def writer_format(self, writer, *args):
 | ||
|         from ctypes import c_char_p, pythonapi, c_int, c_void_p
 | ||
|         _PyUnicodeWriter_Format = getattr(pythonapi, "PyUnicodeWriter_Format")
 | ||
|         _PyUnicodeWriter_Format.argtypes = (c_void_p, c_char_p,)
 | ||
|         _PyUnicodeWriter_Format.restype = c_int
 | ||
| 
 | ||
|         if _PyUnicodeWriter_Format(writer.get_pointer(), *args) < 0:
 | ||
|             raise ValueError("PyUnicodeWriter_Format failed")
 | ||
| 
 | ||
|     def test_format(self):
 | ||
|         from ctypes import c_int
 | ||
|         writer = self.create_writer(0)
 | ||
|         self.writer_format(writer, b'%s %i', b'abc', c_int(123))
 | ||
|         writer.write_char('.')
 | ||
|         self.assertEqual(writer.finish(), 'abc 123.')
 | ||
| 
 | ||
|     def test_recover_error(self):
 | ||
|         # test recovering from PyUnicodeWriter_Format() error
 | ||
|         writer = self.create_writer(0)
 | ||
|         self.writer_format(writer, b"%s ", b"Hello")
 | ||
| 
 | ||
|         # PyUnicodeWriter_Format() fails with an invalid format string
 | ||
|         with self.assertRaises(ValueError):
 | ||
|             self.writer_format(writer, b"%s\xff", b"World")
 | ||
| 
 | ||
|         # Retry PyUnicodeWriter_Format() with a valid format string
 | ||
|         self.writer_format(writer, b"%s.", b"World")
 | ||
| 
 | ||
|         self.assertEqual(writer.finish(), 'Hello World.')
 | ||
| 
 | ||
|     def test_unicode_equal(self):
 | ||
|         unicode_equal = _testlimitedcapi.unicode_equal
 | ||
| 
 | ||
|         def copy(text):
 | ||
|             return text.encode().decode()
 | ||
| 
 | ||
|         self.assertTrue(unicode_equal("", ""))
 | ||
|         self.assertTrue(unicode_equal("abc", "abc"))
 | ||
|         self.assertTrue(unicode_equal("abc", copy("abc")))
 | ||
|         self.assertTrue(unicode_equal("\u20ac", copy("\u20ac")))
 | ||
|         self.assertTrue(unicode_equal("\U0010ffff", copy("\U0010ffff")))
 | ||
| 
 | ||
|         self.assertFalse(unicode_equal("abc", "abcd"))
 | ||
|         self.assertFalse(unicode_equal("\u20ac", "\u20ad"))
 | ||
|         self.assertFalse(unicode_equal("\U0010ffff", "\U0010fffe"))
 | ||
| 
 | ||
|         # str subclass
 | ||
|         self.assertTrue(unicode_equal("abc", Str("abc")))
 | ||
|         self.assertTrue(unicode_equal(Str("abc"), "abc"))
 | ||
|         self.assertFalse(unicode_equal("abc", Str("abcd")))
 | ||
|         self.assertFalse(unicode_equal(Str("abc"), "abcd"))
 | ||
| 
 | ||
|         # invalid type
 | ||
|         for invalid_type in (b'bytes', 123, ("tuple",)):
 | ||
|             with self.subTest(invalid_type=invalid_type):
 | ||
|                 with self.assertRaises(TypeError):
 | ||
|                     unicode_equal("abc", invalid_type)
 | ||
|                 with self.assertRaises(TypeError):
 | ||
|                     unicode_equal(invalid_type, "abc")
 | ||
| 
 | ||
|         # CRASHES unicode_equal("abc", NULL)
 | ||
|         # CRASHES unicode_equal(NULL, "abc")
 | ||
| 
 | ||
| 
 | ||
| if __name__ == "__main__":
 | ||
|     unittest.main()
 | 
