mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
	
	
		
			516 lines
		
	
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			516 lines
		
	
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | import unittest | |||
|  | import sys | |||
|  | from test import support | |||
|  | from test.support import import_helper | |||
|  | 
 | |||
|  | try: | |||
|  |     import _testcapi | |||
|  | except ImportError: | |||
|  |     _testcapi = None | |||
|  | 
 | |||
|  | 
 | |||
|  | class CAPITest(unittest.TestCase): | |||
|  | 
 | |||
|  |     # Test PyUnicode_FromFormat() | |||
|  |     def test_from_format(self): | |||
|  |         import_helper.import_module('ctypes') | |||
|  |         from ctypes import ( | |||
|  |             c_char_p, | |||
|  |             pythonapi, py_object, sizeof, | |||
|  |             c_int, c_long, c_longlong, c_ssize_t, | |||
|  |             c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p) | |||
|  |         name = "PyUnicode_FromFormat" | |||
|  |         _PyUnicode_FromFormat = getattr(pythonapi, name) | |||
|  |         _PyUnicode_FromFormat.argtypes = (c_char_p,) | |||
|  |         _PyUnicode_FromFormat.restype = py_object | |||
|  | 
 | |||
|  |         def PyUnicode_FromFormat(format, *args): | |||
|  |             cargs = tuple( | |||
|  |                 py_object(arg) if isinstance(arg, str) else arg | |||
|  |                 for arg in args) | |||
|  |             return _PyUnicode_FromFormat(format, *cargs) | |||
|  | 
 | |||
|  |         def check_format(expected, format, *args): | |||
|  |             text = PyUnicode_FromFormat(format, *args) | |||
|  |             self.assertEqual(expected, text) | |||
|  | 
 | |||
|  |         # ascii format, non-ascii argument | |||
|  |         check_format('ascii\x7f=unicode\xe9', | |||
|  |                      b'ascii\x7f=%U', 'unicode\xe9') | |||
|  | 
 | |||
|  |         # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV() | |||
|  |         # raises an error | |||
|  |         self.assertRaisesRegex(ValueError, | |||
|  |             r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format ' | |||
|  |             'string, got a non-ASCII byte: 0xe9$', | |||
|  |             PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii') | |||
|  | 
 | |||
|  |         # test "%c" | |||
|  |         check_format('\uabcd', | |||
|  |                      b'%c', c_int(0xabcd)) | |||
|  |         check_format('\U0010ffff', | |||
|  |                      b'%c', c_int(0x10ffff)) | |||
|  |         with self.assertRaises(OverflowError): | |||
|  |             PyUnicode_FromFormat(b'%c', c_int(0x110000)) | |||
|  |         # Issue #18183 | |||
|  |         check_format('\U00010000\U00100000', | |||
|  |                      b'%c%c', c_int(0x10000), c_int(0x100000)) | |||
|  | 
 | |||
|  |         # test "%" | |||
|  |         check_format('%', | |||
|  |                      b'%%') | |||
|  |         check_format('%s', | |||
|  |                      b'%%s') | |||
|  |         check_format('[%]', | |||
|  |                      b'[%%]') | |||
|  |         check_format('%abc', | |||
|  |                      b'%%%s', b'abc') | |||
|  | 
 | |||
|  |         # truncated string | |||
|  |         check_format('abc', | |||
|  |                      b'%.3s', b'abcdef') | |||
|  |         check_format('abc[\ufffd', | |||
|  |                      b'%.5s', 'abc[\u20ac]'.encode('utf8')) | |||
|  |         check_format("'\\u20acABC'", | |||
|  |                      b'%A', '\u20acABC') | |||
|  |         check_format("'\\u20", | |||
|  |                      b'%.5A', '\u20acABCDEF') | |||
|  |         check_format("'\u20acABC'", | |||
|  |                      b'%R', '\u20acABC') | |||
|  |         check_format("'\u20acA", | |||
|  |                      b'%.3R', '\u20acABCDEF') | |||
|  |         check_format('\u20acAB', | |||
|  |                      b'%.3S', '\u20acABCDEF') | |||
|  |         check_format('\u20acAB', | |||
|  |                      b'%.3U', '\u20acABCDEF') | |||
|  |         check_format('\u20acAB', | |||
|  |                      b'%.3V', '\u20acABCDEF', None) | |||
|  |         check_format('abc[\ufffd', | |||
|  |                      b'%.5V', None, 'abc[\u20ac]'.encode('utf8')) | |||
|  | 
 | |||
|  |         # following tests comes from #7330 | |||
|  |         # test width modifier and precision modifier with %S | |||
|  |         check_format("repr=  abc", | |||
|  |                      b'repr=%5S', 'abc') | |||
|  |         check_format("repr=ab", | |||
|  |                      b'repr=%.2S', 'abc') | |||
|  |         check_format("repr=   ab", | |||
|  |                      b'repr=%5.2S', 'abc') | |||
|  | 
 | |||
|  |         # test width modifier and precision modifier with %R | |||
|  |         check_format("repr=   'abc'", | |||
|  |                      b'repr=%8R', 'abc') | |||
|  |         check_format("repr='ab", | |||
|  |                      b'repr=%.3R', 'abc') | |||
|  |         check_format("repr=  'ab", | |||
|  |                      b'repr=%5.3R', 'abc') | |||
|  | 
 | |||
|  |         # test width modifier and precision modifier with %A | |||
|  |         check_format("repr=   'abc'", | |||
|  |                      b'repr=%8A', 'abc') | |||
|  |         check_format("repr='ab", | |||
|  |                      b'repr=%.3A', 'abc') | |||
|  |         check_format("repr=  'ab", | |||
|  |                      b'repr=%5.3A', 'abc') | |||
|  | 
 | |||
|  |         # test width modifier and precision modifier with %s | |||
|  |         check_format("repr=  abc", | |||
|  |                      b'repr=%5s', b'abc') | |||
|  |         check_format("repr=ab", | |||
|  |                      b'repr=%.2s', b'abc') | |||
|  |         check_format("repr=   ab", | |||
|  |                      b'repr=%5.2s', b'abc') | |||
|  | 
 | |||
|  |         # test width modifier and precision modifier with %U | |||
|  |         check_format("repr=  abc", | |||
|  |                      b'repr=%5U', 'abc') | |||
|  |         check_format("repr=ab", | |||
|  |                      b'repr=%.2U', 'abc') | |||
|  |         check_format("repr=   ab", | |||
|  |                      b'repr=%5.2U', 'abc') | |||
|  | 
 | |||
|  |         # test width modifier and precision modifier with %V | |||
|  |         check_format("repr=  abc", | |||
|  |                      b'repr=%5V', 'abc', b'123') | |||
|  |         check_format("repr=ab", | |||
|  |                      b'repr=%.2V', 'abc', b'123') | |||
|  |         check_format("repr=   ab", | |||
|  |                      b'repr=%5.2V', 'abc', b'123') | |||
|  |         check_format("repr=  123", | |||
|  |                      b'repr=%5V', None, b'123') | |||
|  |         check_format("repr=12", | |||
|  |                      b'repr=%.2V', None, b'123') | |||
|  |         check_format("repr=   12", | |||
|  |                      b'repr=%5.2V', None, b'123') | |||
|  | 
 | |||
|  |         # test integer formats (%i, %d, %u) | |||
|  |         check_format('010', | |||
|  |                      b'%03i', c_int(10)) | |||
|  |         check_format('0010', | |||
|  |                      b'%0.4i', c_int(10)) | |||
|  |         check_format('-123', | |||
|  |                      b'%i', c_int(-123)) | |||
|  |         check_format('-123', | |||
|  |                      b'%li', c_long(-123)) | |||
|  |         check_format('-123', | |||
|  |                      b'%lli', c_longlong(-123)) | |||
|  |         check_format('-123', | |||
|  |                      b'%zi', c_ssize_t(-123)) | |||
|  | 
 | |||
|  |         check_format('-123', | |||
|  |                      b'%d', c_int(-123)) | |||
|  |         check_format('-123', | |||
|  |                      b'%ld', c_long(-123)) | |||
|  |         check_format('-123', | |||
|  |                      b'%lld', c_longlong(-123)) | |||
|  |         check_format('-123', | |||
|  |                      b'%zd', c_ssize_t(-123)) | |||
|  | 
 | |||
|  |         check_format('123', | |||
|  |                      b'%u', c_uint(123)) | |||
|  |         check_format('123', | |||
|  |                      b'%lu', c_ulong(123)) | |||
|  |         check_format('123', | |||
|  |                      b'%llu', c_ulonglong(123)) | |||
|  |         check_format('123', | |||
|  |                      b'%zu', c_size_t(123)) | |||
|  | 
 | |||
|  |         # test long output | |||
|  |         min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1)) | |||
|  |         max_longlong = -min_longlong - 1 | |||
|  |         check_format(str(min_longlong), | |||
|  |                      b'%lld', c_longlong(min_longlong)) | |||
|  |         check_format(str(max_longlong), | |||
|  |                      b'%lld', c_longlong(max_longlong)) | |||
|  |         max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1 | |||
|  |         check_format(str(max_ulonglong), | |||
|  |                      b'%llu', c_ulonglong(max_ulonglong)) | |||
|  |         PyUnicode_FromFormat(b'%p', c_void_p(-1)) | |||
|  | 
 | |||
|  |         # test padding (width and/or precision) | |||
|  |         check_format('123'.rjust(10, '0'), | |||
|  |                      b'%010i', c_int(123)) | |||
|  |         check_format('123'.rjust(100), | |||
|  |                      b'%100i', c_int(123)) | |||
|  |         check_format('123'.rjust(100, '0'), | |||
|  |                      b'%.100i', c_int(123)) | |||
|  |         check_format('123'.rjust(80, '0').rjust(100), | |||
|  |                      b'%100.80i', c_int(123)) | |||
|  | 
 | |||
|  |         check_format('123'.rjust(10, '0'), | |||
|  |                      b'%010u', c_uint(123)) | |||
|  |         check_format('123'.rjust(100), | |||
|  |                      b'%100u', c_uint(123)) | |||
|  |         check_format('123'.rjust(100, '0'), | |||
|  |                      b'%.100u', c_uint(123)) | |||
|  |         check_format('123'.rjust(80, '0').rjust(100), | |||
|  |                      b'%100.80u', c_uint(123)) | |||
|  | 
 | |||
|  |         check_format('123'.rjust(10, '0'), | |||
|  |                      b'%010x', c_int(0x123)) | |||
|  |         check_format('123'.rjust(100), | |||
|  |                      b'%100x', c_int(0x123)) | |||
|  |         check_format('123'.rjust(100, '0'), | |||
|  |                      b'%.100x', c_int(0x123)) | |||
|  |         check_format('123'.rjust(80, '0').rjust(100), | |||
|  |                      b'%100.80x', c_int(0x123)) | |||
|  | 
 | |||
|  |         # test %A | |||
|  |         check_format(r"%A:'abc\xe9\uabcd\U0010ffff'", | |||
|  |                      b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') | |||
|  | 
 | |||
|  |         # test %V | |||
|  |         check_format('repr=abc', | |||
|  |                      b'repr=%V', 'abc', b'xyz') | |||
|  | 
 | |||
|  |         # test %p | |||
|  |         # We cannot test the exact result, | |||
|  |         # because it returns a hex representation of a C pointer, | |||
|  |         # which is going to be different each time. But, we can test the format. | |||
|  |         p_format_regex = r'^0x[a-zA-Z0-9]{3,}$' | |||
|  |         p_format1 = PyUnicode_FromFormat(b'%p', 'abc') | |||
|  |         self.assertIsInstance(p_format1, str) | |||
|  |         self.assertRegex(p_format1, p_format_regex) | |||
|  | 
 | |||
|  |         p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz') | |||
|  |         self.assertIsInstance(p_format2, str) | |||
|  |         self.assertRegex(p_format2, | |||
|  |                          r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}') | |||
|  | 
 | |||
|  |         # Extra args are ignored: | |||
|  |         p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz') | |||
|  |         self.assertIsInstance(p_format3, str) | |||
|  |         self.assertRegex(p_format3, p_format_regex) | |||
|  | 
 | |||
|  |         # Test string decode from parameter of %s using utf-8. | |||
|  |         # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of | |||
|  |         # '\u4eba\u6c11' | |||
|  |         check_format('repr=\u4eba\u6c11', | |||
|  |                      b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91') | |||
|  | 
 | |||
|  |         #Test replace error handler. | |||
|  |         check_format('repr=abc\ufffd', | |||
|  |                      b'repr=%V', None, b'abc\xff') | |||
|  | 
 | |||
|  |         # Issue #33817: empty strings | |||
|  |         check_format('', | |||
|  |                      b'') | |||
|  |         check_format('', | |||
|  |                      b'%s', b'') | |||
|  | 
 | |||
|  |         # check for crashes | |||
|  |         for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1', | |||
|  |                     b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc', | |||
|  |                     b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'): | |||
|  |             with self.subTest(fmt=fmt): | |||
|  |                 self.assertRaisesRegex(SystemError, 'invalid format string', | |||
|  |                     PyUnicode_FromFormat, fmt, b'abc') | |||
|  |         self.assertRaisesRegex(SystemError, 'invalid format string', | |||
|  |             PyUnicode_FromFormat, b'%+i', c_int(10)) | |||
|  | 
 | |||
|  |     # Test PyUnicode_AsWideChar() | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_aswidechar(self): | |||
|  |         from _testcapi import unicode_aswidechar | |||
|  |         import_helper.import_module('ctypes') | |||
|  |         from ctypes import c_wchar, sizeof | |||
|  | 
 | |||
|  |         wchar, size = unicode_aswidechar('abcdef', 2) | |||
|  |         self.assertEqual(size, 2) | |||
|  |         self.assertEqual(wchar, 'ab') | |||
|  | 
 | |||
|  |         wchar, size = unicode_aswidechar('abc', 3) | |||
|  |         self.assertEqual(size, 3) | |||
|  |         self.assertEqual(wchar, 'abc') | |||
|  | 
 | |||
|  |         wchar, size = unicode_aswidechar('abc', 4) | |||
|  |         self.assertEqual(size, 3) | |||
|  |         self.assertEqual(wchar, 'abc\0') | |||
|  | 
 | |||
|  |         wchar, size = unicode_aswidechar('abc', 10) | |||
|  |         self.assertEqual(size, 3) | |||
|  |         self.assertEqual(wchar, 'abc\0') | |||
|  | 
 | |||
|  |         wchar, size = unicode_aswidechar('abc\0def', 20) | |||
|  |         self.assertEqual(size, 7) | |||
|  |         self.assertEqual(wchar, 'abc\0def\0') | |||
|  | 
 | |||
|  |         nonbmp = chr(0x10ffff) | |||
|  |         if sizeof(c_wchar) == 2: | |||
|  |             buflen = 3 | |||
|  |             nchar = 2 | |||
|  |         else: # sizeof(c_wchar) == 4 | |||
|  |             buflen = 2 | |||
|  |             nchar = 1 | |||
|  |         wchar, size = unicode_aswidechar(nonbmp, buflen) | |||
|  |         self.assertEqual(size, nchar) | |||
|  |         self.assertEqual(wchar, nonbmp + '\0') | |||
|  | 
 | |||
|  |     # Test PyUnicode_AsWideCharString() | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_aswidecharstring(self): | |||
|  |         from _testcapi import unicode_aswidecharstring | |||
|  |         import_helper.import_module('ctypes') | |||
|  |         from ctypes import c_wchar, sizeof | |||
|  | 
 | |||
|  |         wchar, size = unicode_aswidecharstring('abc') | |||
|  |         self.assertEqual(size, 3) | |||
|  |         self.assertEqual(wchar, 'abc\0') | |||
|  | 
 | |||
|  |         wchar, size = unicode_aswidecharstring('abc\0def') | |||
|  |         self.assertEqual(size, 7) | |||
|  |         self.assertEqual(wchar, 'abc\0def\0') | |||
|  | 
 | |||
|  |         nonbmp = chr(0x10ffff) | |||
|  |         if sizeof(c_wchar) == 2: | |||
|  |             nchar = 2 | |||
|  |         else: # sizeof(c_wchar) == 4 | |||
|  |             nchar = 1 | |||
|  |         wchar, size = unicode_aswidecharstring(nonbmp) | |||
|  |         self.assertEqual(size, nchar) | |||
|  |         self.assertEqual(wchar, nonbmp + '\0') | |||
|  | 
 | |||
|  |     # Test PyUnicode_AsUCS4() | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_asucs4(self): | |||
|  |         from _testcapi import unicode_asucs4 | |||
|  |         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', | |||
|  |                   'a\ud800b\udfffc', '\ud834\udd1e']: | |||
|  |             l = len(s) | |||
|  |             self.assertEqual(unicode_asucs4(s, l, True), s+'\0') | |||
|  |             self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff') | |||
|  |             self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff') | |||
|  |             self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff') | |||
|  |             self.assertRaises(SystemError, unicode_asucs4, s, l-1, True) | |||
|  |             self.assertRaises(SystemError, unicode_asucs4, s, l-2, False) | |||
|  |             s = '\0'.join([s, s]) | |||
|  |             self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0') | |||
|  |             self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff') | |||
|  | 
 | |||
|  |     # Test PyUnicode_AsUTF8() | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_asutf8(self): | |||
|  |         from _testcapi import unicode_asutf8 | |||
|  | 
 | |||
|  |         bmp = '\u0100' | |||
|  |         bmp2 = '\uffff' | |||
|  |         nonbmp = chr(0x10ffff) | |||
|  | 
 | |||
|  |         self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80') | |||
|  |         self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf') | |||
|  |         self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf') | |||
|  |         self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc') | |||
|  | 
 | |||
|  |     # Test PyUnicode_AsUTF8AndSize() | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_asutf8andsize(self): | |||
|  |         from _testcapi import unicode_asutf8andsize | |||
|  | 
 | |||
|  |         bmp = '\u0100' | |||
|  |         bmp2 = '\uffff' | |||
|  |         nonbmp = chr(0x10ffff) | |||
|  | 
 | |||
|  |         self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2)) | |||
|  |         self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3)) | |||
|  |         self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) | |||
|  |         self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') | |||
|  | 
 | |||
|  |     # Test PyUnicode_Count() | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_count(self): | |||
|  |         from _testcapi import unicode_count | |||
|  | 
 | |||
|  |         st = 'abcabd' | |||
|  |         self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2) | |||
|  |         self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2) | |||
|  |         self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1) | |||
|  |         self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0)  # cyrillic "a" | |||
|  |         # start < end | |||
|  |         self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1) | |||
|  |         self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0) | |||
|  |         self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2) | |||
|  |         # start >= end | |||
|  |         self.assertEqual(unicode_count(st, 'abc', 0, 0), 0) | |||
|  |         self.assertEqual(unicode_count(st, 'a', 3, 2), 0) | |||
|  |         self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0) | |||
|  |         # negative | |||
|  |         self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2) | |||
|  |         self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1) | |||
|  |         # wrong args | |||
|  |         self.assertRaises(TypeError, unicode_count, 'a', 'a') | |||
|  |         self.assertRaises(TypeError, unicode_count, 'a', 'a', 1) | |||
|  |         self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1) | |||
|  |         self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1) | |||
|  |         # empty string | |||
|  |         self.assertEqual(unicode_count('abc', '', 0, 3), 4) | |||
|  |         self.assertEqual(unicode_count('abc', '', 1, 3), 3) | |||
|  |         self.assertEqual(unicode_count('', '', 0, 1), 1) | |||
|  |         self.assertEqual(unicode_count('', 'a', 0, 1), 0) | |||
|  |         # different unicode kinds | |||
|  |         for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": | |||
|  |             for ch in uni: | |||
|  |                 self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1) | |||
|  |                 self.assertEqual(unicode_count(st, ch, 0, len(st)), 0) | |||
|  | 
 | |||
|  |         # subclasses should still work | |||
|  |         class MyStr(str): | |||
|  |             pass | |||
|  | 
 | |||
|  |         self.assertEqual(unicode_count(MyStr('aab'), 'a', 0, 3), 2) | |||
|  | 
 | |||
|  |     # Test PyUnicode_FindChar() | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_findchar(self): | |||
|  |         from _testcapi import unicode_findchar | |||
|  | 
 | |||
|  |         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": | |||
|  |             for i, ch in enumerate(str): | |||
|  |                 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i) | |||
|  |                 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i) | |||
|  | 
 | |||
|  |         str = "!>_<!" | |||
|  |         self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1) | |||
|  |         self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1) | |||
|  |         # start < end | |||
|  |         self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4) | |||
|  |         self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4) | |||
|  |         # start >= end | |||
|  |         self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1) | |||
|  |         self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1) | |||
|  |         # negative | |||
|  |         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0) | |||
|  |         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0) | |||
|  | 
 | |||
|  |     # Test PyUnicode_CopyCharacters() | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_copycharacters(self): | |||
|  |         from _testcapi import unicode_copycharacters | |||
|  | 
 | |||
|  |         strings = [ | |||
|  |             'abcde', '\xa1\xa2\xa3\xa4\xa5', | |||
|  |             '\u4f60\u597d\u4e16\u754c\uff01', | |||
|  |             '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604' | |||
|  |         ] | |||
|  | 
 | |||
|  |         for idx, from_ in enumerate(strings): | |||
|  |             # wide -> narrow: exceed maxchar limitation | |||
|  |             for to in strings[:idx]: | |||
|  |                 self.assertRaises( | |||
|  |                     SystemError, | |||
|  |                     unicode_copycharacters, to, 0, from_, 0, 5 | |||
|  |                 ) | |||
|  |             # same kind | |||
|  |             for from_start in range(5): | |||
|  |                 self.assertEqual( | |||
|  |                     unicode_copycharacters(from_, 0, from_, from_start, 5), | |||
|  |                     (from_[from_start:from_start+5].ljust(5, '\0'), | |||
|  |                      5-from_start) | |||
|  |                 ) | |||
|  |             for to_start in range(5): | |||
|  |                 self.assertEqual( | |||
|  |                     unicode_copycharacters(from_, to_start, from_, to_start, 5), | |||
|  |                     (from_[to_start:to_start+5].rjust(5, '\0'), | |||
|  |                      5-to_start) | |||
|  |                 ) | |||
|  |             # narrow -> wide | |||
|  |             # Tests omitted since this creates invalid strings. | |||
|  | 
 | |||
|  |         s = strings[0] | |||
|  |         self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5) | |||
|  |         self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5) | |||
|  |         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5) | |||
|  |         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5) | |||
|  |         self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5) | |||
|  |         self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1) | |||
|  |         self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0) | |||
|  | 
 | |||
|  |     @support.cpython_only | |||
|  |     @unittest.skipIf(_testcapi is None, 'need _testcapi module') | |||
|  |     def test_pep393_utf8_caching_bug(self): | |||
|  |         # Issue #25709: Problem with string concatenation and utf-8 cache | |||
|  |         from _testcapi import getargs_s_hash | |||
|  |         for k in 0x24, 0xa4, 0x20ac, 0x1f40d: | |||
|  |             s = '' | |||
|  |             for i in range(5): | |||
|  |                 # Due to CPython specific optimization the 's' string can be | |||
|  |                 # resized in-place. | |||
|  |                 s += chr(k) | |||
|  |                 # Parsing with the "s#" format code calls indirectly | |||
|  |                 # PyUnicode_AsUTF8AndSize() which creates the UTF-8 | |||
|  |                 # encoded string cached in the Unicode object. | |||
|  |                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) | |||
|  |                 # Check that the second call returns the same result | |||
|  |                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) | |||
|  | 
 | |||
|  | 
 | |||
|  | if __name__ == "__main__": | |||
|  |     unittest.main() |