gh-80480: array: Add 'w' typecode. (#105242)

2026-01-06 23:42:34 +00:00 · 2023-06-05 01:45:00 +09:00 · 2023-06-05 01:45:00 +09:00 · 1237fb6a4b
commit 1237fb6a4b
parent 5a5ed7a3e6
7 changed files with 159 additions and 59 deletions
--- a/Lib/test/test_array.py
+++ b/Lib/test/test_array.py
@ -27,7 +27,7 @@ class ArraySubclassWithKwargs(array.array):
    def __init__(self, typecode, newarg=None):
        array.array.__init__(self)

-typecodes = 'ubBhHiIlLfdqQ'
+typecodes = 'uwbBhHiIlLfdqQ'

 class MiscTest(unittest.TestCase):

@ -186,11 +186,12 @@ def test_unicode(self):
        )
        for testcase in testcases:
            mformat_code, encoding = testcase
-            a = array.array('u', teststr)
-            b = array_reconstructor(
-                array.array, 'u', mformat_code, teststr.encode(encoding))
-            self.assertEqual(a, b,
-                msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase))
+            for c in 'uw':
+                a = array.array(c, teststr)
+                b = array_reconstructor(
+                    array.array, c, mformat_code, teststr.encode(encoding))
+                self.assertEqual(a, b,
+                    msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase))


 class BaseTest:
@ -234,7 +235,7 @@ def test_buffer_info(self):
        self.assertEqual(bi[1], len(a))

    def test_byteswap(self):
-        if self.typecode == 'u':
+        if self.typecode in ('u', 'w'):
            example = '\U00100100'
        else:
            example = self.example
@ -1079,7 +1080,7 @@ def test_buffer(self):
        self.assertEqual(m.tobytes(), expected)
        self.assertRaises(BufferError, a.frombytes, a.tobytes())
        self.assertEqual(m.tobytes(), expected)
-        if self.typecode == 'u':
+        if self.typecode in ('u', 'w'):
            self.assertRaises(BufferError, a.fromunicode, a.tounicode())
            self.assertEqual(m.tobytes(), expected)
        self.assertRaises(BufferError, operator.imul, a, 2)
@ -1135,16 +1136,17 @@ def test_sizeof_without_buffer(self):
        support.check_sizeof(self, a, basesize)

    def test_initialize_with_unicode(self):
-        if self.typecode != 'u':
+        if self.typecode not in ('u', 'w'):
            with self.assertRaises(TypeError) as cm:
                a = array.array(self.typecode, 'foo')
            self.assertIn("cannot use a str", str(cm.exception))
            with self.assertRaises(TypeError) as cm:
-                a = array.array(self.typecode, array.array('u', 'foo'))
+                a = array.array(self.typecode, array.array('w', 'foo'))
            self.assertIn("cannot use a unicode array", str(cm.exception))
        else:
            a = array.array(self.typecode, "foo")
            a = array.array(self.typecode, array.array('u', 'foo'))
+            a = array.array(self.typecode, array.array('w', 'foo'))

    @support.cpython_only
    def test_obsolete_write_lock(self):
@ -1171,40 +1173,45 @@ class UnicodeTest(StringTest, unittest.TestCase):
    smallerexample = '\x01\u263a\x00\ufefe'
    biggerexample = '\x01\u263a\x01\ufeff'
    outside = str('\x33')
-    minitemsize = 2
+    minitemsize = sizeof_wchar

    def test_unicode(self):
        self.assertRaises(TypeError, array.array, 'b', 'foo')

-        a = array.array('u', '\xa0\xc2\u1234')
+        a = array.array(self.typecode, '\xa0\xc2\u1234')
        a.fromunicode(' ')
        a.fromunicode('')
        a.fromunicode('')
        a.fromunicode('\x11abc\xff\u1234')
        s = a.tounicode()
        self.assertEqual(s, '\xa0\xc2\u1234 \x11abc\xff\u1234')
-        self.assertEqual(a.itemsize, sizeof_wchar)
+        self.assertEqual(a.itemsize, self.minitemsize)

        s = '\x00="\'a\\b\x80\xff\u0000\u0001\u1234'
-        a = array.array('u', s)
+        a = array.array(self.typecode, s)
        self.assertEqual(
            repr(a),
-            "array('u', '\\x00=\"\\'a\\\\b\\x80\xff\\x00\\x01\u1234')")
+            f"array('{self.typecode}', '\\x00=\"\\'a\\\\b\\x80\xff\\x00\\x01\u1234')")

        self.assertRaises(TypeError, a.fromunicode)

    def test_issue17223(self):
-        # this used to crash
-        if sizeof_wchar == 4:
-            # U+FFFFFFFF is an invalid code point in Unicode 6.0
-            invalid_str = b'\xff\xff\xff\xff'
-        else:
+        if self.typecode == 'u' and sizeof_wchar == 2:
            # PyUnicode_FromUnicode() cannot fail with 16-bit wchar_t
            self.skipTest("specific to 32-bit wchar_t")
-        a = array.array('u', invalid_str)
+
+        # this used to crash
+        # U+FFFFFFFF is an invalid code point in Unicode 6.0
+        invalid_str = b'\xff\xff\xff\xff'
+
+        a = array.array(self.typecode, invalid_str)
        self.assertRaises(ValueError, a.tounicode)
        self.assertRaises(ValueError, str, a)

+class UCS4Test(UnicodeTest):
+    typecode = 'w'
+    minitemsize = 4
+
 class NumberTest(BaseTest):

    def test_extslice(self):