gh-131878: Fix input of unicode characters with two or more code points in new pyrepl on Windows (gh-131901)

Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com>
This commit is contained in:
Sergey Miryanov 2025-05-05 09:25:00 -07:00 committed by GitHub
parent d6078ed6d0
commit 0c5151bc81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 68 additions and 27 deletions

View file

@ -69,17 +69,13 @@ def insert(self, event: Event) -> None:
trace('added event {event}', event=event) trace('added event {event}', event=event)
self.events.append(event) self.events.append(event)
def push(self, char: int | bytes | str) -> None: def push(self, char: int | bytes) -> None:
""" """
Processes a character by updating the buffer and handling special key mappings. Processes a character by updating the buffer and handling special key mappings.
""" """
assert isinstance(char, (int, bytes))
ord_char = char if isinstance(char, int) else ord(char) ord_char = char if isinstance(char, int) else ord(char)
if ord_char > 255: char = ord_char.to_bytes()
assert isinstance(char, str)
char = bytes(char.encode(self.encoding, "replace"))
self.buf.extend(char)
else:
char = bytes(bytearray((ord_char,)))
self.buf.append(ord_char) self.buf.append(ord_char)
if char in self.keymap: if char in self.keymap:

View file

@ -485,7 +485,8 @@ def get_event(self, block: bool = True) -> Event | None:
return None return None
elif self.__vt_support: elif self.__vt_support:
# If virtual terminal is enabled, scanning VT sequences # If virtual terminal is enabled, scanning VT sequences
self.event_queue.push(rec.Event.KeyEvent.uChar.UnicodeChar) for char in raw_key.encode(self.event_queue.encoding, "replace"):
self.event_queue.push(char)
continue continue
if key_event.dwControlKeyState & ALT_ACTIVE: if key_event.dwControlKeyState & ALT_ACTIVE:

View file

@ -53,7 +53,7 @@ def test_push_with_key_in_keymap(self, mock_keymap):
mock_keymap.compile_keymap.return_value = {"a": "b"} mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue() eq = self.make_eventqueue()
eq.keymap = {b"a": "b"} eq.keymap = {b"a": "b"}
eq.push("a") eq.push(b"a")
mock_keymap.compile_keymap.assert_called() mock_keymap.compile_keymap.assert_called()
self.assertEqual(eq.events[0].evt, "key") self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "b") self.assertEqual(eq.events[0].data, "b")
@ -63,7 +63,7 @@ def test_push_without_key_in_keymap(self, mock_keymap):
mock_keymap.compile_keymap.return_value = {"a": "b"} mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue() eq = self.make_eventqueue()
eq.keymap = {b"c": "d"} eq.keymap = {b"c": "d"}
eq.push("a") eq.push(b"a")
mock_keymap.compile_keymap.assert_called() mock_keymap.compile_keymap.assert_called()
self.assertEqual(eq.events[0].evt, "key") self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "a") self.assertEqual(eq.events[0].data, "a")
@ -73,13 +73,13 @@ def test_push_with_keymap_in_keymap(self, mock_keymap):
mock_keymap.compile_keymap.return_value = {"a": "b"} mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue() eq = self.make_eventqueue()
eq.keymap = {b"a": {b"b": "c"}} eq.keymap = {b"a": {b"b": "c"}}
eq.push("a") eq.push(b"a")
mock_keymap.compile_keymap.assert_called() mock_keymap.compile_keymap.assert_called()
self.assertTrue(eq.empty()) self.assertTrue(eq.empty())
eq.push("b") eq.push(b"b")
self.assertEqual(eq.events[0].evt, "key") self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "c") self.assertEqual(eq.events[0].data, "c")
eq.push("d") eq.push(b"d")
self.assertEqual(eq.events[1].evt, "key") self.assertEqual(eq.events[1].evt, "key")
self.assertEqual(eq.events[1].data, "d") self.assertEqual(eq.events[1].data, "d")
@ -88,32 +88,32 @@ def test_push_with_keymap_in_keymap_and_escape(self, mock_keymap):
mock_keymap.compile_keymap.return_value = {"a": "b"} mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue() eq = self.make_eventqueue()
eq.keymap = {b"a": {b"b": "c"}} eq.keymap = {b"a": {b"b": "c"}}
eq.push("a") eq.push(b"a")
mock_keymap.compile_keymap.assert_called() mock_keymap.compile_keymap.assert_called()
self.assertTrue(eq.empty()) self.assertTrue(eq.empty())
eq.flush_buf() eq.flush_buf()
eq.push("\033") eq.push(b"\033")
self.assertEqual(eq.events[0].evt, "key") self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\033") self.assertEqual(eq.events[0].data, "\033")
eq.push("b") eq.push(b"b")
self.assertEqual(eq.events[1].evt, "key") self.assertEqual(eq.events[1].evt, "key")
self.assertEqual(eq.events[1].data, "b") self.assertEqual(eq.events[1].data, "b")
def test_push_special_key(self): def test_push_special_key(self):
eq = self.make_eventqueue() eq = self.make_eventqueue()
eq.keymap = {} eq.keymap = {}
eq.push("\x1b") eq.push(b"\x1b")
eq.push("[") eq.push(b"[")
eq.push("A") eq.push(b"A")
self.assertEqual(eq.events[0].evt, "key") self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\x1b") self.assertEqual(eq.events[0].data, "\x1b")
def test_push_unrecognized_escape_sequence(self): def test_push_unrecognized_escape_sequence(self):
eq = self.make_eventqueue() eq = self.make_eventqueue()
eq.keymap = {} eq.keymap = {}
eq.push("\x1b") eq.push(b"\x1b")
eq.push("[") eq.push(b"[")
eq.push("Z") eq.push(b"Z")
self.assertEqual(len(eq.events), 3) self.assertEqual(len(eq.events), 3)
self.assertEqual(eq.events[0].evt, "key") self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\x1b") self.assertEqual(eq.events[0].data, "\x1b")
@ -122,12 +122,54 @@ def test_push_unrecognized_escape_sequence(self):
self.assertEqual(eq.events[2].evt, "key") self.assertEqual(eq.events[2].evt, "key")
self.assertEqual(eq.events[2].data, "Z") self.assertEqual(eq.events[2].data, "Z")
def test_push_unicode_character(self): def test_push_unicode_character_as_str(self):
eq = self.make_eventqueue() eq = self.make_eventqueue()
eq.keymap = {} eq.keymap = {}
with self.assertRaises(AssertionError):
eq.push("ч") eq.push("ч")
self.assertEqual(eq.events[0].evt, "key") with self.assertRaises(AssertionError):
self.assertEqual(eq.events[0].data, "ч") eq.push("ñ")
def test_push_unicode_character_two_bytes(self):
eq = self.make_eventqueue()
eq.keymap = {}
encoded = "ч".encode(eq.encoding, "replace")
self.assertEqual(len(encoded), 2)
eq.push(encoded[0])
e = eq.get()
self.assertIsNone(e)
eq.push(encoded[1])
e = eq.get()
self.assertEqual(e.evt, "key")
self.assertEqual(e.data, "ч")
def test_push_single_chars_and_unicode_character_as_str(self):
eq = self.make_eventqueue()
eq.keymap = {}
def _event(evt, data, raw=None):
r = raw if raw is not None else data.encode(eq.encoding)
e = Event(evt, data, r)
return e
def _push(keys):
for k in keys:
eq.push(k)
self.assertIsInstance("ñ", str)
# If an exception happens during push, the existing events must be
# preserved and we can continue to push.
_push(b"b")
with self.assertRaises(AssertionError):
_push("ñ")
_push(b"a")
self.assertEqual(eq.get(), _event("key", "b"))
self.assertEqual(eq.get(), _event("key", "a"))
@unittest.skipIf(support.MS_WINDOWS, "No Unix event queue on Windows") @unittest.skipIf(support.MS_WINDOWS, "No Unix event queue on Windows")

View file

@ -0,0 +1,2 @@
Fix support of unicode characters with two or more codepoints on Windows in
the new REPL.