gh-125434: Fix non-ASCII thread names in faulthandler on Windows (#140700)

Add _Py_DumpWideString() function to dump a wide string as ASCII. It
supports surrogate pairs.

Replace _Py_EncodeLocaleRaw() with _Py_DumpWideString()
in write_thread_name().
This commit is contained in:
Victor Stinner 2025-10-28 14:41:51 +01:00 committed by GitHub
parent 2cefa70eb9
commit 80f20f58b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -980,6 +980,52 @@ _Py_DumpASCII(int fd, PyObject *text)
}
}
#ifdef MS_WINDOWS
static void
_Py_DumpWideString(int fd, wchar_t *str)
{
Py_ssize_t size = wcslen(str);
int truncated;
if (MAX_STRING_LENGTH < size) {
size = MAX_STRING_LENGTH;
truncated = 1;
}
else {
truncated = 0;
}
for (Py_ssize_t i=0; i < size; i++) {
Py_UCS4 ch = str[i];
if (' ' <= ch && ch <= 126) {
/* printable ASCII character */
dump_char(fd, (char)ch);
}
else if (ch <= 0xff) {
PUTS(fd, "\\x");
_Py_DumpHexadecimal(fd, ch, 2);
}
else if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
&& Py_UNICODE_IS_LOW_SURROGATE(str[i+1])) {
ch = Py_UNICODE_JOIN_SURROGATES(ch, str[i+1]);
i++; // Skip the low surrogate character
PUTS(fd, "\\U");
_Py_DumpHexadecimal(fd, ch, 8);
}
else {
Py_BUILD_ASSERT(sizeof(wchar_t) == 2);
PUTS(fd, "\\u");
_Py_DumpHexadecimal(fd, ch, 4);
}
}
if (truncated) {
PUTS(fd, "...");
}
}
#endif
/* Write a frame into the file fd: "File "xxx", line xxx in xxx".
This function is signal safe. */
@ -1149,20 +1195,15 @@ write_thread_name(int fd, PyThreadState *tstate)
return;
}
wchar_t *wname;
HRESULT hr = pGetThreadDescription(thread, &wname);
wchar_t *name;
HRESULT hr = pGetThreadDescription(thread, &name);
if (!FAILED(hr)) {
char *name = _Py_EncodeLocaleRaw(wname, NULL);
if (name != NULL) {
size_t len = strlen(name);
if (len) {
PUTS(fd, " [");
(void)_Py_write_noraise(fd, name, len);
PUTS(fd, "]");
}
PyMem_RawFree(name);
if (name[0] != 0) {
PUTS(fd, " [");
_Py_DumpWideString(fd, name);
PUTS(fd, "]");
}
LocalFree(wname);
LocalFree(name);
}
CloseHandle(thread);
#endif