mirror of
https://github.com/python/cpython.git
synced 2026-04-14 07:41:00 +00:00
[3.14] gh-80667: Fix case-sensitivity of some Unicode literal escapes (GH-107281) (GH-144753)
Lookup for CJK ideograms and Hangul syllables is now case-insensitive,
as is the case for other character names.
(cherry picked from commit e66f4a5a9c)
Co-authored-by: James <snoopjedi@gmail.com>
This commit is contained in:
parent
7f5a3acded
commit
ac9e9e2c8f
3 changed files with 18 additions and 7 deletions
|
|
@ -88,6 +88,9 @@ def test_hangul_syllables(self):
|
|||
self.checkletter("HANGUL SYLLABLE HWEOK", "\ud6f8")
|
||||
self.checkletter("HANGUL SYLLABLE HIH", "\ud7a3")
|
||||
|
||||
self.checkletter("haNGul SYllABle WAe", '\uc65c')
|
||||
self.checkletter("HAngUL syLLabLE waE", '\uc65c')
|
||||
|
||||
self.assertRaises(ValueError, unicodedata.name, "\ud7a4")
|
||||
|
||||
def test_cjk_unified_ideographs(self):
|
||||
|
|
@ -103,6 +106,11 @@ def test_cjk_unified_ideographs(self):
|
|||
self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D")
|
||||
self.checkletter("CJK UNIFIED IDEOGRAPH-3134A", "\U0003134A")
|
||||
|
||||
self.checkletter("cjK UniFIeD idEogRAph-3aBc", "\u3abc")
|
||||
self.checkletter("CJk uNIfiEd IDeOGraPH-3AbC", "\u3abc")
|
||||
self.checkletter("cjK UniFIeD idEogRAph-2aBcD", "\U0002abcd")
|
||||
self.checkletter("CJk uNIfiEd IDeOGraPH-2AbCd", "\U0002abcd")
|
||||
|
||||
def test_bmp_characters(self):
|
||||
for code in range(0x10000):
|
||||
char = chr(code)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
Literals using the ``\N{name}`` escape syntax can now construct CJK
|
||||
ideographs and Hangul syllables using case-insensitive names.
|
||||
|
|
@ -1362,7 +1362,7 @@ find_syllable(const char *str, int *len, int *pos, int count, int column)
|
|||
len1 = Py_SAFE_DOWNCAST(strlen(s), size_t, int);
|
||||
if (len1 <= *len)
|
||||
continue;
|
||||
if (strncmp(str, s, len1) == 0) {
|
||||
if (PyOS_strnicmp(str, s, len1) == 0) {
|
||||
*len = len1;
|
||||
*pos = i;
|
||||
}
|
||||
|
|
@ -1394,7 +1394,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
|
|||
* PUA */
|
||||
|
||||
/* Check for hangul syllables. */
|
||||
if (strncmp(name, "HANGUL SYLLABLE ", 16) == 0) {
|
||||
if (PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0) {
|
||||
int len, L = -1, V = -1, T = -1;
|
||||
const char *pos = name + 16;
|
||||
find_syllable(pos, &len, &L, LCount, 0);
|
||||
|
|
@ -1412,7 +1412,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
|
|||
}
|
||||
|
||||
/* Check for unified ideographs. */
|
||||
if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
|
||||
if (PyOS_strnicmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
|
||||
/* Four or five hexdigits must follow. */
|
||||
unsigned int v;
|
||||
v = 0;
|
||||
|
|
@ -1422,10 +1422,11 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
|
|||
return 0;
|
||||
while (namelen--) {
|
||||
v *= 16;
|
||||
if (*name >= '0' && *name <= '9')
|
||||
v += *name - '0';
|
||||
else if (*name >= 'A' && *name <= 'F')
|
||||
v += *name - 'A' + 10;
|
||||
Py_UCS1 c = Py_TOUPPER(*name);
|
||||
if (c >= '0' && c <= '9')
|
||||
v += c - '0';
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
v += c - 'A' + 10;
|
||||
else
|
||||
return 0;
|
||||
name++;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue