mirror of
https://github.com/python/cpython.git
synced 2026-04-22 11:50:50 +00:00
bpo-29456: Fix bugs in unicodedata.normalize: u1176, u11a7 and u11c3 (GH-1958)
Hangul composition check boundaries are wrong for the second character
([0x1161, 0x1176) instead of [0x1161, 0x1176]) and third character ((0x11A7, 0x11C3)
instead of [0x11A7, 0x11C3]).
(cherry picked from commit d134809cd3)
Co-authored-by: Wonsup Yoon <pusnow@me.com>
This commit is contained in:
parent
a50b825c18
commit
0e2b76ea4e
4 changed files with 22 additions and 3 deletions
|
|
@ -681,15 +681,19 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
|
|||
if (LBase <= code && code < (LBase+LCount) &&
|
||||
i + 1 < len &&
|
||||
VBase <= PyUnicode_READ(kind, data, i+1) &&
|
||||
PyUnicode_READ(kind, data, i+1) <= (VBase+VCount)) {
|
||||
PyUnicode_READ(kind, data, i+1) < (VBase+VCount)) {
|
||||
/* check L character is a modern leading consonant (0x1100 ~ 0x1112)
|
||||
and V character is a modern vowel (0x1161 ~ 0x1175). */
|
||||
int LIndex, VIndex;
|
||||
LIndex = code - LBase;
|
||||
VIndex = PyUnicode_READ(kind, data, i+1) - VBase;
|
||||
code = SBase + (LIndex*VCount+VIndex)*TCount;
|
||||
i+=2;
|
||||
if (i < len &&
|
||||
TBase <= PyUnicode_READ(kind, data, i) &&
|
||||
PyUnicode_READ(kind, data, i) <= (TBase+TCount)) {
|
||||
TBase < PyUnicode_READ(kind, data, i) &&
|
||||
PyUnicode_READ(kind, data, i) < (TBase+TCount)) {
|
||||
/* check T character is a modern trailing consonant
|
||||
(0x11A8 ~ 0x11C2). */
|
||||
code += PyUnicode_READ(kind, data, i)-TBase;
|
||||
i++;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue