mirror of
https://github.com/python/cpython.git
synced 2026-06-05 01:10:53 +00:00
gh-62259: Add support of multi-byte encodings in the XML parser (GH-149860)
Supported encodings: "cp932", "cp949", "cp950", "Big5","EUC-JP", "GB2312", "GBK", "johab", and "Shift_JIS". Partially supported encodings (only BMP characters): "Big5-HKSCS", "EUC_JIS-2004", "EUC_JISX0213", "Shift_JIS-2004", "Shift_JISX0213", "utf-8-sig" and non-standard aliases like "UTF8" (without hyphen). The parser now raises ValueError for known unsupported multi-byte encodings such us "ISO-2022-JP" or "raw-unicode-escape" instead of failing later, when encounter non-ASCII data.
This commit is contained in:
parent
a34edf7446
commit
8ab7b43a14
47 changed files with 401 additions and 29 deletions
|
|
@ -10,6 +10,7 @@ Copyright (c) Corporation for National Research Initiatives.
|
|||
|
||||
#include "Python.h"
|
||||
#include "pycore_call.h" // _PyObject_CallNoArgs()
|
||||
#include "pycore_codecs.h" // export _PyCodec_LookupTextEncoding()
|
||||
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
|
||||
#include "pycore_pyerrors.h" // _PyErr_FormatNote()
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue