mirror of
https://github.com/python/cpython.git
synced 2025-10-24 02:13:49 +00:00
Marc-Andre Lemburg:
The attached patch set includes a workaround to get Python with Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause is a bug in the BSDI wchar.h header file) and Python interfaces for the MBCS codec donated by Mark Hammond. Also included are some minor corrections w/r to the docs of the new "es" and "es#" parser markers (use PyMem_Free() instead of free(); thanks to Mark Hammond for finding these). The unicodedata tests are now in a separate file (test_unicodedata.py) to avoid problems if the module cannot be found.
This commit is contained in:
parent
66d4513975
commit
24bdb0474f
9 changed files with 116 additions and 56 deletions
|
@ -82,6 +82,10 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_WCHAR_H
|
#ifdef HAVE_WCHAR_H
|
||||||
|
/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
|
||||||
|
# ifdef _HAVE_BSDI
|
||||||
|
# include <time.h>
|
||||||
|
# endif
|
||||||
# include "wchar.h"
|
# include "wchar.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -562,7 +566,9 @@ extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
|
||||||
);
|
);
|
||||||
|
|
||||||
#ifdef MS_WIN32
|
#ifdef MS_WIN32
|
||||||
|
|
||||||
/* --- MBCS codecs for Windows -------------------------------------------- */
|
/* --- MBCS codecs for Windows -------------------------------------------- */
|
||||||
|
|
||||||
extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
|
extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
|
||||||
const char *string, /* MBCS encoded string */
|
const char *string, /* MBCS encoded string */
|
||||||
int length, /* size of string */
|
int length, /* size of string */
|
||||||
|
@ -579,8 +585,8 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
|
||||||
const char *errors /* error handling */
|
const char *errors /* error handling */
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
#endif /* MS_WIN32 */
|
#endif /* MS_WIN32 */
|
||||||
|
|
||||||
/* --- Methods & Slots ----------------------------------------------------
|
/* --- Methods & Slots ----------------------------------------------------
|
||||||
|
|
||||||
These are capable of handling Unicode objects and strings on input
|
These are capable of handling Unicode objects and strings on input
|
||||||
|
|
|
@ -34,4 +34,3 @@ class StreamConverter(StreamWriter,StreamReader):
|
||||||
def getregentry():
|
def getregentry():
|
||||||
|
|
||||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
test_unicode
|
test_unicode
|
||||||
Testing Unicode comparisons... done.
|
Testing Unicode comparisons... done.
|
||||||
|
Testing Unicode contains method... done.
|
||||||
Testing Unicode formatting strings... done.
|
Testing Unicode formatting strings... done.
|
||||||
Testing unicodedata module... done.
|
Testing builtin codecs... done.
|
||||||
|
|
2
Lib/test/output/test_unicodedata
Normal file
2
Lib/test/output/test_unicodedata
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
test_unicodedata
|
||||||
|
Testing unicodedata module... done.
|
|
@ -1,6 +1,5 @@
|
||||||
""" Test script for the Unicode implementation.
|
""" Test script for the Unicode implementation.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
@ -250,50 +249,6 @@ def __init__(self): self.seq = [7, u'hello', 123L]
|
||||||
assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
|
assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def'
|
||||||
print 'done.'
|
print 'done.'
|
||||||
|
|
||||||
# Test Unicode database APIs
|
|
||||||
try:
|
|
||||||
import unicodedata
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
print 'Testing unicodedata module...',
|
|
||||||
|
|
||||||
assert unicodedata.digit(u'A',None) is None
|
|
||||||
assert unicodedata.digit(u'9') == 9
|
|
||||||
assert unicodedata.digit(u'\u215b',None) is None
|
|
||||||
assert unicodedata.digit(u'\u2468') == 9
|
|
||||||
|
|
||||||
assert unicodedata.numeric(u'A',None) is None
|
|
||||||
assert unicodedata.numeric(u'9') == 9
|
|
||||||
assert unicodedata.numeric(u'\u215b') == 0.125
|
|
||||||
assert unicodedata.numeric(u'\u2468') == 9.0
|
|
||||||
|
|
||||||
assert unicodedata.decimal(u'A',None) is None
|
|
||||||
assert unicodedata.decimal(u'9') == 9
|
|
||||||
assert unicodedata.decimal(u'\u215b',None) is None
|
|
||||||
assert unicodedata.decimal(u'\u2468',None) is None
|
|
||||||
|
|
||||||
assert unicodedata.category(u'\uFFFE') == 'Cn'
|
|
||||||
assert unicodedata.category(u'a') == 'Ll'
|
|
||||||
assert unicodedata.category(u'A') == 'Lu'
|
|
||||||
|
|
||||||
assert unicodedata.bidirectional(u'\uFFFE') == ''
|
|
||||||
assert unicodedata.bidirectional(u' ') == 'WS'
|
|
||||||
assert unicodedata.bidirectional(u'A') == 'L'
|
|
||||||
|
|
||||||
assert unicodedata.decomposition(u'\uFFFE') == ''
|
|
||||||
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
|
|
||||||
|
|
||||||
assert unicodedata.mirrored(u'\uFFFE') == 0
|
|
||||||
assert unicodedata.mirrored(u'a') == 0
|
|
||||||
assert unicodedata.mirrored(u'\u2201') == 1
|
|
||||||
|
|
||||||
assert unicodedata.combining(u'\uFFFE') == 0
|
|
||||||
assert unicodedata.combining(u'a') == 0
|
|
||||||
assert unicodedata.combining(u'\u20e1') == 230
|
|
||||||
|
|
||||||
print 'done.'
|
|
||||||
|
|
||||||
# Test builtin codecs
|
# Test builtin codecs
|
||||||
print 'Testing builtin codecs...',
|
print 'Testing builtin codecs...',
|
||||||
|
|
||||||
|
|
50
Lib/test/test_unicodedata.py
Normal file
50
Lib/test/test_unicodedata.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
""" Test script for the unicodedata module.
|
||||||
|
|
||||||
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
|
||||||
|
"""#"
|
||||||
|
from test_support import verbose
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Test Unicode database APIs
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
print 'Testing unicodedata module...',
|
||||||
|
|
||||||
|
assert unicodedata.digit(u'A',None) is None
|
||||||
|
assert unicodedata.digit(u'9') == 9
|
||||||
|
assert unicodedata.digit(u'\u215b',None) is None
|
||||||
|
assert unicodedata.digit(u'\u2468') == 9
|
||||||
|
|
||||||
|
assert unicodedata.numeric(u'A',None) is None
|
||||||
|
assert unicodedata.numeric(u'9') == 9
|
||||||
|
assert unicodedata.numeric(u'\u215b') == 0.125
|
||||||
|
assert unicodedata.numeric(u'\u2468') == 9.0
|
||||||
|
|
||||||
|
assert unicodedata.decimal(u'A',None) is None
|
||||||
|
assert unicodedata.decimal(u'9') == 9
|
||||||
|
assert unicodedata.decimal(u'\u215b',None) is None
|
||||||
|
assert unicodedata.decimal(u'\u2468',None) is None
|
||||||
|
|
||||||
|
assert unicodedata.category(u'\uFFFE') == 'Cn'
|
||||||
|
assert unicodedata.category(u'a') == 'Ll'
|
||||||
|
assert unicodedata.category(u'A') == 'Lu'
|
||||||
|
|
||||||
|
assert unicodedata.bidirectional(u'\uFFFE') == ''
|
||||||
|
assert unicodedata.bidirectional(u' ') == 'WS'
|
||||||
|
assert unicodedata.bidirectional(u'A') == 'L'
|
||||||
|
|
||||||
|
assert unicodedata.decomposition(u'\uFFFE') == ''
|
||||||
|
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
|
||||||
|
|
||||||
|
assert unicodedata.mirrored(u'\uFFFE') == 0
|
||||||
|
assert unicodedata.mirrored(u'a') == 0
|
||||||
|
assert unicodedata.mirrored(u'\u2201') == 1
|
||||||
|
|
||||||
|
assert unicodedata.combining(u'\uFFFE') == 0
|
||||||
|
assert unicodedata.combining(u'a') == 0
|
||||||
|
assert unicodedata.combining(u'\u20e1') == 230
|
||||||
|
|
||||||
|
print 'done.'
|
|
@ -740,8 +740,8 @@ These markers are used by the PyArg_ParseTuple() APIs:
|
||||||
On output, a buffer of the needed size is allocated and
|
On output, a buffer of the needed size is allocated and
|
||||||
returned through *buffer as NULL-terminated string.
|
returned through *buffer as NULL-terminated string.
|
||||||
The encoded may not contain embedded NULL characters.
|
The encoded may not contain embedded NULL characters.
|
||||||
The caller is responsible for free()ing the allocated *buffer
|
The caller is responsible for calling PyMem_Free()
|
||||||
after usage.
|
to free the allocated *buffer after usage.
|
||||||
|
|
||||||
"es#":
|
"es#":
|
||||||
Takes three parameters: encoding (const char *),
|
Takes three parameters: encoding (const char *),
|
||||||
|
@ -755,8 +755,9 @@ These markers are used by the PyArg_ParseTuple() APIs:
|
||||||
|
|
||||||
If *buffer is NULL, a buffer of the needed size is
|
If *buffer is NULL, a buffer of the needed size is
|
||||||
allocated and output copied into it. *buffer is then
|
allocated and output copied into it. *buffer is then
|
||||||
updated to point to the allocated memory area. The caller
|
updated to point to the allocated memory area.
|
||||||
is responsible for free()ing *buffer after usage.
|
The caller is responsible for calling PyMem_Free()
|
||||||
|
to free the allocated *buffer after usage.
|
||||||
|
|
||||||
In both cases *buffer_len is updated to the number of
|
In both cases *buffer_len is updated to the number of
|
||||||
characters written (excluding the trailing NULL-byte).
|
characters written (excluding the trailing NULL-byte).
|
||||||
|
@ -784,7 +785,7 @@ Using "es#" with auto-allocation:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
str = PyString_FromStringAndSize(buffer, buffer_len);
|
str = PyString_FromStringAndSize(buffer, buffer_len);
|
||||||
free(buffer);
|
PyMem_Free(buffer);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -807,7 +808,7 @@ Using "es" with auto-allocation returning a NULL-terminated string:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
str = PyString_FromString(buffer);
|
str = PyString_FromString(buffer);
|
||||||
free(buffer);
|
PyMem_Free(buffer);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -286,6 +286,26 @@ charmap_decode(PyObject *self,
|
||||||
size);
|
size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef MS_WIN32
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
mbcs_decode(PyObject *self,
|
||||||
|
PyObject *args)
|
||||||
|
{
|
||||||
|
const char *data;
|
||||||
|
int size;
|
||||||
|
const char *errors = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
|
||||||
|
&data, &size, &errors))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
|
||||||
|
size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* MS_WIN32 */
|
||||||
|
|
||||||
/* --- Encoder ------------------------------------------------------------ */
|
/* --- Encoder ------------------------------------------------------------ */
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -491,6 +511,28 @@ charmap_encode(PyObject *self,
|
||||||
PyUnicode_GET_SIZE(str));
|
PyUnicode_GET_SIZE(str));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef MS_WIN32
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
mbcs_encode(PyObject *self,
|
||||||
|
PyObject *args)
|
||||||
|
{
|
||||||
|
PyObject *str;
|
||||||
|
const char *errors = NULL;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "U|z:mbcs_encode",
|
||||||
|
&str, &errors))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return codec_tuple(PyUnicode_EncodeMBCS(
|
||||||
|
PyUnicode_AS_UNICODE(str),
|
||||||
|
PyUnicode_GET_SIZE(str),
|
||||||
|
errors),
|
||||||
|
PyUnicode_GET_SIZE(str));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* MS_WIN32 */
|
||||||
|
|
||||||
/* --- Module API --------------------------------------------------------- */
|
/* --- Module API --------------------------------------------------------- */
|
||||||
|
|
||||||
static PyMethodDef _codecs_functions[] = {
|
static PyMethodDef _codecs_functions[] = {
|
||||||
|
@ -519,6 +561,10 @@ static PyMethodDef _codecs_functions[] = {
|
||||||
{"charmap_decode", charmap_decode, 1},
|
{"charmap_decode", charmap_decode, 1},
|
||||||
{"readbuffer_encode", readbuffer_encode, 1},
|
{"readbuffer_encode", readbuffer_encode, 1},
|
||||||
{"charbuffer_encode", charbuffer_encode, 1},
|
{"charbuffer_encode", charbuffer_encode, 1},
|
||||||
|
#ifdef MS_WIN32
|
||||||
|
{"mbcs_encode", mbcs_encode, 1},
|
||||||
|
{"mbcs_decode", mbcs_decode, 1},
|
||||||
|
#endif
|
||||||
{NULL, NULL} /* sentinel */
|
{NULL, NULL} /* sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -704,7 +704,7 @@ convertsimple1(arg, p_format, p_va)
|
||||||
the data copied into it; *buffer is
|
the data copied into it; *buffer is
|
||||||
updated to point to the new buffer;
|
updated to point to the new buffer;
|
||||||
the caller is responsible for
|
the caller is responsible for
|
||||||
free()ing it after usage
|
PyMem_Free()ing it after usage
|
||||||
|
|
||||||
- if *buffer is not NULL, the data
|
- if *buffer is not NULL, the data
|
||||||
is copied to *buffer; *buffer_len
|
is copied to *buffer; *buffer_len
|
||||||
|
@ -752,7 +752,7 @@ convertsimple1(arg, p_format, p_va)
|
||||||
is allocated and the data copied
|
is allocated and the data copied
|
||||||
into it; *buffer is updated to
|
into it; *buffer is updated to
|
||||||
point to the new buffer; the caller
|
point to the new buffer; the caller
|
||||||
is responsible for free()ing it
|
is responsible for PyMem_Free()ing it
|
||||||
after usage
|
after usage
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue